From 3ff340e24c9dd5cff9fc07d67914c5adf67f80d6 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 11 Jun 2021 08:42:50 +0300 Subject: [PATCH 001/794] bus: ti-sysc: Fix gpt12 system timer issue with reserved status Jarkko Nikula reported that Beagleboard revision c2 stopped booting. Jarkko bisected the issue down to commit 6cfcd5563b4f ("clocksource/drivers/timer-ti-dm: Fix suspend and resume for am3 and am4"). Let's fix the issue by tagging system timers as reserved rather than ignoring them. And let's not probe any interconnect target module child devices for reserved modules. This allows PM runtime to keep track of clocks and clockdomains for the interconnect target module, and prevent the system timer from idling as we already have SYSC_QUIRK_NO_IDLE and SYSC_QUIRK_NO_IDLE_ON_INIT flags set for system timers. Fixes: 6cfcd5563b4f ("clocksource/drivers/timer-ti-dm: Fix suspend and resume for am3 and am4") Reported-by: Jarkko Nikula Tested-by: Jarkko Nikula Signed-off-by: Tony Lindgren --- drivers/bus/ti-sysc.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index 38cb116ed433..188cdb0a394e 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -100,6 +100,7 @@ static const char * const clock_names[SYSC_MAX_CLOCKS] = { * @cookie: data used by legacy platform callbacks * @name: name if available * @revision: interconnect target module revision + * @reserved: target module is reserved and already in use * @enabled: sysc runtime enabled status * @needs_resume: runtime resume needed on resume from suspend * @child_needs_resume: runtime resume needed for child on resume from suspend @@ -130,6 +131,7 @@ struct sysc { struct ti_sysc_cookie cookie; const char *name; u32 revision; + unsigned int reserved:1; unsigned int enabled:1; unsigned int needs_resume:1; unsigned int child_needs_resume:1; @@ -3093,8 +3095,8 @@ static int sysc_probe(struct platform_device *pdev) return error; error = sysc_check_active_timer(ddata); - if (error) - return error; + if (error == -EBUSY) + ddata->reserved = true; error = sysc_get_clocks(ddata); if (error) @@ -3130,11 +3132,15 @@ static int sysc_probe(struct platform_device *pdev) sysc_show_registers(ddata); ddata->dev->type = &sysc_device_type; - error = of_platform_populate(ddata->dev->of_node, sysc_match_table, - pdata ? pdata->auxdata : NULL, - ddata->dev); - if (error) - goto err; + + if (!ddata->reserved) { + error = of_platform_populate(ddata->dev->of_node, + sysc_match_table, + pdata ? pdata->auxdata : NULL, + ddata->dev); + if (error) + goto err; + } INIT_DELAYED_WORK(&ddata->idle_work, ti_sysc_idle); From 33c8516841ea4fa12fdb8961711bf95095c607ee Mon Sep 17 00:00:00 2001 From: Rander Wang Date: Fri, 25 Jun 2021 15:50:39 -0500 Subject: [PATCH 002/794] ASoC: Intel: boards: fix xrun issue on platform with max98373 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On TGL platform with max98373 codec the trigger start sequence is fe first, then codec component and sdw link is the last. Recently a delay was introduced in max98373 codec driver and this resulted to the start of sdw stream transmission was delayed and the data transmitted by fw can't be consumed by sdw controller, so xrun happened. Adding delay in trigger function is a bad idea. This patch enable spk pin in prepare function and disable it in hw_free to avoid xrun issue caused by delay in trigger. Fixes: 3a27875e91fb ("ASoC: max98373: Added 30ms turn on/off time delay") BugLink: https://github.com/thesofproject/sof/issues/4066 Reviewed-by: Bard Liao Reviewed-by: Péter Ujfalusi Signed-off-by: Rander Wang Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20210625205042.65181-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/sof_sdw_max98373.c | 77 +++++++++++++++-------- 1 file changed, 51 insertions(+), 26 deletions(-) diff --git a/sound/soc/intel/boards/sof_sdw_max98373.c b/sound/soc/intel/boards/sof_sdw_max98373.c index 0e7ed906b341..25daef910aee 100644 --- a/sound/soc/intel/boards/sof_sdw_max98373.c +++ b/sound/soc/intel/boards/sof_sdw_max98373.c @@ -55,43 +55,68 @@ static int spk_init(struct snd_soc_pcm_runtime *rtd) return ret; } -static int max98373_sdw_trigger(struct snd_pcm_substream *substream, int cmd) +static int mx8373_enable_spk_pin(struct snd_pcm_substream *substream, bool enable) { + struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); + struct snd_soc_dai *codec_dai; + struct snd_soc_dai *cpu_dai; int ret; + int j; - switch (cmd) { - case SNDRV_PCM_TRIGGER_START: - case SNDRV_PCM_TRIGGER_RESUME: - case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: - /* enable max98373 first */ - ret = max_98373_trigger(substream, cmd); - if (ret < 0) - break; + /* set spk pin by playback only */ + if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) + return 0; - ret = sdw_trigger(substream, cmd); - break; - case SNDRV_PCM_TRIGGER_STOP: - case SNDRV_PCM_TRIGGER_SUSPEND: - case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - ret = sdw_trigger(substream, cmd); - if (ret < 0) - break; + cpu_dai = asoc_rtd_to_cpu(rtd, 0); + for_each_rtd_codec_dais(rtd, j, codec_dai) { + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(cpu_dai->component); + char pin_name[16]; - ret = max_98373_trigger(substream, cmd); - break; - default: - ret = -EINVAL; - break; + snprintf(pin_name, ARRAY_SIZE(pin_name), "%s Spk", + codec_dai->component->name_prefix); + + if (enable) + ret = snd_soc_dapm_enable_pin(dapm, pin_name); + else + ret = snd_soc_dapm_disable_pin(dapm, pin_name); + + if (!ret) + snd_soc_dapm_sync(dapm); } - return ret; + return 0; +} + +static int mx8373_sdw_prepare(struct snd_pcm_substream *substream) +{ + int ret = 0; + + /* according to soc_pcm_prepare dai link prepare is called first */ + ret = sdw_prepare(substream); + if (ret < 0) + return ret; + + return mx8373_enable_spk_pin(substream, true); +} + +static int mx8373_sdw_hw_free(struct snd_pcm_substream *substream) +{ + int ret = 0; + + /* according to soc_pcm_hw_free dai link free is called first */ + ret = sdw_hw_free(substream); + if (ret < 0) + return ret; + + return mx8373_enable_spk_pin(substream, false); } static const struct snd_soc_ops max_98373_sdw_ops = { .startup = sdw_startup, - .prepare = sdw_prepare, - .trigger = max98373_sdw_trigger, - .hw_free = sdw_hw_free, + .prepare = mx8373_sdw_prepare, + .trigger = sdw_trigger, + .hw_free = mx8373_sdw_hw_free, .shutdown = sdw_shutdown, }; From 0c4f8fd3ed9cb27228497f0ae495ea6cef7017b1 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Sun, 27 Jun 2021 11:59:55 +0100 Subject: [PATCH 003/794] ASoC: remove zte zx dangling kconfig In commit dc98f1d we removed the zte zx sound drivers but there was a dangling Kconfig left around for the codec so fix this. Fixes: dc98f1d655ca ("ASoC: remove zte zx drivers") Signed-off-by: Peter Robinson Cc: Arnd Bergmann Cc: Mark Brown Acked-by: Arnd Bergmann Link: https://lore.kernel.org/r/20210627105955.3410015-1-pbrobinson@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/Kconfig | 5 ----- 1 file changed, 5 deletions(-) diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 3abdda48dc8e..bea7b47eddbe 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -1813,11 +1813,6 @@ config SND_SOC_ZL38060 which consists of a Digital Signal Processor (DSP), several Digital Audio Interfaces (DAIs), analog outputs, and a block of 14 GPIOs. -config SND_SOC_ZX_AUD96P22 - tristate "ZTE ZX AUD96P22 CODEC" - depends on I2C - select REGMAP_I2C - # Amp config SND_SOC_LM4857 tristate From dd6fb8ff2210f74b056bf9234d0605e8c26a8ac0 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Sat, 26 Jun 2021 16:59:39 +0100 Subject: [PATCH 004/794] ASoC: wm_adsp: Correct wm_coeff_tlv_get handling When wm_coeff_tlv_get was updated it was accidentally switch to the _raw version of the helper causing it to ignore the current DSP state it should be checking. Switch the code back to the correct helper so that users can't read the controls when they arn't available. Fixes: 73ecf1a673d3 ("ASoC: wm_adsp: Correct cache handling of new kernel control API") Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20210626155941.12251-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 37aa020f23f6..59d876d36cfd 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -1213,7 +1213,7 @@ static int wm_coeff_tlv_get(struct snd_kcontrol *kctl, mutex_lock(&ctl->dsp->pwr_lock); - ret = wm_coeff_read_ctrl_raw(ctl, ctl->cache, size); + ret = wm_coeff_read_ctrl(ctl, ctl->cache, size); if (!ret && copy_to_user(bytes, ctl->cache, size)) ret = -EFAULT; From e588332271b9cde6252dac8973b77e580cd639bd Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Sat, 26 Jun 2021 16:59:40 +0100 Subject: [PATCH 005/794] ASoC: wm_adsp: Add CCM_CORE_RESET to Halo start core When starting the Halo core it is advised to also write the core reset bit, this ensures the part starts up in the appropriate state. Omitting this doesn't cause issues on most parts but cs40l25 requires it and it is advised on all Halo parts. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20210626155941.12251-2-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 59d876d36cfd..549d98241dae 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -282,6 +282,7 @@ /* * HALO_CCM_CORE_CONTROL */ +#define HALO_CORE_RESET 0x00000200 #define HALO_CORE_EN 0x00000001 /* @@ -3333,7 +3334,8 @@ static int wm_halo_start_core(struct wm_adsp *dsp) { return regmap_update_bits(dsp->regmap, dsp->base + HALO_CCM_CORE_CONTROL, - HALO_CORE_EN, HALO_CORE_EN); + HALO_CORE_RESET | HALO_CORE_EN, + HALO_CORE_RESET | HALO_CORE_EN); } static void wm_halo_stop_core(struct wm_adsp *dsp) From 2b6a761be079f9fa8abf3157b5679a6f38885db4 Mon Sep 17 00:00:00 2001 From: ChiYuan Huang Date: Sat, 26 Jun 2021 23:58:32 +0800 Subject: [PATCH 006/794] regulator: rtmv20: Fix wrong mask for strobe-polarity-high Fix wrong mask for strobe-polarity-high. Signed-off-by: ChiYuan Huang In-reply-to: Reviewed-by: Axel Lin Link: https://lore.kernel.org/r/1624723112-26653-1-git-send-email-u0084500@gmail.com Signed-off-by: Mark Brown --- drivers/regulator/rtmv20-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/rtmv20-regulator.c b/drivers/regulator/rtmv20-regulator.c index 852fb2596ffd..d7372599385e 100644 --- a/drivers/regulator/rtmv20-regulator.c +++ b/drivers/regulator/rtmv20-regulator.c @@ -36,7 +36,7 @@ #define RTMV20_WIDTH2_MASK GENMASK(7, 0) #define RTMV20_LBPLVL_MASK GENMASK(3, 0) #define RTMV20_LBPEN_MASK BIT(7) -#define RTMV20_STROBEPOL_MASK BIT(1) +#define RTMV20_STROBEPOL_MASK BIT(0) #define RTMV20_VSYNPOL_MASK BIT(1) #define RTMV20_FSINEN_MASK BIT(7) #define RTMV20_ESEN_MASK BIT(6) From 6549c46af8551b346bcc0b9043f93848319acd5c Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 27 Jun 2021 16:04:18 +0800 Subject: [PATCH 007/794] regulator: rt5033: Fix n_voltages settings for BUCK and LDO For linear regulators, the n_voltages should be (max - min) / step + 1. Buck voltage from 1v to 3V, per step 100mV, and vout mask is 0x1f. If value is from 20 to 31, the voltage will all be fixed to 3V. And LDO also, just vout range is different from 1.2v to 3v, step is the same. If value is from 18 to 31, the voltage will also be fixed to 3v. Signed-off-by: Axel Lin Reviewed-by: ChiYuan Huang Link: https://lore.kernel.org/r/20210627080418.1718127-1-axel.lin@ingics.com Signed-off-by: Mark Brown --- include/linux/mfd/rt5033-private.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/mfd/rt5033-private.h b/include/linux/mfd/rt5033-private.h index 2d1895c3efbf..40a0c2dfb80f 100644 --- a/include/linux/mfd/rt5033-private.h +++ b/include/linux/mfd/rt5033-private.h @@ -200,13 +200,13 @@ enum rt5033_reg { #define RT5033_REGULATOR_BUCK_VOLTAGE_MIN 1000000U #define RT5033_REGULATOR_BUCK_VOLTAGE_MAX 3000000U #define RT5033_REGULATOR_BUCK_VOLTAGE_STEP 100000U -#define RT5033_REGULATOR_BUCK_VOLTAGE_STEP_NUM 32 +#define RT5033_REGULATOR_BUCK_VOLTAGE_STEP_NUM 21 /* RT5033 regulator LDO output voltage uV */ #define RT5033_REGULATOR_LDO_VOLTAGE_MIN 1200000U #define RT5033_REGULATOR_LDO_VOLTAGE_MAX 3000000U #define RT5033_REGULATOR_LDO_VOLTAGE_STEP 100000U -#define RT5033_REGULATOR_LDO_VOLTAGE_STEP_NUM 32 +#define RT5033_REGULATOR_LDO_VOLTAGE_STEP_NUM 19 /* RT5033 regulator SAFE LDO output voltage uV */ #define RT5033_REGULATOR_SAFE_LDO_VOLTAGE 4900000U From 7c1a80e80cde008f271bae630d28cf684351e807 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Fri, 25 Jun 2021 13:23:54 +0300 Subject: [PATCH 008/794] net: xfrm: fix memory leak in xfrm_user_rcv_msg Syzbot reported memory leak in xfrm_user_rcv_msg(). The problem was is non-freed skb's frag_list. In skb_release_all() skb_release_data() will be called only in case of skb->head != NULL, but netlink_skb_destructor() sets head to NULL. So, allocated frag_list skb should be freed manualy, since consume_skb() won't take care of it Fixes: 5106f4a8acff ("xfrm/compat: Add 32=>64-bit messages translator") Reported-and-tested-by: syzbot+fb347cf82c73a90efcca@syzkaller.appspotmail.com Signed-off-by: Pavel Skripkin Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b47d613409b7..7aff641c717d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2811,6 +2811,16 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, err = link->doit(skb, nlh, attrs); + /* We need to free skb allocated in xfrm_alloc_compat() before + * returning from this function, because consume_skb() won't take + * care of frag_list since netlink destructor sets + * sbk->head to NULL. (see netlink_skb_destructor()) + */ + if (skb_has_frag_list(skb)) { + kfree_skb(skb_shinfo(skb)->frag_list); + skb_shinfo(skb)->frag_list = NULL; + } + err: kvfree(nlh64); return err; From 2c70ff56e49ae219640689a0c86041c0f656046f Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 28 Jun 2021 23:04:58 +0200 Subject: [PATCH 009/794] ASoC: codecs: allow SSM2518 to be selected by the user Allow the Analog SSM2518 driver to be enabled without a large bunch of other drivers. Signed-off-by: Lucas Stach Link: https://lore.kernel.org/r/20210628210458.2508973-1-l.stach@pengutronix.de Signed-off-by: Mark Brown --- sound/soc/codecs/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index bea7b47eddbe..3a42c4611414 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -1325,7 +1325,7 @@ config SND_SOC_SSM2305 high-efficiency mono Class-D audio power amplifiers. config SND_SOC_SSM2518 - tristate + tristate "Analog Devices SSM2518 Class-D Amplifier" depends on I2C config SND_SOC_SSM2602 From 5db5dd5be70eaf808d9fd90174b957fc5c2912cb Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 30 Jun 2021 15:42:46 +0800 Subject: [PATCH 010/794] regulator: hi6421v600: Fix getting wrong drvdata that causes boot failure Since config.dev = pdev->dev.parent in current code, so dev_get_drvdata(rdev->dev.parent) actually returns the drvdata of the mfd device rather than the regulator. Fix it. Fixes: 9bc146acc331 ("regulator: hi6421v600: Fix setting wrong driver_data") Reported-by: Mauro Carvalho Chehab Signed-off-by: Axel Lin Tested-by: Mauro Carvalho Chehab Reviewed-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/20210630074246.2305166-1-axel.lin@ingics.com Signed-off-by: Mark Brown --- drivers/regulator/hi6421v600-regulator.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/regulator/hi6421v600-regulator.c b/drivers/regulator/hi6421v600-regulator.c index 48922704f0e1..a8501e8720d0 100644 --- a/drivers/regulator/hi6421v600-regulator.c +++ b/drivers/regulator/hi6421v600-regulator.c @@ -98,10 +98,9 @@ static const unsigned int ldo34_voltages[] = { static int hi6421_spmi_regulator_enable(struct regulator_dev *rdev) { - struct hi6421_spmi_reg_priv *priv; + struct hi6421_spmi_reg_priv *priv = rdev_get_drvdata(rdev); int ret; - priv = dev_get_drvdata(rdev->dev.parent); /* cannot enable more than one regulator at one time */ mutex_lock(&priv->enable_mutex); @@ -119,9 +118,10 @@ static int hi6421_spmi_regulator_enable(struct regulator_dev *rdev) static unsigned int hi6421_spmi_regulator_get_mode(struct regulator_dev *rdev) { - struct hi6421_spmi_reg_info *sreg = rdev_get_drvdata(rdev); + struct hi6421_spmi_reg_info *sreg; unsigned int reg_val; + sreg = container_of(rdev->desc, struct hi6421_spmi_reg_info, desc); regmap_read(rdev->regmap, rdev->desc->enable_reg, ®_val); if (reg_val & sreg->eco_mode_mask) @@ -133,9 +133,10 @@ static unsigned int hi6421_spmi_regulator_get_mode(struct regulator_dev *rdev) static int hi6421_spmi_regulator_set_mode(struct regulator_dev *rdev, unsigned int mode) { - struct hi6421_spmi_reg_info *sreg = rdev_get_drvdata(rdev); + struct hi6421_spmi_reg_info *sreg; unsigned int val; + sreg = container_of(rdev->desc, struct hi6421_spmi_reg_info, desc); switch (mode) { case REGULATOR_MODE_NORMAL: val = 0; @@ -159,7 +160,9 @@ hi6421_spmi_regulator_get_optimum_mode(struct regulator_dev *rdev, int input_uV, int output_uV, int load_uA) { - struct hi6421_spmi_reg_info *sreg = rdev_get_drvdata(rdev); + struct hi6421_spmi_reg_info *sreg; + + sreg = container_of(rdev->desc, struct hi6421_spmi_reg_info, desc); if (!sreg->eco_uA || ((unsigned int)load_uA > sreg->eco_uA)) return REGULATOR_MODE_NORMAL; @@ -252,13 +255,12 @@ static int hi6421_spmi_regulator_probe(struct platform_device *pdev) return -ENOMEM; mutex_init(&priv->enable_mutex); - platform_set_drvdata(pdev, priv); for (i = 0; i < ARRAY_SIZE(regulator_info); i++) { info = ®ulator_info[i]; config.dev = pdev->dev.parent; - config.driver_data = info; + config.driver_data = priv; config.regmap = pmic->regmap; rdev = devm_regulator_register(dev, &info->desc, &config); From e4a5c19888a5f8a9390860ca493e643be58c8791 Mon Sep 17 00:00:00 2001 From: Alain Volmat Date: Wed, 30 Jun 2021 10:45:19 +0200 Subject: [PATCH 011/794] spi: stm32h7: fix full duplex irq handler handling In case of Full-Duplex mode, DXP flag is set when RXP and TXP flags are set. But to avoid 2 different handlings, just add TXP and RXP flag in the mask instead of DXP, and then keep the initial handling of TXP and RXP events. Also rephrase comment about EOTIE which is one of the interrupt enable bits. It is not triggered by any event. Signed-off-by: Amelie Delaunay Signed-off-by: Alain Volmat Reviewed-by: Amelie Delaunay Link: https://lore.kernel.org/r/1625042723-661-3-git-send-email-alain.volmat@foss.st.com Signed-off-by: Mark Brown --- drivers/spi/spi-stm32.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c index 8ffcffbb8157..65b37c8dc49f 100644 --- a/drivers/spi/spi-stm32.c +++ b/drivers/spi/spi-stm32.c @@ -884,15 +884,18 @@ static irqreturn_t stm32h7_spi_irq_thread(int irq, void *dev_id) ier = readl_relaxed(spi->base + STM32H7_SPI_IER); mask = ier; - /* EOTIE is triggered on EOT, SUSP and TXC events. */ + /* + * EOTIE enables irq from EOT, SUSP and TXC events. We need to set + * SUSP to acknowledge it later. TXC is automatically cleared + */ + mask |= STM32H7_SPI_SR_SUSP; /* - * When TXTF is set, DXPIE and TXPIE are cleared. So in case of - * Full-Duplex, need to poll RXP event to know if there are remaining - * data, before disabling SPI. + * DXPIE is set in Full-Duplex, one IT will be raised if TXP and RXP + * are set. So in case of Full-Duplex, need to poll TXP and RXP event. */ - if (spi->rx_buf && !spi->cur_usedma) - mask |= STM32H7_SPI_SR_RXP; + if ((spi->cur_comm == SPI_FULL_DUPLEX) && !spi->cur_usedma) + mask |= STM32H7_SPI_SR_TXP | STM32H7_SPI_SR_RXP; if (!(sr & mask)) { dev_warn(spi->dev, "spurious IT (sr=0x%08x, ier=0x%08x)\n", From 9cf76a72af6ab81030dea6481b1d7bdd814fbdaf Mon Sep 17 00:00:00 2001 From: Kyle Russell Date: Mon, 21 Jun 2021 21:09:41 -0400 Subject: [PATCH 012/794] ASoC: tlv320aic31xx: fix reversed bclk/wclk master bits These are backwards from Table 7-71 of the TLV320AIC3100 spec [1]. This was broken in 12eb4d66ba2e when BCLK_MASTER and WCLK_MASTER were converted from 0x08 and 0x04 to BIT(2) and BIT(3), respectively. -#define AIC31XX_BCLK_MASTER 0x08 -#define AIC31XX_WCLK_MASTER 0x04 +#define AIC31XX_BCLK_MASTER BIT(2) +#define AIC31XX_WCLK_MASTER BIT(3) Probably just a typo since the defines were not listed in bit order. [1] https://www.ti.com/lit/gpn/tlv320aic3100 Signed-off-by: Kyle Russell Link: https://lore.kernel.org/r/20210622010941.241386-1-bkylerussell@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic31xx.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/tlv320aic31xx.h b/sound/soc/codecs/tlv320aic31xx.h index 81952984613d..2513922a0292 100644 --- a/sound/soc/codecs/tlv320aic31xx.h +++ b/sound/soc/codecs/tlv320aic31xx.h @@ -151,8 +151,8 @@ struct aic31xx_pdata { #define AIC31XX_WORD_LEN_24BITS 0x02 #define AIC31XX_WORD_LEN_32BITS 0x03 #define AIC31XX_IFACE1_MASTER_MASK GENMASK(3, 2) -#define AIC31XX_BCLK_MASTER BIT(2) -#define AIC31XX_WCLK_MASTER BIT(3) +#define AIC31XX_BCLK_MASTER BIT(3) +#define AIC31XX_WCLK_MASTER BIT(2) /* AIC31XX_DATA_OFFSET */ #define AIC31XX_DATA_OFFSET_MASK GENMASK(7, 0) From 8888ef2304d0ae78f3d5ec19653fa7cc4ffdbd7a Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 23 Jun 2021 23:34:43 +0800 Subject: [PATCH 013/794] regulator: bd9576: Fix testing wrong flag in check_temp_flag_mismatch Fix trivial copy-paste typo. Signed-off-by: Axel Lin Reviewed-by: Matti Vaittinen Link: https://lore.kernel.org/r/20210623153443.623856-1-axel.lin@ingics.com Signed-off-by: Mark Brown --- drivers/regulator/bd9576-regulator.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/bd9576-regulator.c b/drivers/regulator/bd9576-regulator.c index e16c3727db7a..aa42da4d141e 100644 --- a/drivers/regulator/bd9576-regulator.c +++ b/drivers/regulator/bd9576-regulator.c @@ -294,9 +294,9 @@ static bool check_temp_flag_mismatch(struct regulator_dev *rdev, int severity, struct bd957x_regulator_data *r) { if ((severity == REGULATOR_SEVERITY_ERR && - r->ovd_notif != REGULATOR_EVENT_OVER_TEMP) || + r->temp_notif != REGULATOR_EVENT_OVER_TEMP) || (severity == REGULATOR_SEVERITY_WARN && - r->ovd_notif != REGULATOR_EVENT_OVER_TEMP_WARN)) { + r->temp_notif != REGULATOR_EVENT_OVER_TEMP_WARN)) { dev_warn(rdev_get_dev(rdev), "Can't support both thermal WARN and ERR\n"); if (severity == REGULATOR_SEVERITY_WARN) From c36748ac545421d94a5091c754414c0f3664bf10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Thu, 1 Jul 2021 08:28:25 -0700 Subject: [PATCH 014/794] misc: eeprom: at24: Always append device id even if label property is set. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to append device id even if eeprom have a label property set as some platform can have multiple eeproms with same label and we can not register each of those with same label. Failing to register those eeproms trigger cascade failures on such platform (system is no longer working). This fix regression on such platform introduced with 4e302c3b568e Reported-by: Alexander Fomichev Fixes: 4e302c3b568e ("misc: eeprom: at24: fix NVMEM name with custom AT24 device name") Cc: stable@vger.kernel.org Signed-off-by: Jérôme Glisse Signed-off-by: Bartosz Golaszewski --- drivers/misc/eeprom/at24.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 7a6f01ace78a..305ffad131a2 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -714,23 +714,20 @@ static int at24_probe(struct i2c_client *client) } /* - * If the 'label' property is not present for the AT24 EEPROM, - * then nvmem_config.id is initialised to NVMEM_DEVID_AUTO, - * and this will append the 'devid' to the name of the NVMEM - * device. This is purely legacy and the AT24 driver has always - * defaulted to this. However, if the 'label' property is - * present then this means that the name is specified by the - * firmware and this name should be used verbatim and so it is - * not necessary to append the 'devid'. + * We initialize nvmem_config.id to NVMEM_DEVID_AUTO even if the + * label property is set as some platform can have multiple eeproms + * with same label and we can not register each of those with same + * label. Failing to register those eeproms trigger cascade failure + * on such platform. */ + nvmem_config.id = NVMEM_DEVID_AUTO; + if (device_property_present(dev, "label")) { - nvmem_config.id = NVMEM_DEVID_NONE; err = device_property_read_string(dev, "label", &nvmem_config.name); if (err) return err; } else { - nvmem_config.id = NVMEM_DEVID_AUTO; nvmem_config.name = dev_name(dev); } From eaf228263921cd15962654b539d916380a0f076e Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 2 Jul 2021 09:20:22 +0200 Subject: [PATCH 015/794] Revert "xfrm: policy: Read seqcount outside of rcu-read side in xfrm_policy_lookup_bytype" This reverts commit d7b0408934c749f546b01f2b33d07421a49b6f3e. This commit tried to fix a locking bug introduced by commit 77cc278f7b20 ("xfrm: policy: Use sequence counters with associated lock"). As it turned out, this patch did not really fix the bug. A proper fix for this bug is applied on top of this revert. Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index e9d0df2a2ab1..ce500f847b99 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2092,15 +2092,12 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, if (unlikely(!daddr || !saddr)) return NULL; - retry: - sequence = read_seqcount_begin(&xfrm_policy_hash_generation); rcu_read_lock(); - - chain = policy_hash_direct(net, daddr, saddr, family, dir); - if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) { - rcu_read_unlock(); - goto retry; - } + retry: + do { + sequence = read_seqcount_begin(&xfrm_policy_hash_generation); + chain = policy_hash_direct(net, daddr, saddr, family, dir); + } while (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)); ret = NULL; hlist_for_each_entry_rcu(pol, chain, bydst) { @@ -2131,15 +2128,11 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, } skip_inexact: - if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) { - rcu_read_unlock(); + if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) goto retry; - } - if (ret && !xfrm_pol_hold_rcu(ret)) { - rcu_read_unlock(); + if (ret && !xfrm_pol_hold_rcu(ret)) goto retry; - } fail: rcu_read_unlock(); From 2580d3f40022642452dd8422bfb8c22e54cf84bb Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 28 Jun 2021 15:34:28 +0200 Subject: [PATCH 016/794] xfrm: Fix RCU vs hash_resize_mutex lock inversion xfrm_bydst_resize() calls synchronize_rcu() while holding hash_resize_mutex. But then on PREEMPT_RT configurations, xfrm_policy_lookup_bytype() may acquire that mutex while running in an RCU read side critical section. This results in a deadlock. In fact the scope of hash_resize_mutex is way beyond the purpose of xfrm_policy_lookup_bytype() to just fetch a coherent and stable policy for a given destination/direction, along with other details. The lower level net->xfrm.xfrm_policy_lock, which among other things protects per destination/direction references to policy entries, is enough to serialize and benefit from priority inheritance against the write side. As a bonus, it makes it officially a per network namespace synchronization business where a policy table resize on namespace A shouldn't block a policy lookup on namespace B. Fixes: 77cc278f7b20 (xfrm: policy: Use sequence counters with associated lock) Cc: stable@vger.kernel.org Cc: Ahmed S. Darwish Cc: Peter Zijlstra (Intel) Cc: Varad Gautam Cc: Steffen Klassert Cc: Herbert Xu Cc: David S. Miller Signed-off-by: Frederic Weisbecker Signed-off-by: Steffen Klassert --- include/net/netns/xfrm.h | 1 + net/xfrm/xfrm_policy.c | 17 ++++++++--------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index e816b6a3ef2b..9b376b87bd54 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -74,6 +74,7 @@ struct netns_xfrm { #endif spinlock_t xfrm_state_lock; seqcount_spinlock_t xfrm_state_hash_generation; + seqcount_spinlock_t xfrm_policy_hash_generation; spinlock_t xfrm_policy_lock; struct mutex xfrm_cfg_mutex; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ce500f847b99..46a6d15b66d6 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -155,7 +155,6 @@ static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1] __read_mostly; static struct kmem_cache *xfrm_dst_cache __ro_after_init; -static __read_mostly seqcount_mutex_t xfrm_policy_hash_generation; static struct rhashtable xfrm_policy_inexact_table; static const struct rhashtable_params xfrm_pol_inexact_params; @@ -585,7 +584,7 @@ static void xfrm_bydst_resize(struct net *net, int dir) return; spin_lock_bh(&net->xfrm.xfrm_policy_lock); - write_seqcount_begin(&xfrm_policy_hash_generation); + write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation); odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table, lockdep_is_held(&net->xfrm.xfrm_policy_lock)); @@ -596,7 +595,7 @@ static void xfrm_bydst_resize(struct net *net, int dir) rcu_assign_pointer(net->xfrm.policy_bydst[dir].table, ndst); net->xfrm.policy_bydst[dir].hmask = nhashmask; - write_seqcount_end(&xfrm_policy_hash_generation); + write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); synchronize_rcu(); @@ -1245,7 +1244,7 @@ static void xfrm_hash_rebuild(struct work_struct *work) } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq)); spin_lock_bh(&net->xfrm.xfrm_policy_lock); - write_seqcount_begin(&xfrm_policy_hash_generation); + write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation); /* make sure that we can insert the indirect policies again before * we start with destructive action. @@ -1354,7 +1353,7 @@ static void xfrm_hash_rebuild(struct work_struct *work) out_unlock: __xfrm_policy_inexact_flush(net); - write_seqcount_end(&xfrm_policy_hash_generation); + write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); mutex_unlock(&hash_resize_mutex); @@ -2095,9 +2094,9 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, rcu_read_lock(); retry: do { - sequence = read_seqcount_begin(&xfrm_policy_hash_generation); + sequence = read_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation); chain = policy_hash_direct(net, daddr, saddr, family, dir); - } while (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)); + } while (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence)); ret = NULL; hlist_for_each_entry_rcu(pol, chain, bydst) { @@ -2128,7 +2127,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, } skip_inexact: - if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) + if (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence)) goto retry; if (ret && !xfrm_pol_hold_rcu(ret)) @@ -4084,6 +4083,7 @@ static int __net_init xfrm_net_init(struct net *net) /* Initialize the per-net locks here */ spin_lock_init(&net->xfrm.xfrm_state_lock); spin_lock_init(&net->xfrm.xfrm_policy_lock); + seqcount_spinlock_init(&net->xfrm.xfrm_policy_hash_generation, &net->xfrm.xfrm_policy_lock); mutex_init(&net->xfrm.xfrm_cfg_mutex); rv = xfrm_statistics_init(net); @@ -4128,7 +4128,6 @@ void __init xfrm_init(void) { register_pernet_subsys(&xfrm_net_ops); xfrm_dev_init(); - seqcount_mutex_init(&xfrm_policy_hash_generation, &hash_resize_mutex); xfrm_input_init(); #ifdef CONFIG_XFRM_ESPINTCP From c9cd752d8f3a6b13afc5332a60bea3e68f141738 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 5 Jul 2021 15:34:41 +0200 Subject: [PATCH 017/794] regulator: fixed: Mark regulator-fixed-domain as deprecated A power domain should not be modelled as a regulator, not even for the simplest case as recent discussions have concluded around the existing regulator-fixed-domain DT binding. Fortunately, there is only one user of the binding that was recently added. Therefore, let's mark the binding as deprecated to prevent it from being further used. Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20210705133441.11344-1-ulf.hansson@linaro.org Signed-off-by: Mark Brown --- .../devicetree/bindings/regulator/fixed-regulator.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml b/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml index 8850c01bd470..9b131c6facbc 100644 --- a/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml +++ b/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml @@ -57,12 +57,14 @@ properties: maxItems: 1 power-domains: + deprecated: true description: Power domain to use for enable control. This binding is only available if the compatible is chosen to regulator-fixed-domain. maxItems: 1 required-opps: + deprecated: true description: Performance state to use for enable control. This binding is only available if the compatible is chosen to regulator-fixed-domain. The From ea986908ccfcc53204a03bb0841227e1b26578c4 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Fri, 2 Jul 2021 22:21:40 +0800 Subject: [PATCH 018/794] regulator: mtk-dvfsrc: Fix wrong dev pointer for devm_regulator_register If use dev->parent, the regulator_unregister will not be called when this driver is unloaded. Fix it by using dev instead. Signed-off-by: Axel Lin Link: https://lore.kernel.org/r/20210702142140.2678130-1-axel.lin@ingics.com Signed-off-by: Mark Brown --- drivers/regulator/mtk-dvfsrc-regulator.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/regulator/mtk-dvfsrc-regulator.c b/drivers/regulator/mtk-dvfsrc-regulator.c index d3d876198d6e..234af3a66c77 100644 --- a/drivers/regulator/mtk-dvfsrc-regulator.c +++ b/drivers/regulator/mtk-dvfsrc-regulator.c @@ -179,8 +179,7 @@ static int dvfsrc_vcore_regulator_probe(struct platform_device *pdev) for (i = 0; i < regulator_init_data->size; i++) { config.dev = dev->parent; config.driver_data = (mt_regulators + i); - rdev = devm_regulator_register(dev->parent, - &(mt_regulators + i)->desc, + rdev = devm_regulator_register(dev, &(mt_regulators + i)->desc, &config); if (IS_ERR(rdev)) { dev_err(dev, "failed to register %s\n", From 135cbd378eab336da15de9c84bbb22bf743b38a5 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 3 Jul 2021 04:23:00 +0200 Subject: [PATCH 019/794] spi: imx: mx51-ecspi: Reinstate low-speed CONFIGREG delay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since 00b80ac935539 ("spi: imx: mx51-ecspi: Move some initialisation to prepare_message hook."), the MX51_ECSPI_CONFIG write no longer happens in prepare_transfer hook, but rather in prepare_message hook, however the MX51_ECSPI_CONFIG delay is still left in prepare_transfer hook and thus has no effect. This leads to low bus frequency operation problems described in 6fd8b8503a0dc ("spi: spi-imx: Fix out-of-order CS/SCLK operation at low speeds") again. Move the MX51_ECSPI_CONFIG write delay into the prepare_message hook as well, thus reinstating the low bus frequency fix. Fixes: 00b80ac935539 ("spi: imx: mx51-ecspi: Move some initialisation to prepare_message hook.") Signed-off-by: Marek Vasut Cc: Uwe Kleine-König Cc: Mark Brown Link: https://lore.kernel.org/r/20210703022300.296114-1-marex@denx.de Signed-off-by: Mark Brown --- drivers/spi/spi-imx.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 39dc02e366f4..4aee3db6d6df 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -506,7 +506,7 @@ static int mx51_ecspi_prepare_message(struct spi_imx_data *spi_imx, { struct spi_device *spi = msg->spi; u32 ctrl = MX51_ECSPI_CTRL_ENABLE; - u32 testreg; + u32 testreg, delay; u32 cfg = readl(spi_imx->base + MX51_ECSPI_CONFIG); /* set Master or Slave mode */ @@ -567,6 +567,23 @@ static int mx51_ecspi_prepare_message(struct spi_imx_data *spi_imx, writel(cfg, spi_imx->base + MX51_ECSPI_CONFIG); + /* + * Wait until the changes in the configuration register CONFIGREG + * propagate into the hardware. It takes exactly one tick of the + * SCLK clock, but we will wait two SCLK clock just to be sure. The + * effect of the delay it takes for the hardware to apply changes + * is noticable if the SCLK clock run very slow. In such a case, if + * the polarity of SCLK should be inverted, the GPIO ChipSelect might + * be asserted before the SCLK polarity changes, which would disrupt + * the SPI communication as the device on the other end would consider + * the change of SCLK polarity as a clock tick already. + */ + delay = (2 * 1000000) / spi_imx->spi_bus_clk; + if (likely(delay < 10)) /* SCLK is faster than 100 kHz */ + udelay(delay); + else /* SCLK is _very_ slow */ + usleep_range(delay, delay + 10); + return 0; } @@ -574,7 +591,7 @@ static int mx51_ecspi_prepare_transfer(struct spi_imx_data *spi_imx, struct spi_device *spi) { u32 ctrl = readl(spi_imx->base + MX51_ECSPI_CTRL); - u32 clk, delay; + u32 clk; /* Clear BL field and set the right value */ ctrl &= ~MX51_ECSPI_CTRL_BL_MASK; @@ -596,23 +613,6 @@ static int mx51_ecspi_prepare_transfer(struct spi_imx_data *spi_imx, writel(ctrl, spi_imx->base + MX51_ECSPI_CTRL); - /* - * Wait until the changes in the configuration register CONFIGREG - * propagate into the hardware. It takes exactly one tick of the - * SCLK clock, but we will wait two SCLK clock just to be sure. The - * effect of the delay it takes for the hardware to apply changes - * is noticable if the SCLK clock run very slow. In such a case, if - * the polarity of SCLK should be inverted, the GPIO ChipSelect might - * be asserted before the SCLK polarity changes, which would disrupt - * the SPI communication as the device on the other end would consider - * the change of SCLK polarity as a clock tick already. - */ - delay = (2 * 1000000) / clk; - if (likely(delay < 10)) /* SCLK is faster than 100 kHz */ - udelay(delay); - else /* SCLK is _very_ slow */ - usleep_range(delay, delay + 10); - return 0; } From 7999d2555c9f879d006ea8469d74db9cdb038af0 Mon Sep 17 00:00:00 2001 From: Alain Volmat Date: Wed, 7 Jul 2021 10:27:00 +0200 Subject: [PATCH 020/794] spi: stm32: fixes pm_runtime calls in probe/remove Add pm_runtime calls in probe/probe error path and remove in order to be consistent in all places in ordering and ensure that pm_runtime is disabled prior to resources used by the SPI controller. This patch also fixes the 2 following warnings on driver remove: WARNING: CPU: 0 PID: 743 at drivers/clk/clk.c:594 clk_core_disable_lock+0x18/0x24 WARNING: CPU: 0 PID: 743 at drivers/clk/clk.c:476 clk_unprepare+0x24/0x2c Fixes: 038ac869c9d2 ("spi: stm32: add runtime PM support") Signed-off-by: Amelie Delaunay Signed-off-by: Alain Volmat Link: https://lore.kernel.org/r/1625646426-5826-2-git-send-email-alain.volmat@foss.st.com Signed-off-by: Mark Brown --- drivers/spi/spi-stm32.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c index 65b37c8dc49f..05618a618939 100644 --- a/drivers/spi/spi-stm32.c +++ b/drivers/spi/spi-stm32.c @@ -1928,6 +1928,7 @@ static int stm32_spi_probe(struct platform_device *pdev) master->can_dma = stm32_spi_can_dma; pm_runtime_set_active(&pdev->dev); + pm_runtime_get_noresume(&pdev->dev); pm_runtime_enable(&pdev->dev); ret = spi_register_master(master); @@ -1943,6 +1944,8 @@ static int stm32_spi_probe(struct platform_device *pdev) err_pm_disable: pm_runtime_disable(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); err_dma_release: if (spi->dma_tx) dma_release_channel(spi->dma_tx); @@ -1959,9 +1962,14 @@ static int stm32_spi_remove(struct platform_device *pdev) struct spi_master *master = platform_get_drvdata(pdev); struct stm32_spi *spi = spi_master_get_devdata(master); + pm_runtime_get_sync(&pdev->dev); + spi_unregister_master(master); spi->cfg->disable(spi); + pm_runtime_disable(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); if (master->dma_tx) dma_release_channel(master->dma_tx); if (master->dma_rx) @@ -1969,7 +1977,6 @@ static int stm32_spi_remove(struct platform_device *pdev) clk_disable_unprepare(spi->clk); - pm_runtime_disable(&pdev->dev); pinctrl_pm_select_sleep_state(&pdev->dev); From c90b4503ccf42d9d367e843c223df44aa550e82a Mon Sep 17 00:00:00 2001 From: Colin Xu Date: Wed, 7 Jul 2021 08:45:31 +0800 Subject: [PATCH 021/794] drm/i915/gvt: Clear d3_entered on elsp cmd submission. d3_entered flag is used to mark for vgpu_reset a previous power transition from D3->D0, typically for VM resume from S3, so that gvt could skip PPGTT invalidation in current vgpu_reset during resuming. In case S0ix exit, although there is D3->D0, guest driver continue to use vgpu as normal, with d3_entered set, until next shutdown/reboot or power transition. If a reboot follows a S0ix exit, device power state transite as: D0->D3->D0->D0(reboot), while system power state transites as: S0->S0 (reboot). There is no vgpu_reset until D0(reboot), thus d3_entered won't be cleared, the vgpu_reset will skip PPGTT invalidation however those PPGTT entries are no longer valid. Err appears like: gvt: vgpu 2: vfio_pin_pages failed for gfn 0xxxxx, ret -22 gvt: vgpu 2: fail: spt xxxx guest entry 0xxxxx type 2 gvt: vgpu 2: fail: shadow page xxxx guest entry 0xxxxx type 2. Give gvt a chance to clear d3_entered on elsp cmd submission so that the states before & after S0ix enter/exit are consistent. Fixes: ba25d977571e ("drm/i915/gvt: Do not destroy ppgtt_mm during vGPU D3->D0.") Reviewed-by: Zhenyu Wang Signed-off-by: Colin Xu Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20210707004531.4873-1-colin.xu@intel.com --- drivers/gpu/drm/i915/gvt/handlers.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index dda320749c65..2358c92733b0 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1977,6 +1977,21 @@ static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, if (drm_WARN_ON(&i915->drm, !engine)) return -EINVAL; + /* + * Due to d3_entered is used to indicate skipping PPGTT invalidation on + * vGPU reset, it's set on D0->D3 on PCI config write, and cleared after + * vGPU reset if in resuming. + * In S0ix exit, the device power state also transite from D3 to D0 as + * S3 resume, but no vGPU reset (triggered by QEMU devic model). After + * S0ix exit, all engines continue to work. However the d3_entered + * remains set which will break next vGPU reset logic (miss the expected + * PPGTT invalidation). + * Engines can only work in D0. Thus the 1st elsp write gives GVT a + * chance to clear d3_entered. + */ + if (vgpu->d3_entered) + vgpu->d3_entered = false; + execlist = &vgpu->submission.execlist[engine->id]; execlist->elsp_dwords.data[3 - execlist->elsp_dwords.index] = data; From ecef6a9effe49e8e2635c839020b9833b71e934c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 9 Jul 2021 15:02:37 +0200 Subject: [PATCH 022/794] libata: fix ata_pio_sector for CONFIG_HIGHMEM Data transfers are not required to be block aligned in memory, so they span two pages. Fix this by splitting the call to >sff_data_xfer into two for that case. This has been broken since the initial libata import before the damn of git, but was uncovered by the legacy ide driver removal. Reported-by: kernel test robot Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210709130237.3730959-1-hch@lst.de Signed-off-by: Jens Axboe --- drivers/ata/libata-sff.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index ae7189d1a568..b71ea4a680b0 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -637,6 +637,20 @@ unsigned int ata_sff_data_xfer32(struct ata_queued_cmd *qc, unsigned char *buf, } EXPORT_SYMBOL_GPL(ata_sff_data_xfer32); +static void ata_pio_xfer(struct ata_queued_cmd *qc, struct page *page, + unsigned int offset, size_t xfer_size) +{ + bool do_write = (qc->tf.flags & ATA_TFLAG_WRITE); + unsigned char *buf; + + buf = kmap_atomic(page); + qc->ap->ops->sff_data_xfer(qc, buf + offset, xfer_size, do_write); + kunmap_atomic(buf); + + if (!do_write && !PageSlab(page)) + flush_dcache_page(page); +} + /** * ata_pio_sector - Transfer a sector of data. * @qc: Command on going @@ -648,11 +662,9 @@ EXPORT_SYMBOL_GPL(ata_sff_data_xfer32); */ static void ata_pio_sector(struct ata_queued_cmd *qc) { - int do_write = (qc->tf.flags & ATA_TFLAG_WRITE); struct ata_port *ap = qc->ap; struct page *page; unsigned int offset; - unsigned char *buf; if (!qc->cursg) { qc->curbytes = qc->nbytes; @@ -670,13 +682,20 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) DPRINTK("data %s\n", qc->tf.flags & ATA_TFLAG_WRITE ? "write" : "read"); - /* do the actual data transfer */ - buf = kmap_atomic(page); - ap->ops->sff_data_xfer(qc, buf + offset, qc->sect_size, do_write); - kunmap_atomic(buf); + /* + * Split the transfer when it splits a page boundary. Note that the + * split still has to be dword aligned like all ATA data transfers. + */ + WARN_ON_ONCE(offset % 4); + if (offset + qc->sect_size > PAGE_SIZE) { + unsigned int split_len = PAGE_SIZE - offset; - if (!do_write && !PageSlab(page)) - flush_dcache_page(page); + ata_pio_xfer(qc, page, offset, split_len); + ata_pio_xfer(qc, nth_page(page, 1), 0, + qc->sect_size - split_len); + } else { + ata_pio_xfer(qc, page, offset, qc->sect_size); + } qc->curbytes += qc->sect_size; qc->cursg_ofs += qc->sect_size; From 0dfc21c1a4cac321749a53c92da616d9546d00e3 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 8 Jul 2021 12:34:31 +0200 Subject: [PATCH 023/794] ASoC: tegra: Use ADMAIF component for DMA allocations DMA memory is currently allocated for the soundcard device, which is a virtual device added for the sole purpose of "stitching" together the audio device. It is not a real device and therefore doesn't have a DMA mask or a description of the path to and from memory of accesses. Memory accesses really originate from the ADMA controller that provides the DMA channels used by the PCM component. However, since the DMA memory is allocated up-front and the DMA channels aren't known at that point, there is no way of knowing the DMA channel provider at allocation time. The next best physical device in the memory path is the ADMAIF. Use it as the device to allocate DMA memory to. iommus and interconnects device tree properties can thus be added to the ADMAIF device tree node to describe the memory access path for audio. Signed-off-by: Thierry Reding Link: https://lore.kernel.org/r/20210708103432.1690385-2-thierry.reding@gmail.com Signed-off-by: Mark Brown --- sound/soc/tegra/tegra_pcm.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/sound/soc/tegra/tegra_pcm.c b/sound/soc/tegra/tegra_pcm.c index 573374b89b10..d3276b4595af 100644 --- a/sound/soc/tegra/tegra_pcm.c +++ b/sound/soc/tegra/tegra_pcm.c @@ -213,19 +213,19 @@ snd_pcm_uframes_t tegra_pcm_pointer(struct snd_soc_component *component, } EXPORT_SYMBOL_GPL(tegra_pcm_pointer); -static int tegra_pcm_preallocate_dma_buffer(struct snd_pcm *pcm, int stream, +static int tegra_pcm_preallocate_dma_buffer(struct device *dev, struct snd_pcm *pcm, int stream, size_t size) { struct snd_pcm_substream *substream = pcm->streams[stream].substream; struct snd_dma_buffer *buf = &substream->dma_buffer; - buf->area = dma_alloc_wc(pcm->card->dev, size, &buf->addr, GFP_KERNEL); + buf->area = dma_alloc_wc(dev, size, &buf->addr, GFP_KERNEL); if (!buf->area) return -ENOMEM; buf->private_data = NULL; buf->dev.type = SNDRV_DMA_TYPE_DEV; - buf->dev.dev = pcm->card->dev; + buf->dev.dev = dev; buf->bytes = size; return 0; @@ -244,31 +244,28 @@ static void tegra_pcm_deallocate_dma_buffer(struct snd_pcm *pcm, int stream) if (!buf->area) return; - dma_free_wc(pcm->card->dev, buf->bytes, buf->area, buf->addr); + dma_free_wc(buf->dev.dev, buf->bytes, buf->area, buf->addr); buf->area = NULL; } -static int tegra_pcm_dma_allocate(struct snd_soc_pcm_runtime *rtd, +static int tegra_pcm_dma_allocate(struct device *dev, struct snd_soc_pcm_runtime *rtd, size_t size) { - struct snd_card *card = rtd->card->snd_card; struct snd_pcm *pcm = rtd->pcm; int ret; - ret = dma_set_mask_and_coherent(card->dev, DMA_BIT_MASK(32)); + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); if (ret < 0) return ret; if (pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream) { - ret = tegra_pcm_preallocate_dma_buffer(pcm, - SNDRV_PCM_STREAM_PLAYBACK, size); + ret = tegra_pcm_preallocate_dma_buffer(dev, pcm, SNDRV_PCM_STREAM_PLAYBACK, size); if (ret) goto err; } if (pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream) { - ret = tegra_pcm_preallocate_dma_buffer(pcm, - SNDRV_PCM_STREAM_CAPTURE, size); + ret = tegra_pcm_preallocate_dma_buffer(dev, pcm, SNDRV_PCM_STREAM_CAPTURE, size); if (ret) goto err_free_play; } @@ -284,7 +281,16 @@ err: int tegra_pcm_construct(struct snd_soc_component *component, struct snd_soc_pcm_runtime *rtd) { - return tegra_pcm_dma_allocate(rtd, tegra_pcm_hardware.buffer_bytes_max); + struct device *dev = component->dev; + + /* + * Fallback for backwards-compatibility with older device trees that + * have the iommus property in the virtual, top-level "sound" node. + */ + if (!of_get_property(dev->of_node, "iommus", NULL)) + dev = rtd->card->snd_card->dev; + + return tegra_pcm_dma_allocate(dev, rtd, tegra_pcm_hardware.buffer_bytes_max); } EXPORT_SYMBOL_GPL(tegra_pcm_construct); From 2169d6a0f0721935410533281fc7e87e4e2322d1 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 8 Jul 2021 11:12:55 +0200 Subject: [PATCH 024/794] ASoC: tlv320aic32x4: Fix TAS2505 volume controls None of the TAS2505 outputs are stereo, do not pretend they are by implementing them using SOC*DOUBLE* macros referencing the same register twice, use SOC*SINGLE* instead. Fix volume ranges and mute control for the codec according to datasheet. Fixes: b4525b6196cd7 ("ASoC: tlv320aic32x4: add support for TAS2505") Signed-off-by: Marek Vasut Cc: Claudius Heine Cc: Mark Brown Link: https://lore.kernel.org/r/20210708091255.56502-1-marex@denx.de Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic32x4.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/sound/soc/codecs/tlv320aic32x4.c b/sound/soc/codecs/tlv320aic32x4.c index c63b717040ed..dcd8aeb45cb3 100644 --- a/sound/soc/codecs/tlv320aic32x4.c +++ b/sound/soc/codecs/tlv320aic32x4.c @@ -250,8 +250,8 @@ static DECLARE_TLV_DB_SCALE(tlv_pcm, -6350, 50, 0); static DECLARE_TLV_DB_SCALE(tlv_driver_gain, -600, 100, 0); /* -12dB min, 0.5dB steps */ static DECLARE_TLV_DB_SCALE(tlv_adc_vol, -1200, 50, 0); - -static DECLARE_TLV_DB_LINEAR(tlv_spk_vol, TLV_DB_GAIN_MUTE, 0); +/* -6dB min, 1dB steps */ +static DECLARE_TLV_DB_SCALE(tlv_tas_driver_gain, -5850, 50, 0); static DECLARE_TLV_DB_SCALE(tlv_amp_vol, 0, 600, 1); static const char * const lo_cm_text[] = { @@ -1063,21 +1063,20 @@ static const struct snd_soc_component_driver soc_component_dev_aic32x4 = { }; static const struct snd_kcontrol_new aic32x4_tas2505_snd_controls[] = { - SOC_DOUBLE_R_S_TLV("PCM Playback Volume", AIC32X4_LDACVOL, - AIC32X4_LDACVOL, 0, -0x7f, 0x30, 7, 0, tlv_pcm), + SOC_SINGLE_S8_TLV("PCM Playback Volume", + AIC32X4_LDACVOL, -0x7f, 0x30, tlv_pcm), SOC_ENUM("DAC Playback PowerTune Switch", l_ptm_enum), - SOC_DOUBLE_R_S_TLV("HP Driver Playback Volume", AIC32X4_HPLGAIN, - AIC32X4_HPLGAIN, 0, -0x6, 0x1d, 5, 0, - tlv_driver_gain), - SOC_DOUBLE_R("HP DAC Playback Switch", AIC32X4_HPLGAIN, - AIC32X4_HPLGAIN, 6, 0x01, 1), + + SOC_SINGLE_TLV("HP Driver Gain Volume", + AIC32X4_HPLGAIN, 0, 0x74, 1, tlv_tas_driver_gain), + SOC_SINGLE("HP DAC Playback Switch", AIC32X4_HPLGAIN, 6, 1, 1), + + SOC_SINGLE_TLV("Speaker Driver Playback Volume", + TAS2505_SPKVOL1, 0, 0x74, 1, tlv_tas_driver_gain), + SOC_SINGLE_TLV("Speaker Amplifier Playback Volume", + TAS2505_SPKVOL2, 4, 5, 0, tlv_amp_vol), SOC_SINGLE("Auto-mute Switch", AIC32X4_DACMUTE, 4, 7, 0), - - SOC_SINGLE_RANGE_TLV("Speaker Driver Playback Volume", TAS2505_SPKVOL1, - 0, 0, 117, 1, tlv_spk_vol), - SOC_SINGLE_TLV("Speaker Amplifier Playback Volume", TAS2505_SPKVOL2, - 4, 5, 0, tlv_amp_vol), }; static const struct snd_kcontrol_new hp_output_mixer_controls[] = { From 6c621b811f99feb3941f04b386795b45f47cd771 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 7 Jul 2021 17:02:34 +0100 Subject: [PATCH 025/794] ASoC: tlv320aic31xx: Make regmap cache only on probe() Currently the tlv320aic31xx driver has regulator support but does not enable the regulators during probe, deferring this until something causes ASoC to make the card active. It does put the device into cache only mode but only when the component level probe is called, however if interrupts are in use the driver will access the regmap before then which if the regulators are not powered on would cause I/O problems. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210707160234.16253-1-broonie@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic31xx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/tlv320aic31xx.c b/sound/soc/codecs/tlv320aic31xx.c index 51870d50f419..b504d63385b3 100644 --- a/sound/soc/codecs/tlv320aic31xx.c +++ b/sound/soc/codecs/tlv320aic31xx.c @@ -1604,6 +1604,8 @@ static int aic31xx_i2c_probe(struct i2c_client *i2c, ret); return ret; } + regcache_cache_only(aic31xx->regmap, true); + aic31xx->dev = &i2c->dev; aic31xx->irq = i2c->irq; From 1c73daee4bf30ccdff5e86dc400daa6f74735da5 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 30 Jun 2021 17:59:59 +0800 Subject: [PATCH 026/794] regulator: hi6421: Fix getting wrong drvdata Since config.dev = pdev->dev.parent in current code, so dev_get_drvdata(rdev->dev.parent) call in hi6421_regulator_enable returns the drvdata of the mfd device rather than the regulator. Fix it. This was broken while converting to use simplified DT parsing because the config.dev changed from pdev->dev to pdev->dev.parent for parsing the parent's of_node. Fixes: 29dc269a85ef ("regulator: hi6421: Convert to use simplified DT parsing") Signed-off-by: Axel Lin Link: https://lore.kernel.org/r/20210630095959.2411543-1-axel.lin@ingics.com Signed-off-by: Mark Brown --- drivers/regulator/hi6421-regulator.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/regulator/hi6421-regulator.c b/drivers/regulator/hi6421-regulator.c index bff8c515dcde..d144a4bdb76d 100644 --- a/drivers/regulator/hi6421-regulator.c +++ b/drivers/regulator/hi6421-regulator.c @@ -366,9 +366,8 @@ static struct hi6421_regulator_info static int hi6421_regulator_enable(struct regulator_dev *rdev) { - struct hi6421_regulator_pdata *pdata; + struct hi6421_regulator_pdata *pdata = rdev_get_drvdata(rdev); - pdata = dev_get_drvdata(rdev->dev.parent); /* hi6421 spec requires regulator enablement must be serialized: * - Because when BUCK, LDO switching from off to on, it will have * a huge instantaneous current; so you can not turn on two or @@ -385,9 +384,10 @@ static int hi6421_regulator_enable(struct regulator_dev *rdev) static unsigned int hi6421_regulator_ldo_get_mode(struct regulator_dev *rdev) { - struct hi6421_regulator_info *info = rdev_get_drvdata(rdev); + struct hi6421_regulator_info *info; unsigned int reg_val; + info = container_of(rdev->desc, struct hi6421_regulator_info, desc); regmap_read(rdev->regmap, rdev->desc->enable_reg, ®_val); if (reg_val & info->mode_mask) return REGULATOR_MODE_IDLE; @@ -397,9 +397,10 @@ static unsigned int hi6421_regulator_ldo_get_mode(struct regulator_dev *rdev) static unsigned int hi6421_regulator_buck_get_mode(struct regulator_dev *rdev) { - struct hi6421_regulator_info *info = rdev_get_drvdata(rdev); + struct hi6421_regulator_info *info; unsigned int reg_val; + info = container_of(rdev->desc, struct hi6421_regulator_info, desc); regmap_read(rdev->regmap, rdev->desc->enable_reg, ®_val); if (reg_val & info->mode_mask) return REGULATOR_MODE_STANDBY; @@ -410,9 +411,10 @@ static unsigned int hi6421_regulator_buck_get_mode(struct regulator_dev *rdev) static int hi6421_regulator_ldo_set_mode(struct regulator_dev *rdev, unsigned int mode) { - struct hi6421_regulator_info *info = rdev_get_drvdata(rdev); + struct hi6421_regulator_info *info; unsigned int new_mode; + info = container_of(rdev->desc, struct hi6421_regulator_info, desc); switch (mode) { case REGULATOR_MODE_NORMAL: new_mode = 0; @@ -434,9 +436,10 @@ static int hi6421_regulator_ldo_set_mode(struct regulator_dev *rdev, static int hi6421_regulator_buck_set_mode(struct regulator_dev *rdev, unsigned int mode) { - struct hi6421_regulator_info *info = rdev_get_drvdata(rdev); + struct hi6421_regulator_info *info; unsigned int new_mode; + info = container_of(rdev->desc, struct hi6421_regulator_info, desc); switch (mode) { case REGULATOR_MODE_NORMAL: new_mode = 0; @@ -459,7 +462,9 @@ static unsigned int hi6421_regulator_ldo_get_optimum_mode(struct regulator_dev *rdev, int input_uV, int output_uV, int load_uA) { - struct hi6421_regulator_info *info = rdev_get_drvdata(rdev); + struct hi6421_regulator_info *info; + + info = container_of(rdev->desc, struct hi6421_regulator_info, desc); if (load_uA > info->eco_microamp) return REGULATOR_MODE_NORMAL; @@ -543,14 +548,13 @@ static int hi6421_regulator_probe(struct platform_device *pdev) if (!pdata) return -ENOMEM; mutex_init(&pdata->lock); - platform_set_drvdata(pdev, pdata); for (i = 0; i < ARRAY_SIZE(hi6421_regulator_info); i++) { /* assign per-regulator data */ info = &hi6421_regulator_info[i]; config.dev = pdev->dev.parent; - config.driver_data = info; + config.driver_data = pdata; config.regmap = pmic->regmap; rdev = devm_regulator_register(&pdev->dev, &info->desc, From 69e1818ad27bae167eeaaf6829d4a08900ef5153 Mon Sep 17 00:00:00 2001 From: Dan Sneddon Date: Tue, 29 Jun 2021 12:22:18 -0700 Subject: [PATCH 027/794] spi: atmel: Fix CS and initialization bug Commit 5fa5e6dec762 ("spi: atmel: Switch to transfer_one transfer method") switched to using transfer_one and set_cs. The core doesn't call set_cs when the chip select lines are gpios. Add the SPI_MASTER_GPIO_SS flag to the driver to ensure the calls to set_cs happen since the driver programs configuration registers there. Fixes: 5fa5e6dec762 ("spi: atmel: Switch to transfer_one transfer method") Signed-off-by: Dan Sneddon Link: https://lore.kernel.org/r/20210629192218.32125-1-dan.sneddon@microchip.com Signed-off-by: Mark Brown --- drivers/spi/spi-atmel.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c index 2ef74885ffa2..788dcdf25f00 100644 --- a/drivers/spi/spi-atmel.c +++ b/drivers/spi/spi-atmel.c @@ -352,8 +352,6 @@ static void cs_activate(struct atmel_spi *as, struct spi_device *spi) } mr = spi_readl(as, MR); - if (spi->cs_gpiod) - gpiod_set_value(spi->cs_gpiod, 1); } else { u32 cpol = (spi->mode & SPI_CPOL) ? SPI_BIT(CPOL) : 0; int i; @@ -369,8 +367,6 @@ static void cs_activate(struct atmel_spi *as, struct spi_device *spi) mr = spi_readl(as, MR); mr = SPI_BFINS(PCS, ~(1 << chip_select), mr); - if (spi->cs_gpiod) - gpiod_set_value(spi->cs_gpiod, 1); spi_writel(as, MR, mr); } @@ -400,8 +396,6 @@ static void cs_deactivate(struct atmel_spi *as, struct spi_device *spi) if (!spi->cs_gpiod) spi_writel(as, CR, SPI_BIT(LASTXFER)); - else - gpiod_set_value(spi->cs_gpiod, 0); } static void atmel_spi_lock(struct atmel_spi *as) __acquires(&as->lock) @@ -1483,7 +1477,8 @@ static int atmel_spi_probe(struct platform_device *pdev) master->bus_num = pdev->id; master->num_chipselect = 4; master->setup = atmel_spi_setup; - master->flags = (SPI_MASTER_MUST_RX | SPI_MASTER_MUST_TX); + master->flags = (SPI_MASTER_MUST_RX | SPI_MASTER_MUST_TX | + SPI_MASTER_GPIO_SS); master->transfer_one = atmel_spi_one_transfer; master->set_cs = atmel_spi_set_cs; master->cleanup = atmel_spi_cleanup; From 3a70dd2d050331ee4cf5ad9d5c0a32d83ead9a43 Mon Sep 17 00:00:00 2001 From: Peter Hess Date: Tue, 6 Jul 2021 14:16:09 +0200 Subject: [PATCH 028/794] spi: mediatek: fix fifo rx mode In FIFO mode were two problems: - RX mode was never handled and - in this case the tx_buf pointer was NULL and caused an exception fix this by handling RX mode in mtk_spi_fifo_transfer Fixes: a568231f4632 ("spi: mediatek: Add spi bus for Mediatek MT8173") Signed-off-by: Peter Hess Signed-off-by: Frank Wunderlich Link: https://lore.kernel.org/r/20210706121609.680534-1-linux@fw-web.de Signed-off-by: Mark Brown --- drivers/spi/spi-mt65xx.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index 976f73b9e299..8d5fa7f1e506 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -427,13 +427,23 @@ static int mtk_spi_fifo_transfer(struct spi_master *master, mtk_spi_setup_packet(master); cnt = xfer->len / 4; - iowrite32_rep(mdata->base + SPI_TX_DATA_REG, xfer->tx_buf, cnt); + if (xfer->tx_buf) + iowrite32_rep(mdata->base + SPI_TX_DATA_REG, xfer->tx_buf, cnt); + + if (xfer->rx_buf) + ioread32_rep(mdata->base + SPI_RX_DATA_REG, xfer->rx_buf, cnt); remainder = xfer->len % 4; if (remainder > 0) { reg_val = 0; - memcpy(®_val, xfer->tx_buf + (cnt * 4), remainder); - writel(reg_val, mdata->base + SPI_TX_DATA_REG); + if (xfer->tx_buf) { + memcpy(®_val, xfer->tx_buf + (cnt * 4), remainder); + writel(reg_val, mdata->base + SPI_TX_DATA_REG); + } + if (xfer->rx_buf) { + reg_val = readl(mdata->base + SPI_RX_DATA_REG); + memcpy(xfer->rx_buf + (cnt * 4), ®_val, remainder); + } } mtk_spi_enable_transfer(master); From 94b619a07655805a1622484967754f5848640456 Mon Sep 17 00:00:00 2001 From: Marco De Marco Date: Mon, 5 Jul 2021 19:44:21 +0000 Subject: [PATCH 029/794] USB: serial: option: add support for u-blox LARA-R6 family The patch is meant to support LARA-R6 Cat 1 module family. Module USB ID: Vendor ID: 0x05c6 Product ID: 0x90fA Interface layout: If 0: Diagnostic If 1: AT parser If 2: AT parser If 3: QMI wwan (not available in all versions) Signed-off-by: Marco De Marco Link: https://lore.kernel.org/r/49260184.kfMIbaSn9k@mars Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 7608584ef4fe..0fbe253dc570 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -238,6 +238,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_UC15 0x9090 /* These u-blox products use Qualcomm's vendor ID */ #define UBLOX_PRODUCT_R410M 0x90b2 +#define UBLOX_PRODUCT_R6XX 0x90fa /* These Yuga products use Qualcomm's vendor ID */ #define YUGA_PRODUCT_CLM920_NC5 0x9625 @@ -1101,6 +1102,8 @@ static const struct usb_device_id option_ids[] = { /* u-blox products using Qualcomm vendor ID */ { USB_DEVICE(QUALCOMM_VENDOR_ID, UBLOX_PRODUCT_R410M), .driver_info = RSVD(1) | RSVD(3) }, + { USB_DEVICE(QUALCOMM_VENDOR_ID, UBLOX_PRODUCT_R6XX), + .driver_info = RSVD(3) }, /* Quectel products using Quectel vendor ID */ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21, 0xff, 0xff, 0xff), .driver_info = NUMEP2 }, From 2e2832562c877e6530b8480982d99a4ff90c6777 Mon Sep 17 00:00:00 2001 From: Alan Young Date: Fri, 9 Jul 2021 09:48:54 +0100 Subject: [PATCH 030/794] ALSA: pcm: Call substream ack() method upon compat mmap commit If a 32-bit application is being used with a 64-bit kernel and is using the mmap mechanism to write data, then the SNDRV_PCM_IOCTL_SYNC_PTR ioctl results in calling snd_pcm_ioctl_sync_ptr_compat(). Make this use pcm_lib_apply_appl_ptr() so that the substream's ack() method, if defined, is called. The snd_pcm_sync_ptr() function, used in the 64-bit ioctl case, already uses snd_pcm_ioctl_sync_ptr_compat(). Fixes: 9027c4639ef1 ("ALSA: pcm: Call ack() whenever appl_ptr is updated") Signed-off-by: Alan Young Cc: Link: https://lore.kernel.org/r/c441f18c-eb2a-3bdd-299a-696ccca2de9c@gmail.com Signed-off-by: Takashi Iwai --- sound/core/pcm_native.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 14e32825c339..c88c4316c417 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -3063,9 +3063,14 @@ static int snd_pcm_ioctl_sync_ptr_compat(struct snd_pcm_substream *substream, boundary = 0x7fffffff; snd_pcm_stream_lock_irq(substream); /* FIXME: we should consider the boundary for the sync from app */ - if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL)) - control->appl_ptr = scontrol.appl_ptr; - else + if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL)) { + err = pcm_lib_apply_appl_ptr(substream, + scontrol.appl_ptr); + if (err < 0) { + snd_pcm_stream_unlock_irq(substream); + return err; + } + } else scontrol.appl_ptr = control->appl_ptr % boundary; if (!(sflags & SNDRV_PCM_SYNC_PTR_AVAIL_MIN)) control->avail_min = scontrol.avail_min; From c71f78a662611fe2c67f3155da19b0eff0f29762 Mon Sep 17 00:00:00 2001 From: Maxim Schwalm Date: Mon, 12 Jul 2021 03:50:11 +0300 Subject: [PATCH 031/794] ASoC: rt5631: Fix regcache sync errors on resume The ALC5631 does not like multi-write accesses, avoid them. This fixes: rt5631 4-001a: Unable to sync registers 0x3a-0x3c. -121 errors on resume from suspend (and all registers after the registers in the error not being synced). Inspired by commit 2d30e9494f1e ("ASoC: rt5651: Fix regcache sync errors on resume") from Hans de Geode, which fixed the same errors on ALC5651. Signed-off-by: Maxim Schwalm Link: https://lore.kernel.org/r/20210712005011.28536-1-digetx@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5631.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/rt5631.c b/sound/soc/codecs/rt5631.c index 3000bc128b5b..38356ea2bd6e 100644 --- a/sound/soc/codecs/rt5631.c +++ b/sound/soc/codecs/rt5631.c @@ -1695,6 +1695,8 @@ static const struct regmap_config rt5631_regmap_config = { .reg_defaults = rt5631_reg, .num_reg_defaults = ARRAY_SIZE(rt5631_reg), .cache_type = REGCACHE_RBTREE, + .use_single_read = true, + .use_single_write = true, }; static int rt5631_i2c_probe(struct i2c_client *i2c, From 98f7cd23aa9563c06503991a0cd41f0cacc99f5f Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Fri, 9 Jul 2021 16:49:43 +0200 Subject: [PATCH 032/794] s390/vdso32: add vdso32.lds to targets This fixes a permanent rebuild of the 32 bit vdso. The RPM build process was first calling 'make bzImage' and 'make modules' as a second step. This caused a recompilation of vdso32.so, which in turn also changed the build-id of vmlinux. Fixes: 779df2248739 ("s390/vdso: add minimal compat vdso") Signed-off-by: Sven Schnelle Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/kernel/vdso32/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index b2349a3f4fa3..3457dcf10396 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -29,6 +29,7 @@ $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) $(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) obj-y += vdso32_wrapper.o +targets += vdso32.lds CPPFLAGS_vdso32.lds += -P -C -U$(ARCH) # Disable gcov profiling, ubsan and kasan for VDSO code From c30e5e9ff0c695a8bac813ff4d5216fd7fb51e4e Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 8 Jul 2021 11:41:21 +0000 Subject: [PATCH 033/794] s390/defconfig: allow early device mapper disks doing make install on an Ubuntu that is installed on an LVM will fail to boot. Turns out that defconfig misses 2 device mapper related configs for the Ubuntu initramfs. Signed-off-by: Christian Borntraeger Reviewed-by: Steffen Maier Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 3 ++- arch/s390/configs/defconfig | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 86afcc6b56bf..55cb846cda37 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -436,7 +436,7 @@ CONFIG_MD_MULTIPATH=m CONFIG_MD_FAULTY=m CONFIG_MD_CLUSTER=m CONFIG_BCACHE=m -CONFIG_BLK_DEV_DM=m +CONFIG_BLK_DEV_DM=y CONFIG_DM_UNSTRIPED=m CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m @@ -453,6 +453,7 @@ CONFIG_DM_MULTIPATH_ST=m CONFIG_DM_MULTIPATH_HST=m CONFIG_DM_MULTIPATH_IOA=m CONFIG_DM_DELAY=m +CONFIG_DM_INIT=y CONFIG_DM_UEVENT=y CONFIG_DM_FLAKEY=m CONFIG_DM_VERITY=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 71b49ea5b058..5d847ab5feaa 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -427,7 +427,7 @@ CONFIG_MD_MULTIPATH=m CONFIG_MD_FAULTY=m CONFIG_MD_CLUSTER=m CONFIG_BCACHE=m -CONFIG_BLK_DEV_DM=m +CONFIG_BLK_DEV_DM=y CONFIG_DM_UNSTRIPED=m CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m @@ -444,6 +444,7 @@ CONFIG_DM_MULTIPATH_ST=m CONFIG_DM_MULTIPATH_HST=m CONFIG_DM_MULTIPATH_IOA=m CONFIG_DM_DELAY=m +CONFIG_DM_INIT=y CONFIG_DM_UEVENT=y CONFIG_DM_FLAKEY=m CONFIG_DM_VERITY=m From f35e0cc25280cb0063b0e4481f99268fbd872ff3 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Tue, 6 Jul 2021 08:44:00 +0300 Subject: [PATCH 034/794] doc, af_xdp: Fix bind flags option typo Fix XDP_ZERO_COPY flag typo since it is actually named XDP_ZEROCOPY instead as per if_xdp.h uapi header. Signed-off-by: Baruch Siach Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/1656fdf94704e9e735df0f8b97667d8f26dd098b.1625550240.git.baruch@tkos.co.il --- Documentation/networking/af_xdp.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst index 42576880aa4a..60b217b436be 100644 --- a/Documentation/networking/af_xdp.rst +++ b/Documentation/networking/af_xdp.rst @@ -243,8 +243,8 @@ Configuration Flags and Socket Options These are the various configuration flags that can be used to control and monitor the behavior of AF_XDP sockets. -XDP_COPY and XDP_ZERO_COPY bind flags -------------------------------------- +XDP_COPY and XDP_ZEROCOPY bind flags +------------------------------------ When you bind to a socket, the kernel will first try to use zero-copy copy. If zero-copy is not supported, it will fall back on using copy @@ -252,7 +252,7 @@ mode, i.e. copying all packets out to user space. But if you would like to force a certain mode, you can use the following flags. If you pass the XDP_COPY flag to the bind call, the kernel will force the socket into copy mode. If it cannot use copy mode, the bind call will -fail with an error. Conversely, the XDP_ZERO_COPY flag will force the +fail with an error. Conversely, the XDP_ZEROCOPY flag will force the socket into zero-copy mode or fail. XDP_SHARED_UMEM bind flag From 5e21bb4e812566aef86fbb77c96a4ec0782286e4 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Thu, 8 Jul 2021 16:04:09 +0800 Subject: [PATCH 035/794] bpf, test: fix NULL pointer dereference on invalid expected_attach_type These two types of XDP progs (BPF_XDP_DEVMAP, BPF_XDP_CPUMAP) will not be executed directly in the driver, therefore we should also not directly run them from here. To run in these two situations, there must be further preparations done, otherwise these may cause a kernel panic. For more details, see also dev_xdp_attach(). [ 46.982479] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 46.984295] #PF: supervisor read access in kernel mode [ 46.985777] #PF: error_code(0x0000) - not-present page [ 46.987227] PGD 800000010dca4067 P4D 800000010dca4067 PUD 10dca6067 PMD 0 [ 46.989201] Oops: 0000 [#1] SMP PTI [ 46.990304] CPU: 7 PID: 562 Comm: a.out Not tainted 5.13.0+ #44 [ 46.992001] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/24 [ 46.995113] RIP: 0010:___bpf_prog_run+0x17b/0x1710 [ 46.996586] Code: 49 03 14 cc e8 76 f6 fe ff e9 ad fe ff ff 0f b6 43 01 48 0f bf 4b 02 48 83 c3 08 89 c2 83 e0 0f c0 ea 04 02 [ 47.001562] RSP: 0018:ffffc900005afc58 EFLAGS: 00010246 [ 47.003115] RAX: 0000000000000000 RBX: ffffc9000023f068 RCX: 0000000000000000 [ 47.005163] RDX: 0000000000000000 RSI: 0000000000000079 RDI: ffffc900005afc98 [ 47.007135] RBP: 0000000000000000 R08: ffffc9000023f048 R09: c0000000ffffdfff [ 47.009171] R10: 0000000000000001 R11: ffffc900005afb40 R12: ffffc900005afc98 [ 47.011172] R13: 0000000000000001 R14: 0000000000000001 R15: ffffffff825258a8 [ 47.013244] FS: 00007f04a5207580(0000) GS:ffff88842fdc0000(0000) knlGS:0000000000000000 [ 47.015705] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 47.017475] CR2: 0000000000000000 CR3: 0000000100182005 CR4: 0000000000770ee0 [ 47.019558] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 47.021595] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 47.023574] PKRU: 55555554 [ 47.024571] Call Trace: [ 47.025424] __bpf_prog_run32+0x32/0x50 [ 47.026296] ? printk+0x53/0x6a [ 47.027066] ? ktime_get+0x39/0x90 [ 47.027895] bpf_test_run.cold.28+0x23/0x123 [ 47.028866] ? printk+0x53/0x6a [ 47.029630] bpf_prog_test_run_xdp+0x149/0x1d0 [ 47.030649] __sys_bpf+0x1305/0x23d0 [ 47.031482] __x64_sys_bpf+0x17/0x20 [ 47.032316] do_syscall_64+0x3a/0x80 [ 47.033165] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 47.034254] RIP: 0033:0x7f04a51364dd [ 47.035133] Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 48 [ 47.038768] RSP: 002b:00007fff8f9fc518 EFLAGS: 00000213 ORIG_RAX: 0000000000000141 [ 47.040344] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f04a51364dd [ 47.041749] RDX: 0000000000000048 RSI: 0000000020002a80 RDI: 000000000000000a [ 47.043171] RBP: 00007fff8f9fc530 R08: 0000000002049300 R09: 0000000020000100 [ 47.044626] R10: 0000000000000004 R11: 0000000000000213 R12: 0000000000401070 [ 47.046088] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 [ 47.047579] Modules linked in: [ 47.048318] CR2: 0000000000000000 [ 47.049120] ---[ end trace 7ad34443d5be719a ]--- [ 47.050273] RIP: 0010:___bpf_prog_run+0x17b/0x1710 [ 47.051343] Code: 49 03 14 cc e8 76 f6 fe ff e9 ad fe ff ff 0f b6 43 01 48 0f bf 4b 02 48 83 c3 08 89 c2 83 e0 0f c0 ea 04 02 [ 47.054943] RSP: 0018:ffffc900005afc58 EFLAGS: 00010246 [ 47.056068] RAX: 0000000000000000 RBX: ffffc9000023f068 RCX: 0000000000000000 [ 47.057522] RDX: 0000000000000000 RSI: 0000000000000079 RDI: ffffc900005afc98 [ 47.058961] RBP: 0000000000000000 R08: ffffc9000023f048 R09: c0000000ffffdfff [ 47.060390] R10: 0000000000000001 R11: ffffc900005afb40 R12: ffffc900005afc98 [ 47.061803] R13: 0000000000000001 R14: 0000000000000001 R15: ffffffff825258a8 [ 47.063249] FS: 00007f04a5207580(0000) GS:ffff88842fdc0000(0000) knlGS:0000000000000000 [ 47.065070] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 47.066307] CR2: 0000000000000000 CR3: 0000000100182005 CR4: 0000000000770ee0 [ 47.067747] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 47.069217] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 47.070652] PKRU: 55555554 [ 47.071318] Kernel panic - not syncing: Fatal exception [ 47.072854] Kernel Offset: disabled [ 47.073683] ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes: 9216477449f3 ("bpf: cpumap: Add the possibility to attach an eBPF program to cpumap") Fixes: fbee97feed9b ("bpf: Add support to attach bpf program to a devmap entry") Reported-by: Abaci Signed-off-by: Xuan Zhuo Signed-off-by: Daniel Borkmann Reviewed-by: Dust Li Acked-by: Jesper Dangaard Brouer Acked-by: David Ahern Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20210708080409.73525-1-xuanzhuo@linux.alibaba.com --- net/bpf/test_run.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index aa47af349ba8..1cc75c811e24 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -701,6 +701,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, void *data; int ret; + if (prog->expected_attach_type == BPF_XDP_DEVMAP || + prog->expected_attach_type == BPF_XDP_CPUMAP) + return -EINVAL; if (kattr->test.ctx_in || kattr->test.ctx_out) return -EINVAL; From 0c23af52ccd1605926480b5dfd1dd857ef604611 Mon Sep 17 00:00:00 2001 From: Naresh Kumar PBS Date: Sun, 11 Jul 2021 06:31:36 -0700 Subject: [PATCH 036/794] RDMA/bnxt_re: Fix stats counters Statistical counters are not incrementing in some adapter versions with newer FW. This is due to the stats context length mismatch between FW and driver. Since the L2 driver updates the length correctly, use the stats length from L2 driver while allocating the DMA'able memory and creating the stats context. Fixes: 9d6b648c3112 ("bnxt_en: Update firmware interface spec to 1.10.1.65.") Link: https://lore.kernel.org/r/1626010296-6076-1-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Naresh Kumar PBS Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/main.c | 4 +++- drivers/infiniband/hw/bnxt_re/qplib_res.c | 10 ++++------ drivers/infiniband/hw/bnxt_re/qplib_res.h | 1 + 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index d5674026512a..a8688a92c760 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -120,6 +120,7 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode) if (!chip_ctx) return -ENOMEM; chip_ctx->chip_num = bp->chip_num; + chip_ctx->hw_stats_size = bp->hw_ring_stats_size; rdev->chip_ctx = chip_ctx; /* rest members to follow eventually */ @@ -550,6 +551,7 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev, dma_addr_t dma_map, u32 *fw_stats_ctx_id) { + struct bnxt_qplib_chip_ctx *chip_ctx = rdev->chip_ctx; struct hwrm_stat_ctx_alloc_output resp = {0}; struct hwrm_stat_ctx_alloc_input req = {0}; struct bnxt_en_dev *en_dev = rdev->en_dev; @@ -566,7 +568,7 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev, bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, -1); req.update_period_ms = cpu_to_le32(1000); req.stats_dma_addr = cpu_to_le64(dma_map); - req.stats_dma_length = cpu_to_le16(sizeof(struct ctx_hw_stats_ext)); + req.stats_dma_length = cpu_to_le16(chip_ctx->hw_stats_size); req.stat_ctx_flags = STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE; bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 17f0701b3cee..44282a8cdd4f 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -56,6 +56,7 @@ static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev, struct bnxt_qplib_stats *stats); static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev, + struct bnxt_qplib_chip_ctx *cctx, struct bnxt_qplib_stats *stats); /* PBL */ @@ -559,7 +560,7 @@ int bnxt_qplib_alloc_ctx(struct bnxt_qplib_res *res, goto fail; stats_alloc: /* Stats */ - rc = bnxt_qplib_alloc_stats_ctx(res->pdev, &ctx->stats); + rc = bnxt_qplib_alloc_stats_ctx(res->pdev, res->cctx, &ctx->stats); if (rc) goto fail; @@ -889,15 +890,12 @@ static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev, } static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev, + struct bnxt_qplib_chip_ctx *cctx, struct bnxt_qplib_stats *stats) { memset(stats, 0, sizeof(*stats)); stats->fw_id = -1; - /* 128 byte aligned context memory is required only for 57500. - * However making this unconditional, it does not harm previous - * generation. - */ - stats->size = ALIGN(sizeof(struct ctx_hw_stats), 128); + stats->size = cctx->hw_stats_size; stats->dma = dma_alloc_coherent(&pdev->dev, stats->size, &stats->dma_map, GFP_KERNEL); if (!stats->dma) { diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index c291f495ae91..91031502e8f5 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -54,6 +54,7 @@ struct bnxt_qplib_chip_ctx { u16 chip_num; u8 chip_rev; u8 chip_metal; + u16 hw_stats_size; struct bnxt_qplib_drv_modes modes; }; From 6407c69dc51fbd7cf7b6760cd8aefb105d96ff5b Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Wed, 7 Jul 2021 14:14:55 -0700 Subject: [PATCH 037/794] RDMA/irdma: Fix unused variable total_size warning Fix the following unused variable warning: drivers/infiniband/hw/irdma/uk.c:934:6: warning: variable 'total_size' set but not used [-Wunused-but-set-variable] Link: https://lore.kernel.org/r/20210707211455.2076-1-tatyana.e.nikolova@intel.com Link: https://lkml.org/lkml/2021/7/1/726 Reported-by: kernel test robot Fixes: 551c46edc769 ("RDMA/irdma: Add user/kernel shared libraries") Signed-off-by: Mustafa Ismail Signed-off-by: Tatyana Nikolova Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/uk.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c index a6d52c20091c..5fb92de1f015 100644 --- a/drivers/infiniband/hw/irdma/uk.c +++ b/drivers/infiniband/hw/irdma/uk.c @@ -931,7 +931,7 @@ enum irdma_status_code irdma_uk_mw_bind(struct irdma_qp_uk *qp, enum irdma_status_code irdma_uk_post_receive(struct irdma_qp_uk *qp, struct irdma_post_rq_info *info) { - u32 total_size = 0, wqe_idx, i, byte_off; + u32 wqe_idx, i, byte_off; u32 addl_frag_cnt; __le64 *wqe; u64 hdr; @@ -939,9 +939,6 @@ enum irdma_status_code irdma_uk_post_receive(struct irdma_qp_uk *qp, if (qp->max_rq_frag_cnt < info->num_sges) return IRDMA_ERR_INVALID_FRAG_COUNT; - for (i = 0; i < info->num_sges; i++) - total_size += info->sg_list[i].len; - wqe = irdma_qp_get_next_recv_wqe(qp, &wqe_idx); if (!wqe) return IRDMA_ERR_QP_TOOMANY_WRS_POSTED; From 514305ee0a1dade95c6ff1eb5735de5a329d1f89 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Thu, 1 Jul 2021 12:41:27 +0200 Subject: [PATCH 038/794] RDMA/irdma: Make spdxcheck.py happy Commit 48d6b3336a9f ("RDMA/irdma: Add ABI definitions") adds ./include/uapi/rdma/irdma-abi.h with an additional unneeded closing bracket at the end of the SPDX-License-Identifier line. Hence, ./scripts/spdxcheck.py complains: include/uapi/rdma/irdma-abi.h: 1:77 Syntax error: ) Remove that closing bracket to make spdxcheck.py happy. Fixes: 48d6b3336a9f ("RDMA/irdma: Add ABI definitions") Link: https://lore.kernel.org/r/20210701104127.1877-1-lukas.bulwahn@gmail.com Signed-off-by: Lukas Bulwahn Acked-by: Tatyana Nikolova Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/irdma-abi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/rdma/irdma-abi.h b/include/uapi/rdma/irdma-abi.h index 26b638a7ad97..a7085e092d34 100644 --- a/include/uapi/rdma/irdma-abi.h +++ b/include/uapi/rdma/irdma-abi.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB */ /* * Copyright (c) 2006 - 2021 Intel Corporation. All rights reserved. * Copyright (c) 2005 Topspin Communications. All rights reserved. From c9538831b353b96cb37092c3d3e929d67fd43c5f Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Thu, 8 Jul 2021 02:47:52 -0400 Subject: [PATCH 039/794] RDMA/irdma: Change the returned type to void Since the function irdma_sc_parse_fpm_commit_buf always returns 0, remove the returned value check and change the returned type to void. Fixes: 3f49d6842569 ("RDMA/irdma: Implement HW Admin Queue OPs") Link: https://lore.kernel.org/r/20210708064752.797520-1-yanjun.zhu@linux.dev Signed-off-by: Zhu Yanjun Reviewed-by: Majd Dibbiny Acked-by: Tatyana Nikolova Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/ctrl.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index b1023a7d0bd1..c3880a85e255 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -2845,7 +2845,7 @@ static u64 irdma_sc_decode_fpm_commit(struct irdma_sc_dev *dev, __le64 *buf, * parses fpm commit info and copy base value * of hmc objects in hmc_info */ -static enum irdma_status_code +static void irdma_sc_parse_fpm_commit_buf(struct irdma_sc_dev *dev, __le64 *buf, struct irdma_hmc_obj_info *info, u32 *sd) { @@ -2915,7 +2915,6 @@ irdma_sc_parse_fpm_commit_buf(struct irdma_sc_dev *dev, __le64 *buf, else *sd = (u32)(size >> 21); - return 0; } /** @@ -4434,9 +4433,9 @@ static enum irdma_status_code irdma_sc_cfg_iw_fpm(struct irdma_sc_dev *dev, ret_code = irdma_sc_commit_fpm_val(dev->cqp, 0, hmc_info->hmc_fn_id, &commit_fpm_mem, true, wait_type); if (!ret_code) - ret_code = irdma_sc_parse_fpm_commit_buf(dev, dev->fpm_commit_buf, - hmc_info->hmc_obj, - &hmc_info->sd_table.sd_cnt); + irdma_sc_parse_fpm_commit_buf(dev, dev->fpm_commit_buf, + hmc_info->hmc_obj, + &hmc_info->sd_table.sd_cnt); print_hex_dump_debug("HMC: COMMIT FPM BUFFER", DUMP_PREFIX_OFFSET, 16, 8, commit_fpm_mem.va, IRDMA_COMMIT_FPM_BUF_SIZE, false); From b0863f1927323110e3d0d69f6adb6a91018a9a3c Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 12 Jul 2021 14:54:36 -0400 Subject: [PATCH 040/794] USB: core: Fix incorrect pipe calculation in do_proc_control() When the user submits a control URB via usbfs, the user supplies the bRequestType value and the kernel uses it to compute the pipe value. However, do_proc_control() performs this computation incorrectly in the case where the bRequestType direction bit is set to USB_DIR_IN and the URB's transfer length is 0: The pipe's direction is also set to IN but it should be OUT, which is the direction the actual transfer will use regardless of bRequestType. Commit 5cc59c418fde ("USB: core: WARN if pipe direction != setup packet direction") added a check to compare the direction bit in the pipe value to a control URB's actual direction and to WARN if they are different. This can be triggered by the incorrect computation mentioned above, as found by syzbot. This patch fixes the computation, thus avoiding the WARNing. Reported-and-tested-by: syzbot+72af3105289dcb4c055b@syzkaller.appspotmail.com Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/20210712185436.GB326369@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index b97464498763..9618ba622a2d 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1133,7 +1133,7 @@ static int do_proc_control(struct usb_dev_state *ps, "wIndex=%04x wLength=%04x\n", ctrl->bRequestType, ctrl->bRequest, ctrl->wValue, ctrl->wIndex, ctrl->wLength); - if (ctrl->bRequestType & 0x80) { + if ((ctrl->bRequestType & USB_DIR_IN) && ctrl->wLength) { pipe = usb_rcvctrlpipe(dev, 0); snoop_urb(dev, NULL, pipe, ctrl->wLength, tmo, SUBMIT, NULL, 0); From e48a12e546ecbfb0718176037eae0ad60598a29a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 13 Jul 2021 06:16:05 +0200 Subject: [PATCH 041/794] jump_labels: Mark __jump_label_transform() as __always_inlined to work around aggressive compiler un-inlining In randconfig testing, certain UBSAN and CC Kconfig combinations with GCC 10.3.0: CONFIG_X86_32=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_UBSAN=y # CONFIG_UBSAN_TRAP is not set # CONFIG_UBSAN_BOUNDS is not set CONFIG_UBSAN_SHIFT=y # CONFIG_UBSAN_DIV_ZERO is not set CONFIG_UBSAN_UNREACHABLE=y CONFIG_UBSAN_BOOL=y # CONFIG_UBSAN_ENUM is not set # CONFIG_UBSAN_ALIGNMENT is not set # CONFIG_UBSAN_SANITIZE_ALL is not set ... produce this build warning (and build error if CONFIG_SECTION_MISMATCH_WARN_ONLY=y is set): WARNING: modpost: vmlinux.o(.text+0x4c1cc): Section mismatch in reference from the function __jump_label_transform() to the function .init.text:text_poke_early() The function __jump_label_transform() references the function __init text_poke_early(). This is often because __jump_label_transform lacks a __init annotation or the annotation of text_poke_early is wrong. ERROR: modpost: Section mismatches detected. The problem is that __jump_label_transform() gets uninlined by GCC, despite there being only a single local scope user of the 'static inline' function. Mark the function __always_inline instead, to work around this compiler bug/artifact. Signed-off-by: Ingo Molnar --- arch/x86/kernel/jump_label.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index 674906fad43b..68f091ba8443 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -79,9 +79,10 @@ __jump_label_patch(struct jump_entry *entry, enum jump_label_type type) return (struct jump_label_patch){.code = code, .size = size}; } -static inline void __jump_label_transform(struct jump_entry *entry, - enum jump_label_type type, - int init) +static __always_inline void +__jump_label_transform(struct jump_entry *entry, + enum jump_label_type type, + int init) { const struct jump_label_patch jlp = __jump_label_patch(entry, type); From e9ba16e68cce2f85e9f5d2eba5c0453f1a741fd2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 11 Jul 2021 08:26:45 +0200 Subject: [PATCH 042/794] smpboot: Mark idle_init() as __always_inlined to work around aggressive compiler un-inlining While this function is a static inline, and is only used once in local scope, certain Kconfig variations may cause it to be compiled as a standalone function: 89231bf0 : 89231bf0: 83 05 60 d9 45 89 01 addl $0x1,0x8945d960 89231bf7: 55 push %ebp Resulting in this build failure: WARNING: modpost: vmlinux.o(.text.unlikely+0x7fd5): Section mismatch in reference from the function idle_init() to the function .init.text:fork_idle() The function idle_init() references the function __init fork_idle(). This is often because idle_init lacks a __init annotation or the annotation of fork_idle is wrong. ERROR: modpost: Section mismatches detected. Certain USBSAN options x86-32 builds with CONFIG_CC_OPTIMIZE_FOR_SIZE=y seem to be causing this. So mark idle_init() as __always_inline to work around this compiler bug/feature. Signed-off-by: Ingo Molnar --- kernel/smpboot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/smpboot.c b/kernel/smpboot.c index e4163042c4d6..21b7953f8242 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -47,7 +47,7 @@ void __init idle_thread_set_boot_cpu(void) * * Creates the thread if it does not exist. */ -static inline void idle_init(unsigned int cpu) +static inline void __always_inline idle_init(unsigned int cpu) { struct task_struct *tsk = per_cpu(idle_threads, cpu); From 5dd0a6b8582ffbfa88351949d50eccd5b6694ade Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 12 Jul 2021 22:57:35 +0200 Subject: [PATCH 043/794] bpf: Fix tail_call_reachable rejection for interpreter when jit failed During testing of f263a81451c1 ("bpf: Track subprog poke descriptors correctly and fix use-after-free") under various failure conditions, for example, when jit_subprogs() fails and tries to clean up the program to be run under the interpreter, we ran into the following freeze: [...] #127/8 tailcall_bpf2bpf_3:FAIL [...] [ 92.041251] BUG: KASAN: slab-out-of-bounds in ___bpf_prog_run+0x1b9d/0x2e20 [ 92.042408] Read of size 8 at addr ffff88800da67f68 by task test_progs/682 [ 92.043707] [ 92.044030] CPU: 1 PID: 682 Comm: test_progs Tainted: G O 5.13.0-53301-ge6c08cb33a30-dirty #87 [ 92.045542] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1 04/01/2014 [ 92.046785] Call Trace: [ 92.047171] ? __bpf_prog_run_args64+0xc0/0xc0 [ 92.047773] ? __bpf_prog_run_args32+0x8b/0xb0 [ 92.048389] ? __bpf_prog_run_args64+0xc0/0xc0 [ 92.049019] ? ktime_get+0x117/0x130 [...] // few hundred [similar] lines more [ 92.659025] ? ktime_get+0x117/0x130 [ 92.659845] ? __bpf_prog_run_args64+0xc0/0xc0 [ 92.660738] ? __bpf_prog_run_args32+0x8b/0xb0 [ 92.661528] ? __bpf_prog_run_args64+0xc0/0xc0 [ 92.662378] ? print_usage_bug+0x50/0x50 [ 92.663221] ? print_usage_bug+0x50/0x50 [ 92.664077] ? bpf_ksym_find+0x9c/0xe0 [ 92.664887] ? ktime_get+0x117/0x130 [ 92.665624] ? kernel_text_address+0xf5/0x100 [ 92.666529] ? __kernel_text_address+0xe/0x30 [ 92.667725] ? unwind_get_return_address+0x2f/0x50 [ 92.668854] ? ___bpf_prog_run+0x15d4/0x2e20 [ 92.670185] ? ktime_get+0x117/0x130 [ 92.671130] ? __bpf_prog_run_args64+0xc0/0xc0 [ 92.672020] ? __bpf_prog_run_args32+0x8b/0xb0 [ 92.672860] ? __bpf_prog_run_args64+0xc0/0xc0 [ 92.675159] ? ktime_get+0x117/0x130 [ 92.677074] ? lock_is_held_type+0xd5/0x130 [ 92.678662] ? ___bpf_prog_run+0x15d4/0x2e20 [ 92.680046] ? ktime_get+0x117/0x130 [ 92.681285] ? __bpf_prog_run32+0x6b/0x90 [ 92.682601] ? __bpf_prog_run64+0x90/0x90 [ 92.683636] ? lock_downgrade+0x370/0x370 [ 92.684647] ? mark_held_locks+0x44/0x90 [ 92.685652] ? ktime_get+0x117/0x130 [ 92.686752] ? lockdep_hardirqs_on+0x79/0x100 [ 92.688004] ? ktime_get+0x117/0x130 [ 92.688573] ? __cant_migrate+0x2b/0x80 [ 92.689192] ? bpf_test_run+0x2f4/0x510 [ 92.689869] ? bpf_test_timer_continue+0x1c0/0x1c0 [ 92.690856] ? rcu_read_lock_bh_held+0x90/0x90 [ 92.691506] ? __kasan_slab_alloc+0x61/0x80 [ 92.692128] ? eth_type_trans+0x128/0x240 [ 92.692737] ? __build_skb+0x46/0x50 [ 92.693252] ? bpf_prog_test_run_skb+0x65e/0xc50 [ 92.693954] ? bpf_prog_test_run_raw_tp+0x2d0/0x2d0 [ 92.694639] ? __fget_light+0xa1/0x100 [ 92.695162] ? bpf_prog_inc+0x23/0x30 [ 92.695685] ? __sys_bpf+0xb40/0x2c80 [ 92.696324] ? bpf_link_get_from_fd+0x90/0x90 [ 92.697150] ? mark_held_locks+0x24/0x90 [ 92.698007] ? lockdep_hardirqs_on_prepare+0x124/0x220 [ 92.699045] ? finish_task_switch+0xe6/0x370 [ 92.700072] ? lockdep_hardirqs_on+0x79/0x100 [ 92.701233] ? finish_task_switch+0x11d/0x370 [ 92.702264] ? __switch_to+0x2c0/0x740 [ 92.703148] ? mark_held_locks+0x24/0x90 [ 92.704155] ? __x64_sys_bpf+0x45/0x50 [ 92.705146] ? do_syscall_64+0x35/0x80 [ 92.706953] ? entry_SYSCALL_64_after_hwframe+0x44/0xae [...] Turns out that the program rejection from e411901c0b77 ("bpf: allow for tailcalls in BPF subprograms for x64 JIT") is buggy since env->prog->aux->tail_call_reachable is never true. Commit ebf7d1f508a7 ("bpf, x64: rework pro/epilogue and tailcall handling in JIT") added a tracker into check_max_stack_depth() which propagates the tail_call_reachable condition throughout the subprograms. This info is then assigned to the subprogram's func[i]->aux->tail_call_reachable. However, in the case of the rejection check upon JIT failure, env->prog->aux->tail_call_reachable is used. func[0]->aux->tail_call_reachable which represents the main program's information did not propagate this to the outer env->prog->aux, though. Add this propagation into check_max_stack_depth() where it needs to belong so that the check can be done reliably. Fixes: ebf7d1f508a7 ("bpf, x64: rework pro/epilogue and tailcall handling in JIT") Fixes: e411901c0b77 ("bpf: allow for tailcalls in BPF subprograms for x64 JIT") Co-developed-by: John Fastabend Signed-off-by: Daniel Borkmann Signed-off-by: John Fastabend Signed-off-by: Alexei Starovoitov Acked-by: Maciej Fijalkowski Link: https://lore.kernel.org/bpf/618c34e3163ad1a36b1e82377576a6081e182f25.1626123173.git.daniel@iogearbox.net --- kernel/bpf/verifier.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 42a4063de7cd..9de3c9c3267c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3677,6 +3677,8 @@ continue_func: if (tail_call_reachable) for (j = 0; j < frame; j++) subprog[ret_prog[j]].tail_call_reachable = true; + if (subprog[0].tail_call_reachable) + env->prog->aux->tail_call_reachable = true; /* end of for() loop means the last insn of the 'subprog' * was reached. Doesn't matter whether it was JA or EXIT From 5acc7d3e8d342858405fbbc671221f676b547ce7 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Sat, 10 Jul 2021 11:16:35 +0800 Subject: [PATCH 044/794] xdp, net: Fix use-after-free in bpf_xdp_link_release The problem occurs between dev_get_by_index() and dev_xdp_attach_link(). At this point, dev_xdp_uninstall() is called. Then xdp link will not be detached automatically when dev is released. But link->dev already points to dev, when xdp link is released, dev will still be accessed, but dev has been released. dev_get_by_index() | link->dev = dev | | rtnl_lock() | unregister_netdevice_many() | dev_xdp_uninstall() | rtnl_unlock() rtnl_lock(); | dev_xdp_attach_link() | rtnl_unlock(); | | netdev_run_todo() // dev released bpf_xdp_link_release() | /* access dev. | use-after-free */ | [ 45.966867] BUG: KASAN: use-after-free in bpf_xdp_link_release+0x3b8/0x3d0 [ 45.967619] Read of size 8 at addr ffff00000f9980c8 by task a.out/732 [ 45.968297] [ 45.968502] CPU: 1 PID: 732 Comm: a.out Not tainted 5.13.0+ #22 [ 45.969222] Hardware name: linux,dummy-virt (DT) [ 45.969795] Call trace: [ 45.970106] dump_backtrace+0x0/0x4c8 [ 45.970564] show_stack+0x30/0x40 [ 45.970981] dump_stack_lvl+0x120/0x18c [ 45.971470] print_address_description.constprop.0+0x74/0x30c [ 45.972182] kasan_report+0x1e8/0x200 [ 45.972659] __asan_report_load8_noabort+0x2c/0x50 [ 45.973273] bpf_xdp_link_release+0x3b8/0x3d0 [ 45.973834] bpf_link_free+0xd0/0x188 [ 45.974315] bpf_link_put+0x1d0/0x218 [ 45.974790] bpf_link_release+0x3c/0x58 [ 45.975291] __fput+0x20c/0x7e8 [ 45.975706] ____fput+0x24/0x30 [ 45.976117] task_work_run+0x104/0x258 [ 45.976609] do_notify_resume+0x894/0xaf8 [ 45.977121] work_pending+0xc/0x328 [ 45.977575] [ 45.977775] The buggy address belongs to the page: [ 45.978369] page:fffffc00003e6600 refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x4f998 [ 45.979522] flags: 0x7fffe0000000000(node=0|zone=0|lastcpupid=0x3ffff) [ 45.980349] raw: 07fffe0000000000 fffffc00003e6708 ffff0000dac3c010 0000000000000000 [ 45.981309] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 [ 45.982259] page dumped because: kasan: bad access detected [ 45.982948] [ 45.983153] Memory state around the buggy address: [ 45.983753] ffff00000f997f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 45.984645] ffff00000f998000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 45.985533] >ffff00000f998080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 45.986419] ^ [ 45.987112] ffff00000f998100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 45.988006] ffff00000f998180: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 45.988895] ================================================================== [ 45.989773] Disabling lock debugging due to kernel taint [ 45.990552] Kernel panic - not syncing: panic_on_warn set ... [ 45.991166] CPU: 1 PID: 732 Comm: a.out Tainted: G B 5.13.0+ #22 [ 45.991929] Hardware name: linux,dummy-virt (DT) [ 45.992448] Call trace: [ 45.992753] dump_backtrace+0x0/0x4c8 [ 45.993208] show_stack+0x30/0x40 [ 45.993627] dump_stack_lvl+0x120/0x18c [ 45.994113] dump_stack+0x1c/0x34 [ 45.994530] panic+0x3a4/0x7d8 [ 45.994930] end_report+0x194/0x198 [ 45.995380] kasan_report+0x134/0x200 [ 45.995850] __asan_report_load8_noabort+0x2c/0x50 [ 45.996453] bpf_xdp_link_release+0x3b8/0x3d0 [ 45.997007] bpf_link_free+0xd0/0x188 [ 45.997474] bpf_link_put+0x1d0/0x218 [ 45.997942] bpf_link_release+0x3c/0x58 [ 45.998429] __fput+0x20c/0x7e8 [ 45.998833] ____fput+0x24/0x30 [ 45.999247] task_work_run+0x104/0x258 [ 45.999731] do_notify_resume+0x894/0xaf8 [ 46.000236] work_pending+0xc/0x328 [ 46.000697] SMP: stopping secondary CPUs [ 46.001226] Dumping ftrace buffer: [ 46.001663] (ftrace buffer empty) [ 46.002110] Kernel Offset: disabled [ 46.002545] CPU features: 0x00000001,23202c00 [ 46.003080] Memory Limit: none Fixes: aa8d3a716b59db6c ("bpf, xdp: Add bpf_link-based XDP attachment API") Reported-by: Abaci Signed-off-by: Xuan Zhuo Signed-off-by: Alexei Starovoitov Reviewed-by: Dust Li Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210710031635.41649-1-xuanzhuo@linux.alibaba.com --- net/core/dev.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 64b21f0a2048..7da8d1215328 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9700,14 +9700,17 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) struct net_device *dev; int err, fd; + rtnl_lock(); dev = dev_get_by_index(net, attr->link_create.target_ifindex); - if (!dev) + if (!dev) { + rtnl_unlock(); return -EINVAL; + } link = kzalloc(sizeof(*link), GFP_USER); if (!link) { err = -ENOMEM; - goto out_put_dev; + goto unlock; } bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog); @@ -9717,14 +9720,14 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) err = bpf_link_prime(&link->link, &link_primer); if (err) { kfree(link); - goto out_put_dev; + goto unlock; } - rtnl_lock(); err = dev_xdp_attach_link(dev, NULL, link); rtnl_unlock(); if (err) { + link->dev = NULL; bpf_link_cleanup(&link_primer); goto out_put_dev; } @@ -9734,6 +9737,9 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) dev_put(dev); return fd; +unlock: + rtnl_unlock(); + out_put_dev: dev_put(dev); return err; From 7e71b85473f863a29eb1c69265ef025389b4091d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 8 Jun 2021 14:26:58 +0300 Subject: [PATCH 045/794] arm64: dts: ls1028a: fix node name for the sysclk U-Boot attempts to fix up the "clock-frequency" property of the "/sysclk" node: https://elixir.bootlin.com/u-boot/v2021.04/source/arch/arm/cpu/armv8/fsl-layerscape/fdt.c#L512 but fails to do so: ## Booting kernel from Legacy Image at a1000000 ... Image Name: Created: 2021-06-08 10:31:38 UTC Image Type: AArch64 Linux Kernel Image (gzip compressed) Data Size: 15431370 Bytes = 14.7 MiB Load Address: 80080000 Entry Point: 80080000 Verifying Checksum ... OK ## Flattened Device Tree blob at a0000000 Booting using the fdt blob at 0xa0000000 Uncompressing Kernel Image Loading Device Tree to 00000000fbb19000, end 00000000fbb22717 ... OK Unable to update property /sysclk:clock-frequency, err=FDT_ERR_NOTFOUND Starting kernel ... All Layerscape SoCs except LS1028A use "sysclk" as the node name, and not "clock-sysclk". So change the node name of LS1028A accordingly. Fixes: 8897f3255c9c ("arm64: dts: Add support for NXP LS1028A SoC") Signed-off-by: Vladimir Oltean Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index b2e3e5d2a108..343ecf0e8973 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -66,7 +66,7 @@ }; }; - sysclk: clock-sysclk { + sysclk: sysclk { compatible = "fixed-clock"; #clock-cells = <0>; clock-frequency = <100000000>; From 7dd2dd4ff9f3abda601f22b9d01441a0869d20d7 Mon Sep 17 00:00:00 2001 From: Adrian Larumbe Date: Wed, 7 Jul 2021 00:43:38 +0100 Subject: [PATCH 046/794] dmaengine: xilinx_dma: Fix read-after-free bug when terminating transfers When user calls dmaengine_terminate_sync, the driver will clean up any remaining descriptors for all the pending or active transfers that had previously been submitted. However, this might happen whilst the tasklet is invoking the DMA callback for the last finished transfer, so by the time it returns and takes over the channel's spinlock, the list of completed descriptors it was traversing is no longer valid. This leads to a read-after-free situation. Fix it by signalling whether a user-triggered termination has happened by means of a boolean variable. Signed-off-by: Adrian Larumbe Link: https://lore.kernel.org/r/20210706234338.7696-3-adrian.martinezlarumbe@imgtec.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dma.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 75c0b8e904e5..4b9530a7bf65 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -394,6 +394,7 @@ struct xilinx_dma_tx_descriptor { * @genlock: Support genlock mode * @err: Channel has errors * @idle: Check for channel idle + * @terminating: Check for channel being synchronized by user * @tasklet: Cleanup work after irq * @config: Device configuration info * @flush_on_fsync: Flush on Frame sync @@ -431,6 +432,7 @@ struct xilinx_dma_chan { bool genlock; bool err; bool idle; + bool terminating; struct tasklet_struct tasklet; struct xilinx_vdma_config config; bool flush_on_fsync; @@ -1049,6 +1051,13 @@ static void xilinx_dma_chan_desc_cleanup(struct xilinx_dma_chan *chan) /* Run any dependencies, then free the descriptor */ dma_run_dependencies(&desc->async_tx); xilinx_dma_free_tx_descriptor(chan, desc); + + /* + * While we ran a callback the user called a terminate function, + * which takes care of cleaning up any remaining descriptors + */ + if (chan->terminating) + break; } spin_unlock_irqrestore(&chan->lock, flags); @@ -1965,6 +1974,8 @@ static dma_cookie_t xilinx_dma_tx_submit(struct dma_async_tx_descriptor *tx) if (desc->cyclic) chan->cyclic = true; + chan->terminating = false; + spin_unlock_irqrestore(&chan->lock, flags); return cookie; @@ -2436,6 +2447,7 @@ static int xilinx_dma_terminate_all(struct dma_chan *dchan) xilinx_dma_chan_reset(chan); /* Remove and free all of the descriptors in the lists */ + chan->terminating = true; xilinx_dma_free_descriptors(chan); chan->idle = true; From 1da569fa7ec8cb0591c74aa3050d4ea1397778b4 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 6 Jul 2021 20:45:21 +0800 Subject: [PATCH 047/794] dmaengine: usb-dmac: Fix PM reference leak in usb_dmac_probe() pm_runtime_get_sync will increment pm usage counter even it failed. Forgetting to putting operation will result in reference leak here. Fix it by moving the error_pm label above the pm_runtime_put() in the error path. Reported-by: Hulk Robot Signed-off-by: Yu Kuai Link: https://lore.kernel.org/r/20210706124521.1371901-1-yukuai3@huawei.com Signed-off-by: Vinod Koul --- drivers/dma/sh/usb-dmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/sh/usb-dmac.c b/drivers/dma/sh/usb-dmac.c index 8f7ceb698226..1cc06900153e 100644 --- a/drivers/dma/sh/usb-dmac.c +++ b/drivers/dma/sh/usb-dmac.c @@ -855,8 +855,8 @@ static int usb_dmac_probe(struct platform_device *pdev) error: of_dma_controller_free(pdev->dev.of_node); - pm_runtime_put(&pdev->dev); error_pm: + pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); return ret; } From da435aedb00a4ef61019ff11ae0c08ffb9b1fb18 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 24 Jun 2021 12:09:29 -0700 Subject: [PATCH 048/794] dmaengine: idxd: fix array index when int_handles are being used The index to the irq vector should be local and has no relation to the assigned interrupt handle. Assign the MSIX interrupt index that is programmed for the descriptor. The interrupt handle only matters when it comes to hardware descriptor programming. Fixes: eb15e7154fbf ("dmaengine: idxd: add interrupt handle request and release support") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162456176939.1121476.3366256009925001897.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/submit.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index 19afb62abaff..e29887528077 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -128,19 +128,8 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) * Pending the descriptor to the lockless list for the irq_entry * that we designated the descriptor to. */ - if (desc->hw->flags & IDXD_OP_FLAG_RCI) { - int vec; - - /* - * If the driver is on host kernel, it would be the value - * assigned to interrupt handle, which is index for MSIX - * vector. If it's guest then can't use the int_handle since - * that is the index to IMS for the entire device. The guest - * device local index will be used. - */ - vec = !idxd->int_handles ? desc->hw->int_handle : desc->vector; - llist_add(&desc->llnode, &idxd->irq_entries[vec].pending_llist); - } + if (desc->hw->flags & IDXD_OP_FLAG_RCI) + llist_add(&desc->llnode, &idxd->irq_entries[desc->vector].pending_llist); return 0; } From d5c10e0fc8645342fe5c9796b00c84ab078cd713 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 24 Jun 2021 13:43:32 -0700 Subject: [PATCH 049/794] dmaengine: idxd: fix setup sequence for MSIXPERM table The MSIX permission table should be programmed BEFORE request_irq() happens. This prevents any possibility of an interrupt happening before the MSIX perm table is setup, however slight. Fixes: 6df0e6c57dfc ("dmaengine: idxd: clear MSIX permission entry on shutdown") Sign-off-by: Dave Jiang Link: https://lore.kernel.org/r/162456741222.1138073.1298447364671237896.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/init.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index c8ae41d36040..4e32a4dcc3ab 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -102,6 +102,8 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) spin_lock_init(&idxd->irq_entries[i].list_lock); } + idxd_msix_perm_setup(idxd); + irq_entry = &idxd->irq_entries[0]; rc = request_threaded_irq(irq_entry->vector, NULL, idxd_misc_thread, 0, "idxd-misc", irq_entry); @@ -148,7 +150,6 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) } idxd_unmask_error_interrupts(idxd); - idxd_msix_perm_setup(idxd); return 0; err_wq_irqs: @@ -162,6 +163,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) err_misc_irq: /* Disable error interrupt generation */ idxd_mask_error_interrupts(idxd); + idxd_msix_perm_clear(idxd); err_irq_entries: pci_free_irq_vectors(pdev); dev_err(dev, "No usable interrupts\n"); From f9613aa07f16d6042e74208d1b40a6104d72964a Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 15 Jun 2021 20:52:38 +0800 Subject: [PATCH 050/794] ARM: imx: add missing iounmap() Commit e76bdfd7403a ("ARM: imx: Added perf functionality to mmdc driver") introduced imx_mmdc_remove(), the mmdc_base need be unmapped in it if config PERF_EVENTS is enabled. If imx_mmdc_perf_init() fails, the mmdc_base also need be unmapped. Fixes: e76bdfd7403a ("ARM: imx: Added perf functionality to mmdc driver") Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Reviewed-by: Dong Aisheng Signed-off-by: Shawn Guo --- arch/arm/mach-imx/mmdc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c index 0dfd0ae7a63d..8e57691aafe2 100644 --- a/arch/arm/mach-imx/mmdc.c +++ b/arch/arm/mach-imx/mmdc.c @@ -462,6 +462,7 @@ static int imx_mmdc_remove(struct platform_device *pdev) cpuhp_state_remove_instance_nocalls(cpuhp_mmdc_state, &pmu_mmdc->node); perf_pmu_unregister(&pmu_mmdc->pmu); + iounmap(pmu_mmdc->mmdc_base); kfree(pmu_mmdc); return 0; } @@ -567,7 +568,11 @@ static int imx_mmdc_probe(struct platform_device *pdev) val &= ~(1 << BP_MMDC_MAPSR_PSD); writel_relaxed(val, reg); - return imx_mmdc_perf_init(pdev, mmdc_base); + err = imx_mmdc_perf_init(pdev, mmdc_base); + if (err) + iounmap(mmdc_base); + + return err; } int imx_mmdc_get_ddr_type(void) From f07ec85365807b3939f32d0094a6dd5ce065d1b9 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 15 Jun 2021 20:52:39 +0800 Subject: [PATCH 051/794] ARM: imx: add missing clk_disable_unprepare() clock source is prepared and enabled by clk_prepare_enable() in probe function, but no disable or unprepare in remove and error path. Fixes: 9454a0caff6a ("ARM: imx: add mmdc ipg clock operation for mmdc") Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Reviewed-by: Dong Aisheng Signed-off-by: Shawn Guo --- arch/arm/mach-imx/mmdc.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c index 8e57691aafe2..4a6f1359e1e9 100644 --- a/arch/arm/mach-imx/mmdc.c +++ b/arch/arm/mach-imx/mmdc.c @@ -103,6 +103,7 @@ struct mmdc_pmu { struct perf_event *mmdc_events[MMDC_NUM_COUNTERS]; struct hlist_node node; struct fsl_mmdc_devtype_data *devtype_data; + struct clk *mmdc_ipg_clk; }; /* @@ -463,11 +464,13 @@ static int imx_mmdc_remove(struct platform_device *pdev) cpuhp_state_remove_instance_nocalls(cpuhp_mmdc_state, &pmu_mmdc->node); perf_pmu_unregister(&pmu_mmdc->pmu); iounmap(pmu_mmdc->mmdc_base); + clk_disable_unprepare(pmu_mmdc->mmdc_ipg_clk); kfree(pmu_mmdc); return 0; } -static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_base) +static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_base, + struct clk *mmdc_ipg_clk) { struct mmdc_pmu *pmu_mmdc; char *name; @@ -495,6 +498,7 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b } mmdc_num = mmdc_pmu_init(pmu_mmdc, mmdc_base, &pdev->dev); + pmu_mmdc->mmdc_ipg_clk = mmdc_ipg_clk; if (mmdc_num == 0) name = "mmdc"; else @@ -568,9 +572,11 @@ static int imx_mmdc_probe(struct platform_device *pdev) val &= ~(1 << BP_MMDC_MAPSR_PSD); writel_relaxed(val, reg); - err = imx_mmdc_perf_init(pdev, mmdc_base); - if (err) + err = imx_mmdc_perf_init(pdev, mmdc_base, mmdc_ipg_clk); + if (err) { iounmap(mmdc_base); + clk_disable_unprepare(mmdc_ipg_clk); + } return err; } From fb1425b436bcf936065edbbe8d092465a53185b6 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 17 Jun 2021 11:54:15 -0300 Subject: [PATCH 052/794] ARM: imx: common: Move prototype outside the SMP block Currently the imx_gpcv2_set_core1_pdn_pup_by_software() prototype is guarded by the CONFIG_SMP symbol. This causes W=1 build warnings when CONFIG_SMP is not selected: arch/arm/mach-imx/src.c:103:6: warning: no previous prototype for 'imx_gpcv2_set_core1_pdn_pup_by_software' [-Wmissing-prototypes] Fix it by moving the imx_gpcv2_set_core1_pdn_pup_by_software() prototype outside of the CONFIG_SMP block. Fixes: e34645f45805 ("ARM: imx: add smp support for imx7d") Reported-by: kernel test robot Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/mach-imx/common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-imx/common.h b/arch/arm/mach-imx/common.h index f0a073a71401..13f3068e9845 100644 --- a/arch/arm/mach-imx/common.h +++ b/arch/arm/mach-imx/common.h @@ -68,7 +68,6 @@ void imx_set_cpu_arg(int cpu, u32 arg); void v7_secondary_startup(void); void imx_scu_map_io(void); void imx_smp_prepare(void); -void imx_gpcv2_set_core1_pdn_pup_by_software(bool pdn); #else static inline void imx_scu_map_io(void) {} static inline void imx_smp_prepare(void) {} @@ -81,6 +80,7 @@ void imx_gpc_mask_all(void); void imx_gpc_restore_all(void); void imx_gpc_hwirq_mask(unsigned int hwirq); void imx_gpc_hwirq_unmask(unsigned int hwirq); +void imx_gpcv2_set_core1_pdn_pup_by_software(bool pdn); void imx_anatop_init(void); void imx_anatop_pre_suspend(void); void imx_anatop_post_resume(void); From fd8e83884fdd7b5fc411f201a58d8d01890198a2 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Fri, 25 Jun 2021 14:13:53 +0200 Subject: [PATCH 053/794] ARM: dts: imx6qdl-sr-som: Increase the PHY reset duration to 10ms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The AR803x PHY used on this modules seems to require the reset line to be asserted for around 10ms in order to avoid rare cases where the PHY gets stuck in an incoherent state that prevents it to function correctly. The previous value of 2ms was found to be problematic on some setups, causing intermittent issues where the PHY would be unresponsive every once in a while on some sytems, with a low occurrence (it typically took around 30 consecutive reboots to encounter the issue). Bumping the delay to the 10ms makes the issue dissapear, with more than 2500 consecutive reboots performed without the issue showing-up. Fixes: 208d7baf8085 ("ARM: imx: initial SolidRun HummingBoard support") Signed-off-by: Maxime Chevallier Tested-by: Hervé Codina Reviewed-by: Russell King (Oracle) Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6qdl-sr-som.dtsi | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6qdl-sr-som.dtsi b/arch/arm/boot/dts/imx6qdl-sr-som.dtsi index 0ad8ccde0cf8..f86efd0ccc40 100644 --- a/arch/arm/boot/dts/imx6qdl-sr-som.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sr-som.dtsi @@ -54,7 +54,13 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_microsom_enet_ar8035>; phy-mode = "rgmii-id"; - phy-reset-duration = <2>; + + /* + * The PHY seems to require a long-enough reset duration to avoid + * some rare issues where the PHY gets stuck in an inconsistent and + * non-functional state at boot-up. 10ms proved to be fine . + */ + phy-reset-duration = <10>; phy-reset-gpios = <&gpio4 15 GPIO_ACTIVE_LOW>; status = "okay"; From 80d9ac9bd7b9366c2a89d2716a397749299728e7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Jul 2021 12:36:41 +0100 Subject: [PATCH 054/794] KVM: arm64: Fix detection of shared VMAs on guest fault When merging the KVM MTE support, the blob that was interposed between the chair and the keyboard experienced a neuronal accident (also known as a brain fart), turning a check for VM_SHARED into VM_PFNMAP as it was reshuffling some of the code. The blob having now come back to its senses, let's restore the initial check that the original author got right the first place. Fixes: ea7fc1bb1cd1 ("KVM: arm64: Introduce MTE VM feature") Reviewed-by: Steven Price Signed-off-by: Marc Zyngier Cc: Catalin Marinas Link: https://lore.kernel.org/r/20210713114804.594993-1-maz@kernel.org --- arch/arm64/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 3155c9e778f0..0625bf2353c2 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -947,7 +947,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, vma_shift = get_vma_page_shift(vma, hva); } - shared = (vma->vm_flags & VM_PFNMAP); + shared = (vma->vm_flags & VM_SHARED); switch (vma_shift) { #ifndef __PAGETABLE_PMD_FOLDED From bac0b135907855e9f8c032877c3df3c60885a08f Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 13 Jul 2021 22:37:41 +0200 Subject: [PATCH 055/794] KVM: selftests: change pthread_yield to sched_yield MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With later GCC we get steal_time.c: In function ‘main’: steal_time.c:323:25: warning: ‘pthread_yield’ is deprecated: pthread_yield is deprecated, use sched_yield instead [-Wdeprecated-declarations] Let's follow the instructions and use sched_yield instead. Signed-off-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210713203742.29680-2-drjones@redhat.com --- tools/testing/selftests/kvm/steal_time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index b0031f2d38fd..ecec30865a74 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -320,7 +320,7 @@ int main(int ac, char **av) run_delay = get_run_delay(); pthread_create(&thread, &attr, do_steal_time, NULL); do - pthread_yield(); + sched_yield(); while (get_run_delay() - run_delay < MIN_RUN_DELAY_NS); pthread_join(thread, NULL); run_delay = get_run_delay() - run_delay; From 5cf17746b302aa32a4f200cc6ce38865bfe4cf94 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 13 Jul 2021 22:37:42 +0200 Subject: [PATCH 056/794] KVM: arm64: selftests: get-reg-list: actually enable pmu regs in pmu sublist We reworked get-reg-list to make it easier to enable optional register sublists by parametrizing their vcpu feature flags as well as making other generalizations. That was all to make sure we enable the PMU registers when we want to test them. Somehow we forgot to actually include the PMU feature flag in the PMU sublist description though! Do that now. Fixes: 313673bad871 ("KVM: arm64: selftests: get-reg-list: Split base and pmu registers") Signed-off-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210713203742.29680-3-drjones@redhat.com --- tools/testing/selftests/kvm/aarch64/get-reg-list.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index a16c8f05366c..cc898181faab 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -1019,7 +1019,8 @@ static __u64 sve_rejects_set[] = { #define VREGS_SUBLIST \ { "vregs", .regs = vregs, .regs_n = ARRAY_SIZE(vregs), } #define PMU_SUBLIST \ - { "pmu", .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), } + { "pmu", .capability = KVM_CAP_ARM_PMU_V3, .feature = KVM_ARM_VCPU_PMU_V3, \ + .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), } #define SVE_SUBLIST \ { "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \ .regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \ From aa21548e34c19c12e924c736f3fd9e6a4d0f5419 Mon Sep 17 00:00:00 2001 From: Sathya Prakash M R Date: Mon, 12 Jul 2021 15:16:20 -0500 Subject: [PATCH 057/794] ASoC: SOF: Intel: Update ADL descriptor to use ACPI power states The ADL descriptor was missing an ACPI power setting, causing the DSP to enter D3 even with a D0i1-compatible wake-on-voice/hotwording capture stream. Fixes: 4ad03f894b3c ('ASoC: SOF: Intel: Update ADL P to use its own descriptor') Reviewed-by: Ranjani Sridharan Signed-off-by: Sathya Prakash M R Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20210712201620.44311-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/pci-tgl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/sof/intel/pci-tgl.c b/sound/soc/sof/intel/pci-tgl.c index a00262184efa..d04ce84fe7cc 100644 --- a/sound/soc/sof/intel/pci-tgl.c +++ b/sound/soc/sof/intel/pci-tgl.c @@ -89,6 +89,7 @@ static const struct sof_dev_desc adls_desc = { static const struct sof_dev_desc adl_desc = { .machines = snd_soc_acpi_intel_adl_machines, .alt_machines = snd_soc_acpi_intel_adl_sdw_machines, + .use_acpi_target_states = true, .resindex_lpe_base = 0, .resindex_pcicfg_base = -1, .resindex_imr_base = -1, From 9431f8df233f808baa5fcc62b520cc6503fdf022 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 13 Jul 2021 15:04:17 +0100 Subject: [PATCH 058/794] ASoC: codecs: wcd938x: make sdw dependency explicit in Kconfig currenlty wcd938x has only soundwire interface and depends on symbols from wcd938x soundwire module, so make this dependency explicit in Kconfig Without this one of the randconfig endup setting CONFIG_SND_SOC_WCD938X=y CONFIG_SND_SOC_WCD938X_SDW=m resulting in some undefined reference to wcd938x_sdw* symbols. Reported-by: kernel test robot Fixes: 045442228868 ("ASoC: codecs: wcd938x: add audio routing and Kconfig") Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20210713140417.23693-1-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 3a42c4611414..032c87637f63 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -1557,6 +1557,7 @@ config SND_SOC_WCD934X Qualcomm SoCs like SDM845. config SND_SOC_WCD938X + depends on SND_SOC_WCD938X_SDW tristate config SND_SOC_WCD938X_SDW From c934fec1c32840224fd975544c347823962193b2 Mon Sep 17 00:00:00 2001 From: Mason Zhang Date: Tue, 13 Jul 2021 19:42:48 +0800 Subject: [PATCH 059/794] spi: mediatek: move devm_spi_register_master position This patch move devm_spi_register_master to the end of mtk_spi_probe. If slaves call spi_sync in there probe function, master should have probe done. Signed-off-by: Mason Zhang Link: https://lore.kernel.org/r/20210713114247.1536-1-mason.zhang@mediatek.com Signed-off-by: Mark Brown --- drivers/spi/spi-mt65xx.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index 8d5fa7f1e506..68dca8ceb3ad 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -803,12 +803,6 @@ static int mtk_spi_probe(struct platform_device *pdev) pm_runtime_enable(&pdev->dev); - ret = devm_spi_register_master(&pdev->dev, master); - if (ret) { - dev_err(&pdev->dev, "failed to register master (%d)\n", ret); - goto err_disable_runtime_pm; - } - if (mdata->dev_comp->need_pad_sel) { if (mdata->pad_num != master->num_chipselect) { dev_err(&pdev->dev, @@ -848,6 +842,12 @@ static int mtk_spi_probe(struct platform_device *pdev) dev_notice(&pdev->dev, "SPI dma_set_mask(%d) failed, ret:%d\n", addr_bits, ret); + ret = devm_spi_register_master(&pdev->dev, master); + if (ret) { + dev_err(&pdev->dev, "failed to register master (%d)\n", ret); + goto err_disable_runtime_pm; + } + return 0; err_disable_runtime_pm: From 95d429206c97cf109591009fa386004191c62c47 Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Mon, 28 Jun 2021 18:28:46 -0400 Subject: [PATCH 060/794] platform/x86: think-lmi: Add pending_reboot support The Think-lmi driver was missing pending_reboot support as it wasn't available from the BIOS. Turns out this is really useful to have from user space so implementing from a purely SW point of view. Thanks to Mario Limonciello for guidance on how fwupd would use this. Suggested-by: Mario Limonciello Signed-off-by: Mark Pearson Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20210628222846.8830-1-markpearson@lenovo.com Signed-off-by: Hans de Goede --- drivers/platform/x86/think-lmi.c | 19 +++++++++++++++++++ drivers/platform/x86/think-lmi.h | 1 + 2 files changed, 20 insertions(+) diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index 3671b5d20613..64dcec53a7a0 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -571,6 +571,11 @@ static ssize_t current_value_store(struct kobject *kobj, else ret = tlmi_save_bios_settings(""); + if (!ret && !tlmi_priv.pending_changes) { + tlmi_priv.pending_changes = true; + /* let userland know it may need to check reboot pending again */ + kobject_uevent(&tlmi_priv.class_dev->kobj, KOBJ_CHANGE); + } out: kfree(auth_str); kfree(set_str); @@ -647,6 +652,14 @@ static struct kobj_type tlmi_pwd_setting_ktype = { .sysfs_ops = &tlmi_kobj_sysfs_ops, }; +static ssize_t pending_reboot_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", tlmi_priv.pending_changes); +} + +static struct kobj_attribute pending_reboot = __ATTR_RO(pending_reboot); + /* ---- Initialisation --------------------------------------------------------- */ static void tlmi_release_attr(void) { @@ -667,6 +680,7 @@ static void tlmi_release_attr(void) sysfs_remove_group(&tlmi_priv.pwd_power->kobj, &auth_attr_group); kobject_put(&tlmi_priv.pwd_power->kobj); kset_unregister(tlmi_priv.authentication_kset); + sysfs_remove_file(&tlmi_priv.class_dev->kobj, &pending_reboot.attr); } static int tlmi_sysfs_init(void) @@ -746,6 +760,11 @@ static int tlmi_sysfs_init(void) if (ret) goto fail_create_attr; + /* Create global sysfs files */ + ret = sysfs_create_file(&tlmi_priv.class_dev->kobj, &pending_reboot.attr); + if (ret) + goto fail_create_attr; + return ret; fail_create_attr: diff --git a/drivers/platform/x86/think-lmi.h b/drivers/platform/x86/think-lmi.h index 6fa8da7af6c7..eb598846628a 100644 --- a/drivers/platform/x86/think-lmi.h +++ b/drivers/platform/x86/think-lmi.h @@ -60,6 +60,7 @@ struct think_lmi { bool can_get_bios_selections; bool can_set_bios_password; bool can_get_password_settings; + bool pending_changes; struct tlmi_attr_setting *setting[TLMI_SETTINGS_COUNT]; struct device *class_dev; From 95e1b60f8dc8f225b14619e9aca9bdd7d99167db Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Tue, 29 Jun 2021 14:17:57 +0530 Subject: [PATCH 061/794] platform/x86: amd-pmc: Fix command completion code The protocol to submit a job request to SMU is to wait for AMD_PMC_REGISTER_RESPONSE to return 1,meaning SMU is ready to take requests. PMC driver has to make sure that the response code is always AMD_PMC_RESULT_OK before making any command submissions. When we submit a message to SMU, we have to wait until it processes the request. Adding a read_poll_timeout() check as this was missing in the existing code. Also, add a mutex to protect amd_pmc_send_cmd() calls to SMU. Fixes: 156ec4731cb2 ("platform/x86: amd-pmc: Add AMD platform support for S2Idle") Signed-off-by: Shyam Sundar S K Acked-by: Raul E Rangel Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20210629084803.248498-2-Shyam-sundar.S-k@amd.com Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 38 ++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index b9da58ee9b1e..1b5f149932c1 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -68,6 +68,7 @@ struct amd_pmc_dev { u32 base_addr; u32 cpu_id; struct device *dev; + struct mutex lock; /* generic mutex lock */ #if IS_ENABLED(CONFIG_DEBUG_FS) struct dentry *dbgfs_dir; #endif /* CONFIG_DEBUG_FS */ @@ -138,9 +139,10 @@ static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set) u8 msg; u32 val; + mutex_lock(&dev->lock); /* Wait until we get a valid response */ rc = readx_poll_timeout(ioread32, dev->regbase + AMD_PMC_REGISTER_RESPONSE, - val, val > 0, PMC_MSG_DELAY_MIN_US, + val, val != 0, PMC_MSG_DELAY_MIN_US, PMC_MSG_DELAY_MIN_US * RESPONSE_REGISTER_LOOP_MAX); if (rc) { dev_err(dev->dev, "failed to talk to SMU\n"); @@ -156,7 +158,37 @@ static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set) /* Write message ID to message ID register */ msg = (dev->cpu_id == AMD_CPU_ID_RN) ? MSG_OS_HINT_RN : MSG_OS_HINT_PCO; amd_pmc_reg_write(dev, AMD_PMC_REGISTER_MESSAGE, msg); - return 0; + /* Wait until we get a valid response */ + rc = readx_poll_timeout(ioread32, dev->regbase + AMD_PMC_REGISTER_RESPONSE, + val, val != 0, PMC_MSG_DELAY_MIN_US, + PMC_MSG_DELAY_MIN_US * RESPONSE_REGISTER_LOOP_MAX); + if (rc) { + dev_err(dev->dev, "SMU response timed out\n"); + goto out_unlock; + } + + switch (val) { + case AMD_PMC_RESULT_OK: + break; + case AMD_PMC_RESULT_CMD_REJECT_BUSY: + dev_err(dev->dev, "SMU not ready. err: 0x%x\n", val); + rc = -EBUSY; + goto out_unlock; + case AMD_PMC_RESULT_CMD_UNKNOWN: + dev_err(dev->dev, "SMU cmd unknown. err: 0x%x\n", val); + rc = -EINVAL; + goto out_unlock; + case AMD_PMC_RESULT_CMD_REJECT_PREREQ: + case AMD_PMC_RESULT_FAILED: + default: + dev_err(dev->dev, "SMU cmd failed. err: 0x%x\n", val); + rc = -EIO; + goto out_unlock; + } + +out_unlock: + mutex_unlock(&dev->lock); + return rc; } static int __maybe_unused amd_pmc_suspend(struct device *dev) @@ -259,6 +291,7 @@ static int amd_pmc_probe(struct platform_device *pdev) amd_pmc_dump_registers(dev); + mutex_init(&dev->lock); platform_set_drvdata(pdev, dev); amd_pmc_dbgfs_register(dev); return 0; @@ -269,6 +302,7 @@ static int amd_pmc_remove(struct platform_device *pdev) struct amd_pmc_dev *dev = platform_get_drvdata(pdev); amd_pmc_dbgfs_unregister(dev); + mutex_destroy(&dev->lock); return 0; } From 4c06d35dfedf4c1fd03702e0f05292a69d020e21 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Tue, 29 Jun 2021 14:17:58 +0530 Subject: [PATCH 062/794] platform/x86: amd-pmc: Fix SMU firmware reporting mechanism It was lately understood that the current mechanism available in the driver to get SMU firmware info works only on internal SMU builds and there is a separate way to get all the SMU logging counters (addressed in the next patch). Hence remove all the smu info shown via debugfs as it is no more useful. Fixes: 156ec4731cb2 ("platform/x86: amd-pmc: Add AMD platform support for S2Idle") Signed-off-by: Shyam Sundar S K Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20210629084803.248498-3-Shyam-sundar.S-k@amd.com Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index 1b5f149932c1..b1d6175a13b2 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -52,7 +52,6 @@ #define AMD_CPU_ID_PCO AMD_CPU_ID_RV #define AMD_CPU_ID_CZN AMD_CPU_ID_RN -#define AMD_SMU_FW_VERSION 0x0 #define PMC_MSG_DELAY_MIN_US 100 #define RESPONSE_REGISTER_LOOP_MAX 200 @@ -89,11 +88,6 @@ static inline void amd_pmc_reg_write(struct amd_pmc_dev *dev, int reg_offset, u3 #ifdef CONFIG_DEBUG_FS static int smu_fw_info_show(struct seq_file *s, void *unused) { - struct amd_pmc_dev *dev = s->private; - u32 value; - - value = ioread32(dev->smu_base + AMD_SMU_FW_VERSION); - seq_printf(s, "SMU FW Info: %x\n", value); return 0; } DEFINE_SHOW_ATTRIBUTE(smu_fw_info); @@ -280,10 +274,6 @@ static int amd_pmc_probe(struct platform_device *pdev) pci_dev_put(rdev); base_addr = ((u64)base_addr_hi << 32 | base_addr_lo); - dev->smu_base = devm_ioremap(dev->dev, base_addr, AMD_PMC_MAPPING_SIZE); - if (!dev->smu_base) - return -ENOMEM; - dev->regbase = devm_ioremap(dev->dev, base_addr + AMD_PMC_BASE_ADDR_OFFSET, AMD_PMC_MAPPING_SIZE); if (!dev->regbase) From 162b937a8064029ed22cd1039d4dcf7f1721f940 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Tue, 29 Jun 2021 14:17:59 +0530 Subject: [PATCH 063/794] platform/x86: amd-pmc: call dump registers only once Currently amd_pmc_dump_registers() routine is being called at multiple places. The best to call it is after command submission to SMU. Signed-off-by: Shyam Sundar S K Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20210629084803.248498-4-Shyam-sundar.S-k@amd.com Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index b1d6175a13b2..e5107e3b1911 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -182,6 +182,7 @@ static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set) out_unlock: mutex_unlock(&dev->lock); + amd_pmc_dump_registers(dev); return rc; } @@ -194,7 +195,6 @@ static int __maybe_unused amd_pmc_suspend(struct device *dev) if (rc) dev_err(pdev->dev, "suspend failed\n"); - amd_pmc_dump_registers(pdev); return 0; } @@ -207,7 +207,6 @@ static int __maybe_unused amd_pmc_resume(struct device *dev) if (rc) dev_err(pdev->dev, "resume failed\n"); - amd_pmc_dump_registers(pdev); return 0; } @@ -279,8 +278,6 @@ static int amd_pmc_probe(struct platform_device *pdev) if (!dev->regbase) return -ENOMEM; - amd_pmc_dump_registers(dev); - mutex_init(&dev->lock); platform_set_drvdata(pdev, dev); amd_pmc_dbgfs_register(dev); From 76620567496237f1f1f54683ec7da1755ee501d7 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Tue, 29 Jun 2021 14:18:00 +0530 Subject: [PATCH 064/794] platform/x86: amd-pmc: Add support for logging SMU metrics SMU provides a way to dump the s0ix debug statistics in the form of a metrics table via a of set special mailbox commands. Add support to the driver which can send these commands to SMU and expose the information received via debugfs. The information contains the s0ix entry/exit, active time of each IP block etc. As a side note, SMU subsystem logging is not supported on Picasso based SoC's. Signed-off-by: Shyam Sundar S K Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20210629084803.248498-5-Shyam-sundar.S-k@amd.com Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 147 +++++++++++++++++++++++++++++++-- 1 file changed, 139 insertions(+), 8 deletions(-) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index e5107e3b1911..0ebb2732c46a 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -46,6 +46,14 @@ #define AMD_PMC_RESULT_CMD_UNKNOWN 0xFE #define AMD_PMC_RESULT_FAILED 0xFF +/* SMU Message Definations */ +#define SMU_MSG_GETSMUVERSION 0x02 +#define SMU_MSG_LOG_GETDRAM_ADDR_HI 0x04 +#define SMU_MSG_LOG_GETDRAM_ADDR_LO 0x05 +#define SMU_MSG_LOG_START 0x06 +#define SMU_MSG_LOG_RESET 0x07 +#define SMU_MSG_LOG_DUMP_DATA 0x08 +#define SMU_MSG_GET_SUP_CONSTRAINTS 0x09 /* List of supported CPU ids */ #define AMD_CPU_ID_RV 0x15D0 #define AMD_CPU_ID_RN 0x1630 @@ -55,17 +63,42 @@ #define PMC_MSG_DELAY_MIN_US 100 #define RESPONSE_REGISTER_LOOP_MAX 200 +#define SOC_SUBSYSTEM_IP_MAX 12 +#define DELAY_MIN_US 2000 +#define DELAY_MAX_US 3000 enum amd_pmc_def { MSG_TEST = 0x01, MSG_OS_HINT_PCO, MSG_OS_HINT_RN, }; +struct amd_pmc_bit_map { + const char *name; + u32 bit_mask; +}; + +static const struct amd_pmc_bit_map soc15_ip_blk[] = { + {"DISPLAY", BIT(0)}, + {"CPU", BIT(1)}, + {"GFX", BIT(2)}, + {"VDD", BIT(3)}, + {"ACP", BIT(4)}, + {"VCN", BIT(5)}, + {"ISP", BIT(6)}, + {"NBIO", BIT(7)}, + {"DF", BIT(8)}, + {"USB0", BIT(9)}, + {"USB1", BIT(10)}, + {"LAPIC", BIT(11)}, + {} +}; + struct amd_pmc_dev { void __iomem *regbase; - void __iomem *smu_base; + void __iomem *smu_virt_addr; u32 base_addr; u32 cpu_id; + u32 active_ips; struct device *dev; struct mutex lock; /* generic mutex lock */ #if IS_ENABLED(CONFIG_DEBUG_FS) @@ -74,6 +107,7 @@ struct amd_pmc_dev { }; static struct amd_pmc_dev pmc; +static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set, u32 *data, u8 msg, bool ret); static inline u32 amd_pmc_reg_read(struct amd_pmc_dev *dev, int reg_offset) { @@ -85,9 +119,49 @@ static inline void amd_pmc_reg_write(struct amd_pmc_dev *dev, int reg_offset, u3 iowrite32(val, dev->regbase + reg_offset); } +struct smu_metrics { + u32 table_version; + u32 hint_count; + u32 s0i3_cyclecount; + u32 timein_s0i2; + u64 timeentering_s0i3_lastcapture; + u64 timeentering_s0i3_totaltime; + u64 timeto_resume_to_os_lastcapture; + u64 timeto_resume_to_os_totaltime; + u64 timein_s0i3_lastcapture; + u64 timein_s0i3_totaltime; + u64 timein_swdrips_lastcapture; + u64 timein_swdrips_totaltime; + u64 timecondition_notmet_lastcapture[SOC_SUBSYSTEM_IP_MAX]; + u64 timecondition_notmet_totaltime[SOC_SUBSYSTEM_IP_MAX]; +} __packed; + #ifdef CONFIG_DEBUG_FS static int smu_fw_info_show(struct seq_file *s, void *unused) { + struct amd_pmc_dev *dev = s->private; + struct smu_metrics table; + int idx; + + if (dev->cpu_id == AMD_CPU_ID_PCO) + return -EINVAL; + + memcpy_fromio(&table, dev->smu_virt_addr, sizeof(struct smu_metrics)); + + seq_puts(s, "\n=== SMU Statistics ===\n"); + seq_printf(s, "Table Version: %d\n", table.table_version); + seq_printf(s, "Hint Count: %d\n", table.hint_count); + seq_printf(s, "S0i3 Cycle Count: %d\n", table.s0i3_cyclecount); + seq_printf(s, "Time (in us) to S0i3: %lld\n", table.timeentering_s0i3_lastcapture); + seq_printf(s, "Time (in us) in S0i3: %lld\n", table.timein_s0i3_lastcapture); + + seq_puts(s, "\n=== Active time (in us) ===\n"); + for (idx = 0 ; idx < SOC_SUBSYSTEM_IP_MAX ; idx++) { + if (soc15_ip_blk[idx].bit_mask & dev->active_ips) + seq_printf(s, "%-8s : %lld\n", soc15_ip_blk[idx].name, + table.timecondition_notmet_lastcapture[idx]); + } + return 0; } DEFINE_SHOW_ATTRIBUTE(smu_fw_info); @@ -113,6 +187,32 @@ static inline void amd_pmc_dbgfs_unregister(struct amd_pmc_dev *dev) } #endif /* CONFIG_DEBUG_FS */ +static int amd_pmc_setup_smu_logging(struct amd_pmc_dev *dev) +{ + u32 phys_addr_low, phys_addr_hi; + u64 smu_phys_addr; + + if (dev->cpu_id == AMD_CPU_ID_PCO) + return -EINVAL; + + /* Get Active devices list from SMU */ + amd_pmc_send_cmd(dev, 0, &dev->active_ips, SMU_MSG_GET_SUP_CONSTRAINTS, 1); + + /* Get dram address */ + amd_pmc_send_cmd(dev, 0, &phys_addr_low, SMU_MSG_LOG_GETDRAM_ADDR_LO, 1); + amd_pmc_send_cmd(dev, 0, &phys_addr_hi, SMU_MSG_LOG_GETDRAM_ADDR_HI, 1); + smu_phys_addr = ((u64)phys_addr_hi << 32 | phys_addr_low); + + dev->smu_virt_addr = devm_ioremap(dev->dev, smu_phys_addr, sizeof(struct smu_metrics)); + if (!dev->smu_virt_addr) + return -ENOMEM; + + /* Start the logging */ + amd_pmc_send_cmd(dev, 0, NULL, SMU_MSG_LOG_START, 0); + + return 0; +} + static void amd_pmc_dump_registers(struct amd_pmc_dev *dev) { u32 value; @@ -127,10 +227,9 @@ static void amd_pmc_dump_registers(struct amd_pmc_dev *dev) dev_dbg(dev->dev, "AMD_PMC_REGISTER_MESSAGE:%x\n", value); } -static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set) +static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set, u32 *data, u8 msg, bool ret) { int rc; - u8 msg; u32 val; mutex_lock(&dev->lock); @@ -150,8 +249,8 @@ static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set) amd_pmc_reg_write(dev, AMD_PMC_REGISTER_ARGUMENT, set); /* Write message ID to message ID register */ - msg = (dev->cpu_id == AMD_CPU_ID_RN) ? MSG_OS_HINT_RN : MSG_OS_HINT_PCO; amd_pmc_reg_write(dev, AMD_PMC_REGISTER_MESSAGE, msg); + /* Wait until we get a valid response */ rc = readx_poll_timeout(ioread32, dev->regbase + AMD_PMC_REGISTER_RESPONSE, val, val != 0, PMC_MSG_DELAY_MIN_US, @@ -163,6 +262,11 @@ static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set) switch (val) { case AMD_PMC_RESULT_OK: + if (ret) { + /* PMFW may take longer time to return back the data */ + usleep_range(DELAY_MIN_US, 10 * DELAY_MAX_US); + *data = amd_pmc_reg_read(dev, AMD_PMC_REGISTER_ARGUMENT); + } break; case AMD_PMC_RESULT_CMD_REJECT_BUSY: dev_err(dev->dev, "SMU not ready. err: 0x%x\n", val); @@ -186,12 +290,29 @@ out_unlock: return rc; } +static int amd_pmc_get_os_hint(struct amd_pmc_dev *dev) +{ + switch (dev->cpu_id) { + case AMD_CPU_ID_PCO: + return MSG_OS_HINT_PCO; + case AMD_CPU_ID_RN: + return MSG_OS_HINT_RN; + } + return -EINVAL; +} + static int __maybe_unused amd_pmc_suspend(struct device *dev) { struct amd_pmc_dev *pdev = dev_get_drvdata(dev); int rc; + u8 msg; - rc = amd_pmc_send_cmd(pdev, 1); + /* Reset and Start SMU logging - to monitor the s0i3 stats */ + amd_pmc_send_cmd(pdev, 0, NULL, SMU_MSG_LOG_RESET, 0); + amd_pmc_send_cmd(pdev, 0, NULL, SMU_MSG_LOG_START, 0); + + msg = amd_pmc_get_os_hint(pdev); + rc = amd_pmc_send_cmd(pdev, 1, NULL, msg, 0); if (rc) dev_err(pdev->dev, "suspend failed\n"); @@ -202,8 +323,13 @@ static int __maybe_unused amd_pmc_resume(struct device *dev) { struct amd_pmc_dev *pdev = dev_get_drvdata(dev); int rc; + u8 msg; - rc = amd_pmc_send_cmd(pdev, 0); + /* Let SMU know that we are looking for stats */ + amd_pmc_send_cmd(pdev, 0, NULL, SMU_MSG_LOG_DUMP_DATA, 0); + + msg = amd_pmc_get_os_hint(pdev); + rc = amd_pmc_send_cmd(pdev, 0, NULL, msg, 0); if (rc) dev_err(pdev->dev, "resume failed\n"); @@ -226,8 +352,7 @@ static int amd_pmc_probe(struct platform_device *pdev) { struct amd_pmc_dev *dev = &pmc; struct pci_dev *rdev; - u32 base_addr_lo; - u32 base_addr_hi; + u32 base_addr_lo, base_addr_hi; u64 base_addr; int err; u32 val; @@ -279,6 +404,12 @@ static int amd_pmc_probe(struct platform_device *pdev) return -ENOMEM; mutex_init(&dev->lock); + + /* Use SMU to get the s0i3 debug stats */ + err = amd_pmc_setup_smu_logging(dev); + if (err) + dev_err(dev->dev, "SMU debugging info not supported on this platform\n"); + platform_set_drvdata(pdev, dev); amd_pmc_dbgfs_register(dev); return 0; From b9a4fa6978bef902409858737fa180fa7b9346ac Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Tue, 29 Jun 2021 14:18:01 +0530 Subject: [PATCH 065/794] platform/x86: amd-pmc: Add support for logging s0ix counters Even the FCH SSC registers provides certain level of information about the s0ix entry and exit times which comes handy when the SMU fails to report the statistics via the mailbox communication. This information is captured via a new debugfs file "s0ix_stats". A non-zero entry in this counters would mean that the system entered the s0ix state. If s0ix entry time and exit time don't change during suspend to idle, the silicon has not entered the deepest state. Signed-off-by: Shyam Sundar S K Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20210629084803.248498-6-Shyam-sundar.S-k@amd.com Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 45 +++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index 0ebb2732c46a..7f011c3f60f2 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -46,6 +46,15 @@ #define AMD_PMC_RESULT_CMD_UNKNOWN 0xFE #define AMD_PMC_RESULT_FAILED 0xFF +/* FCH SSC Registers */ +#define FCH_S0I3_ENTRY_TIME_L_OFFSET 0x30 +#define FCH_S0I3_ENTRY_TIME_H_OFFSET 0x34 +#define FCH_S0I3_EXIT_TIME_L_OFFSET 0x38 +#define FCH_S0I3_EXIT_TIME_H_OFFSET 0x3C +#define FCH_SSC_MAPPING_SIZE 0x800 +#define FCH_BASE_PHY_ADDR_LOW 0xFED81100 +#define FCH_BASE_PHY_ADDR_HIGH 0x00000000 + /* SMU Message Definations */ #define SMU_MSG_GETSMUVERSION 0x02 #define SMU_MSG_LOG_GETDRAM_ADDR_HI 0x04 @@ -96,6 +105,7 @@ static const struct amd_pmc_bit_map soc15_ip_blk[] = { struct amd_pmc_dev { void __iomem *regbase; void __iomem *smu_virt_addr; + void __iomem *fch_virt_addr; u32 base_addr; u32 cpu_id; u32 active_ips; @@ -166,6 +176,29 @@ static int smu_fw_info_show(struct seq_file *s, void *unused) } DEFINE_SHOW_ATTRIBUTE(smu_fw_info); +static int s0ix_stats_show(struct seq_file *s, void *unused) +{ + struct amd_pmc_dev *dev = s->private; + u64 entry_time, exit_time, residency; + + entry_time = ioread32(dev->fch_virt_addr + FCH_S0I3_ENTRY_TIME_H_OFFSET); + entry_time = entry_time << 32 | ioread32(dev->fch_virt_addr + FCH_S0I3_ENTRY_TIME_L_OFFSET); + + exit_time = ioread32(dev->fch_virt_addr + FCH_S0I3_EXIT_TIME_H_OFFSET); + exit_time = exit_time << 32 | ioread32(dev->fch_virt_addr + FCH_S0I3_EXIT_TIME_L_OFFSET); + + /* It's in 48MHz. We need to convert it */ + residency = (exit_time - entry_time) / 48; + + seq_puts(s, "=== S0ix statistics ===\n"); + seq_printf(s, "S0ix Entry Time: %lld\n", entry_time); + seq_printf(s, "S0ix Exit Time: %lld\n", exit_time); + seq_printf(s, "Residency Time: %lld\n", residency); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(s0ix_stats); + static void amd_pmc_dbgfs_unregister(struct amd_pmc_dev *dev) { debugfs_remove_recursive(dev->dbgfs_dir); @@ -176,6 +209,8 @@ static void amd_pmc_dbgfs_register(struct amd_pmc_dev *dev) dev->dbgfs_dir = debugfs_create_dir("amd_pmc", NULL); debugfs_create_file("smu_fw_info", 0644, dev->dbgfs_dir, dev, &smu_fw_info_fops); + debugfs_create_file("s0ix_stats", 0644, dev->dbgfs_dir, dev, + &s0ix_stats_fops); } #else static inline void amd_pmc_dbgfs_register(struct amd_pmc_dev *dev) @@ -353,7 +388,7 @@ static int amd_pmc_probe(struct platform_device *pdev) struct amd_pmc_dev *dev = &pmc; struct pci_dev *rdev; u32 base_addr_lo, base_addr_hi; - u64 base_addr; + u64 base_addr, fch_phys_addr; int err; u32 val; @@ -405,6 +440,14 @@ static int amd_pmc_probe(struct platform_device *pdev) mutex_init(&dev->lock); + /* Use FCH registers to get the S0ix stats */ + base_addr_lo = FCH_BASE_PHY_ADDR_LOW; + base_addr_hi = FCH_BASE_PHY_ADDR_HIGH; + fch_phys_addr = ((u64)base_addr_hi << 32 | base_addr_lo); + dev->fch_virt_addr = devm_ioremap(dev->dev, fch_phys_addr, FCH_SSC_MAPPING_SIZE); + if (!dev->fch_virt_addr) + return -ENOMEM; + /* Use SMU to get the s0i3 debug stats */ err = amd_pmc_setup_smu_logging(dev); if (err) From 9422584a601ae8e4af51e890a14a936b2b689628 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Tue, 29 Jun 2021 14:18:02 +0530 Subject: [PATCH 066/794] platform/x86: amd-pmc: Add support for ACPI ID AMDI0006 Some newer BIOSes have added another ACPI ID for the uPEP device. SMU statistics behave identically on this device. Signed-off-by: Shyam Sundar S K Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20210629084803.248498-7-Shyam-sundar.S-k@amd.com Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index 7f011c3f60f2..c5054fa2aed9 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -469,6 +469,7 @@ static int amd_pmc_remove(struct platform_device *pdev) static const struct acpi_device_id amd_pmc_acpi_ids[] = { {"AMDI0005", 0}, + {"AMDI0006", 0}, {"AMD0004", 0}, { } }; From 83cbaf14275a30f14cf558b09389a1664b173858 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Tue, 29 Jun 2021 14:18:03 +0530 Subject: [PATCH 067/794] platform/x86: amd-pmc: Add new acpi id for future PMC controllers The upcoming PMC controller would have a newer acpi id, add that to the supported acpid device list. Signed-off-by: Shyam Sundar S K Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20210629084803.248498-8-Shyam-sundar.S-k@amd.com Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index c5054fa2aed9..d2f9a62e1166 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -68,6 +68,7 @@ #define AMD_CPU_ID_RN 0x1630 #define AMD_CPU_ID_PCO AMD_CPU_ID_RV #define AMD_CPU_ID_CZN AMD_CPU_ID_RN +#define AMD_CPU_ID_YC 0x14B5 #define PMC_MSG_DELAY_MIN_US 100 #define RESPONSE_REGISTER_LOOP_MAX 200 @@ -331,6 +332,7 @@ static int amd_pmc_get_os_hint(struct amd_pmc_dev *dev) case AMD_CPU_ID_PCO: return MSG_OS_HINT_PCO; case AMD_CPU_ID_RN: + case AMD_CPU_ID_YC: return MSG_OS_HINT_RN; } return -EINVAL; @@ -376,6 +378,7 @@ static const struct dev_pm_ops amd_pmc_pm_ops = { }; static const struct pci_device_id pmc_pci_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_YC) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_CZN) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_RN) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_PCO) }, @@ -470,6 +473,7 @@ static int amd_pmc_remove(struct platform_device *pdev) static const struct acpi_device_id amd_pmc_acpi_ids[] = { {"AMDI0005", 0}, {"AMDI0006", 0}, + {"AMDI0007", 0}, {"AMD0004", 0}, { } }; From a973c983375c37301645d4fea056b1f4bff77bf7 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 7 Jul 2021 09:16:47 -0500 Subject: [PATCH 068/794] platform/x86: amd-pmc: Use return code on suspend Right now the driver will still return success even if the OS_HINT command failed to send to the SMU. In the rare event of a failure, the suspend should really be aborted here so that relevant logs can may be captured. Signed-off-by: Mario Limonciello Acked-by: Shyam Sundar S K Link: https://lore.kernel.org/r/20210707141647.8871-1-mario.limonciello@amd.com Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index d2f9a62e1166..680f94c7e075 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -353,7 +353,7 @@ static int __maybe_unused amd_pmc_suspend(struct device *dev) if (rc) dev_err(pdev->dev, "suspend failed\n"); - return 0; + return rc; } static int __maybe_unused amd_pmc_resume(struct device *dev) From 64752a95b702817602d72f109ceaf5ec0780e283 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 14 Jul 2021 10:48:36 +0200 Subject: [PATCH 069/794] ALSA: usb-audio: Add missing proc text entry for BESPOKEN type Recently we've added a new usb_mixer element type, USB_MIXER_BESPOKEN, but it wasn't added in the table in snd_usb_mixer_dump_cval(). This is no big problem since each bespoken type should have its own dump method, but it still isn't disallowed to use the standard one, so we should cover it as well. Along with it, define the table with the explicit array initializer for avoiding other pitfalls. Fixes: 785b6f29a795 ("ALSA: usb-audio: scarlett2: Fix wrong resume call") Reported-by: Pavel Machek Cc: Link: https://lore.kernel.org/r/20210714084836.1977-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/mixer.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 30b3e128e28d..f4cdaf1ba44a 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -3295,7 +3295,15 @@ static void snd_usb_mixer_dump_cval(struct snd_info_buffer *buffer, { struct usb_mixer_elem_info *cval = mixer_elem_list_to_info(list); static const char * const val_types[] = { - "BOOLEAN", "INV_BOOLEAN", "S8", "U8", "S16", "U16", "S32", "U32", + [USB_MIXER_BOOLEAN] = "BOOLEAN", + [USB_MIXER_INV_BOOLEAN] = "INV_BOOLEAN", + [USB_MIXER_S8] = "S8", + [USB_MIXER_U8] = "U8", + [USB_MIXER_S16] = "S16", + [USB_MIXER_U16] = "U16", + [USB_MIXER_S32] = "S32", + [USB_MIXER_U32] = "U32", + [USB_MIXER_BESPOKEN] = "BESPOKEN", }; snd_iprintf(buffer, " Info: id=%i, control=%i, cmask=0x%x, " "channels=%i, type=\"%s\"\n", cval->head.id, From edb25572fc7058db5a98223e11d2d50497178553 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 23 Jun 2021 00:50:01 -0700 Subject: [PATCH 070/794] mmc: core: Use kref in place of struct mmc_blk_data::usage Ulf reported the following KASAN splat after adding some manual hacks into mmc-utils[1]. DEBUG: mmc_blk_open: Let's sleep for 10s.. mmc1: card 0007 removed BUG: KASAN: use-after-free in mmc_blk_get+0x58/0xb8 Read of size 4 at addr ffff00000a394a28 by task mmc/180 CPU: 2 PID: 180 Comm: mmc Not tainted 5.10.0-rc4-00069-gcc758c8c7127-dirty #5 Hardware name: Qualcomm Technologies, Inc. APQ 8016 SBC (DT) Call trace: dump_backtrace+0x0/0x2b4 show_stack+0x18/0x6c dump_stack+0xfc/0x168 print_address_description.constprop.0+0x6c/0x488 kasan_report+0x118/0x210 __asan_load4+0x94/0xd0 mmc_blk_get+0x58/0xb8 mmc_blk_open+0x7c/0xdc __blkdev_get+0x3b4/0x964 blkdev_get+0x64/0x100 blkdev_open+0xe8/0x104 do_dentry_open+0x234/0x61c vfs_open+0x54/0x64 path_openat+0xe04/0x1584 do_filp_open+0xe8/0x1e4 do_sys_openat2+0x120/0x230 __arm64_sys_openat+0xf0/0x15c el0_svc_common.constprop.0+0xac/0x234 do_el0_svc+0x84/0xa0 el0_sync_handler+0x264/0x270 el0_sync+0x174/0x180 Allocated by task 33: stack_trace_save+0x9c/0xdc kasan_save_stack+0x28/0x60 __kasan_kmalloc.constprop.0+0xc8/0xf0 kasan_kmalloc+0x10/0x20 mmc_blk_alloc_req+0x94/0x4b0 mmc_blk_probe+0x2d4/0xaa4 mmc_bus_probe+0x34/0x4c really_probe+0x148/0x6e0 driver_probe_device+0x78/0xec __device_attach_driver+0x108/0x16c bus_for_each_drv+0xf4/0x15c __device_attach+0x168/0x240 device_initial_probe+0x14/0x20 bus_probe_device+0xec/0x100 device_add+0x55c/0xaf0 mmc_add_card+0x288/0x380 mmc_attach_sd+0x18c/0x22c mmc_rescan+0x444/0x4f0 process_one_work+0x3b8/0x650 worker_thread+0xa0/0x724 kthread+0x218/0x220 ret_from_fork+0x10/0x38 Freed by task 33: stack_trace_save+0x9c/0xdc kasan_save_stack+0x28/0x60 kasan_set_track+0x28/0x40 kasan_set_free_info+0x24/0x4c __kasan_slab_free+0x100/0x180 kasan_slab_free+0x14/0x20 kfree+0xb8/0x46c mmc_blk_put+0xe4/0x11c mmc_blk_remove_req.part.0+0x6c/0xe4 mmc_blk_remove+0x368/0x370 mmc_bus_remove+0x34/0x50 __device_release_driver+0x228/0x31c device_release_driver+0x2c/0x44 bus_remove_device+0x1e4/0x200 device_del+0x2b0/0x770 mmc_remove_card+0xf0/0x150 mmc_sd_detect+0x9c/0x150 mmc_rescan+0x110/0x4f0 process_one_work+0x3b8/0x650 worker_thread+0xa0/0x724 kthread+0x218/0x220 ret_from_fork+0x10/0x38 The buggy address belongs to the object at ffff00000a394800 which belongs to the cache kmalloc-1k of size 1024 The buggy address is located 552 bytes inside of 1024-byte region [ffff00000a394800, ffff00000a394c00) The buggy address belongs to the page: page:00000000ff84ed53 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x8a390 head:00000000ff84ed53 order:3 compound_mapcount:0 compound_pincount:0 flags: 0x3fffc0000010200(slab|head) raw: 03fffc0000010200 dead000000000100 dead000000000122 ffff000009f03800 raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff00000a394900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff00000a394980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff00000a394a00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff00000a394a80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff00000a394b00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Looking closer at the problem, it looks like a classic dangling pointer bug. The 'struct mmc_blk_data' that is used after being freed in mmc_blk_put() is stashed away in 'md->disk->private_data' via mmc_blk_alloc_req() but used in mmc_blk_get() because the 'usage' count isn't properly aligned with the lifetime of the pointer. You'd expect the 'usage' member to be in sync with the kfree(), and it mostly is, except that mmc_blk_get() needs to dereference the potentially freed memory storage for the 'struct mmc_blk_data' stashed away in the private_data member to look at 'usage' before it actually figures out if it wants to consider it a valid pointer or not. That's not going to work if the freed memory has been overwritten by something else after the free, and KASAN rightly complains here. To fix the immediate problem, let's set the private_data member to NULL in mmc_blk_put() so that mmc_blk_get() can consider the object "on the way out" if the pointer is NULL and not even try to look at 'usage' if the object isn't going to be around much longer. With that set to NULL on the last mmc_blk_put(), optimize the get path further and use a kref underneath the 'open_lock' mutex to only up the reference count if it's non-zero, i.e. alive, and otherwise make mmc_blk_get() return NULL, without actually testing the reference count if we're in the process of removing the object from the system. Finally, tighten the locking region on the put side to only be around the parts that are removing the 'mmc_blk_data' from the system and publishing that fact to the gendisk and then drop the lock as soon as we can to avoid holding the lock around code that doesn't need it. This fixes the KASAN issue. Cc: Matthias Schiffer Cc: Sujit Kautkar Cc: Zubin Mithra Reported-by: Ulf Hansson Link: https://lore.kernel.org/linux-mmc/CAPDyKFryT63Jc7+DXWSpAC19qpZRqFr1orxwYGMuSqx247O8cQ@mail.gmail.com/ [1] Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20210623075002.1746924-2-swboyd@chromium.org Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 9890a1532cb0..ce8aed562929 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -111,7 +112,7 @@ struct mmc_blk_data { #define MMC_BLK_CMD23 (1 << 0) /* Can do SET_BLOCK_COUNT for multiblock */ #define MMC_BLK_REL_WR (1 << 1) /* MMC Reliable write support */ - unsigned int usage; + struct kref kref; unsigned int read_only; unsigned int part_type; unsigned int reset_done; @@ -181,10 +182,8 @@ static struct mmc_blk_data *mmc_blk_get(struct gendisk *disk) mutex_lock(&open_lock); md = disk->private_data; - if (md && md->usage == 0) + if (md && !kref_get_unless_zero(&md->kref)) md = NULL; - if (md) - md->usage++; mutex_unlock(&open_lock); return md; @@ -196,18 +195,25 @@ static inline int mmc_get_devidx(struct gendisk *disk) return devidx; } +static void mmc_blk_kref_release(struct kref *ref) +{ + struct mmc_blk_data *md = container_of(ref, struct mmc_blk_data, kref); + int devidx; + + devidx = mmc_get_devidx(md->disk); + ida_simple_remove(&mmc_blk_ida, devidx); + + mutex_lock(&open_lock); + md->disk->private_data = NULL; + mutex_unlock(&open_lock); + + put_disk(md->disk); + kfree(md); +} + static void mmc_blk_put(struct mmc_blk_data *md) { - mutex_lock(&open_lock); - md->usage--; - if (md->usage == 0) { - int devidx = mmc_get_devidx(md->disk); - - ida_simple_remove(&mmc_blk_ida, devidx); - put_disk(md->disk); - kfree(md); - } - mutex_unlock(&open_lock); + kref_put(&md->kref, mmc_blk_kref_release); } static ssize_t power_ro_lock_show(struct device *dev, @@ -2327,7 +2333,8 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, INIT_LIST_HEAD(&md->part); INIT_LIST_HEAD(&md->rpmbs); - md->usage = 1; + kref_init(&md->kref); + md->queue.blkdata = md; md->disk->major = MMC_BLOCK_MAJOR; From 10252bae863d09b9648bed2e035572d207200ca1 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 23 Jun 2021 00:50:02 -0700 Subject: [PATCH 071/794] mmc: core: Don't allocate IDA for OF aliases There's a chance that the IDA allocated in mmc_alloc_host() is not freed for some time because it's freed as part of a class' release function (see mmc_host_classdev_release() where the IDA is freed). If another thread is holding a reference to the class, then only once all balancing device_put() calls (in turn calling kobject_put()) have been made will the IDA be released and usable again. Normally this isn't a problem because the kobject is released before anything else that may want to use the same number tries to again, but with CONFIG_DEBUG_KOBJECT_RELEASE=y and OF aliases it becomes pretty easy to try to allocate an alias from the IDA twice while the first time it was allocated is still pending a call to ida_simple_remove(). It's also possible to trigger it by using CONFIG_DEBUG_KOBJECT_RELEASE and probe defering a driver at boot that calls mmc_alloc_host() before trying to get resources that may defer likes clks or regulators. Instead of allocating from the IDA in this scenario, let's just skip it if we know this is an OF alias. The number is already "claimed" and devices that aren't using OF aliases won't try to use the claimed numbers anyway (see mmc_first_nonreserved_index()). This should avoid any issues with mmc_alloc_host() returning failures from the ida_simple_get() in the case that we're using an OF alias. Cc: Matthias Schiffer Cc: Sujit Kautkar Reported-by: Zubin Mithra Fixes: fa2d0aa96941 ("mmc: core: Allow setting slot index via device tree alias") Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20210623075002.1746924-3-swboyd@chromium.org Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/core/host.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index eda4a1892c33..0475d96047c4 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -75,7 +75,8 @@ static void mmc_host_classdev_release(struct device *dev) { struct mmc_host *host = cls_dev_to_mmc_host(dev); wakeup_source_unregister(host->ws); - ida_simple_remove(&mmc_host_ida, host->index); + if (of_alias_get_id(host->parent->of_node, "mmc") < 0) + ida_simple_remove(&mmc_host_ida, host->index); kfree(host); } @@ -502,7 +503,7 @@ static int mmc_first_nonreserved_index(void) */ struct mmc_host *mmc_alloc_host(int extra, struct device *dev) { - int err; + int index; struct mmc_host *host; int alias_id, min_idx, max_idx; @@ -515,20 +516,19 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev) alias_id = of_alias_get_id(dev->of_node, "mmc"); if (alias_id >= 0) { - min_idx = alias_id; - max_idx = alias_id + 1; + index = alias_id; } else { min_idx = mmc_first_nonreserved_index(); max_idx = 0; + + index = ida_simple_get(&mmc_host_ida, min_idx, max_idx, GFP_KERNEL); + if (index < 0) { + kfree(host); + return NULL; + } } - err = ida_simple_get(&mmc_host_ida, min_idx, max_idx, GFP_KERNEL); - if (err < 0) { - kfree(host); - return NULL; - } - - host->index = err; + host->index = index; dev_set_name(&host->class_dev, "mmc%d", host->index); host->ws = wakeup_source_register(NULL, dev_name(&host->class_dev)); From 23e9592b06b43cea4d6799843795beca13437907 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Sat, 10 Jul 2021 13:08:10 -0600 Subject: [PATCH 072/794] platform/x86: wireless-hotkey: remove hardcoded "hp" from the error message This driver is no longer specific to HP laptops so "hp" in the error message is no longer applicable. Signed-off-by: Alex Hung Link: https://lore.kernel.org/r/20210710190810.313104-1-alex.hung@canonical.com Signed-off-by: Hans de Goede --- drivers/platform/x86/wireless-hotkey.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/wireless-hotkey.c b/drivers/platform/x86/wireless-hotkey.c index b010e4ca3383..11c60a273446 100644 --- a/drivers/platform/x86/wireless-hotkey.c +++ b/drivers/platform/x86/wireless-hotkey.c @@ -78,7 +78,7 @@ static int wl_add(struct acpi_device *device) err = wireless_input_setup(); if (err) - pr_err("Failed to setup hp wireless hotkeys\n"); + pr_err("Failed to setup wireless hotkeys\n"); return err; } From 9c23aa51477a37f8b56c3c40192248db0663c196 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 14 Jul 2021 19:00:21 +0200 Subject: [PATCH 073/794] r8152: Fix potential PM refcount imbalance rtl8152_close() takes the refcount via usb_autopm_get_interface() but it doesn't release when RTL8152_UNPLUG test hits. This may lead to the imbalance of PM refcount. This patch addresses it. Link: https://bugzilla.suse.com/show_bug.cgi?id=1186194 Signed-off-by: Takashi Iwai Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 1692d3b1b6e1..4096e20e9725 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -6763,10 +6763,11 @@ static int rtl8152_close(struct net_device *netdev) tp->rtl_ops.down(tp); mutex_unlock(&tp->control); - - usb_autopm_put_interface(tp->intf); } + if (!res) + usb_autopm_put_interface(tp->intf); + free_all_mem(tp); return res; From 776ac63a986d211286230c4fd70f85390eabedcd Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 14 Jul 2021 19:00:22 +0200 Subject: [PATCH 074/794] r8152: Fix a deadlock by doubly PM resume r8152 driver sets up the MAC address at reset-resume, while rtl8152_set_mac_address() has the temporary autopm get/put. This may lead to a deadlock as the PM lock has been already taken for the execution of the runtime PM callback. This patch adds the workaround to avoid the superfluous autpm when called from rtl8152_reset_resume(). Link: https://bugzilla.suse.com/show_bug.cgi?id=1186194 Signed-off-by: Takashi Iwai Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 4096e20e9725..e09b107b5c99 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1552,7 +1552,8 @@ static int rtl8152_set_speed(struct r8152 *tp, u8 autoneg, u32 speed, u8 duplex, u32 advertising); -static int rtl8152_set_mac_address(struct net_device *netdev, void *p) +static int __rtl8152_set_mac_address(struct net_device *netdev, void *p, + bool in_resume) { struct r8152 *tp = netdev_priv(netdev); struct sockaddr *addr = p; @@ -1561,9 +1562,11 @@ static int rtl8152_set_mac_address(struct net_device *netdev, void *p) if (!is_valid_ether_addr(addr->sa_data)) goto out1; - ret = usb_autopm_get_interface(tp->intf); - if (ret < 0) - goto out1; + if (!in_resume) { + ret = usb_autopm_get_interface(tp->intf); + if (ret < 0) + goto out1; + } mutex_lock(&tp->control); @@ -1575,11 +1578,17 @@ static int rtl8152_set_mac_address(struct net_device *netdev, void *p) mutex_unlock(&tp->control); - usb_autopm_put_interface(tp->intf); + if (!in_resume) + usb_autopm_put_interface(tp->intf); out1: return ret; } +static int rtl8152_set_mac_address(struct net_device *netdev, void *p) +{ + return __rtl8152_set_mac_address(netdev, p, false); +} + /* Devices containing proper chips can support a persistent * host system provided MAC address. * Examples of this are Dell TB15 and Dell WD15 docks @@ -1698,7 +1707,7 @@ static int determine_ethernet_addr(struct r8152 *tp, struct sockaddr *sa) return ret; } -static int set_ethernet_addr(struct r8152 *tp) +static int set_ethernet_addr(struct r8152 *tp, bool in_resume) { struct net_device *dev = tp->netdev; struct sockaddr sa; @@ -1711,7 +1720,7 @@ static int set_ethernet_addr(struct r8152 *tp) if (tp->version == RTL_VER_01) ether_addr_copy(dev->dev_addr, sa.sa_data); else - ret = rtl8152_set_mac_address(dev, &sa); + ret = __rtl8152_set_mac_address(dev, &sa, in_resume); return ret; } @@ -8444,7 +8453,7 @@ static int rtl8152_reset_resume(struct usb_interface *intf) clear_bit(SELECTIVE_SUSPEND, &tp->flags); tp->rtl_ops.init(tp); queue_delayed_work(system_long_wq, &tp->hw_phy_work, 0); - set_ethernet_addr(tp); + set_ethernet_addr(tp, true); return rtl8152_resume(intf); } @@ -9645,7 +9654,7 @@ static int rtl8152_probe(struct usb_interface *intf, tp->rtl_fw.retry = true; #endif queue_delayed_work(system_long_wq, &tp->hw_phy_work, 0); - set_ethernet_addr(tp); + set_ethernet_addr(tp, false); usb_set_intfdata(intf, tp); From 1a3402d93c73bf6bb4df6d7c2aac35abfc3c50e2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 3 Jun 2021 01:15:59 +0200 Subject: [PATCH 075/794] posix-cpu-timers: Fix rearm racing against process tick Since the process wide cputime counter is started locklessly from posix_cpu_timer_rearm(), it can be concurrently stopped by operations on other timers from the same thread group, such as in the following unlucky scenario: CPU 0 CPU 1 ----- ----- timer_settime(TIMER B) posix_cpu_timer_rearm(TIMER A) cpu_clock_sample_group() (pct->timers_active already true) handle_posix_cpu_timers() check_process_timers() stop_process_timers() pct->timers_active = false arm_timer(TIMER A) tick -> run_posix_cpu_timers() // sees !pct->timers_active, ignore // our TIMER A Fix this with simply locking process wide cputime counting start and timer arm in the same block. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Frederic Weisbecker Fixes: 60f2ceaa8111 ("posix-cpu-timers: Remove unnecessary locking around cpu_clock_sample_group") Cc: stable@vger.kernel.org Cc: Oleg Nesterov Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Eric W. Biederman --- kernel/time/posix-cpu-timers.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 29a5e54e6e10..517be7fd175e 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -991,6 +991,11 @@ static void posix_cpu_timer_rearm(struct k_itimer *timer) if (!p) goto out; + /* Protect timer list r/w in arm_timer() */ + sighand = lock_task_sighand(p, &flags); + if (unlikely(sighand == NULL)) + goto out; + /* * Fetch the current sample and update the timer's expiry time. */ @@ -1001,11 +1006,6 @@ static void posix_cpu_timer_rearm(struct k_itimer *timer) bump_cpu_timer(timer, now); - /* Protect timer list r/w in arm_timer() */ - sighand = lock_task_sighand(p, &flags); - if (unlikely(sighand == NULL)) - goto out; - /* * Now re-arm for the new expiry time. */ From aebacb7f6ca1926918734faae14d1f0b6fae5cb7 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Fri, 9 Jul 2021 16:13:25 +0200 Subject: [PATCH 076/794] timers: Fix get_next_timer_interrupt() with no timers pending 31cd0e119d50 ("timers: Recalculate next timer interrupt only when necessary") subtly altered get_next_timer_interrupt()'s behaviour. The function no longer consistently returns KTIME_MAX with no timers pending. In order to decide if there are any timers pending we check whether the next expiry will happen NEXT_TIMER_MAX_DELTA jiffies from now. Unfortunately, the next expiry time and the timer base clock are no longer updated in unison. The former changes upon certain timer operations (enqueue, expire, detach), whereas the latter keeps track of jiffies as they move forward. Ultimately breaking the logic above. A simplified example: - Upon entering get_next_timer_interrupt() with: jiffies = 1 base->clk = 0; base->next_expiry = NEXT_TIMER_MAX_DELTA; 'base->next_expiry == base->clk + NEXT_TIMER_MAX_DELTA', the function returns KTIME_MAX. - 'base->clk' is updated to the jiffies value. - The next time we enter get_next_timer_interrupt(), taking into account no timer operations happened: base->clk = 1; base->next_expiry = NEXT_TIMER_MAX_DELTA; 'base->next_expiry != base->clk + NEXT_TIMER_MAX_DELTA', the function returns a valid expire time, which is incorrect. This ultimately might unnecessarily rearm sched's timer on nohz_full setups, and add latency to the system[1]. So, introduce 'base->timers_pending'[2], update it every time 'base->next_expiry' changes, and use it in get_next_timer_interrupt(). [1] See tick_nohz_stop_tick(). [2] A quick pahole check on x86_64 and arm64 shows it doesn't make 'struct timer_base' any bigger. Fixes: 31cd0e119d50 ("timers: Recalculate next timer interrupt only when necessary") Signed-off-by: Nicolas Saenz Julienne Signed-off-by: Frederic Weisbecker --- kernel/time/timer.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 3fadb58fc9d7..9eb11c2209e5 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -207,6 +207,7 @@ struct timer_base { unsigned int cpu; bool next_expiry_recalc; bool is_idle; + bool timers_pending; DECLARE_BITMAP(pending_map, WHEEL_SIZE); struct hlist_head vectors[WHEEL_SIZE]; } ____cacheline_aligned; @@ -595,6 +596,7 @@ static void enqueue_timer(struct timer_base *base, struct timer_list *timer, * can reevaluate the wheel: */ base->next_expiry = bucket_expiry; + base->timers_pending = true; base->next_expiry_recalc = false; trigger_dyntick_cpu(base, timer); } @@ -1582,6 +1584,7 @@ static unsigned long __next_timer_interrupt(struct timer_base *base) } base->next_expiry_recalc = false; + base->timers_pending = !(next == base->clk + NEXT_TIMER_MAX_DELTA); return next; } @@ -1633,7 +1636,6 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); u64 expires = KTIME_MAX; unsigned long nextevt; - bool is_max_delta; /* * Pretend that there is no timer pending if the cpu is offline. @@ -1646,7 +1648,6 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) if (base->next_expiry_recalc) base->next_expiry = __next_timer_interrupt(base); nextevt = base->next_expiry; - is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA); /* * We have a fresh next event. Check whether we can forward the @@ -1664,7 +1665,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) expires = basem; base->is_idle = false; } else { - if (!is_max_delta) + if (base->timers_pending) expires = basem + (u64)(nextevt - basej) * TICK_NSEC; /* * If we expect to sleep more than a tick, mark the base idle. @@ -1947,6 +1948,7 @@ int timers_prepare_cpu(unsigned int cpu) base = per_cpu_ptr(&timer_bases[b], cpu); base->clk = jiffies; base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA; + base->timers_pending = false; base->is_idle = false; } return 0; From 95edbbf78c3bdbd1daa921dd4a2e61c751e469ba Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 15 Jul 2021 15:43:27 +0800 Subject: [PATCH 077/794] platform/x86: amd-pmc: Fix missing unlock on error in amd_pmc_send_cmd() Add the missing unlock before return from function amd_pmc_send_cmd() in the error handling case. Fixes: 95e1b60f8dc8 ("platform/x86: amd-pmc: Fix command completion code") Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20210715074327.1966083-1-yangyingliang@huawei.com Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index 680f94c7e075..663a4ca0580d 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -275,7 +275,7 @@ static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set, u32 *data, u8 msg PMC_MSG_DELAY_MIN_US * RESPONSE_REGISTER_LOOP_MAX); if (rc) { dev_err(dev->dev, "failed to talk to SMU\n"); - return rc; + goto out_unlock; } /* Write zero to response register */ From 32a19de21ae40f0601f48575b610dde4f518ccc6 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 7 Jul 2021 11:51:10 +0200 Subject: [PATCH 078/794] drm/vc4: hdmi: Drop devm interrupt handler for CEC interrupts The CEC interrupt handlers are registered through the devm_request_threaded_irq function. However, while free_irq is indeed called properly when the device is unbound or bind fails, it's called after unbind or bind is done. In our particular case, it means that on failure it creates a window where our interrupt handler can be called, but we're freeing every resource (CEC adapter, DRM objects, etc.) it might need. In order to address this, let's switch to the non-devm variant to control better when the handler will be unregistered and allow us to make it safe. Fixes: 15b4511a4af6 ("drm/vc4: add HDMI CEC support") Signed-off-by: Maxime Ripard Reviewed-by: Dave Stevenson Link: https://patchwork.freedesktop.org/patch/msgid/20210707095112.1469670-2-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_hdmi.c | 49 +++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index aab1b36ceb3c..c2876731ee2d 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -1857,38 +1857,46 @@ static int vc4_hdmi_cec_init(struct vc4_hdmi *vc4_hdmi) vc4_hdmi_cec_update_clk_div(vc4_hdmi); if (vc4_hdmi->variant->external_irq_controller) { - ret = devm_request_threaded_irq(&pdev->dev, - platform_get_irq_byname(pdev, "cec-rx"), - vc4_cec_irq_handler_rx_bare, - vc4_cec_irq_handler_rx_thread, 0, - "vc4 hdmi cec rx", vc4_hdmi); + ret = request_threaded_irq(platform_get_irq_byname(pdev, "cec-rx"), + vc4_cec_irq_handler_rx_bare, + vc4_cec_irq_handler_rx_thread, 0, + "vc4 hdmi cec rx", vc4_hdmi); if (ret) goto err_delete_cec_adap; - ret = devm_request_threaded_irq(&pdev->dev, - platform_get_irq_byname(pdev, "cec-tx"), - vc4_cec_irq_handler_tx_bare, - vc4_cec_irq_handler_tx_thread, 0, - "vc4 hdmi cec tx", vc4_hdmi); + ret = request_threaded_irq(platform_get_irq_byname(pdev, "cec-tx"), + vc4_cec_irq_handler_tx_bare, + vc4_cec_irq_handler_tx_thread, 0, + "vc4 hdmi cec tx", vc4_hdmi); if (ret) - goto err_delete_cec_adap; + goto err_remove_cec_rx_handler; } else { HDMI_WRITE(HDMI_CEC_CPU_MASK_SET, 0xffffffff); - ret = devm_request_threaded_irq(&pdev->dev, platform_get_irq(pdev, 0), - vc4_cec_irq_handler, - vc4_cec_irq_handler_thread, 0, - "vc4 hdmi cec", vc4_hdmi); + ret = request_threaded_irq(platform_get_irq(pdev, 0), + vc4_cec_irq_handler, + vc4_cec_irq_handler_thread, 0, + "vc4 hdmi cec", vc4_hdmi); if (ret) goto err_delete_cec_adap; } ret = cec_register_adapter(vc4_hdmi->cec_adap, &pdev->dev); if (ret < 0) - goto err_delete_cec_adap; + goto err_remove_handlers; return 0; +err_remove_handlers: + if (vc4_hdmi->variant->external_irq_controller) + free_irq(platform_get_irq_byname(pdev, "cec-tx"), vc4_hdmi); + else + free_irq(platform_get_irq(pdev, 0), vc4_hdmi); + +err_remove_cec_rx_handler: + if (vc4_hdmi->variant->external_irq_controller) + free_irq(platform_get_irq_byname(pdev, "cec-rx"), vc4_hdmi); + err_delete_cec_adap: cec_delete_adapter(vc4_hdmi->cec_adap); @@ -1897,6 +1905,15 @@ err_delete_cec_adap: static void vc4_hdmi_cec_exit(struct vc4_hdmi *vc4_hdmi) { + struct platform_device *pdev = vc4_hdmi->pdev; + + if (vc4_hdmi->variant->external_irq_controller) { + free_irq(platform_get_irq_byname(pdev, "cec-rx"), vc4_hdmi); + free_irq(platform_get_irq_byname(pdev, "cec-tx"), vc4_hdmi); + } else { + free_irq(platform_get_irq(pdev, 0), vc4_hdmi); + } + cec_unregister_adapter(vc4_hdmi->cec_adap); } #else From f8c2602733c953ed7a16e060640b8e96f9d94b9b Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 25 Jun 2021 23:50:07 +0200 Subject: [PATCH 079/794] s390/ftrace: fix ftrace_update_ftrace_func implementation s390 enforces DYNAMIC_FTRACE if FUNCTION_TRACER is selected. At the same time implementation of ftrace_caller is not compliant with HAVE_DYNAMIC_FTRACE since it doesn't provide implementation of ftrace_update_ftrace_func() and calls ftrace_trace_function() directly. The subtle difference is that during ftrace code patching ftrace replaces function tracer via ftrace_update_ftrace_func() and activates it back afterwards. Unexpected direct calls to ftrace_trace_function() during ftrace code patching leads to nullptr-dereferences when tracing is activated for one of functions which are used during code patching. Those function currently are: copy_from_kernel_nofault() copy_from_kernel_nofault_allowed() preempt_count_sub() [with debug_defconfig] preempt_count_add() [with debug_defconfig] Corresponding KASAN report: BUG: KASAN: nullptr-dereference in function_trace_call+0x316/0x3b0 Read of size 4 at addr 0000000000001e08 by task migration/0/15 CPU: 0 PID: 15 Comm: migration/0 Tainted: G B 5.13.0-41423-g08316af3644d Hardware name: IBM 3906 M04 704 (LPAR) Stopper: multi_cpu_stop+0x0/0x3e0 <- stop_machine_cpuslocked+0x1e4/0x218 Call Trace: [<0000000001f77caa>] show_stack+0x16a/0x1d0 [<0000000001f8de42>] dump_stack+0x15a/0x1b0 [<0000000001f81d56>] print_address_description.constprop.0+0x66/0x2e0 [<000000000082b0ca>] kasan_report+0x152/0x1c0 [<00000000004cfd8e>] function_trace_call+0x316/0x3b0 [<0000000001fb7082>] ftrace_caller+0x7a/0x7e [<00000000006bb3e6>] copy_from_kernel_nofault_allowed+0x6/0x10 [<00000000006bb42e>] copy_from_kernel_nofault+0x3e/0xd0 [<000000000014605c>] ftrace_make_call+0xb4/0x1f8 [<000000000047a1b4>] ftrace_replace_code+0x134/0x1d8 [<000000000047a6e0>] ftrace_modify_all_code+0x120/0x1d0 [<000000000047a7ec>] __ftrace_modify_code+0x5c/0x78 [<000000000042395c>] multi_cpu_stop+0x224/0x3e0 [<0000000000423212>] cpu_stopper_thread+0x33a/0x5a0 [<0000000000243ff2>] smpboot_thread_fn+0x302/0x708 [<00000000002329ea>] kthread+0x342/0x408 [<00000000001066b2>] __ret_from_fork+0x92/0xf0 [<0000000001fb57fa>] ret_from_fork+0xa/0x30 The buggy address belongs to the page: page:(____ptrval____) refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1 flags: 0x1ffff00000001000(reserved|node=0|zone=0|lastcpupid=0x1ffff) raw: 1ffff00000001000 0000040000000048 0000040000000048 0000000000000000 raw: 0000000000000000 0000000000000000 ffffffff00000001 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: 0000000000001d00: f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 0000000000001d80: f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 >0000000000001e00: f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 ^ 0000000000001e80: f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 0000000000001f00: f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 ================================================================== To fix that introduce ftrace_func callback to be called from ftrace_caller and update it in ftrace_update_ftrace_func(). Fixes: 4cc9bed034d1 ("[S390] cleanup ftrace backend functions") Cc: stable@vger.kernel.org Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/include/asm/ftrace.h | 1 + arch/s390/kernel/ftrace.c | 2 ++ arch/s390/kernel/mcount.S | 4 ++-- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 695c61989f97..345cbe982a8b 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -19,6 +19,7 @@ void ftrace_caller(void); extern char ftrace_graph_caller_end; extern unsigned long ftrace_plt; +extern void *ftrace_func; struct dyn_arch_ftrace { }; diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index c6ddeb5029b4..2d8f595d9196 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -40,6 +40,7 @@ * trampoline (ftrace_plt), which clobbers also r1. */ +void *ftrace_func __read_mostly = ftrace_stub; unsigned long ftrace_plt; int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, @@ -85,6 +86,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) int ftrace_update_ftrace_func(ftrace_func_t func) { + ftrace_func = func; return 0; } diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index faf64c2f90f5..6b13797143a7 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -59,13 +59,13 @@ ENTRY(ftrace_caller) #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES aghik %r2,%r0,-MCOUNT_INSN_SIZE lgrl %r4,function_trace_op - lgrl %r1,ftrace_trace_function + lgrl %r1,ftrace_func #else lgr %r2,%r0 aghi %r2,-MCOUNT_INSN_SIZE larl %r4,function_trace_op lg %r4,0(%r4) - larl %r1,ftrace_trace_function + larl %r1,ftrace_func lg %r1,0(%r1) #endif lgr %r3,%r14 From ac34de14ac30ba4484d68f8845a54b6b6c23db42 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 28 Jun 2021 23:09:55 +0200 Subject: [PATCH 080/794] Revert "soc: imx8m: change to use platform driver" With the SoC matching changed to a platform driver the match data is available only after other drivers, which may rely on it are already probed. This breaks at least the CAAM driver on i.MX8M. Revert the change until all those drivers have been audited and changed to be able to eal with match data being available later in the boot process. Fixes: 7d981405d0fd ("soc: imx8m: change to use platform driver") Signed-off-by: Lucas Stach Tested-by: Frieder Schrempf Acked-by: Peng Fan Signed-off-by: Shawn Guo --- drivers/soc/imx/soc-imx8m.c | 84 ++++++------------------------------- 1 file changed, 12 insertions(+), 72 deletions(-) diff --git a/drivers/soc/imx/soc-imx8m.c b/drivers/soc/imx/soc-imx8m.c index 071e14496e4b..cc57a384d74d 100644 --- a/drivers/soc/imx/soc-imx8m.c +++ b/drivers/soc/imx/soc-imx8m.c @@ -5,8 +5,6 @@ #include #include -#include -#include #include #include #include @@ -31,7 +29,7 @@ struct imx8_soc_data { char *name; - u32 (*soc_revision)(struct device *dev); + u32 (*soc_revision)(void); }; static u64 soc_uid; @@ -52,7 +50,7 @@ static u32 imx8mq_soc_revision_from_atf(void) static inline u32 imx8mq_soc_revision_from_atf(void) { return 0; }; #endif -static u32 __init imx8mq_soc_revision(struct device *dev) +static u32 __init imx8mq_soc_revision(void) { struct device_node *np; void __iomem *ocotp_base; @@ -77,20 +75,9 @@ static u32 __init imx8mq_soc_revision(struct device *dev) rev = REV_B1; } - if (dev) { - int ret; - - ret = nvmem_cell_read_u64(dev, "soc_unique_id", &soc_uid); - if (ret) { - iounmap(ocotp_base); - of_node_put(np); - return ret; - } - } else { - soc_uid = readl_relaxed(ocotp_base + OCOTP_UID_HIGH); - soc_uid <<= 32; - soc_uid |= readl_relaxed(ocotp_base + OCOTP_UID_LOW); - } + soc_uid = readl_relaxed(ocotp_base + OCOTP_UID_HIGH); + soc_uid <<= 32; + soc_uid |= readl_relaxed(ocotp_base + OCOTP_UID_LOW); iounmap(ocotp_base); of_node_put(np); @@ -120,7 +107,7 @@ static void __init imx8mm_soc_uid(void) of_node_put(np); } -static u32 __init imx8mm_soc_revision(struct device *dev) +static u32 __init imx8mm_soc_revision(void) { struct device_node *np; void __iomem *anatop_base; @@ -138,15 +125,7 @@ static u32 __init imx8mm_soc_revision(struct device *dev) iounmap(anatop_base); of_node_put(np); - if (dev) { - int ret; - - ret = nvmem_cell_read_u64(dev, "soc_unique_id", &soc_uid); - if (ret) - return ret; - } else { - imx8mm_soc_uid(); - } + imx8mm_soc_uid(); return rev; } @@ -171,7 +150,7 @@ static const struct imx8_soc_data imx8mp_soc_data = { .soc_revision = imx8mm_soc_revision, }; -static __maybe_unused const struct of_device_id imx8_machine_match[] = { +static __maybe_unused const struct of_device_id imx8_soc_match[] = { { .compatible = "fsl,imx8mq", .data = &imx8mq_soc_data, }, { .compatible = "fsl,imx8mm", .data = &imx8mm_soc_data, }, { .compatible = "fsl,imx8mn", .data = &imx8mn_soc_data, }, @@ -179,20 +158,12 @@ static __maybe_unused const struct of_device_id imx8_machine_match[] = { { } }; -static __maybe_unused const struct of_device_id imx8_soc_match[] = { - { .compatible = "fsl,imx8mq-soc", .data = &imx8mq_soc_data, }, - { .compatible = "fsl,imx8mm-soc", .data = &imx8mm_soc_data, }, - { .compatible = "fsl,imx8mn-soc", .data = &imx8mn_soc_data, }, - { .compatible = "fsl,imx8mp-soc", .data = &imx8mp_soc_data, }, - { } -}; - #define imx8_revision(soc_rev) \ soc_rev ? \ kasprintf(GFP_KERNEL, "%d.%d", (soc_rev >> 4) & 0xf, soc_rev & 0xf) : \ "unknown" -static int imx8_soc_info(struct platform_device *pdev) +static int __init imx8_soc_init(void) { struct soc_device_attribute *soc_dev_attr; struct soc_device *soc_dev; @@ -211,10 +182,7 @@ static int imx8_soc_info(struct platform_device *pdev) if (ret) goto free_soc; - if (pdev) - id = of_match_node(imx8_soc_match, pdev->dev.of_node); - else - id = of_match_node(imx8_machine_match, of_root); + id = of_match_node(imx8_soc_match, of_root); if (!id) { ret = -ENODEV; goto free_soc; @@ -223,16 +191,8 @@ static int imx8_soc_info(struct platform_device *pdev) data = id->data; if (data) { soc_dev_attr->soc_id = data->name; - if (data->soc_revision) { - if (pdev) { - soc_rev = data->soc_revision(&pdev->dev); - ret = soc_rev; - if (ret < 0) - goto free_soc; - } else { - soc_rev = data->soc_revision(NULL); - } - } + if (data->soc_revision) + soc_rev = data->soc_revision(); } soc_dev_attr->revision = imx8_revision(soc_rev); @@ -270,24 +230,4 @@ free_soc: kfree(soc_dev_attr); return ret; } - -/* Retain device_initcall is for backward compatibility with DTS. */ -static int __init imx8_soc_init(void) -{ - if (of_find_matching_node_and_match(NULL, imx8_soc_match, NULL)) - return 0; - - return imx8_soc_info(NULL); -} device_initcall(imx8_soc_init); - -static struct platform_driver imx8_soc_info_driver = { - .probe = imx8_soc_info, - .driver = { - .name = "imx8_soc_info", - .of_match_table = imx8_soc_match, - }, -}; - -module_platform_driver(imx8_soc_info_driver); -MODULE_LICENSE("GPL v2"); From e44fbdb68049539de9923ce4bad2d277aef54892 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 12 Jul 2021 11:36:50 +1000 Subject: [PATCH 081/794] KVM: PPC: Book3S HV P9: Fix guest TM support The conversion to C introduced several bugs in TM handling that can cause host crashes with TM bad thing interrupts. Mostly just simple typos or missed logic in the conversion that got through due to my not testing TM in the guest sufficiently. - Early TM emulation for the softpatch interrupt should be done if fake suspend mode is _not_ active. - Early TM emulation wants to return immediately to the guest so as to not doom transactions unnecessarily. - And if exiting from the guest, the host MSR should include the TM[S] bit if the guest was T/S, before it is treclaimed. After this fix, all the TM selftests pass when running on a P9 processor that implements TM with softpatch interrupt. Fixes: 89d35b2391015 ("KVM: PPC: Book3S HV P9: Implement the rest of the P9 path in C") Reported-by: Alexey Kardashevskiy Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210712013650.376325-1-npiggin@gmail.com --- arch/powerpc/kvm/book3s_hv_p9_entry.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c index 83f592eadcd2..961b3d70483c 100644 --- a/arch/powerpc/kvm/book3s_hv_p9_entry.c +++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c @@ -317,6 +317,9 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc */ mtspr(SPRN_HDEC, hdec); +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +tm_return_to_guest: +#endif mtspr(SPRN_DAR, vcpu->arch.shregs.dar); mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr); mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0); @@ -415,11 +418,23 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc * is in real suspend mode and is trying to transition to * transactional mode. */ - if (local_paca->kvm_hstate.fake_suspend && + if (!local_paca->kvm_hstate.fake_suspend && (vcpu->arch.shregs.msr & MSR_TS_S)) { if (kvmhv_p9_tm_emulation_early(vcpu)) { - /* Prevent it being handled again. */ - trap = 0; + /* + * Go straight back into the guest with the + * new NIP/MSR as set by TM emulation. + */ + mtspr(SPRN_HSRR0, vcpu->arch.regs.nip); + mtspr(SPRN_HSRR1, vcpu->arch.shregs.msr); + + /* + * tm_return_to_guest re-loads SRR0/1, DAR, + * DSISR after RI is cleared, in case they had + * been clobbered by a MCE. + */ + __mtmsrd(0, 1); /* clear RI */ + goto tm_return_to_guest; } } #endif @@ -499,6 +514,10 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc * If we are in real mode, only switch MMU on after the MMU is * switched to host, to avoid the P9_RADIX_PREFETCH_BUG. */ + if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && + vcpu->arch.shregs.msr & MSR_TS_MASK) + msr |= MSR_TS_S; + __mtmsrd(msr, 0); end_timing(vcpu); From 99bb2ebab953435852340cdb198c5abbf0bb5dd3 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 14 Jul 2021 11:58:12 +0200 Subject: [PATCH 082/794] net: dsa: mv88e6xxx: NET_DSA_MV88E6XXX_PTP should depend on NET_DSA_MV88E6XXX Making global2 support mandatory removed the Kconfig symbol NET_DSA_MV88E6XXX_GLOBAL2. This symbol also served as an intermediate symbol to make NET_DSA_MV88E6XXX_PTP depend on NET_DSA_MV88E6XXX. With the symbol removed, the user is always asked about PTP support for Marvell 88E6xxx switches, even if the latter support is not enabled. Fix this by reinstating the dependency. Fixes: 63368a7416df144b ("net: dsa: mv88e6xxx: Make global2 support mandatory") Signed-off-by: Geert Uytterhoeven Reviewed-by: Andrew Lunn Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/Kconfig b/drivers/net/dsa/mv88e6xxx/Kconfig index 05af632b0f59..634a48e6616b 100644 --- a/drivers/net/dsa/mv88e6xxx/Kconfig +++ b/drivers/net/dsa/mv88e6xxx/Kconfig @@ -12,7 +12,7 @@ config NET_DSA_MV88E6XXX config NET_DSA_MV88E6XXX_PTP bool "PTP support for Marvell 88E6xxx" default n - depends on PTP_1588_CLOCK + depends on NET_DSA_MV88E6XXX && PTP_1588_CLOCK help Say Y to enable PTP hardware timestamping on Marvell 88E6xxx switch chips that support it. From e7efc2ce3d0789cd7c21b70ff00cd7838d382639 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 14 Jul 2021 16:23:43 +0100 Subject: [PATCH 083/794] liquidio: Fix unintentional sign extension issue on left shift of u16 Shifting the u16 integer oct->pcie_port by CN23XX_PKT_INPUT_CTL_MAC_NUM_POS (29) bits will be promoted to a 32 bit signed int and then sign-extended to a u64. In the cases where oct->pcie_port where bit 2 is set (e.g. 3..7) the shifted value will be sign extended and the top 32 bits of the result will be set. Fix this by casting the u16 values to a u64 before the 29 bit left shift. Addresses-Coverity: ("Unintended sign extension") Fixes: 3451b97cce2d ("liquidio: CN23XX register setup") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c index 4cddd628d41b..9ed3d1ab2ca5 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c @@ -420,7 +420,7 @@ static int cn23xx_pf_setup_global_input_regs(struct octeon_device *oct) * bits 32:47 indicate the PVF num. */ for (q_no = 0; q_no < ern; q_no++) { - reg_val = oct->pcie_port << CN23XX_PKT_INPUT_CTL_MAC_NUM_POS; + reg_val = (u64)oct->pcie_port << CN23XX_PKT_INPUT_CTL_MAC_NUM_POS; /* for VF assigned queues. */ if (q_no < oct->sriov_info.pf_srn) { From 65875073eddd24d7b3968c1501ef29277398dc7b Mon Sep 17 00:00:00 2001 From: Qitao Xu Date: Wed, 14 Jul 2021 22:59:23 -0700 Subject: [PATCH 084/794] net: use %px to print skb address in trace_netif_receive_skb The print format of skb adress in tracepoint class net_dev_template is changed to %px from %p, because we want to use skb address as a quick way to identify a packet. Note, trace ring buffer is only accessible to privileged users, it is safe to use a real kernel address here. Reviewed-by: Cong Wang Signed-off-by: Qitao Xu Signed-off-by: David S. Miller --- include/trace/events/net.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/events/net.h b/include/trace/events/net.h index 2399073c3afc..78c448c6ab4c 100644 --- a/include/trace/events/net.h +++ b/include/trace/events/net.h @@ -136,7 +136,7 @@ DECLARE_EVENT_CLASS(net_dev_template, __assign_str(name, skb->dev->name); ), - TP_printk("dev=%s skbaddr=%p len=%u", + TP_printk("dev=%s skbaddr=%px len=%u", __get_str(name), __entry->skbaddr, __entry->len) ) From 851f36e40962408309ad2665bf0056c19a97881c Mon Sep 17 00:00:00 2001 From: Qitao Xu Date: Wed, 14 Jul 2021 23:00:21 -0700 Subject: [PATCH 085/794] net_sched: use %px to print skb address in trace_qdisc_dequeue() Print format of skbaddr is changed to %px from %p, because we want to use skb address as a quick way to identify a packet. Note, trace ring buffer is only accessible to privileged users, it is safe to use a real kernel address here. Reviewed-by: Cong Wang Signed-off-by: Qitao Xu Signed-off-by: David S. Miller --- include/trace/events/qdisc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h index 330d32d84485..58209557cb3a 100644 --- a/include/trace/events/qdisc.h +++ b/include/trace/events/qdisc.h @@ -41,7 +41,7 @@ TRACE_EVENT(qdisc_dequeue, __entry->txq_state = txq->state; ), - TP_printk("dequeue ifindex=%d qdisc handle=0x%X parent=0x%X txq_state=0x%lX packets=%d skbaddr=%p", + TP_printk("dequeue ifindex=%d qdisc handle=0x%X parent=0x%X txq_state=0x%lX packets=%d skbaddr=%px", __entry->ifindex, __entry->handle, __entry->parent, __entry->txq_state, __entry->packets, __entry->skbaddr ) ); From 70713dddf3d25a02d1952f8c5d2688c986d2f2fb Mon Sep 17 00:00:00 2001 From: Qitao Xu Date: Wed, 14 Jul 2021 23:03:24 -0700 Subject: [PATCH 086/794] net_sched: introduce tracepoint trace_qdisc_enqueue() Tracepoint trace_qdisc_enqueue() is introduced to trace skb at the entrance of TC layer on TX side. This is similar to trace_qdisc_dequeue(): 1. For both we only trace successful cases. The failure cases can be traced via trace_kfree_skb(). 2. They are called at entrance or exit of TC layer, not for each ->enqueue() or ->dequeue(). This is intentional, because we want to make trace_qdisc_enqueue() symmetric to trace_qdisc_dequeue(), which is easier to use. The return value of qdisc_enqueue() is not interesting here, we have Qdisc's drop packets in ->dequeue(), it is impossible to trace them even if we have the return value, the only way to trace them is tracing kfree_skb(). We only add information we need to trace ring buffer. If any other information is needed, it is easy to extend it without breaking ABI, see commit 3dd344ea84e1 ("net: tracepoint: exposing sk_family in all tcp:tracepoints"). Reviewed-by: Cong Wang Signed-off-by: Qitao Xu Signed-off-by: David S. Miller --- include/trace/events/qdisc.h | 26 ++++++++++++++++++++++++++ net/core/dev.c | 20 ++++++++++++++++---- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h index 58209557cb3a..c3006c6b4a87 100644 --- a/include/trace/events/qdisc.h +++ b/include/trace/events/qdisc.h @@ -46,6 +46,32 @@ TRACE_EVENT(qdisc_dequeue, __entry->txq_state, __entry->packets, __entry->skbaddr ) ); +TRACE_EVENT(qdisc_enqueue, + + TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq, struct sk_buff *skb), + + TP_ARGS(qdisc, txq, skb), + + TP_STRUCT__entry( + __field(struct Qdisc *, qdisc) + __field(void *, skbaddr) + __field(int, ifindex) + __field(u32, handle) + __field(u32, parent) + ), + + TP_fast_assign( + __entry->qdisc = qdisc; + __entry->skbaddr = skb; + __entry->ifindex = txq->dev ? txq->dev->ifindex : 0; + __entry->handle = qdisc->handle; + __entry->parent = qdisc->parent; + ), + + TP_printk("enqueue ifindex=%d qdisc handle=0x%X parent=0x%X skbaddr=%px", + __entry->ifindex, __entry->handle, __entry->parent, __entry->skbaddr) +); + TRACE_EVENT(qdisc_reset, TP_PROTO(struct Qdisc *q), diff --git a/net/core/dev.c b/net/core/dev.c index 64b21f0a2048..7aeefc467ddc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -131,6 +131,7 @@ #include #include #include +#include #include #include #include @@ -3844,6 +3845,18 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) } } +static int dev_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *q, + struct sk_buff **to_free, + struct netdev_queue *txq) +{ + int rc; + + rc = q->enqueue(skb, q, to_free) & NET_XMIT_MASK; + if (rc == NET_XMIT_SUCCESS) + trace_qdisc_enqueue(q, txq, skb); + return rc; +} + static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, struct net_device *dev, struct netdev_queue *txq) @@ -3862,8 +3875,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, * of q->seqlock to protect from racing with requeuing. */ if (unlikely(!nolock_qdisc_is_empty(q))) { - rc = q->enqueue(skb, q, &to_free) & - NET_XMIT_MASK; + rc = dev_qdisc_enqueue(skb, q, &to_free, txq); __qdisc_run(q); qdisc_run_end(q); @@ -3879,7 +3891,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, return NET_XMIT_SUCCESS; } - rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; + rc = dev_qdisc_enqueue(skb, q, &to_free, txq); qdisc_run(q); no_lock_out: @@ -3923,7 +3935,7 @@ no_lock_out: qdisc_run_end(q); rc = NET_XMIT_SUCCESS; } else { - rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; + rc = dev_qdisc_enqueue(skb, q, &to_free, txq); if (qdisc_run_begin(q)) { if (unlikely(contended)) { spin_unlock(&q->busylock); From b18c7da63fcb46e2f9a093cc18d7c219e13a887c Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Mon, 5 Jul 2021 11:41:54 -0500 Subject: [PATCH 087/794] RDMA/rxe: Fix memory leak in error path code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In rxe_mr_init_user() at the third error the driver fails to free the memory at mr->map. This patch adds code to do that. This error only occurs if page_address() fails to return a non zero address which should never happen for 64 bit architectures. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20210705164153.17652-1-rpearsonhpe@gmail.com Reported by: Haakon Bugge Signed-off-by: Bob Pearson Reviewed-by: Zhu Yanjun Reviewed-by: Håkon Bugge Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mr.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 6aabcb4de235..be4bcb420fab 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -113,13 +113,14 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, int num_buf; void *vaddr; int err; + int i; umem = ib_umem_get(pd->ibpd.device, start, length, access); if (IS_ERR(umem)) { - pr_warn("err %d from rxe_umem_get\n", - (int)PTR_ERR(umem)); + pr_warn("%s: Unable to pin memory region err = %d\n", + __func__, (int)PTR_ERR(umem)); err = PTR_ERR(umem); - goto err1; + goto err_out; } mr->umem = umem; @@ -129,9 +130,9 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, err = rxe_mr_alloc(mr, num_buf); if (err) { - pr_warn("err %d from rxe_mr_alloc\n", err); - ib_umem_release(umem); - goto err1; + pr_warn("%s: Unable to allocate memory for map\n", + __func__); + goto err_release_umem; } mr->page_shift = PAGE_SHIFT; @@ -151,10 +152,10 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, vaddr = page_address(sg_page_iter_page(&sg_iter)); if (!vaddr) { - pr_warn("null vaddr\n"); - ib_umem_release(umem); + pr_warn("%s: Unable to get virtual address\n", + __func__); err = -ENOMEM; - goto err1; + goto err_cleanup_map; } buf->addr = (uintptr_t)vaddr; @@ -177,7 +178,13 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, return 0; -err1: +err_cleanup_map: + for (i = 0; i < mr->num_map; i++) + kfree(mr->map[i]); + kfree(mr->map); +err_release_umem: + ib_umem_release(umem); +err_out: return err; } From 91091656252f5d6d8c476e0c92776ce9fae7b445 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 15 Jul 2021 13:57:12 +0100 Subject: [PATCH 088/794] s390/bpf: Perform r1 range checking before accessing jit->seen_reg[r1] Currently array jit->seen_reg[r1] is being accessed before the range checking of index r1. The range changing on r1 should be performed first since it will avoid any potential out-of-range accesses on the array seen_reg[] and also it is more optimal to perform checks on r1 before fetching data from the array. Fix this by swapping the order of the checks before the array access. Fixes: 054623105728 ("s390/bpf: Add s390x eBPF JIT compiler backend") Signed-off-by: Colin Ian King Signed-off-by: Daniel Borkmann Tested-by: Ilya Leoshkevich Acked-by: Ilya Leoshkevich Link: https://lore.kernel.org/bpf/20210715125712.24690-1-colin.king@canonical.com --- arch/s390/net/bpf_jit_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 63cae0476bb4..2ae419f5115a 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -112,7 +112,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) { u32 r1 = reg2hex[b1]; - if (!jit->seen_reg[r1] && r1 >= 6 && r1 <= 15) + if (r1 >= 6 && r1 <= 15 && !jit->seen_reg[r1]) jit->seen_reg[r1] = 1; } From 7e6b27a69167f97c56b5437871d29e9722c3e470 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 12 Jul 2021 12:55:45 -0700 Subject: [PATCH 089/794] bpf, sockmap: Fix potential memory leak on unlikely error case If skb_linearize is needed and fails we could leak a msg on the error handling. To fix ensure we kfree the msg block before returning error. Found during code review. Fixes: 4363023d2668e ("bpf, sockmap: Avoid failures from skb_to_sgvec when skb has frag_list") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Reviewed-by: Cong Wang Link: https://lore.kernel.org/bpf/20210712195546.423990-2-john.fastabend@gmail.com --- net/core/skmsg.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 9b6160a191f8..15d71288e741 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -508,10 +508,8 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, if (skb_linearize(skb)) return -EAGAIN; num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len); - if (unlikely(num_sge < 0)) { - kfree(msg); + if (unlikely(num_sge < 0)) return num_sge; - } copied = skb->len; msg->sg.start = 0; @@ -530,6 +528,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) { struct sock *sk = psock->sk; struct sk_msg *msg; + int err; /* If we are receiving on the same sock skb->sk is already assigned, * skip memory accounting and owner transition seeing it already set @@ -548,7 +547,10 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) * into user buffers. */ skb_set_owner_r(skb, sk); - return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); + err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); + if (err < 0) + kfree(msg); + return err; } /* Puts an skb on the ingress queue of the socket already assigned to the @@ -559,12 +561,16 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb { struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC); struct sock *sk = psock->sk; + int err; if (unlikely(!msg)) return -EAGAIN; sk_msg_init(msg); skb_set_owner_r(skb, sk); - return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); + err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); + if (err < 0) + kfree(msg); + return err; } static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb, From 228a4a7ba8e99bb9ef980b62f71e3be33f4aae69 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 12 Jul 2021 12:55:46 -0700 Subject: [PATCH 090/794] bpf, sockmap, tcp: sk_prot needs inuse_idx set for proc stats The proc socket stats use sk_prot->inuse_idx value to record inuse sock stats. We currently do not set this correctly from sockmap side. The result is reading sock stats '/proc/net/sockstat' gives incorrect values. The socket counter is incremented correctly, but because we don't set the counter correctly when we replace sk_prot we may omit the decrement. To get the correct inuse_idx value move the core_initcall that initializes the TCP proto handlers to late_initcall. This way it is initialized after TCP has the chance to assign the inuse_idx value from the register protocol handler. Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface") Suggested-by: Jakub Sitnicki Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Reviewed-by: Cong Wang Link: https://lore.kernel.org/bpf/20210712195546.423990-3-john.fastabend@gmail.com --- net/ipv4/tcp_bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index f26916a62f25..d3e9386b493e 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -503,7 +503,7 @@ static int __init tcp_bpf_v4_build_proto(void) tcp_bpf_rebuild_protos(tcp_bpf_prots[TCP_BPF_IPV4], &tcp_prot); return 0; } -core_initcall(tcp_bpf_v4_build_proto); +late_initcall(tcp_bpf_v4_build_proto); static int tcp_bpf_assert_proto_ops(struct proto *ops) { From 54ea2f49fd9400dd698c25450be3352b5613b3b4 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 14 Jul 2021 17:47:50 +0200 Subject: [PATCH 091/794] bpf, sockmap, udp: sk_prot needs inuse_idx set for proc stats The proc socket stats use sk_prot->inuse_idx value to record inuse sock stats. We currently do not set this correctly from sockmap side. The result is reading sock stats '/proc/net/sockstat' gives incorrect values. The socket counter is incremented correctly, but because we don't set the counter correctly when we replace sk_prot we may omit the decrement. To get the correct inuse_idx value move the core_initcall that initializes the UDP proto handlers to late_initcall. This way it is initialized after UDP has the chance to assign the inuse_idx value from the register protocol handler. Fixes: edc6741cc660 ("bpf: Add sockmap hooks for UDP sockets") Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Reviewed-by: Cong Wang Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20210714154750.528206-1-jakub@cloudflare.com --- net/ipv4/udp_bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c index 45b8782aec0c..9f5a5cdc38e6 100644 --- a/net/ipv4/udp_bpf.c +++ b/net/ipv4/udp_bpf.c @@ -134,7 +134,7 @@ static int __init udp_bpf_v4_build_proto(void) udp_bpf_rebuild_protos(&udp_bpf_prots[UDP_BPF_IPV4], &udp_prot); return 0; } -core_initcall(udp_bpf_v4_build_proto); +late_initcall(udp_bpf_v4_build_proto); int udp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) { From d444b06e40855219ef38b5e9286db16d435f06dc Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Thu, 15 Jul 2021 13:06:09 +0200 Subject: [PATCH 092/794] bpftool: Check malloc return value in mount_bpffs_for_pin Fix and add a missing NULL check for the prior malloc() call. Fixes: 49a086c201a9 ("bpftool: implement prog load command") Signed-off-by: Tobias Klauser Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Acked-by: Roman Gushchin Link: https://lore.kernel.org/bpf/20210715110609.29364-1-tklauser@distanz.ch --- tools/bpf/bpftool/common.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 1828bba19020..dc6daa193557 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -222,6 +222,11 @@ int mount_bpffs_for_pin(const char *name) int err = 0; file = malloc(strlen(name) + 1); + if (!file) { + p_err("mem alloc failed"); + return -1; + } + strcpy(file, name); dir = dirname(file); From 0dc2d6ff40364a00cd66cae3ed327894dcd11c82 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Thu, 8 Jul 2021 14:35:21 -0700 Subject: [PATCH 093/794] RDMA/irdma: Check vsi pointer before using it Fix a coverity warning about NULL pointer dereference: Dereferencing "vsi", which is known to be "NULL". Link: https://lore.kernel.org/r/20210708213521.438-1-tatyana.e.nikolova@intel.com Reported-by: coverity-bot Addresses-Coverity-ID: 1505164 ("Null pointer dereferences") Fixes: 8498a30e1b94 ("RDMA/irdma: Register auxiliary driver and implement private channel OPs") Signed-off-by: Mustafa Ismail Signed-off-by: Tatyana Nikolova Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c index ea59432351fb..51a41359e0b4 100644 --- a/drivers/infiniband/hw/irdma/main.c +++ b/drivers/infiniband/hw/irdma/main.c @@ -215,10 +215,10 @@ static void irdma_remove(struct auxiliary_device *aux_dev) pr_debug("INIT: Gen2 PF[%d] device remove success\n", PCI_FUNC(pf->pdev->devfn)); } -static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf) +static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf, + struct ice_vsi *vsi) { struct irdma_pci_f *rf = iwdev->rf; - struct ice_vsi *vsi = ice_get_main_vsi(pf); rf->cdev = pf; rf->gen_ops.register_qset = irdma_lan_register_qset; @@ -253,12 +253,15 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_ struct iidc_auxiliary_dev, adev); struct ice_pf *pf = iidc_adev->pf; + struct ice_vsi *vsi = ice_get_main_vsi(pf); struct iidc_qos_params qos_info = {}; struct irdma_device *iwdev; struct irdma_pci_f *rf; struct irdma_l2params l2params = {}; int err; + if (!vsi) + return -EIO; iwdev = ib_alloc_device(irdma_device, ibdev); if (!iwdev) return -ENOMEM; @@ -268,7 +271,7 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_ return -ENOMEM; } - irdma_fill_device_info(iwdev, pf); + irdma_fill_device_info(iwdev, pf, vsi); rf = iwdev->rf; if (irdma_ctrl_init_hw(rf)) { From 991e634360f2622a683b48dfe44fe6d9cb765a09 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Thu, 15 Jul 2021 20:22:04 +0800 Subject: [PATCH 094/794] net: fix uninit-value in caif_seqpkt_sendmsg When nr_segs equal to zero in iovec_from_user, the object msg->msg_iter.iov is uninit stack memory in caif_seqpkt_sendmsg which is defined in ___sys_sendmsg. So we cann't just judge msg->msg_iter.iov->base directlly. We can use nr_segs to judge msg in caif_seqpkt_sendmsg whether has data buffers. ===================================================== BUG: KMSAN: uninit-value in caif_seqpkt_sendmsg+0x693/0xf60 net/caif/caif_socket.c:542 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x220 lib/dump_stack.c:118 kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:118 __msan_warning+0x58/0xa0 mm/kmsan/kmsan_instr.c:215 caif_seqpkt_sendmsg+0x693/0xf60 net/caif/caif_socket.c:542 sock_sendmsg_nosec net/socket.c:652 [inline] sock_sendmsg net/socket.c:672 [inline] ____sys_sendmsg+0x12b6/0x1350 net/socket.c:2343 ___sys_sendmsg net/socket.c:2397 [inline] __sys_sendmmsg+0x808/0xc90 net/socket.c:2480 __compat_sys_sendmmsg net/compat.c:656 [inline] Reported-by: syzbot+09a5d591c1f98cf5efcb@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?id=1ace85e8fc9b0d5a45c08c2656c3e91762daa9b8 Fixes: bece7b2398d0 ("caif: Rewritten socket implementation") Signed-off-by: Ziyang Xuan Signed-off-by: David S. Miller --- net/caif/caif_socket.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 647554c9813b..e12fd3cad619 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -539,7 +539,8 @@ static int caif_seqpkt_sendmsg(struct socket *sock, struct msghdr *msg, goto err; ret = -EINVAL; - if (unlikely(msg->msg_iter.iov->iov_base == NULL)) + if (unlikely(msg->msg_iter.nr_segs == 0) || + unlikely(msg->msg_iter.iov->iov_base == NULL)) goto err; noblock = msg->msg_flags & MSG_DONTWAIT; From a323da0b73b89b3ecabd661c56978a271e1911b6 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Tue, 13 Jul 2021 23:11:28 -0400 Subject: [PATCH 095/794] RDMA/irdma: change the returned type of irdma_sc_repost_aeq_entries to void The function irdma_sc_repost_aeq_entries always returns zero. So the returned type is changed to void. Link: https://lore.kernel.org/r/20210714031130.1511109-2-yanjun.zhu@linux.dev Signed-off-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/ctrl.c | 4 +--- drivers/infiniband/hw/irdma/type.h | 3 +-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index c3880a85e255..f1e5515256e0 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -4186,11 +4186,9 @@ enum irdma_status_code irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, * @dev: sc device struct * @count: allocate count */ -enum irdma_status_code irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count) +void irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count) { writel(count, dev->hw_regs[IRDMA_AEQALLOC]); - - return 0; } /** diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h index 7387b83e826d..874bc25a938b 100644 --- a/drivers/infiniband/hw/irdma/type.h +++ b/drivers/infiniband/hw/irdma/type.h @@ -1222,8 +1222,7 @@ enum irdma_status_code irdma_sc_aeq_init(struct irdma_sc_aeq *aeq, struct irdma_aeq_init_info *info); enum irdma_status_code irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, struct irdma_aeqe_info *info); -enum irdma_status_code irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, - u32 count); +void irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count); void irdma_sc_pd_init(struct irdma_sc_dev *dev, struct irdma_sc_pd *pd, u32 pd_id, int abi_ver); From 41f5fa9fa75cebd48b5ce9ec244ee25390ac3b89 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Tue, 13 Jul 2021 23:11:29 -0400 Subject: [PATCH 096/794] RDMA/irdma: Change the returned type of irdma_set_hw_rsrc to void Since the function irdma_set_hw_rsrc always returns zero, change the returned type to void and remove all the related source code. Link: https://lore.kernel.org/r/20210714031130.1511109-3-yanjun.zhu@linux.dev Signed-off-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/hw.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 7afb8a6a0526..00de5ee9a260 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -1920,7 +1920,7 @@ enum irdma_status_code irdma_ctrl_init_hw(struct irdma_pci_f *rf) * irdma_set_hw_rsrc - set hw memory resources. * @rf: RDMA PCI function */ -static u32 irdma_set_hw_rsrc(struct irdma_pci_f *rf) +static void irdma_set_hw_rsrc(struct irdma_pci_f *rf) { rf->allocated_qps = (void *)(rf->mem_rsrc + (sizeof(struct irdma_arp_entry) * rf->arp_table_size)); @@ -1937,8 +1937,6 @@ static u32 irdma_set_hw_rsrc(struct irdma_pci_f *rf) spin_lock_init(&rf->arp_lock); spin_lock_init(&rf->qptable_lock); spin_lock_init(&rf->qh_list_lock); - - return 0; } /** @@ -2000,9 +1998,7 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf) rf->arp_table = (struct irdma_arp_entry *)rf->mem_rsrc; - ret = irdma_set_hw_rsrc(rf); - if (ret) - goto set_hw_rsrc_fail; + irdma_set_hw_rsrc(rf); set_bit(0, rf->allocated_mrs); set_bit(0, rf->allocated_qps); @@ -2025,9 +2021,6 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf) return 0; -set_hw_rsrc_fail: - kfree(rf->mem_rsrc); - rf->mem_rsrc = NULL; mem_rsrc_kzalloc_fail: kfree(rf->allocated_ws_nodes); rf->allocated_ws_nodes = NULL; From dc6afef7e14252c5ca5b8a8444946cb4b75b0aa0 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Tue, 13 Jul 2021 23:11:30 -0400 Subject: [PATCH 097/794] RDMA/irdma: Change returned type of irdma_setup_virt_qp to void Since the returned value of the function irdma_setup_virt_qp is always 0, remove the returned value check and change the returned type to void. Link: https://lore.kernel.org/r/20210714031130.1511109-4-yanjun.zhu@linux.dev Signed-off-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/verbs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 9712f6902ba8..717147ed0519 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -557,7 +557,7 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) * @iwqp: qp ptr * @init_info: initialize info to return */ -static int irdma_setup_virt_qp(struct irdma_device *iwdev, +static void irdma_setup_virt_qp(struct irdma_device *iwdev, struct irdma_qp *iwqp, struct irdma_qp_init_info *init_info) { @@ -574,8 +574,6 @@ static int irdma_setup_virt_qp(struct irdma_device *iwdev, init_info->sq_pa = qpmr->sq_pbl.addr; init_info->rq_pa = qpmr->rq_pbl.addr; } - - return 0; } /** @@ -914,7 +912,7 @@ static struct ib_qp *irdma_create_qp(struct ib_pd *ibpd, } } init_info.qp_uk_init_info.abi_ver = iwpd->sc_pd.abi_ver; - err_code = irdma_setup_virt_qp(iwdev, iwqp, &init_info); + irdma_setup_virt_qp(iwdev, iwqp, &init_info); } else { init_info.qp_uk_init_info.abi_ver = IRDMA_ABI_VER; err_code = irdma_setup_kmode_qp(iwdev, iwqp, &init_info, init_attr); From e48bf29cf9d6d60d810e2af71e54b71a324094e0 Mon Sep 17 00:00:00 2001 From: Ye Xiang Date: Sun, 13 Jun 2021 11:25:07 +0800 Subject: [PATCH 098/794] HID: intel-ish-hid: use async resume function ISH IPC driver uses asynchronous workqueue to do resume now, but there is a potential timing issue: when child devices resume before bus driver, it will cause child devices resume failed and cannot be recovered until reboot. The current implementation in this case do wait for IPC to resume but fail to accommodate for a case when there is no ISH reboot and soft resume is taking time. This issue is apparent on Tiger Lake platform with 5.11.13 kernel when doing suspend to idle then resume(s0ix) test. To resolve this issue, we change ISHTP HID client to use asynchronous resume callback too. In the asynchronous resume callback, it waits for the ISHTP resume done event, and then notify ISHTP HID client link ready. Signed-off-by: Ye Xiang Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ishtp-hid-client.c | 15 +++++++++- drivers/hid/intel-ish-hid/ishtp-hid.h | 1 + drivers/hid/intel-ish-hid/ishtp/bus.c | 29 +++++++++++++++----- include/linux/intel-ish-client-if.h | 2 ++ 4 files changed, 39 insertions(+), 8 deletions(-) diff --git a/drivers/hid/intel-ish-hid/ishtp-hid-client.c b/drivers/hid/intel-ish-hid/ishtp-hid-client.c index 6b1fa971b33e..91bf4d01e91a 100644 --- a/drivers/hid/intel-ish-hid/ishtp-hid-client.c +++ b/drivers/hid/intel-ish-hid/ishtp-hid-client.c @@ -784,6 +784,17 @@ static void hid_ishtp_cl_reset_handler(struct work_struct *work) } } +static void hid_ishtp_cl_resume_handler(struct work_struct *work) +{ + struct ishtp_cl_data *client_data = container_of(work, struct ishtp_cl_data, resume_work); + struct ishtp_cl *hid_ishtp_cl = client_data->hid_ishtp_cl; + + if (ishtp_wait_resume(ishtp_get_ishtp_device(hid_ishtp_cl))) { + client_data->suspended = false; + wake_up_interruptible(&client_data->ishtp_resume_wait); + } +} + ishtp_print_log ishtp_hid_print_trace; /** @@ -822,6 +833,8 @@ static int hid_ishtp_cl_probe(struct ishtp_cl_device *cl_device) init_waitqueue_head(&client_data->ishtp_resume_wait); INIT_WORK(&client_data->work, hid_ishtp_cl_reset_handler); + INIT_WORK(&client_data->resume_work, hid_ishtp_cl_resume_handler); + ishtp_hid_print_trace = ishtp_trace_callback(cl_device); @@ -921,7 +934,7 @@ static int hid_ishtp_cl_resume(struct device *device) hid_ishtp_trace(client_data, "%s hid_ishtp_cl %p\n", __func__, hid_ishtp_cl); - client_data->suspended = false; + schedule_work(&client_data->resume_work); return 0; } diff --git a/drivers/hid/intel-ish-hid/ishtp-hid.h b/drivers/hid/intel-ish-hid/ishtp-hid.h index f88443a7d935..6a5cc11aefd8 100644 --- a/drivers/hid/intel-ish-hid/ishtp-hid.h +++ b/drivers/hid/intel-ish-hid/ishtp-hid.h @@ -135,6 +135,7 @@ struct ishtp_cl_data { int multi_packet_cnt; struct work_struct work; + struct work_struct resume_work; struct ishtp_cl_device *cl_device; }; diff --git a/drivers/hid/intel-ish-hid/ishtp/bus.c b/drivers/hid/intel-ish-hid/ishtp/bus.c index f0802b047ed8..aa2c51624012 100644 --- a/drivers/hid/intel-ish-hid/ishtp/bus.c +++ b/drivers/hid/intel-ish-hid/ishtp/bus.c @@ -314,13 +314,6 @@ static int ishtp_cl_device_resume(struct device *dev) if (!device) return 0; - /* - * When ISH needs hard reset, it is done asynchrnously, hence bus - * resume will be called before full ISH resume - */ - if (device->ishtp_dev->resume_flag) - return 0; - driver = to_ishtp_cl_driver(dev->driver); if (driver && driver->driver.pm) { if (driver->driver.pm->resume) @@ -849,6 +842,28 @@ struct device *ishtp_device(struct ishtp_cl_device *device) } EXPORT_SYMBOL(ishtp_device); +/** + * ishtp_wait_resume() - Wait for IPC resume + * + * Wait for IPC resume + * + * Return: resume complete or not + */ +bool ishtp_wait_resume(struct ishtp_device *dev) +{ + /* 50ms to get resume response */ + #define WAIT_FOR_RESUME_ACK_MS 50 + + /* Waiting to get resume response */ + if (dev->resume_flag) + wait_event_interruptible_timeout(dev->resume_wait, + !dev->resume_flag, + msecs_to_jiffies(WAIT_FOR_RESUME_ACK_MS)); + + return (!dev->resume_flag); +} +EXPORT_SYMBOL_GPL(ishtp_wait_resume); + /** * ishtp_get_pci_device() - Return PCI device dev pointer * This interface is used to return PCI device pointer diff --git a/include/linux/intel-ish-client-if.h b/include/linux/intel-ish-client-if.h index 25e2b4e80502..aee8ff4739b1 100644 --- a/include/linux/intel-ish-client-if.h +++ b/include/linux/intel-ish-client-if.h @@ -81,6 +81,8 @@ int ishtp_register_event_cb(struct ishtp_cl_device *device, /* Get the device * from ishtp device instance */ struct device *ishtp_device(struct ishtp_cl_device *cl_device); +/* wait for IPC resume */ +bool ishtp_wait_resume(struct ishtp_device *dev); /* Trace interface for clients */ ishtp_print_log ishtp_trace_callback(struct ishtp_cl_device *cl_device); /* Get device pointer of PCI device for DMA acces */ From 55cef88bbf12f3bfbe5c2379a8868a034707e755 Mon Sep 17 00:00:00 2001 From: Yoshitaka Ikeda Date: Thu, 15 Jul 2021 16:21:32 +0000 Subject: [PATCH 099/794] spi: spi-cadence-quadspi: Fix division by zero warning Fix below division by zero warning: - Added an if statement because buswidth can be zero, resulting in division by zero. - The modified code was based on another driver (atmel-quadspi). [ 0.795337] Division by zero in kernel. : [ 0.834051] [<807fd40c>] (__div0) from [<804e1acc>] (Ldiv0+0x8/0x10) [ 0.839097] [<805f0710>] (cqspi_exec_mem_op) from [<805edb4c>] (spi_mem_exec_op+0x3b0/0x3f8) Fixes: 7512eaf54190 ("spi: cadence-quadspi: Fix dummy cycle calculation when buswidth > 1") Signed-off-by: Yoshitaka Ikeda Link: https://lore.kernel.org/r/ed989af6-da88-4e0b-9ed8-126db6cad2e4@nskint.co.jp Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-quadspi.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 7a00346ff9b9..13d1f0ce618e 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -307,11 +307,13 @@ static unsigned int cqspi_calc_rdreg(struct cqspi_flash_pdata *f_pdata) static unsigned int cqspi_calc_dummy(const struct spi_mem_op *op, bool dtr) { - unsigned int dummy_clk; + unsigned int dummy_clk = 0; - dummy_clk = op->dummy.nbytes * (8 / op->dummy.buswidth); - if (dtr) - dummy_clk /= 2; + if (op->dummy.buswidth && op->dummy.nbytes) { + dummy_clk = op->dummy.nbytes * (8 / op->dummy.buswidth); + if (dtr) + dummy_clk /= 2; + } return dummy_clk; } From 3fdcf7cdfc229346d028242e73562704ad644dd0 Mon Sep 17 00:00:00 2001 From: "Luke D. Jones" Date: Mon, 5 Jul 2021 10:26:59 +1200 Subject: [PATCH 100/794] HID: asus: Remove check for same LED brightness on set Remove the early return on LED brightness set so that any controller application, daemon, or desktop may set the same brightness at any stage. This is required because many ASUS ROG keyboards will default to max brightness on laptop resume if the LEDs were set to off before sleep. Signed-off-by: Luke D Jones Signed-off-by: Jiri Kosina --- drivers/hid/hid-asus.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index fca8fc78a78a..fb807c8e989b 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -485,9 +485,6 @@ static void asus_kbd_backlight_set(struct led_classdev *led_cdev, { struct asus_kbd_leds *led = container_of(led_cdev, struct asus_kbd_leds, cdev); - if (led->brightness == brightness) - return; - led->brightness = brightness; schedule_work(&led->work); } From 6206b7981a36476f4695d661ae139f7db36a802d Mon Sep 17 00:00:00 2001 From: Jia He Date: Thu, 15 Jul 2021 16:08:21 +0800 Subject: [PATCH 101/794] qed: fix possible unpaired spin_{un}lock_bh in _qed_mcp_cmd_and_union() Liajian reported a bug_on hit on a ThunderX2 arm64 server with FastLinQ QL41000 ethernet controller: BUG: scheduling while atomic: kworker/0:4/531/0x00000200 [qed_probe:488()]hw prepare failed kernel BUG at mm/vmalloc.c:2355! Internal error: Oops - BUG: 0 [#1] SMP CPU: 0 PID: 531 Comm: kworker/0:4 Tainted: G W 5.4.0-77-generic #86-Ubuntu pstate: 00400009 (nzcv daif +PAN -UAO) Call trace: vunmap+0x4c/0x50 iounmap+0x48/0x58 qed_free_pci+0x60/0x80 [qed] qed_probe+0x35c/0x688 [qed] __qede_probe+0x88/0x5c8 [qede] qede_probe+0x60/0xe0 [qede] local_pci_probe+0x48/0xa0 work_for_cpu_fn+0x24/0x38 process_one_work+0x1d0/0x468 worker_thread+0x238/0x4e0 kthread+0xf0/0x118 ret_from_fork+0x10/0x18 In this case, qed_hw_prepare() returns error due to hw/fw error, but in theory work queue should be in process context instead of interrupt. The root cause might be the unpaired spin_{un}lock_bh() in _qed_mcp_cmd_and_union(), which causes botton half is disabled incorrectly. Reported-by: Lijian Zhang Signed-off-by: Jia He Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 4387292c37e2..79d879a5d663 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -474,14 +474,18 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); - if (!qed_mcp_has_pending_cmd(p_hwfn)) + if (!qed_mcp_has_pending_cmd(p_hwfn)) { + spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); break; + } rc = qed_mcp_update_pending_cmd(p_hwfn, p_ptt); - if (!rc) + if (!rc) { + spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); break; - else if (rc != -EAGAIN) + } else if (rc != -EAGAIN) { goto err; + } spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); @@ -498,6 +502,8 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, return -EAGAIN; } + spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); + /* Send the mailbox command */ qed_mcp_reread_offsets(p_hwfn, p_ptt); seq_num = ++p_hwfn->mcp_info->drv_mb_seq; @@ -524,14 +530,18 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); - if (p_cmd_elem->b_is_completed) + if (p_cmd_elem->b_is_completed) { + spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); break; + } rc = qed_mcp_update_pending_cmd(p_hwfn, p_ptt); - if (!rc) + if (!rc) { + spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); break; - else if (rc != -EAGAIN) + } else if (rc != -EAGAIN) { goto err; + } spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); } while (++cnt < max_retries); @@ -554,6 +564,7 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, return -EAGAIN; } + spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); qed_mcp_cmd_del_elem(p_hwfn, p_cmd_elem); spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); From a6ecfb39ba9d7316057cea823b196b734f6b18ca Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Wed, 14 Jul 2021 17:13:22 +0800 Subject: [PATCH 102/794] usb: hso: fix error handling code of hso_create_net_device The current error handling code of hso_create_net_device is hso_free_net_device, no matter which errors lead to. For example, WARNING in hso_free_net_device [1]. Fix this by refactoring the error handling code of hso_create_net_device by handling different errors by different code. [1] https://syzkaller.appspot.com/bug?id=66eff8d49af1b28370ad342787413e35bbe76efe Reported-by: syzbot+44d53c7255bb1aea22d2@syzkaller.appspotmail.com Fixes: 5fcfb6d0bfcd ("hso: fix bailout in error case of probe") Signed-off-by: Dongliang Mu Signed-off-by: David S. Miller --- drivers/net/usb/hso.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index 63006838bdcc..dec96e8ab567 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -2495,7 +2495,7 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface, hso_net_init); if (!net) { dev_err(&interface->dev, "Unable to create ethernet device\n"); - goto exit; + goto err_hso_dev; } hso_net = netdev_priv(net); @@ -2508,13 +2508,13 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface, USB_DIR_IN); if (!hso_net->in_endp) { dev_err(&interface->dev, "Can't find BULK IN endpoint\n"); - goto exit; + goto err_net; } hso_net->out_endp = hso_get_ep(interface, USB_ENDPOINT_XFER_BULK, USB_DIR_OUT); if (!hso_net->out_endp) { dev_err(&interface->dev, "Can't find BULK OUT endpoint\n"); - goto exit; + goto err_net; } SET_NETDEV_DEV(net, &interface->dev); SET_NETDEV_DEVTYPE(net, &hso_type); @@ -2523,18 +2523,18 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface, for (i = 0; i < MUX_BULK_RX_BUF_COUNT; i++) { hso_net->mux_bulk_rx_urb_pool[i] = usb_alloc_urb(0, GFP_KERNEL); if (!hso_net->mux_bulk_rx_urb_pool[i]) - goto exit; + goto err_mux_bulk_rx; hso_net->mux_bulk_rx_buf_pool[i] = kzalloc(MUX_BULK_RX_BUF_SIZE, GFP_KERNEL); if (!hso_net->mux_bulk_rx_buf_pool[i]) - goto exit; + goto err_mux_bulk_rx; } hso_net->mux_bulk_tx_urb = usb_alloc_urb(0, GFP_KERNEL); if (!hso_net->mux_bulk_tx_urb) - goto exit; + goto err_mux_bulk_rx; hso_net->mux_bulk_tx_buf = kzalloc(MUX_BULK_TX_BUF_SIZE, GFP_KERNEL); if (!hso_net->mux_bulk_tx_buf) - goto exit; + goto err_free_tx_urb; add_net_device(hso_dev); @@ -2542,7 +2542,7 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface, result = register_netdev(net); if (result) { dev_err(&interface->dev, "Failed to register device\n"); - goto exit; + goto err_free_tx_buf; } hso_log_port(hso_dev); @@ -2550,8 +2550,21 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface, hso_create_rfkill(hso_dev, interface); return hso_dev; -exit: - hso_free_net_device(hso_dev, true); + +err_free_tx_buf: + remove_net_device(hso_dev); + kfree(hso_net->mux_bulk_tx_buf); +err_free_tx_urb: + usb_free_urb(hso_net->mux_bulk_tx_urb); +err_mux_bulk_rx: + for (i = 0; i < MUX_BULK_RX_BUF_COUNT; i++) { + usb_free_urb(hso_net->mux_bulk_rx_urb_pool[i]); + kfree(hso_net->mux_bulk_rx_buf_pool[i]); + } +err_net: + free_netdev(net); +err_hso_dev: + kfree(hso_dev); return NULL; } From 8aa6348634d1bc81801329e6ea98cd88ec07fb10 Mon Sep 17 00:00:00 2001 From: Dylan MacKenzie Date: Tue, 13 Jul 2021 16:31:07 -0700 Subject: [PATCH 103/794] HID: amd_sfh: Use correct MMIO register for DMA address amd_stop_sensor_v2 accidentally used a different MMIO register than amd_start_sensor_v2 for the DMA address. Fixes: f264481ad614dfd9 ("HID: amd_sfh: Extend driver capabilities for multi-generation support") Signed-off-by: Dylan MacKenzie Acked-by: Basavaraj Natikar Signed-off-by: Jiri Kosina --- drivers/hid/amd-sfh-hid/amd_sfh_pcie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c index 96e2577fa37e..8d68796aa905 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c @@ -58,7 +58,7 @@ static void amd_stop_sensor_v2(struct amd_mp2_dev *privdata, u16 sensor_idx) cmd_base.cmd_v2.sensor_id = sensor_idx; cmd_base.cmd_v2.length = 16; - writeq(0x0, privdata->mmio + AMD_C2P_MSG2); + writeq(0x0, privdata->mmio + AMD_C2P_MSG1); writel(cmd_base.ul, privdata->mmio + AMD_C2P_MSG0); } From 9a3223b0713369e6258fd8656e0c0a5ed794d186 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 15 Jul 2021 12:59:52 -0600 Subject: [PATCH 104/794] ASoC: dt-bindings: renesas: rsnd: Fix incorrect 'port' regex schema A property regex goes under 'patternProperties', not 'properties' schema. Otherwise, the regex is interpretted as a fixed string. Fixes: 17c2d247ddd2 ("ASoC: dt-bindings: renesas: rsnd: tidyup properties") Cc: Mark Brown Cc: Kuninori Morimoto Cc: alsa-devel@alsa-project.org Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20210715185952.1470138-1-robh@kernel.org Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/renesas,rsnd.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/sound/renesas,rsnd.yaml b/Documentation/devicetree/bindings/sound/renesas,rsnd.yaml index ee936d1aa724..c2930d65728e 100644 --- a/Documentation/devicetree/bindings/sound/renesas,rsnd.yaml +++ b/Documentation/devicetree/bindings/sound/renesas,rsnd.yaml @@ -114,7 +114,7 @@ properties: ports: $ref: /schemas/graph.yaml#/properties/ports - properties: + patternProperties: port(@[0-9a-f]+)?: $ref: audio-graph-port.yaml# unevaluatedProperties: false From fa5239f2af983ffdf08395a542a7d6356b6222c5 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 15 Jul 2021 15:05:37 -0400 Subject: [PATCH 105/794] drm/amdgpu: workaround failed COW checks for Thunk VMAs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KFD Thunk maps invisible VRAM BOs with PROT_NONE, MAP_PRIVATE. is_cow_mapping returns true for these mappings, which causes mmap to fail in ttm_bo_mmap_obj. As a workaround, clear VM_MAYWRITE for PROT_NONE-COW mappings. This should prevent the mapping from ever becoming writable and makes is_cow_mapping(vm_flags) false. Fixes: f91142c62161 ("drm/ttm: nuke VM_MIXEDMAP on BO mappings v3") Suggested-by: Daniel Vetter Tested-by: Felix Kuehling Signed-off-by: Felix Kuehling Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20210715190537.585456-1-Felix.Kuehling@amd.com Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index b3404c43a911..9f952b7fc197 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -255,6 +255,15 @@ static int amdgpu_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_str if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) return -EPERM; + /* Workaround for Thunk bug creating PROT_NONE,MAP_PRIVATE mappings + * for debugger access to invisible VRAM. Should have used MAP_SHARED + * instead. Clearing VM_MAYWRITE prevents the mapping from ever + * becoming writable and makes is_cow_mapping(vm_flags) false. + */ + if (is_cow_mapping(vma->vm_flags) && + !(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))) + vma->vm_flags &= ~VM_MAYWRITE; + return drm_gem_ttm_mmap(obj, vma); } From 40ac971eab89330d6153e7721e88acd2d98833f9 Mon Sep 17 00:00:00 2001 From: Roman Skakun Date: Fri, 16 Jul 2021 11:39:34 +0300 Subject: [PATCH 106/794] dma-mapping: handle vmalloc addresses in dma_common_{mmap,get_sgtable} xen-swiotlb can use vmalloc backed addresses for dma coherent allocations and uses the common helpers. Properly handle them to unbreak Xen on ARM platforms. Fixes: 1b65c4e5a9af ("swiotlb-xen: use xen_alloc/free_coherent_pages") Signed-off-by: Roman Skakun Reviewed-by: Andrii Anisov [hch: split the patch, renamed the helpers] Signed-off-by: Christoph Hellwig --- kernel/dma/ops_helpers.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/kernel/dma/ops_helpers.c b/kernel/dma/ops_helpers.c index 910ae69cae77..af4a6ef48ce0 100644 --- a/kernel/dma/ops_helpers.c +++ b/kernel/dma/ops_helpers.c @@ -5,6 +5,13 @@ */ #include +static struct page *dma_common_vaddr_to_page(void *cpu_addr) +{ + if (is_vmalloc_addr(cpu_addr)) + return vmalloc_to_page(cpu_addr); + return virt_to_page(cpu_addr); +} + /* * Create scatter-list for the already allocated DMA buffer. */ @@ -12,7 +19,7 @@ int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) { - struct page *page = virt_to_page(cpu_addr); + struct page *page = dma_common_vaddr_to_page(cpu_addr); int ret; ret = sg_alloc_table(sgt, 1, GFP_KERNEL); @@ -32,6 +39,7 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, unsigned long user_count = vma_pages(vma); unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; unsigned long off = vma->vm_pgoff; + struct page *page = dma_common_vaddr_to_page(cpu_addr); int ret = -ENXIO; vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs); @@ -43,7 +51,7 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, return -ENXIO; return remap_pfn_range(vma, vma->vm_start, - page_to_pfn(virt_to_page(cpu_addr)) + vma->vm_pgoff, + page_to_pfn(page) + vma->vm_pgoff, user_count << PAGE_SHIFT, vma->vm_page_prot); #else return -ENXIO; From 2acf15b94d5b8ea8392c4b6753a6ffac3135cd78 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Fri, 2 Jul 2021 12:07:43 +0800 Subject: [PATCH 107/794] reiserfs: add check for root_inode in reiserfs_fill_super Our syzcaller report a NULL pointer dereference: BUG: kernel NULL pointer dereference, address: 0000000000000000 PGD 116e95067 P4D 116e95067 PUD 1080b5067 PMD 0 Oops: 0010 [#1] SMP KASAN CPU: 7 PID: 592 Comm: a.out Not tainted 5.13.0-next-20210629-dirty #67 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-p4 RIP: 0010:0x0 Code: Unable to access opcode bytes at RIP 0xffffffffffffffd6. RSP: 0018:ffff888114e779b8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 1ffff110229cef39 RCX: ffffffffaa67e1aa RDX: 0000000000000000 RSI: ffff88810a58ee00 RDI: ffff8881233180b0 RBP: ffffffffac38e9c0 R08: ffffffffaa67e17e R09: 0000000000000001 R10: ffffffffb91c5557 R11: fffffbfff7238aaa R12: ffff88810a58ee00 R13: ffff888114e77aa0 R14: 0000000000000000 R15: ffff8881233180b0 FS: 00007f946163c480(0000) GS:ffff88839f1c0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffffffffd6 CR3: 00000001099c1000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __lookup_slow+0x116/0x2d0 ? page_put_link+0x120/0x120 ? __d_lookup+0xfc/0x320 ? d_lookup+0x49/0x90 lookup_one_len+0x13c/0x170 ? __lookup_slow+0x2d0/0x2d0 ? reiserfs_schedule_old_flush+0x31/0x130 reiserfs_lookup_privroot+0x64/0x150 reiserfs_fill_super+0x158c/0x1b90 ? finish_unfinished+0xb10/0xb10 ? bprintf+0xe0/0xe0 ? __mutex_lock_slowpath+0x30/0x30 ? __kasan_check_write+0x20/0x30 ? up_write+0x51/0xb0 ? set_blocksize+0x9f/0x1f0 mount_bdev+0x27c/0x2d0 ? finish_unfinished+0xb10/0xb10 ? reiserfs_kill_sb+0x120/0x120 get_super_block+0x19/0x30 legacy_get_tree+0x76/0xf0 vfs_get_tree+0x49/0x160 ? capable+0x1d/0x30 path_mount+0xacc/0x1380 ? putname+0x97/0xd0 ? finish_automount+0x450/0x450 ? kmem_cache_free+0xf8/0x5a0 ? putname+0x97/0xd0 do_mount+0xe2/0x110 ? path_mount+0x1380/0x1380 ? copy_mount_options+0x69/0x140 __x64_sys_mount+0xf0/0x190 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae This is because 'root_inode' is initialized with wrong mode, and it's i_op is set to 'reiserfs_special_inode_operations'. Thus add check for 'root_inode' to fix the problem. Link: https://lore.kernel.org/r/20210702040743.1918552-1-yukuai3@huawei.com Signed-off-by: Yu Kuai Signed-off-by: Jan Kara --- fs/reiserfs/super.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 3ffafc73acf0..58481f8d63d5 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -2082,6 +2082,14 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) unlock_new_inode(root_inode); } + if (!S_ISDIR(root_inode->i_mode) || !inode_get_bytes(root_inode) || + !root_inode->i_size) { + SWARN(silent, s, "", "corrupt root inode, run fsck"); + iput(root_inode); + errval = -EUCLEAN; + goto error; + } + s->s_root = d_make_root(root_inode); if (!s->s_root) goto error; From 728d392f8a799f037812d0f2b254fb3b5e115fcf Mon Sep 17 00:00:00 2001 From: Javier Pello Date: Wed, 14 Jul 2021 18:54:48 +0200 Subject: [PATCH 108/794] fs/ext2: Avoid page_address on pages returned by ext2_get_page Commit 782b76d7abdf02b12c46ed6f1e9bf715569027f7 ("fs/ext2: Replace kmap() with kmap_local_page()") replaced the kmap/kunmap calls in ext2_get_page/ext2_put_page with kmap_local_page/kunmap_local for efficiency reasons. As a necessary side change, the commit also made ext2_get_page (and ext2_find_entry and ext2_dotdot) return the mapping address along with the page itself, as it is required for kunmap_local, and converted uses of page_address on such pages to use the newly returned address instead. However, uses of page_address on such pages were missed in ext2_check_page and ext2_delete_entry, which triggers oopses if kmap_local_page happens to return an address from high memory. Fix this now by converting the remaining uses of page_address to use the right address, as returned by kmap_local_page. Link: https://lore.kernel.org/r/20210714185448.8707ac239e9f12b3a7f5b9f9@urjc.es Reviewed-by: Ira Weiny Signed-off-by: Javier Pello Fixes: 782b76d7abdf ("fs/ext2: Replace kmap() with kmap_local_page()") Signed-off-by: Jan Kara --- fs/ext2/dir.c | 12 ++++++------ fs/ext2/ext2.h | 3 ++- fs/ext2/namei.c | 4 ++-- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 14292dba3a12..2c2f179b6977 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -106,12 +106,11 @@ static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len) return err; } -static bool ext2_check_page(struct page *page, int quiet) +static bool ext2_check_page(struct page *page, int quiet, char *kaddr) { struct inode *dir = page->mapping->host; struct super_block *sb = dir->i_sb; unsigned chunk_size = ext2_chunk_size(dir); - char *kaddr = page_address(page); u32 max_inumber = le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count); unsigned offs, rec_len; unsigned limit = PAGE_SIZE; @@ -205,7 +204,8 @@ static struct page * ext2_get_page(struct inode *dir, unsigned long n, if (!IS_ERR(page)) { *page_addr = kmap_local_page(page); if (unlikely(!PageChecked(page))) { - if (PageError(page) || !ext2_check_page(page, quiet)) + if (PageError(page) || !ext2_check_page(page, quiet, + *page_addr)) goto fail; } } @@ -584,10 +584,10 @@ out_unlock: * ext2_delete_entry deletes a directory entry by merging it with the * previous entry. Page is up-to-date. */ -int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ) +int ext2_delete_entry (struct ext2_dir_entry_2 *dir, struct page *page, + char *kaddr) { struct inode *inode = page->mapping->host; - char *kaddr = page_address(page); unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1); unsigned to = ((char *)dir - kaddr) + ext2_rec_len_from_disk(dir->rec_len); @@ -607,7 +607,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ) de = ext2_next_entry(de); } if (pde) - from = (char*)pde - (char*)page_address(page); + from = (char *)pde - kaddr; pos = page_offset(page) + from; lock_page(page); err = ext2_prepare_chunk(page, pos, to - from); diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index b0a694820cb7..e512630cb63e 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -740,7 +740,8 @@ extern int ext2_inode_by_name(struct inode *dir, extern int ext2_make_empty(struct inode *, struct inode *); extern struct ext2_dir_entry_2 *ext2_find_entry(struct inode *, const struct qstr *, struct page **, void **res_page_addr); -extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *); +extern int ext2_delete_entry(struct ext2_dir_entry_2 *dir, struct page *page, + char *kaddr); extern int ext2_empty_dir (struct inode *); extern struct ext2_dir_entry_2 *ext2_dotdot(struct inode *dir, struct page **p, void **pa); extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, void *, diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 1f69b81655b6..5f6b7560eb3f 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -293,7 +293,7 @@ static int ext2_unlink(struct inode * dir, struct dentry *dentry) goto out; } - err = ext2_delete_entry (de, page); + err = ext2_delete_entry (de, page, page_addr); ext2_put_page(page, page_addr); if (err) goto out; @@ -397,7 +397,7 @@ static int ext2_rename (struct user_namespace * mnt_userns, old_inode->i_ctime = current_time(old_inode); mark_inode_dirty(old_inode); - ext2_delete_entry(old_de, old_page); + ext2_delete_entry(old_de, old_page, old_page_addr); if (dir_de) { if (old_dir != new_dir) From 13d257503c0930010ef9eed78b689cec417ab741 Mon Sep 17 00:00:00 2001 From: Shreyansh Chouhan Date: Fri, 9 Jul 2021 20:59:29 +0530 Subject: [PATCH 109/794] reiserfs: check directory items on read from disk While verifying the leaf item that we read from the disk, reiserfs doesn't check the directory items, this could cause a crash when we read a directory item from the disk that has an invalid deh_location. This patch adds a check to the directory items read from the disk that does a bounds check on deh_location for the directory entries. Any directory entry header with a directory entry offset greater than the item length is considered invalid. Link: https://lore.kernel.org/r/20210709152929.766363-1-chouhan.shreyansh630@gmail.com Reported-by: syzbot+c31a48e6702ccb3d64c9@syzkaller.appspotmail.com Signed-off-by: Shreyansh Chouhan Signed-off-by: Jan Kara --- fs/reiserfs/stree.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index 476a7ff49482..ef42729216d1 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -387,6 +387,24 @@ void pathrelse(struct treepath *search_path) search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; } +static int has_valid_deh_location(struct buffer_head *bh, struct item_head *ih) +{ + struct reiserfs_de_head *deh; + int i; + + deh = B_I_DEH(bh, ih); + for (i = 0; i < ih_entry_count(ih); i++) { + if (deh_location(&deh[i]) > ih_item_len(ih)) { + reiserfs_warning(NULL, "reiserfs-5094", + "directory entry location seems wrong %h", + &deh[i]); + return 0; + } + } + + return 1; +} + static int is_leaf(char *buf, int blocksize, struct buffer_head *bh) { struct block_head *blkh; @@ -454,11 +472,14 @@ static int is_leaf(char *buf, int blocksize, struct buffer_head *bh) "(second one): %h", ih); return 0; } - if (is_direntry_le_ih(ih) && (ih_item_len(ih) < (ih_entry_count(ih) * IH_SIZE))) { - reiserfs_warning(NULL, "reiserfs-5093", - "item entry count seems wrong %h", - ih); - return 0; + if (is_direntry_le_ih(ih)) { + if (ih_item_len(ih) < (ih_entry_count(ih) * IH_SIZE)) { + reiserfs_warning(NULL, "reiserfs-5093", + "item entry count seems wrong %h", + ih); + return 0; + } + return has_valid_deh_location(bh, ih); } prev_location = ih_location(ih); } From f99986c0fcad8e1d7d842e9a636f55bcc6748da5 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Fri, 16 Jul 2021 11:57:35 +0100 Subject: [PATCH 110/794] ASoC: codecs: wcd938x: setup irq during component bind SoundWire registers are only accessable after sdw components are succesfully binded. Setup irqs at that point instead of doing at probe. Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20210716105735.6073-1-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index 78b76eceff8f..2fcc97370be2 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -3317,13 +3317,6 @@ static int wcd938x_soc_codec_probe(struct snd_soc_component *component) (WCD938X_DIGITAL_INTR_LEVEL_0 + i), 0); } - ret = wcd938x_irq_init(wcd938x, component->dev); - if (ret) { - dev_err(component->dev, "%s: IRQ init failed: %d\n", - __func__, ret); - return ret; - } - wcd938x->hphr_pdm_wd_int = regmap_irq_get_virq(wcd938x->irq_chip, WCD938X_IRQ_HPHR_PDM_WD_INT); wcd938x->hphl_pdm_wd_int = regmap_irq_get_virq(wcd938x->irq_chip, @@ -3553,7 +3546,6 @@ static int wcd938x_bind(struct device *dev) } wcd938x->sdw_priv[AIF1_PB] = dev_get_drvdata(wcd938x->rxdev); wcd938x->sdw_priv[AIF1_PB]->wcd938x = wcd938x; - wcd938x->sdw_priv[AIF1_PB]->slave_irq = wcd938x->virq; wcd938x->txdev = wcd938x_sdw_device_get(wcd938x->txnode); if (!wcd938x->txdev) { @@ -3562,7 +3554,6 @@ static int wcd938x_bind(struct device *dev) } wcd938x->sdw_priv[AIF1_CAP] = dev_get_drvdata(wcd938x->txdev); wcd938x->sdw_priv[AIF1_CAP]->wcd938x = wcd938x; - wcd938x->sdw_priv[AIF1_CAP]->slave_irq = wcd938x->virq; wcd938x->tx_sdw_dev = dev_to_sdw_dev(wcd938x->txdev); if (!wcd938x->tx_sdw_dev) { dev_err(dev, "could not get txslave with matching of dev\n"); @@ -3595,6 +3586,15 @@ static int wcd938x_bind(struct device *dev) return PTR_ERR(wcd938x->regmap); } + ret = wcd938x_irq_init(wcd938x, dev); + if (ret) { + dev_err(dev, "%s: IRQ init failed: %d\n", __func__, ret); + return ret; + } + + wcd938x->sdw_priv[AIF1_PB]->slave_irq = wcd938x->virq; + wcd938x->sdw_priv[AIF1_CAP]->slave_irq = wcd938x->virq; + ret = wcd938x_set_micbias_data(wcd938x); if (ret < 0) { dev_err(dev, "%s: bad micbias pdata\n", __func__); From 59dd33f82dc0975c55d3d46801e7ca45532d7673 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Fri, 16 Jul 2021 18:00:12 +0530 Subject: [PATCH 111/794] ASoC: soc-pcm: add a flag to reverse the stop sequence On stream stop, currently CPU DAI stop sequence invoked first followed by DMA. For Few platforms, it is required to stop the DMA first before stopping CPU DAI. Introduced new flag in dai_link structure for reordering stop sequence. Based on flag check, ASoC core will re-order the stop sequence. Fixes: 4378f1fbe92405 ("ASoC: soc-pcm: Use different sequence for start/stop trigger") Signed-off-by: Vijendar Mukunda Link: https://lore.kernel.org/r/20210716123015.15697-1-vijendar.mukunda@amd.com Signed-off-by: Mark Brown --- include/sound/soc.h | 6 ++++++ sound/soc/soc-pcm.c | 22 ++++++++++++++++------ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/include/sound/soc.h b/include/sound/soc.h index 675849d07284..8e6dd8a257c5 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -712,6 +712,12 @@ struct snd_soc_dai_link { /* Do not create a PCM for this DAI link (Backend link) */ unsigned int ignore:1; + /* This flag will reorder stop sequence. By enabling this flag + * DMA controller stop sequence will be invoked first followed by + * CPU DAI driver stop sequence + */ + unsigned int stop_dma_first:1; + #ifdef CONFIG_SND_SOC_TOPOLOGY struct snd_soc_dobj dobj; /* For topology */ #endif diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 46513bb97904..d1c570ca21ea 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1015,6 +1015,7 @@ out: static int soc_pcm_trigger(struct snd_pcm_substream *substream, int cmd) { + struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); int ret = -EINVAL, _ret = 0; int rollback = 0; @@ -1055,14 +1056,23 @@ start_err: case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_SUSPEND: case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - ret = snd_soc_pcm_dai_trigger(substream, cmd, rollback); - if (ret < 0) - break; + if (rtd->dai_link->stop_dma_first) { + ret = snd_soc_pcm_component_trigger(substream, cmd, rollback); + if (ret < 0) + break; - ret = snd_soc_pcm_component_trigger(substream, cmd, rollback); - if (ret < 0) - break; + ret = snd_soc_pcm_dai_trigger(substream, cmd, rollback); + if (ret < 0) + break; + } else { + ret = snd_soc_pcm_dai_trigger(substream, cmd, rollback); + if (ret < 0) + break; + ret = snd_soc_pcm_component_trigger(substream, cmd, rollback); + if (ret < 0) + break; + } ret = snd_soc_link_trigger(substream, cmd, rollback); break; } From 7883490cba002121a5870e786a1dc0acce5e1caf Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Fri, 16 Jul 2021 18:00:13 +0530 Subject: [PATCH 112/794] ASoC: amd: reverse stop sequence for stoneyridge platform For Stoneyridge platform, it is required to invoke DMA driver stop first rather than invoking DWC I2S controller stop. Enable dai_link structure stop_dma_fist flag to reverse the stop sequence. Signed-off-by: Vijendar Mukunda Link: https://lore.kernel.org/r/20210716123015.15697-2-vijendar.mukunda@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/acp-da7219-max98357a.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/soc/amd/acp-da7219-max98357a.c b/sound/soc/amd/acp-da7219-max98357a.c index 84e3906abd4f..9449fb40a956 100644 --- a/sound/soc/amd/acp-da7219-max98357a.c +++ b/sound/soc/amd/acp-da7219-max98357a.c @@ -576,6 +576,7 @@ static struct snd_soc_dai_link cz_dai_5682_98357[] = { | SND_SOC_DAIFMT_CBM_CFM, .init = cz_rt5682_init, .dpcm_playback = 1, + .stop_dma_first = 1, .ops = &cz_rt5682_play_ops, SND_SOC_DAILINK_REG(designware1, rt5682, platform), }, @@ -585,6 +586,7 @@ static struct snd_soc_dai_link cz_dai_5682_98357[] = { .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBM_CFM, .dpcm_capture = 1, + .stop_dma_first = 1, .ops = &cz_rt5682_cap_ops, SND_SOC_DAILINK_REG(designware2, rt5682, platform), }, @@ -594,6 +596,7 @@ static struct snd_soc_dai_link cz_dai_5682_98357[] = { .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBM_CFM, .dpcm_playback = 1, + .stop_dma_first = 1, .ops = &cz_rt5682_max_play_ops, SND_SOC_DAILINK_REG(designware3, mx, platform), }, @@ -604,6 +607,7 @@ static struct snd_soc_dai_link cz_dai_5682_98357[] = { .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBM_CFM, .dpcm_capture = 1, + .stop_dma_first = 1, .ops = &cz_rt5682_dmic0_cap_ops, SND_SOC_DAILINK_REG(designware3, adau, platform), }, @@ -614,6 +618,7 @@ static struct snd_soc_dai_link cz_dai_5682_98357[] = { .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBM_CFM, .dpcm_capture = 1, + .stop_dma_first = 1, .ops = &cz_rt5682_dmic1_cap_ops, SND_SOC_DAILINK_REG(designware2, adau, platform), }, From 6a503e1c455316fd0bfd8188c0a62cce7c5525ca Mon Sep 17 00:00:00 2001 From: Oder Chiou Date: Fri, 16 Jul 2021 16:58:53 +0800 Subject: [PATCH 113/794] ASoC: rt5682: Fix the issue of garbled recording after powerd_dbus_suspend While using the DMIC recording, the garbled data will be captured by the DMIC. It is caused by the critical power of PLL closed in the jack detect function. Signed-off-by: Oder Chiou Link: https://lore.kernel.org/r/20210716085853.20170-1-oder_chiou@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5682.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index e4c91571abae..abcd6f483788 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -973,10 +973,14 @@ int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert) rt5682_enable_push_button_irq(component, false); snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1, RT5682_TRIG_JD_MASK, RT5682_TRIG_JD_LOW); - if (!snd_soc_dapm_get_pin_status(dapm, "MICBIAS")) + if (!snd_soc_dapm_get_pin_status(dapm, "MICBIAS") && + !snd_soc_dapm_get_pin_status(dapm, "PLL1") && + !snd_soc_dapm_get_pin_status(dapm, "PLL2B")) snd_soc_component_update_bits(component, RT5682_PWR_ANLG_1, RT5682_PWR_MB, 0); - if (!snd_soc_dapm_get_pin_status(dapm, "Vref2")) + if (!snd_soc_dapm_get_pin_status(dapm, "Vref2") && + !snd_soc_dapm_get_pin_status(dapm, "PLL1") && + !snd_soc_dapm_get_pin_status(dapm, "PLL2B")) snd_soc_component_update_bits(component, RT5682_PWR_ANLG_1, RT5682_PWR_VREF2, 0); snd_soc_component_update_bits(component, RT5682_PWR_ANLG_3, From 59089a189e3adde4cf85f2ce479738d1ae4c514d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 29 Jun 2021 09:39:15 +0000 Subject: [PATCH 114/794] bpf: Remove superfluous aux sanitation on subprog rejection Follow-up to fe9a5ca7e370 ("bpf: Do not mark insn as seen under speculative path verification"). The sanitize_insn_aux_data() helper does not serve a particular purpose in today's code. The original intention for the helper was that if function-by-function verification fails, a given program would be cleared from temporary insn_aux_data[], and then its verification would be re-attempted in the context of the main program a second time. However, a failure in do_check_subprogs() will skip do_check_main() and propagate the error to the user instead, thus such situation can never occur. Given its interaction is not compatible to the Spectre v1 mitigation (due to comparing aux->seen with env->pass_cnt), just remove sanitize_insn_aux_data() to avoid future bugs in this area. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9de3c9c3267c..8a7a28b4cfb9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -12752,37 +12752,6 @@ static void free_states(struct bpf_verifier_env *env) } } -/* The verifier is using insn_aux_data[] to store temporary data during - * verification and to store information for passes that run after the - * verification like dead code sanitization. do_check_common() for subprogram N - * may analyze many other subprograms. sanitize_insn_aux_data() clears all - * temporary data after do_check_common() finds that subprogram N cannot be - * verified independently. pass_cnt counts the number of times - * do_check_common() was run and insn->aux->seen tells the pass number - * insn_aux_data was touched. These variables are compared to clear temporary - * data from failed pass. For testing and experiments do_check_common() can be - * run multiple times even when prior attempt to verify is unsuccessful. - * - * Note that special handling is needed on !env->bypass_spec_v1 if this is - * ever called outside of error path with subsequent program rejection. - */ -static void sanitize_insn_aux_data(struct bpf_verifier_env *env) -{ - struct bpf_insn *insn = env->prog->insnsi; - struct bpf_insn_aux_data *aux; - int i, class; - - for (i = 0; i < env->prog->len; i++) { - class = BPF_CLASS(insn[i].code); - if (class != BPF_LDX && class != BPF_STX) - continue; - aux = &env->insn_aux_data[i]; - if (aux->seen != env->pass_cnt) - continue; - memset(aux, 0, offsetof(typeof(*aux), orig_idx)); - } -} - static int do_check_common(struct bpf_verifier_env *env, int subprog) { bool pop_log = !(env->log.level & BPF_LOG_LEVEL2); @@ -12859,9 +12828,6 @@ out: if (!ret && pop_log) bpf_vlog_reset(&env->log, 0); free_states(env); - if (ret) - /* clean aux data in case subprog was rejected */ - sanitize_insn_aux_data(env); return ret; } From e042aa532c84d18ff13291d00620502ce7a38dda Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 16 Jul 2021 09:18:21 +0000 Subject: [PATCH 115/794] bpf: Fix pointer arithmetic mask tightening under state pruning In 7fedb63a8307 ("bpf: Tighten speculative pointer arithmetic mask") we narrowed the offset mask for unprivileged pointer arithmetic in order to mitigate a corner case where in the speculative domain it is possible to advance, for example, the map value pointer by up to value_size-1 out-of- bounds in order to leak kernel memory via side-channel to user space. The verifier's state pruning for scalars leaves one corner case open where in the first verification path R_x holds an unknown scalar with an aux->alu_limit of e.g. 7, and in a second verification path that same register R_x, here denoted as R_x', holds an unknown scalar which has tighter bounds and would thus satisfy range_within(R_x, R_x') as well as tnum_in(R_x, R_x') for state pruning, yielding an aux->alu_limit of 3: Given the second path fits the register constraints for pruning, the final generated mask from aux->alu_limit will remain at 7. While technically not wrong for the non-speculative domain, it would however be possible to craft similar cases where the mask would be too wide as in 7fedb63a8307. One way to fix it is to detect the presence of unknown scalar map pointer arithmetic and force a deeper search on unknown scalars to ensure that we do not run into a masking mismatch. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 1 + kernel/bpf/verifier.c | 27 +++++++++++++++++---------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index e774ecc1cd1f..7ba7e800d472 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -414,6 +414,7 @@ struct bpf_verifier_env { u32 used_map_cnt; /* number of used maps */ u32 used_btf_cnt; /* number of used BTF objects */ u32 id_gen; /* used to generate unique reg IDs */ + bool explore_alu_limits; bool allow_ptr_leaks; bool allow_uninit_stack; bool allow_ptr_to_map_access; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8a7a28b4cfb9..657062cb4d85 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6561,6 +6561,12 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0; alu_state |= ptr_is_dst_reg ? BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; + + /* Limit pruning on unknown scalars to enable deep search for + * potential masking differences from other program paths. + */ + if (!off_is_imm) + env->explore_alu_limits = true; } err = update_alu_sanitation_state(aux, alu_state, alu_limit); @@ -9936,8 +9942,8 @@ next: } /* Returns true if (rold safe implies rcur safe) */ -static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, - struct bpf_id_pair *idmap) +static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, + struct bpf_reg_state *rcur, struct bpf_id_pair *idmap) { bool equal; @@ -9963,6 +9969,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, return false; switch (rold->type) { case SCALAR_VALUE: + if (env->explore_alu_limits) + return false; if (rcur->type == SCALAR_VALUE) { if (!rold->precise && !rcur->precise) return true; @@ -10053,9 +10061,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, return false; } -static bool stacksafe(struct bpf_func_state *old, - struct bpf_func_state *cur, - struct bpf_id_pair *idmap) +static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, + struct bpf_func_state *cur, struct bpf_id_pair *idmap) { int i, spi; @@ -10100,9 +10107,8 @@ static bool stacksafe(struct bpf_func_state *old, continue; if (old->stack[spi].slot_type[0] != STACK_SPILL) continue; - if (!regsafe(&old->stack[spi].spilled_ptr, - &cur->stack[spi].spilled_ptr, - idmap)) + if (!regsafe(env, &old->stack[spi].spilled_ptr, + &cur->stack[spi].spilled_ptr, idmap)) /* when explored and current stack slot are both storing * spilled registers, check that stored pointers types * are the same as well. @@ -10159,10 +10165,11 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch)); for (i = 0; i < MAX_BPF_REG; i++) - if (!regsafe(&old->regs[i], &cur->regs[i], env->idmap_scratch)) + if (!regsafe(env, &old->regs[i], &cur->regs[i], + env->idmap_scratch)) return false; - if (!stacksafe(old, cur, env->idmap_scratch)) + if (!stacksafe(env, old, cur, env->idmap_scratch)) return false; if (!refsafe(old, cur)) From a6c39de76d709f30982d4b80a9b9537e1d388858 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 16 Jul 2021 13:15:33 +0000 Subject: [PATCH 116/794] bpf, selftests: Add test cases for pointer alu from multiple paths Add several test cases for checking update_alu_sanitation_state() under multiple paths: # ./test_verifier [...] #1061/u map access: known scalar += value_ptr unknown vs const OK #1061/p map access: known scalar += value_ptr unknown vs const OK #1062/u map access: known scalar += value_ptr const vs unknown OK #1062/p map access: known scalar += value_ptr const vs unknown OK #1063/u map access: known scalar += value_ptr const vs const (ne) OK #1063/p map access: known scalar += value_ptr const vs const (ne) OK #1064/u map access: known scalar += value_ptr const vs const (eq) OK #1064/p map access: known scalar += value_ptr const vs const (eq) OK #1065/u map access: known scalar += value_ptr unknown vs unknown (eq) OK #1065/p map access: known scalar += value_ptr unknown vs unknown (eq) OK #1066/u map access: known scalar += value_ptr unknown vs unknown (lt) OK #1066/p map access: known scalar += value_ptr unknown vs unknown (lt) OK #1067/u map access: known scalar += value_ptr unknown vs unknown (gt) OK #1067/p map access: known scalar += value_ptr unknown vs unknown (gt) OK [...] Summary: 1762 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov --- .../selftests/bpf/verifier/value_ptr_arith.c | 229 ++++++++++++++++++ 1 file changed, 229 insertions(+) diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c index a3e593ddfafc..2debba4e8a3a 100644 --- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c @@ -1,3 +1,232 @@ +{ + "map access: known scalar += value_ptr unknown vs const", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_1, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps, paths or scalars", + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: known scalar += value_ptr const vs unknown", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 2), + BPF_MOV64_IMM(BPF_REG_1, 3), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps, paths or scalars", + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: known scalar += value_ptr const vs const (ne)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 2), + BPF_MOV64_IMM(BPF_REG_1, 3), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_1, 5), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps, paths or scalars", + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: known scalar += value_ptr const vs const (eq)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 2), + BPF_MOV64_IMM(BPF_REG_1, 5), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_1, 5), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: known scalar += value_ptr unknown vs unknown (eq)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: known scalar += value_ptr unknown vs unknown (lt)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x3), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps, paths or scalars", + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: known scalar += value_ptr unknown vs unknown (gt)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps, paths or scalars", + .result = ACCEPT, + .retval = 1, +}, { "map access: known scalar += value_ptr from different maps", .insns = { From 0ccfd1ba84a4503b509250941af149e9ebd605ca Mon Sep 17 00:00:00 2001 From: Yoshitaka Ikeda Date: Fri, 16 Jul 2021 14:33:12 +0000 Subject: [PATCH 117/794] spi: spi-cadence-quadspi: Revert "Fix division by zero warning" Revert to change to a better code. This reverts commit 55cef88bbf12f3bfbe5c2379a8868a034707e755. Signed-off-by: Yoshitaka Ikeda Link: https://lore.kernel.org/r/bd30bdb4-07c4-f713-5648-01c898d51f1b@nskint.co.jp Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-quadspi.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 13d1f0ce618e..7a00346ff9b9 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -307,13 +307,11 @@ static unsigned int cqspi_calc_rdreg(struct cqspi_flash_pdata *f_pdata) static unsigned int cqspi_calc_dummy(const struct spi_mem_op *op, bool dtr) { - unsigned int dummy_clk = 0; + unsigned int dummy_clk; - if (op->dummy.buswidth && op->dummy.nbytes) { - dummy_clk = op->dummy.nbytes * (8 / op->dummy.buswidth); - if (dtr) - dummy_clk /= 2; - } + dummy_clk = op->dummy.nbytes * (8 / op->dummy.buswidth); + if (dtr) + dummy_clk /= 2; return dummy_clk; } From 0e85ee897858b1c7a5de53f496d016899d9639c5 Mon Sep 17 00:00:00 2001 From: Yoshitaka Ikeda Date: Fri, 16 Jul 2021 14:35:13 +0000 Subject: [PATCH 118/794] spi: spi-cadence-quadspi: Fix division by zero warning Fix below division by zero warning: - The reason for dividing by zero is because the dummy bus width is zero, but if the dummy n bytes is zero, it indicates that there is no data transfer, so we can just return zero without doing any calculations. [ 0.795337] Division by zero in kernel. : [ 0.834051] [<807fd40c>] (__div0) from [<804e1acc>] (Ldiv0+0x8/0x10) [ 0.839097] [<805f0710>] (cqspi_exec_mem_op) from [<805edb4c>] (spi_mem_exec_op+0x3b0/0x3f8) Fixes: 7512eaf54190 ("spi: cadence-quadspi: Fix dummy cycle calculation when buswidth > 1") Signed-off-by: Yoshitaka Ikeda Reviewed-by: Pratyush Yadav Link: https://lore.kernel.org/r/92eea403-9b21-2488-9cc1-664bee760c5e@nskint.co.jp Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-quadspi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 7a00346ff9b9..d62d69dd72b9 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -309,6 +309,9 @@ static unsigned int cqspi_calc_dummy(const struct spi_mem_op *op, bool dtr) { unsigned int dummy_clk; + if (!op->dummy.nbytes) + return 0; + dummy_clk = op->dummy.nbytes * (8 / op->dummy.buswidth); if (dtr) dummy_clk /= 2; From 674a9f1f6815849bfb5bf385e7da8fc198aaaba9 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Thu, 8 Jul 2021 11:46:54 +0200 Subject: [PATCH 119/794] efi/tpm: Differentiate missing and invalid final event log table. Missing TPM final event log table is not a firmware bug. Clearly if providing event log in the old format makes the final event log invalid it should not be provided at least in that case. Fixes: b4f1874c6216 ("tpm: check event log version before reading final events") Signed-off-by: Michal Suchanek Reviewed-by: Jarkko Sakkinen Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/tpm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/efi/tpm.c b/drivers/firmware/efi/tpm.c index c1955d320fec..8f665678e9e3 100644 --- a/drivers/firmware/efi/tpm.c +++ b/drivers/firmware/efi/tpm.c @@ -62,9 +62,11 @@ int __init efi_tpm_eventlog_init(void) tbl_size = sizeof(*log_tbl) + log_tbl->size; memblock_reserve(efi.tpm_log, tbl_size); - if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR || - log_tbl->version != EFI_TCG2_EVENT_LOG_FORMAT_TCG_2) { - pr_warn(FW_BUG "TPM Final Events table missing or invalid\n"); + if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR) { + pr_info("TPM Final Events table not present\n"); + goto out; + } else if (log_tbl->version != EFI_TCG2_EVENT_LOG_FORMAT_TCG_2) { + pr_warn(FW_BUG "TPM Final Events table invalid\n"); goto out; } From 2bab693a608bdf614b9fcd44083c5100f34b9f77 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Jul 2021 19:43:26 +0100 Subject: [PATCH 120/794] firmware/efi: Tell memblock about EFI iomem reservations kexec_load_file() relies on the memblock infrastructure to avoid stamping over regions of memory that are essential to the survival of the system. However, nobody seems to agree how to flag these regions as reserved, and (for example) EFI only publishes its reservations in /proc/iomem for the benefit of the traditional, userspace based kexec tool. On arm64 platforms with GICv3, this can result in the payload being placed at the location of the LPI tables. Shock, horror! Let's augment the EFI reservation code with a memblock_reserve() call, protecting our dear tables from the secondary kernel invasion. Reported-by: Moritz Fischer Tested-by: Moritz Fischer Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Cc: Ard Biesheuvel Cc: James Morse Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/efi.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 4b7ee3fa9224..847f33ffc4ae 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -896,6 +896,7 @@ static int __init efi_memreserve_map_root(void) static int efi_mem_reserve_iomem(phys_addr_t addr, u64 size) { struct resource *res, *parent; + int ret; res = kzalloc(sizeof(struct resource), GFP_ATOMIC); if (!res) @@ -908,7 +909,17 @@ static int efi_mem_reserve_iomem(phys_addr_t addr, u64 size) /* we expect a conflict with a 'System RAM' region */ parent = request_resource_conflict(&iomem_resource, res); - return parent ? request_resource(parent, res) : 0; + ret = parent ? request_resource(parent, res) : 0; + + /* + * Given that efi_mem_reserve_iomem() can be called at any + * time, only call memblock_reserve() if the architecture + * keeps the infrastructure around. + */ + if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK) && !ret) + memblock_reserve(addr, size); + + return ret; } int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size) From 947228cb9f1a2c69a5da5279c48f02bb4f49ce32 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Fri, 2 Jul 2021 12:10:44 -0700 Subject: [PATCH 121/794] efi/libstub: Fix the efi_load_initrd function description The soft_limit and hard_limit in the function efi_load_initrd describes the preferred and max address of initrd loading location respectively. However, the description wrongly describes it as the size of the allocated memory. Fix the function description. Signed-off-by: Atish Patra Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/efi-stub-helper.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index aa8da0a49829..ae87dded989d 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -630,8 +630,8 @@ efi_status_t efi_load_initrd_cmdline(efi_loaded_image_t *image, * @image: EFI loaded image protocol * @load_addr: pointer to loaded initrd * @load_size: size of loaded initrd - * @soft_limit: preferred size of allocated memory for loading the initrd - * @hard_limit: minimum size of allocated memory + * @soft_limit: preferred address for loading the initrd + * @hard_limit: upper limit address for loading the initrd * * Return: status code */ From 9d7a6c95f62bc335b62aaf9d50590122bd03a796 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 5 Jul 2021 10:44:52 +0200 Subject: [PATCH 122/794] perf: Fix required permissions if sigtrap is requested If perf_event_open() is called with another task as target and perf_event_attr::sigtrap is set, and the target task's user does not match the calling user, also require the CAP_KILL capability or PTRACE_MODE_ATTACH permissions. Otherwise, with the CAP_PERFMON capability alone it would be possible for a user to send SIGTRAP signals via perf events to another user's tasks. This could potentially result in those tasks being terminated if they cannot handle SIGTRAP signals. Note: The check complements the existing capability check, but is not supposed to supersede the ptrace_may_access() check. At a high level we now have: capable of CAP_PERFMON and (CAP_KILL if sigtrap) OR ptrace_may_access(...) // also checks for same thread-group and uid Fixes: 97ba62b27867 ("perf: Add support for SIGTRAP on perf events") Reported-by: Dmitry Vyukov Signed-off-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Acked-by: Dmitry Vyukov Cc: # 5.13+ Link: https://lore.kernel.org/r/20210705084453.2151729-1-elver@google.com --- kernel/events/core.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 464917096e73..c13730b7ac01 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -12158,10 +12158,33 @@ SYSCALL_DEFINE5(perf_event_open, } if (task) { + unsigned int ptrace_mode = PTRACE_MODE_READ_REALCREDS; + bool is_capable; + err = down_read_interruptible(&task->signal->exec_update_lock); if (err) goto err_file; + is_capable = perfmon_capable(); + if (attr.sigtrap) { + /* + * perf_event_attr::sigtrap sends signals to the other + * task. Require the current task to also have + * CAP_KILL. + */ + rcu_read_lock(); + is_capable &= ns_capable(__task_cred(task)->user_ns, CAP_KILL); + rcu_read_unlock(); + + /* + * If the required capabilities aren't available, checks + * for ptrace permissions: upgrade to ATTACH, since + * sending signals can effectively change the target + * task. + */ + ptrace_mode = PTRACE_MODE_ATTACH_REALCREDS; + } + /* * Preserve ptrace permission check for backwards compatibility. * @@ -12171,7 +12194,7 @@ SYSCALL_DEFINE5(perf_event_open, * perf_event_exit_task() that could imply). */ err = -EACCES; - if (!perfmon_capable() && !ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) + if (!is_capable && !ptrace_may_access(task, ptrace_mode)) goto err_cred; } From b068fc04de10fff8974f6ef32b861ad134d94ba4 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 5 Jul 2021 10:44:53 +0200 Subject: [PATCH 123/794] perf: Refactor permissions check into perf_check_permission() Refactor the permission check in perf_event_open() into a helper perf_check_permission(). This makes the permission check logic more readable (because we no longer have a negated disjunction). Add a comment mentioning the ptrace check also checks the uid. No functional change intended. Signed-off-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dmitry Vyukov Link: https://lore.kernel.org/r/20210705084453.2151729-2-elver@google.com --- kernel/events/core.c | 58 ++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 26 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index c13730b7ac01..1cb1f9b8392e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -11917,6 +11917,37 @@ again: return gctx; } +static bool +perf_check_permission(struct perf_event_attr *attr, struct task_struct *task) +{ + unsigned int ptrace_mode = PTRACE_MODE_READ_REALCREDS; + bool is_capable = perfmon_capable(); + + if (attr->sigtrap) { + /* + * perf_event_attr::sigtrap sends signals to the other task. + * Require the current task to also have CAP_KILL. + */ + rcu_read_lock(); + is_capable &= ns_capable(__task_cred(task)->user_ns, CAP_KILL); + rcu_read_unlock(); + + /* + * If the required capabilities aren't available, checks for + * ptrace permissions: upgrade to ATTACH, since sending signals + * can effectively change the target task. + */ + ptrace_mode = PTRACE_MODE_ATTACH_REALCREDS; + } + + /* + * Preserve ptrace permission check for backwards compatibility. The + * ptrace check also includes checks that the current task and other + * task have matching uids, and is therefore not done here explicitly. + */ + return is_capable || ptrace_may_access(task, ptrace_mode); +} + /** * sys_perf_event_open - open a performance event, associate it to a task/cpu * @@ -12158,43 +12189,18 @@ SYSCALL_DEFINE5(perf_event_open, } if (task) { - unsigned int ptrace_mode = PTRACE_MODE_READ_REALCREDS; - bool is_capable; - err = down_read_interruptible(&task->signal->exec_update_lock); if (err) goto err_file; - is_capable = perfmon_capable(); - if (attr.sigtrap) { - /* - * perf_event_attr::sigtrap sends signals to the other - * task. Require the current task to also have - * CAP_KILL. - */ - rcu_read_lock(); - is_capable &= ns_capable(__task_cred(task)->user_ns, CAP_KILL); - rcu_read_unlock(); - - /* - * If the required capabilities aren't available, checks - * for ptrace permissions: upgrade to ATTACH, since - * sending signals can effectively change the target - * task. - */ - ptrace_mode = PTRACE_MODE_ATTACH_REALCREDS; - } - /* - * Preserve ptrace permission check for backwards compatibility. - * * We must hold exec_update_lock across this and any potential * perf_install_in_context() call for this new event to * serialize against exec() altering our credentials (and the * perf_event_exit_task() that could imply). */ err = -EACCES; - if (!is_capable && !ptrace_may_access(task, ptrace_mode)) + if (!perf_check_permission(&attr, task)) goto err_cred; } From 9cb2ff11171264d10be7ea9e31d9ee5d49ba84a5 Mon Sep 17 00:00:00 2001 From: Apurva Nandan Date: Tue, 13 Jul 2021 12:57:41 +0000 Subject: [PATCH 124/794] spi: cadence-quadspi: Disable Auto-HW polling cadence-quadspi has a builtin Auto-HW polling funtionality using which it keep tracks of completion of write operations. When Auto-HW polling is enabled, it automatically initiates status register read operation, until the flash clears its busy bit. cadence-quadspi controller doesn't allow an address phase when auto-polling the busy bit on the status register. Unlike SPI NOR flashes, SPI NAND flashes do require the address of status register when polling the busy bit using the read register operation. As Auto-HW polling is enabled by default, cadence-quadspi returns a timeout for every write operation after an indefinite amount of polling on SPI NAND flashes. Disable Auto-HW polling completely as the spi-nor core, spinand core, etc. take care of polling the busy bit on their own. Signed-off-by: Apurva Nandan Link: https://lore.kernel.org/r/20210713125743.1540-2-a-nandan@ti.com Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-quadspi.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index d62d69dd72b9..a2de23516553 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -800,19 +800,20 @@ static int cqspi_write_setup(struct cqspi_flash_pdata *f_pdata, reg = cqspi_calc_rdreg(f_pdata); writel(reg, reg_base + CQSPI_REG_RD_INSTR); - if (f_pdata->dtr) { - /* - * Some flashes like the cypress Semper flash expect a 4-byte - * dummy address with the Read SR command in DTR mode, but this - * controller does not support sending address with the Read SR - * command. So, disable write completion polling on the - * controller's side. spi-nor will take care of polling the - * status register. - */ - reg = readl(reg_base + CQSPI_REG_WR_COMPLETION_CTRL); - reg |= CQSPI_REG_WR_DISABLE_AUTO_POLL; - writel(reg, reg_base + CQSPI_REG_WR_COMPLETION_CTRL); - } + /* + * SPI NAND flashes require the address of the status register to be + * passed in the Read SR command. Also, some SPI NOR flashes like the + * cypress Semper flash expect a 4-byte dummy address in the Read SR + * command in DTR mode. + * + * But this controller does not support address phase in the Read SR + * command when doing auto-HW polling. So, disable write completion + * polling on the controller's side. spinand and spi-nor will take + * care of polling the status register. + */ + reg = readl(reg_base + CQSPI_REG_WR_COMPLETION_CTRL); + reg |= CQSPI_REG_WR_DISABLE_AUTO_POLL; + writel(reg, reg_base + CQSPI_REG_WR_COMPLETION_CTRL); reg = readl(reg_base + CQSPI_REG_SIZE); reg &= ~CQSPI_REG_SIZE_ADDRESS_MASK; From ea272ce46f3c86d15d9b58bd4d8d44de6cee04b7 Mon Sep 17 00:00:00 2001 From: Veerabadhran Gopalakrishnan Date: Fri, 9 Jul 2021 13:00:11 +0530 Subject: [PATCH 125/794] amdgpu/nv.c - Added video codec support for Yellow Carp Added the supported codecs in the video capabilities query. Signed-off-by: Veerabadhran Gopalakrishnan Reviewed-by: James Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nv.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 94a2c0742ee5..04f6cf38c552 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -333,6 +333,19 @@ static const struct amdgpu_video_codecs bg_video_codecs_encode = { .codec_array = NULL, }; +/* Yellow Carp*/ +static const struct amdgpu_video_codec_info yc_video_codecs_decode_array[] = { + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, +}; + +static const struct amdgpu_video_codecs yc_video_codecs_decode = { + .codec_count = ARRAY_SIZE(bg_video_codecs_decode_array), + .codec_array = bg_video_codecs_decode_array, +}; + static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode, const struct amdgpu_video_codecs **codecs) { @@ -353,12 +366,17 @@ static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode, case CHIP_NAVY_FLOUNDER: case CHIP_DIMGREY_CAVEFISH: case CHIP_VANGOGH: - case CHIP_YELLOW_CARP: if (encode) *codecs = &nv_video_codecs_encode; else *codecs = &sc_video_codecs_decode; return 0; + case CHIP_YELLOW_CARP: + if (encode) + *codecs = &nv_video_codecs_encode; + else + *codecs = &yc_video_codecs_decode; + return 0; case CHIP_BEIGE_GOBY: if (encode) *codecs = &bg_video_codecs_encode; From 6505d6fcc616472c1b4d6298beacf52673c7b072 Mon Sep 17 00:00:00 2001 From: Veerabadhran Gopalakrishnan Date: Tue, 13 Jul 2021 23:21:43 +0530 Subject: [PATCH 126/794] amdgpu/nv.c - Optimize code for video codec support structure Optimized the code for codec info structure initialization Signed-off-by: Veerabadhran Gopalakrishnan Reviewed-by: James Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 + drivers/gpu/drm/amd/amdgpu/nv.c | 223 ++++------------------------ drivers/gpu/drm/amd/amdgpu/soc15.c | 176 +++------------------- 3 files changed, 56 insertions(+), 350 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c0316eaba547..8ac6eb9f1fdb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -619,6 +619,13 @@ struct amdgpu_video_codec_info { u32 max_level; }; +#define codec_info_build(type, width, height, level) \ + .codec_type = type,\ + .max_width = width,\ + .max_height = height,\ + .max_pixels_per_frame = height * width,\ + .max_level = level, + struct amdgpu_video_codecs { const u32 codec_count; const struct amdgpu_video_codec_info *codec_array; diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 04f6cf38c552..cf73a6923203 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -64,32 +64,13 @@ #include "smuio_v11_0.h" #include "smuio_v11_0_6.h" -#define codec_info_build(type, width, height, level) \ - .codec_type = type,\ - .max_width = width,\ - .max_height = height,\ - .max_pixels_per_frame = height * width,\ - .max_level = level, - static const struct amd_ip_funcs nv_common_ip_funcs; /* Navi */ static const struct amdgpu_video_codec_info nv_video_codecs_encode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 2304, - .max_pixels_per_frame = 4096 * 2304, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 4096, - .max_height = 2304, - .max_pixels_per_frame = 4096 * 2304, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)}, }; static const struct amdgpu_video_codecs nv_video_codecs_encode = @@ -101,55 +82,13 @@ static const struct amdgpu_video_codecs nv_video_codecs_encode = /* Navi1x */ static const struct amdgpu_video_codec_info nv_video_codecs_decode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 3, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 5, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 52, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 4, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 8192 * 4352, - .max_level = 186, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 8192 * 4352, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; static const struct amdgpu_video_codecs nv_video_codecs_decode = @@ -161,62 +100,14 @@ static const struct amdgpu_video_codecs nv_video_codecs_decode = /* Sienna Cichlid */ static const struct amdgpu_video_codec_info sc_video_codecs_decode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 3, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 5, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 52, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 4, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 8192 * 4352, - .max_level = 186, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 8192 * 4352, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 8192 * 4352, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; static const struct amdgpu_video_codecs sc_video_codecs_decode = @@ -228,80 +119,20 @@ static const struct amdgpu_video_codecs sc_video_codecs_decode = /* SRIOV Sienna Cichlid, not const since data is controlled by host */ static struct amdgpu_video_codec_info sriov_sc_video_codecs_encode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 2304, - .max_pixels_per_frame = 4096 * 2304, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 4096, - .max_height = 2304, - .max_pixels_per_frame = 4096 * 2304, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)}, }; static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 3, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 5, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 52, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 4, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 8192 * 4352, - .max_level = 186, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 8192 * 4352, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 8192 * 4352, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; static struct amdgpu_video_codecs sriov_sc_video_codecs_encode = diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index b02436401d46..b7d350be8050 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -88,20 +88,8 @@ /* Vega, Raven, Arcturus */ static const struct amdgpu_video_codec_info vega_video_codecs_encode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 2304, - .max_pixels_per_frame = 4096 * 2304, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 4096, - .max_height = 2304, - .max_pixels_per_frame = 4096 * 2304, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)}, }; static const struct amdgpu_video_codecs vega_video_codecs_encode = @@ -113,48 +101,12 @@ static const struct amdgpu_video_codecs vega_video_codecs_encode = /* Vega */ static const struct amdgpu_video_codec_info vega_video_codecs_decode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 3, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 5, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 52, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 4, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 186, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, }; static const struct amdgpu_video_codecs vega_video_codecs_decode = @@ -166,55 +118,13 @@ static const struct amdgpu_video_codecs vega_video_codecs_decode = /* Raven */ static const struct amdgpu_video_codec_info rv_video_codecs_decode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 3, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 5, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 52, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 4, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 186, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 4096, 4096, 0)}, }; static const struct amdgpu_video_codecs rv_video_codecs_decode = @@ -226,55 +136,13 @@ static const struct amdgpu_video_codecs rv_video_codecs_decode = /* Renoir, Arcturus */ static const struct amdgpu_video_codec_info rn_video_codecs_decode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 3, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 5, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 52, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 4, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 186, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; static const struct amdgpu_video_codecs rn_video_codecs_decode = From 4fff6fbca12524358a32e56f125ae738141f62b4 Mon Sep 17 00:00:00 2001 From: Xiaojian Du Date: Wed, 14 Jul 2021 15:07:22 +0800 Subject: [PATCH 127/794] drm/amdgpu: update the golden setting for vangogh This patch is to update the golden setting for vangogh. Signed-off-by: Xiaojian Du Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index f5e9c022960b..a86a0b347e73 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3379,6 +3379,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_vangogh[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000020), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1_Vangogh, 0xffffffff, 0x00070103), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQG_CONFIG, 0x000017ff, 0x00001000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0x00010000, 0x00010020), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00400000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), From 3e94b5965e624f7e6d8dd18eb8f3bf2bb99ba30d Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Thu, 15 Jul 2021 11:08:48 +0800 Subject: [PATCH 128/794] drm/amdgpu: update golden setting for sienna_cichlid Update GFX golden setting for sienna_cichlid. Signed-off-by: Likun Gao Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index a86a0b347e73..f4771f39a280 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3300,6 +3300,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0x00010000, 0x00010020), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000) }; From cfe4e8f00f8f19ba305800f64962d1949ab5d4ca Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Thu, 15 Jul 2021 14:49:08 +0800 Subject: [PATCH 129/794] drm/amdgpu: update gc golden setting for dimgrey_cavefish Update gc_10_3_4 golden setting. Signed-off-by: Tao Zhou Reviewed-by: Guchun Chen Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index f4771f39a280..a64b2c706090 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3447,6 +3447,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_4[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0x00010000, 0x00010020), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x01030000, 0x01030000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x03a00000, 0x00a00000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x00000020, 0x00000020) From bd89c991c6c26fb215c63bd21b6d56e7a4ba2ef6 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Thu, 15 Jul 2021 14:52:37 +0800 Subject: [PATCH 130/794] drm/amd/pm: update DRIVER_IF_VERSION for beige_goby Update the version to 0xD for beige_goby. Signed-off-by: Tao Zhou Reviewed-by: Jack Gui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/inc/smu_v11_0.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h index 1962a5877191..f61b5c914a3d 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h @@ -34,7 +34,7 @@ #define SMU11_DRIVER_IF_VERSION_Navy_Flounder 0xE #define SMU11_DRIVER_IF_VERSION_VANGOGH 0x03 #define SMU11_DRIVER_IF_VERSION_Dimgrey_Cavefish 0xF -#define SMU11_DRIVER_IF_VERSION_Beige_Goby 0x9 +#define SMU11_DRIVER_IF_VERSION_Beige_Goby 0xD /* MP Apertures */ #define MP0_Public 0x03800000 From 353ca0fa56307bfc821a6fb444099e71899f199d Mon Sep 17 00:00:00 2001 From: Liviu Dudau Date: Wed, 14 Jul 2021 09:06:52 +0100 Subject: [PATCH 131/794] drm/amd/display: Fix 10bit 4K display on CIK GPUs Commit 72a7cf0aec0c ("drm/amd/display: Keep linebuffer pixel depth at 30bpp for DCE-11.0.") doesn't seems to have fixed 10bit 4K rendering over DisplayPort for CIK GPUs. On my machine with a HAWAII GPU I get a broken image that looks like it has an effective resolution of 1920x1080 but scaled up in an irregular way. Reverting the commit or applying this patch fixes the problem on v5.14-rc1. Fixes: 72a7cf0aec0c ("drm/amd/display: Keep linebuffer pixel depth at 30bpp for DCE-11.0.") Acked-by: Mario Kleiner Reviewed-by: Harry Wentland Signed-off-by: Liviu Dudau Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index a6a67244a322..1596f6b7fed7 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1062,7 +1062,7 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) * so use only 30 bpp on DCE_VERSION_11_0. Testing with DCE 11.2 and 8.3 * did not show such problems, so this seems to be the exception. */ - if (plane_state->ctx->dce_version != DCE_VERSION_11_0) + if (plane_state->ctx->dce_version > DCE_VERSION_11_0) pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_36BPP; else pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_30BPP; From 2cc3aeb5ecccec0d266813172fcd82b4b5fa5803 Mon Sep 17 00:00:00 2001 From: Ilias Apalodimas Date: Fri, 16 Jul 2021 10:02:18 +0300 Subject: [PATCH 132/794] skbuff: Fix a potential race while recycling page_pool packets As Alexander points out, when we are trying to recycle a cloned/expanded SKB we might trigger a race. The recycling code relies on the pp_recycle bit to trigger, which we carry over to cloned SKBs. If that cloned SKB gets expanded or if we get references to the frags, call skb_release_data() and overwrite skb->head, we are creating separate instances accessing the same page frags. Since the skb_release_data() will first try to recycle the frags, there's a potential race between the original and cloned SKB, since both will have the pp_recycle bit set. Fix this by explicitly those SKBs not recyclable. The atomic_sub_return effectively limits us to a single release case, and when we are calling skb_release_data we are also releasing the option to perform the recycling, or releasing the pages from the page pool. Fixes: 6a5bcd84e886 ("page_pool: Allow drivers to hint on SKB recycling") Reported-by: Alexander Duyck Suggested-by: Alexander Duyck Reviewed-by: Alexander Duyck Acked-by: Jesper Dangaard Brouer Signed-off-by: Ilias Apalodimas Signed-off-by: David S. Miller --- net/core/skbuff.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f63de967ac25..0fe97d660790 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -663,7 +663,7 @@ static void skb_release_data(struct sk_buff *skb) if (skb->cloned && atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1, &shinfo->dataref)) - return; + goto exit; skb_zcopy_clear(skb, true); @@ -674,6 +674,17 @@ static void skb_release_data(struct sk_buff *skb) kfree_skb_list(shinfo->frag_list); skb_free_head(skb); +exit: + /* When we clone an SKB we copy the reycling bit. The pp_recycle + * bit is only set on the head though, so in order to avoid races + * while trying to recycle fragments on __skb_frag_unref() we need + * to make one SKB responsible for triggering the recycle path. + * So disable the recycling bit if an SKB is cloned and we have + * additional references to to the fragmented part of the SKB. + * Eventually the last SKB will have the recycling bit set and it's + * dataref set to 0, which will trigger the recycling + */ + skb->pp_recycle = 0; } /* From 11d8d98cbeef1496469b268d79938b05524731e8 Mon Sep 17 00:00:00 2001 From: Eric Woudstra Date: Fri, 16 Jul 2021 17:36:39 +0200 Subject: [PATCH 133/794] mt7530 fix mt7530_fdb_write vid missing ivl bit According to reference guides mt7530 (mt7620) and mt7531: NOTE: When IVL is reset, MAC[47:0] and FID[2:0] will be used to read/write the address table. When IVL is set, MAC[47:0] and CVID[11:0] will be used to read/write the address table. Since the function only fills in CVID and no FID, we need to set the IVL bit. The existing code does not set it. This is a fix for the issue I dropped here earlier: http://lists.infradead.org/pipermail/linux-mediatek/2021-June/025697.html With this patch, it is now possible to delete the 'self' fdb entry manually. However, wifi roaming still has the same issue, the entry does not get deleted automatically. Wifi roaming also needs a fix somewhere else to function correctly in combination with vlan. Signed-off-by: Eric Woudstra Signed-off-by: David S. Miller --- drivers/net/dsa/mt7530.c | 1 + drivers/net/dsa/mt7530.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 93136f7e69f5..9e4df35f92cc 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -366,6 +366,7 @@ mt7530_fdb_write(struct mt7530_priv *priv, u16 vid, int i; reg[1] |= vid & CVID_MASK; + reg[1] |= ATA2_IVL; reg[2] |= (aging & AGE_TIMER_MASK) << AGE_TIMER; reg[2] |= (port_mask & PORT_MAP_MASK) << PORT_MAP; /* STATIC_ENT indicate that entry is static wouldn't diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index 334d610a503d..b19b389ff10a 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -79,6 +79,7 @@ enum mt753x_bpdu_port_fw { #define STATIC_EMP 0 #define STATIC_ENT 3 #define MT7530_ATA2 0x78 +#define ATA2_IVL BIT(15) /* Register for address table write data */ #define MT7530_ATWD 0x7c From 5f119ba1d5771bbf46d57cff7417dcd84d3084ba Mon Sep 17 00:00:00 2001 From: Yajun Deng Date: Wed, 14 Jul 2021 17:13:20 +0800 Subject: [PATCH 134/794] net: decnet: Fix sleeping inside in af_decnet The release_sock() is blocking function, it would change the state after sleeping. use wait_woken() instead. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Yajun Deng Signed-off-by: David S. Miller --- net/decnet/af_decnet.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 5dbd45dc35ad..dc92a67baea3 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -816,7 +816,7 @@ static int dn_auto_bind(struct socket *sock) static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation) { struct dn_scp *scp = DN_SK(sk); - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); int err; if (scp->state != DN_CR) @@ -826,11 +826,11 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation) scp->segsize_loc = dst_metric_advmss(__sk_dst_get(sk)); dn_send_conn_conf(sk, allocation); - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + add_wait_queue(sk_sleep(sk), &wait); for(;;) { release_sock(sk); if (scp->state == DN_CC) - *timeo = schedule_timeout(*timeo); + *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo); lock_sock(sk); err = 0; if (scp->state == DN_RUN) @@ -844,9 +844,8 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation) err = -EAGAIN; if (!*timeo) break; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); if (err == 0) { sk->sk_socket->state = SS_CONNECTED; } else if (scp->state != DN_CC) { @@ -858,7 +857,7 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation) static int dn_wait_run(struct sock *sk, long *timeo) { struct dn_scp *scp = DN_SK(sk); - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); int err = 0; if (scp->state == DN_RUN) @@ -867,11 +866,11 @@ static int dn_wait_run(struct sock *sk, long *timeo) if (!*timeo) return -EALREADY; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + add_wait_queue(sk_sleep(sk), &wait); for(;;) { release_sock(sk); if (scp->state == DN_CI || scp->state == DN_CC) - *timeo = schedule_timeout(*timeo); + *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo); lock_sock(sk); err = 0; if (scp->state == DN_RUN) @@ -885,9 +884,8 @@ static int dn_wait_run(struct sock *sk, long *timeo) err = -ETIMEDOUT; if (!*timeo) break; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); out: if (err == 0) { sk->sk_socket->state = SS_CONNECTED; @@ -1032,16 +1030,16 @@ static void dn_user_copy(struct sk_buff *skb, struct optdata_dn *opt) static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo) { - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sk_buff *skb = NULL; int err = 0; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + add_wait_queue(sk_sleep(sk), &wait); for(;;) { release_sock(sk); skb = skb_dequeue(&sk->sk_receive_queue); if (skb == NULL) { - *timeo = schedule_timeout(*timeo); + *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo); skb = skb_dequeue(&sk->sk_receive_queue); } lock_sock(sk); @@ -1056,9 +1054,8 @@ static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo) err = -EAGAIN; if (!*timeo) break; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); return skb == NULL ? ERR_PTR(err) : skb; } From ba02920c51debb9198e72b3a8726a7c5ae4ffb41 Mon Sep 17 00:00:00 2001 From: Vidya Sagar Date: Tue, 13 Jul 2021 17:05:46 +0530 Subject: [PATCH 135/794] arm64: tegra: Enable SMMU support for PCIe on Tegra194 As of commit c7289b1c8a4e ("arm64: tegra: Enable SMMU support on Tegra194"), SMMU support is enabled system-wide on Tegra194. However, there was a bit of overlap between the SMMU enablement and the PCIe support addition, so the PCIe device tree nodes are missing the iommus and interconnects properties. This in turn leads to SMMU faults for these devices, since by default the ARM SMMU will fault. Add the iommus and interconnects properties to all the PCIe device tree nodes to restore their functionality. Fixes: c7289b1c8a4e ("arm64: tegra: Enable SMMU support on Tegra194") Signed-off-by: Vidya Sagar Reviewed-by: Jon Hunter Signed-off-by: Thierry Reding --- arch/arm64/boot/dts/nvidia/tegra194.dtsi | 60 +++++++++++++++++++++--- 1 file changed, 54 insertions(+), 6 deletions(-) diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi index 076d5efc4c3d..5ba7a4519b95 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi @@ -1840,7 +1840,11 @@ interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE1R &emc>, <&mc TEGRA194_MEMORY_CLIENT_PCIE1W &emc>; - interconnect-names = "read", "write"; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE1>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE1 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie@14120000 { @@ -1890,7 +1894,11 @@ interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE2AR &emc>, <&mc TEGRA194_MEMORY_CLIENT_PCIE2AW &emc>; - interconnect-names = "read", "write"; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE2>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE2 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie@14140000 { @@ -1940,7 +1948,11 @@ interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE3R &emc>, <&mc TEGRA194_MEMORY_CLIENT_PCIE3W &emc>; - interconnect-names = "read", "write"; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE3>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE3 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie@14160000 { @@ -1990,7 +2002,11 @@ interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE4R &emc>, <&mc TEGRA194_MEMORY_CLIENT_PCIE4W &emc>; - interconnect-names = "read", "write"; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE4>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE4 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie@14180000 { @@ -2040,7 +2056,11 @@ interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE0R &emc>, <&mc TEGRA194_MEMORY_CLIENT_PCIE0W &emc>; - interconnect-names = "read", "write"; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE0>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE0 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie@141a0000 { @@ -2094,7 +2114,11 @@ interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE5R &emc>, <&mc TEGRA194_MEMORY_CLIENT_PCIE5W &emc>; - interconnect-names = "read", "write"; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE5>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE5 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie_ep@14160000 { @@ -2127,6 +2151,14 @@ nvidia,aspm-cmrt-us = <60>; nvidia,aspm-pwr-on-t-us = <20>; nvidia,aspm-l0s-entrance-latency-us = <3>; + + interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE4R &emc>, + <&mc TEGRA194_MEMORY_CLIENT_PCIE4W &emc>; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE4>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE4 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie_ep@14180000 { @@ -2159,6 +2191,14 @@ nvidia,aspm-cmrt-us = <60>; nvidia,aspm-pwr-on-t-us = <20>; nvidia,aspm-l0s-entrance-latency-us = <3>; + + interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE0R &emc>, + <&mc TEGRA194_MEMORY_CLIENT_PCIE0W &emc>; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE0>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE0 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie_ep@141a0000 { @@ -2194,6 +2234,14 @@ nvidia,aspm-cmrt-us = <60>; nvidia,aspm-pwr-on-t-us = <20>; nvidia,aspm-l0s-entrance-latency-us = <3>; + + interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE5R &emc>, + <&mc TEGRA194_MEMORY_CLIENT_PCIE5W &emc>; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE5>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE5 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; sram@40000000 { From 5b69874f74cc5707edd95fcdaa757c507ac8af0f Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Fri, 16 Jul 2021 16:09:41 -0700 Subject: [PATCH 136/794] bonding: fix build issue The commit 9a5605505d9c (" bonding: Add struct bond_ipesc to manage SA") is causing following build error when XFRM is not selected in kernel config. lld: error: undefined symbol: xfrm_dev_state_flush >>> referenced by bond_main.c:3453 (drivers/net/bonding/bond_main.c:3453) >>> net/bonding/bond_main.o:(bond_netdev_event) in archive drivers/built-in.a Fixes: 9a5605505d9c (" bonding: Add struct bond_ipesc to manage SA") Signed-off-by: Mahesh Bandewar CC: Taehee Yoo CC: Jay Vosburgh Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index d22d78303311..31730efa7538 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3450,7 +3450,9 @@ static int bond_master_netdev_event(unsigned long event, return bond_event_changename(event_bond); case NETDEV_UNREGISTER: bond_remove_proc_entry(event_bond); +#ifdef CONFIG_XFRM_OFFLOAD xfrm_dev_state_flush(dev_net(bond_dev), bond_dev, true); +#endif /* CONFIG_XFRM_OFFLOAD */ break; case NETDEV_REGISTER: bond_create_proc_entry(event_bond); From cfbe3650dd3ef2ea9a4420ca89d9a4df98af3fb6 Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Wed, 14 Jul 2021 11:27:03 +0800 Subject: [PATCH 137/794] netfilter: nf_tables: fix audit memory leak in nf_tables_commit In nf_tables_commit, if nf_tables_commit_audit_alloc fails, it does not free the adp variable. Fix this by adding nf_tables_commit_audit_free which frees the linked list with the head node adl. backtrace: kmalloc include/linux/slab.h:591 [inline] kzalloc include/linux/slab.h:721 [inline] nf_tables_commit_audit_alloc net/netfilter/nf_tables_api.c:8439 [inline] nf_tables_commit+0x16e/0x1760 net/netfilter/nf_tables_api.c:8508 nfnetlink_rcv_batch+0x512/0xa80 net/netfilter/nfnetlink.c:562 nfnetlink_rcv_skb_batch net/netfilter/nfnetlink.c:634 [inline] nfnetlink_rcv+0x1fa/0x220 net/netfilter/nfnetlink.c:652 netlink_unicast_kernel net/netlink/af_netlink.c:1314 [inline] netlink_unicast+0x2c7/0x3e0 net/netlink/af_netlink.c:1340 netlink_sendmsg+0x36b/0x6b0 net/netlink/af_netlink.c:1929 sock_sendmsg_nosec net/socket.c:702 [inline] sock_sendmsg+0x56/0x80 net/socket.c:722 Reported-by: syzbot Reported-by: kernel test robot Fixes: c520292f29b8 ("audit: log nftables configuration change events once per table") Signed-off-by: Dongliang Mu Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index de182d1f7c4e..081437dd75b7 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8445,6 +8445,16 @@ static int nf_tables_commit_audit_alloc(struct list_head *adl, return 0; } +static void nf_tables_commit_audit_free(struct list_head *adl) +{ + struct nft_audit_data *adp, *adn; + + list_for_each_entry_safe(adp, adn, adl, list) { + list_del(&adp->list); + kfree(adp); + } +} + static void nf_tables_commit_audit_collect(struct list_head *adl, struct nft_table *table, u32 op) { @@ -8509,6 +8519,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) ret = nf_tables_commit_audit_alloc(&adl, trans->ctx.table); if (ret) { nf_tables_commit_chain_prepare_cancel(net); + nf_tables_commit_audit_free(&adl); return ret; } if (trans->msg_type == NFT_MSG_NEWRULE || @@ -8518,6 +8529,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) ret = nf_tables_commit_chain_prepare(net, chain); if (ret < 0) { nf_tables_commit_chain_prepare_cancel(net); + nf_tables_commit_audit_free(&adl); return ret; } } From bd31ecf44b8e18ccb1e5f6b50f85de6922a60de3 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 16 Jul 2021 12:43:09 +1000 Subject: [PATCH 138/794] KVM: PPC: Book3S: Fix CONFIG_TRANSACTIONAL_MEM=n crash When running CPU_FTR_P9_TM_HV_ASSIST, HFSCR[TM] is set for the guest even if the host has CONFIG_TRANSACTIONAL_MEM=n, which causes it to be unprepared to handle guest exits while transactional. Normal guests don't have a problem because the HTM capability will not be advertised, but a rogue or buggy one could crash the host. Fixes: 4bb3c7a0208f ("KVM: PPC: Book3S HV: Work around transactional memory bugs in POWER9") Reported-by: Alexey Kardashevskiy Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210716024310.164448-1-npiggin@gmail.com --- arch/powerpc/kvm/book3s_hv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 1d1fcc290fca..085fb8ecbf68 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2697,8 +2697,10 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu) HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP | HFSCR_PREFIX; if (cpu_has_feature(CPU_FTR_HVMODE)) { vcpu->arch.hfscr &= mfspr(SPRN_HFSCR); +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) vcpu->arch.hfscr |= HFSCR_TM; +#endif } if (cpu_has_feature(CPU_FTR_TM_COMP)) vcpu->arch.hfscr |= HFSCR_TM; From bc4188a2f56e821ea057aca6bf444e138d06c252 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 16 Jul 2021 12:43:10 +1000 Subject: [PATCH 139/794] KVM: PPC: Fix kvm_arch_vcpu_ioctl vcpu_load leak vcpu_put is not called if the user copy fails. This can result in preempt notifier corruption and crashes, among other issues. Fixes: b3cebfe8c1ca ("KVM: PPC: Move vcpu_load/vcpu_put down to each ioctl case in kvm_arch_vcpu_ioctl") Reported-by: Alexey Kardashevskiy Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210716024310.164448-2-npiggin@gmail.com --- arch/powerpc/kvm/powerpc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index be33b5321a76..b4e6f70b97b9 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -2048,9 +2048,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, { struct kvm_enable_cap cap; r = -EFAULT; - vcpu_load(vcpu); if (copy_from_user(&cap, argp, sizeof(cap))) goto out; + vcpu_load(vcpu); r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); vcpu_put(vcpu); break; @@ -2074,9 +2074,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_DIRTY_TLB: { struct kvm_dirty_tlb dirty; r = -EFAULT; - vcpu_load(vcpu); if (copy_from_user(&dirty, argp, sizeof(dirty))) goto out; + vcpu_load(vcpu); r = kvm_vcpu_ioctl_dirty_tlb(vcpu, &dirty); vcpu_put(vcpu); break; From 1c2b9519159b470ef24b2638f4794e86e2952ab7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 16 Jul 2021 15:27:23 +0200 Subject: [PATCH 140/794] ALSA: sb: Fix potential ABBA deadlock in CSP driver SB16 CSP driver may hit potentially a typical ABBA deadlock in two code paths: In snd_sb_csp_stop(): spin_lock_irqsave(&p->chip->mixer_lock, flags); spin_lock(&p->chip->reg_lock); In snd_sb_csp_load(): spin_lock_irqsave(&p->chip->reg_lock, flags); spin_lock(&p->chip->mixer_lock); Also the similar pattern is seen in snd_sb_csp_start(). Although the practical impact is very small (those states aren't triggered in the same running state and this happens only on a real hardware, decades old ISA sound boards -- which must be very difficult to find nowadays), it's a real scenario and has to be fixed. This patch addresses those deadlocks by splitting the locks in snd_sb_csp_start() and snd_sb_csp_stop() for avoiding the nested locks. Reported-by: Jia-Ju Bai Cc: Link: https://lore.kernel.org/r/7b0fcdaf-cd4f-4728-2eae-48c151a92e10@gmail.com Link: https://lore.kernel.org/r/20210716132723.13216-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/isa/sb/sb16_csp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/isa/sb/sb16_csp.c b/sound/isa/sb/sb16_csp.c index 5bbe6695689d..7ad8c5f7b664 100644 --- a/sound/isa/sb/sb16_csp.c +++ b/sound/isa/sb/sb16_csp.c @@ -816,6 +816,7 @@ static int snd_sb_csp_start(struct snd_sb_csp * p, int sample_width, int channel mixR = snd_sbmixer_read(p->chip, SB_DSP4_PCM_DEV + 1); snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV, mixL & 0x7); snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV + 1, mixR & 0x7); + spin_unlock_irqrestore(&p->chip->mixer_lock, flags); spin_lock(&p->chip->reg_lock); set_mode_register(p->chip, 0xc0); /* c0 = STOP */ @@ -855,6 +856,7 @@ static int snd_sb_csp_start(struct snd_sb_csp * p, int sample_width, int channel spin_unlock(&p->chip->reg_lock); /* restore PCM volume */ + spin_lock_irqsave(&p->chip->mixer_lock, flags); snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV, mixL); snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV + 1, mixR); spin_unlock_irqrestore(&p->chip->mixer_lock, flags); @@ -880,6 +882,7 @@ static int snd_sb_csp_stop(struct snd_sb_csp * p) mixR = snd_sbmixer_read(p->chip, SB_DSP4_PCM_DEV + 1); snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV, mixL & 0x7); snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV + 1, mixR & 0x7); + spin_unlock_irqrestore(&p->chip->mixer_lock, flags); spin_lock(&p->chip->reg_lock); if (p->running & SNDRV_SB_CSP_ST_QSOUND) { @@ -894,6 +897,7 @@ static int snd_sb_csp_stop(struct snd_sb_csp * p) spin_unlock(&p->chip->reg_lock); /* restore PCM volume */ + spin_lock_irqsave(&p->chip->mixer_lock, flags); snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV, mixL); snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV + 1, mixR); spin_unlock_irqrestore(&p->chip->mixer_lock, flags); From 33f735f137c6539e3ceceb515cd1e2a644005b49 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 16 Jul 2021 15:56:00 +0200 Subject: [PATCH 141/794] ALSA: hdmi: Expose all pins on MSI MS-7C94 board The BIOS on MSI Mortar B550m WiFi (MS-7C94) board with AMDGPU seems disabling the other pins than HDMI although it has more outputs including DP. This patch adds the board to the allow list for enabling all pins. Reported-by: Damjan Georgievski Cc: Link: https://lore.kernel.org/r/CAEk1YH4Jd0a8vfZxORVu7qg+Zsc-K+pR187ezNq8QhJBPW4gpw@mail.gmail.com Link: https://lore.kernel.org/r/20210716135600.24176-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 4b2cc8cb55c4..84c088912b3c 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1940,6 +1940,7 @@ static int hdmi_add_cvt(struct hda_codec *codec, hda_nid_t cvt_nid) static const struct snd_pci_quirk force_connect_list[] = { SND_PCI_QUIRK(0x103c, 0x870f, "HP", 1), SND_PCI_QUIRK(0x103c, 0x871a, "HP", 1), + SND_PCI_QUIRK(0x1462, 0xec94, "MS-7C94", 1), {} }; From 7f5231b114da76bfd5d0fc685d5cf408d1bbfca7 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Fri, 16 Jul 2021 21:08:02 +0530 Subject: [PATCH 142/794] platform/x86: amd-pmc: Fix undefined reference to __udivdi3 It was reported that on i386 config ------ on i386: ld: drivers/platform/x86/amd-pmc.o: in function `s0ix_stats_show': amd-pmc.c:(.text+0x100): undefined reference to `__udivdi3' ------- The reason for this is that 64-bit integer division is not supported on 32-bit architecture. Use do_div macro to fix this. Fixes: b9a4fa6978be ("platform/x86: amd-pmc: Add support for logging s0ix counters") Reported-by: Randy Dunlap Signed-off-by: Shyam Sundar S K Reviewed-by: Randy Dunlap # and build-tested Link: https://lore.kernel.org/r/20210716153802.2929670-1-Shyam-sundar.S-k@amd.com Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index 663a4ca0580d..3481479a2942 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -189,7 +189,8 @@ static int s0ix_stats_show(struct seq_file *s, void *unused) exit_time = exit_time << 32 | ioread32(dev->fch_virt_addr + FCH_S0I3_EXIT_TIME_L_OFFSET); /* It's in 48MHz. We need to convert it */ - residency = (exit_time - entry_time) / 48; + residency = exit_time - entry_time; + do_div(residency, 48); seq_puts(s, "=== S0ix statistics ===\n"); seq_printf(s, "S0ix Entry Time: %lld\n", entry_time); From e62fb1e3faae60f483a96c359c8d72bb04a7b728 Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Sat, 17 Jul 2021 16:36:05 +0200 Subject: [PATCH 143/794] platform/x86: think-lmi: Move pending_reboot_attr to the attributes sysfs dir Move the pending_reboot node under attributes dir where it should live, as documented in: Documentation/ABI/testing/sysfs-class-firmware-attributes. Also move the create / remove code to be together with the other code populating / cleaning the attributes sysfs dir. In the removal path this is necessary so that the remove is done before the kset_unregister(tlmi_priv.attribute_kset) call. Signed-off-by: Mark Pearson Co-developed-by: Hans de Goede Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210717143607.3580-1-hdegoede@redhat.com --- drivers/platform/x86/think-lmi.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index 64dcec53a7a0..989a8221dcd8 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -672,6 +672,7 @@ static void tlmi_release_attr(void) kobject_put(&tlmi_priv.setting[i]->kobj); } } + sysfs_remove_file(&tlmi_priv.attribute_kset->kobj, &pending_reboot.attr); kset_unregister(tlmi_priv.attribute_kset); /* Authentication structures */ @@ -680,7 +681,6 @@ static void tlmi_release_attr(void) sysfs_remove_group(&tlmi_priv.pwd_power->kobj, &auth_attr_group); kobject_put(&tlmi_priv.pwd_power->kobj); kset_unregister(tlmi_priv.authentication_kset); - sysfs_remove_file(&tlmi_priv.class_dev->kobj, &pending_reboot.attr); } static int tlmi_sysfs_init(void) @@ -733,6 +733,10 @@ static int tlmi_sysfs_init(void) goto fail_create_attr; } + ret = sysfs_create_file(&tlmi_priv.attribute_kset->kobj, &pending_reboot.attr); + if (ret) + goto fail_create_attr; + /* Create authentication entries */ tlmi_priv.authentication_kset = kset_create_and_add("authentication", NULL, &tlmi_priv.class_dev->kobj); @@ -760,11 +764,6 @@ static int tlmi_sysfs_init(void) if (ret) goto fail_create_attr; - /* Create global sysfs files */ - ret = sysfs_create_file(&tlmi_priv.class_dev->kobj, &pending_reboot.attr); - if (ret) - goto fail_create_attr; - return ret; fail_create_attr: From 30e78435d3bf803cabdc2a1c2eb36e6983aa4596 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 17 Jul 2021 16:36:06 +0200 Subject: [PATCH 144/794] platform/x86: think-lmi: Split kobject_init() and kobject_add() calls tlmi_sysfs_init() calls tlmi_release_attr() on errors which calls kobject_put() for attributes created by tlmi_analyze(), but if we bail early because of an error, then this means that some of the kobjects will not have been initialized yet; and we should thus not call kobject_put() on them. Switch from using kobject_init_and_add() inside tlmi_sysfs_init() to initializing all the created kobjects directly in tlmi_analyze() and only adding them from tlmi_sysfs_init(). This way all kobjects will always be initialized when tlmi_release_attr() gets called. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210717143607.3580-2-hdegoede@redhat.com --- drivers/platform/x86/think-lmi.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index 989a8221dcd8..c22edcf26aaa 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -723,8 +723,8 @@ static int tlmi_sysfs_init(void) /* Build attribute */ tlmi_priv.setting[i]->kobj.kset = tlmi_priv.attribute_kset; - ret = kobject_init_and_add(&tlmi_priv.setting[i]->kobj, &tlmi_attr_setting_ktype, - NULL, "%s", tlmi_priv.setting[i]->display_name); + ret = kobject_add(&tlmi_priv.setting[i]->kobj, NULL, + "%s", tlmi_priv.setting[i]->display_name); if (ret) goto fail_create_attr; @@ -745,8 +745,7 @@ static int tlmi_sysfs_init(void) goto fail_create_attr; } tlmi_priv.pwd_admin->kobj.kset = tlmi_priv.authentication_kset; - ret = kobject_init_and_add(&tlmi_priv.pwd_admin->kobj, &tlmi_pwd_setting_ktype, - NULL, "%s", "Admin"); + ret = kobject_add(&tlmi_priv.pwd_admin->kobj, NULL, "%s", "Admin"); if (ret) goto fail_create_attr; @@ -755,8 +754,7 @@ static int tlmi_sysfs_init(void) goto fail_create_attr; tlmi_priv.pwd_power->kobj.kset = tlmi_priv.authentication_kset; - ret = kobject_init_and_add(&tlmi_priv.pwd_power->kobj, &tlmi_pwd_setting_ktype, - NULL, "%s", "System"); + ret = kobject_add(&tlmi_priv.pwd_power->kobj, NULL, "%s", "System"); if (ret) goto fail_create_attr; @@ -836,6 +834,7 @@ static int tlmi_analyze(void) pr_info("Error retrieving possible values for %d : %s\n", i, setting->display_name); } + kobject_init(&setting->kobj, &tlmi_attr_setting_ktype); tlmi_priv.setting[i] = setting; tlmi_priv.settings_count++; kfree(item); @@ -862,6 +861,8 @@ static int tlmi_analyze(void) if (pwdcfg.password_state & TLMI_PAP_PWD) tlmi_priv.pwd_admin->valid = true; + kobject_init(&tlmi_priv.pwd_admin->kobj, &tlmi_pwd_setting_ktype); + tlmi_priv.pwd_power = kzalloc(sizeof(struct tlmi_pwd_setting), GFP_KERNEL); if (!tlmi_priv.pwd_power) { ret = -ENOMEM; @@ -877,6 +878,8 @@ static int tlmi_analyze(void) if (pwdcfg.password_state & TLMI_POP_PWD) tlmi_priv.pwd_power->valid = true; + kobject_init(&tlmi_priv.pwd_power->kobj, &tlmi_pwd_setting_ktype); + return 0; fail_clear_attr: From f7e506ec4a9966be8b2a87d3324302f0f5dd5a29 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 17 Jul 2021 16:36:07 +0200 Subject: [PATCH 145/794] platform/x86: think-lmi: Fix possible mem-leaks on tlmi_analyze() error-exit Fix 2 possible memleaks on error-exits from tlmi_analyze(): 1. If the kzalloc of pwd_power fails, then not only free the atributes, but also the allocated pwd_admin struct. 2. Freeing the attributes should also free the possible_values strings. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210717143607.3580-3-hdegoede@redhat.com --- drivers/platform/x86/think-lmi.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index c22edcf26aaa..6cfed4427fb0 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -866,7 +866,7 @@ static int tlmi_analyze(void) tlmi_priv.pwd_power = kzalloc(sizeof(struct tlmi_pwd_setting), GFP_KERNEL); if (!tlmi_priv.pwd_power) { ret = -ENOMEM; - goto fail_clear_attr; + goto fail_free_pwd_admin; } strscpy(tlmi_priv.pwd_power->kbdlang, "us", TLMI_LANG_MAXLEN); tlmi_priv.pwd_power->encoding = TLMI_ENCODING_ASCII; @@ -882,9 +882,15 @@ static int tlmi_analyze(void) return 0; +fail_free_pwd_admin: + kfree(tlmi_priv.pwd_admin); fail_clear_attr: - for (i = 0; i < TLMI_SETTINGS_COUNT; ++i) - kfree(tlmi_priv.setting[i]); + for (i = 0; i < TLMI_SETTINGS_COUNT; ++i) { + if (tlmi_priv.setting[i]) { + kfree(tlmi_priv.setting[i]->possible_values); + kfree(tlmi_priv.setting[i]); + } + } return ret; } From ec645dc96699ea6c37b6de86c84d7288ea9a4ddf Mon Sep 17 00:00:00 2001 From: Oleksandr Natalenko Date: Sat, 17 Jul 2021 14:33:28 +0200 Subject: [PATCH 146/794] block: increase BLKCG_MAX_POLS After mq-deadline learned to deal with cgroups, the BLKCG_MAX_POLS value became too small for all the elevators to be registered properly. The following issue is seen: ``` calling bfq_init+0x0/0x8b @ 1 blkcg_policy_register: BLKCG_MAX_POLS too small initcall bfq_init+0x0/0x8b returned -28 after 507 usecs ``` which renders BFQ non-functional. Increase BLKCG_MAX_POLS to allow enough space for everyone. Fixes: 08a9ad8bf607 ("block/mq-deadline: Add cgroup support") Link: https://lore.kernel.org/lkml/8988303.mDXGIdCtx8@natalenko.name/ Signed-off-by: Oleksandr Natalenko Link: https://lore.kernel.org/r/20210717123328.945810-1-oleksandr@natalenko.name Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c454fb446fd0..2e12320cb121 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -57,7 +57,7 @@ struct blk_keyslot_manager; * Maximum number of blkcg policies allowed to be registered concurrently. * Defined here to simplify include dependency. */ -#define BLKCG_MAX_POLS 5 +#define BLKCG_MAX_POLS 6 typedef void (rq_end_io_fn)(struct request *, blk_status_t); From ec7099fdea8025988710ee6fecfd4e4210c29ab5 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 2 Jul 2021 15:37:12 +0200 Subject: [PATCH 147/794] Revert "gpio: mpc8xxx: change the gpio interrupt flags." This reverts commit 3d5bfbd9716318b1ca5c38488aa69f64d38a9aa5. When booting with threadirqs, it causes a splat WARNING: CPU: 0 PID: 29 at kernel/irq/handle.c:159 __handle_irq_event_percpu+0x1ec/0x27c irq 66 handler irq_default_primary_handler+0x0/0x1c enabled interrupts That splat later went away with commit 81e2073c175b ("genirq: Disable interrupts for force threaded handlers"), which got backported to -stable. However, when running an -rt kernel, the splat still exists. Moreover, quoting Thomas Gleixner [1] But 3d5bfbd97163 ("gpio: mpc8xxx: change the gpio interrupt flags.") has nothing to do with that: "Delete the interrupt IRQF_NO_THREAD flags in order to gpio interrupts can be threaded to allow high-priority processes to preempt." This changelog is blatantly wrong. In mainline forced irq threads have always been invoked with softirqs disabled, which obviously makes them non-preemptible. So the patch didn't even do what its commit log said. [1] https://lore.kernel.org/lkml/871r8zey88.ffs@nanos.tec.linutronix.de/ Cc: stable@vger.kernel.org # v5.9+ Signed-off-by: Rasmus Villemoes Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-mpc8xxx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-mpc8xxx.c b/drivers/gpio/gpio-mpc8xxx.c index 4b9157a69fca..50b321a1ab1b 100644 --- a/drivers/gpio/gpio-mpc8xxx.c +++ b/drivers/gpio/gpio-mpc8xxx.c @@ -405,7 +405,7 @@ static int mpc8xxx_probe(struct platform_device *pdev) ret = devm_request_irq(&pdev->dev, mpc8xxx_gc->irqn, mpc8xxx_gpio_irq_cascade, - IRQF_SHARED, "gpio-cascade", + IRQF_NO_THREAD | IRQF_SHARED, "gpio-cascade", mpc8xxx_gc); if (ret) { dev_err(&pdev->dev, From a17ad0961706244dce48ec941f7e476a38c0e727 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 15 Jul 2021 16:59:00 -0700 Subject: [PATCH 148/794] net: Fix zero-copy head len calculation. In some cases skb head could be locked and entire header data is pulled from skb. When skb_zerocopy() called in such cases, following BUG is triggered. This patch fixes it by copying entire skb in such cases. This could be optimized incase this is performance bottleneck. ---8<--- kernel BUG at net/core/skbuff.c:2961! invalid opcode: 0000 [#1] SMP PTI CPU: 2 PID: 0 Comm: swapper/2 Tainted: G OE 5.4.0-77-generic #86-Ubuntu Hardware name: OpenStack Foundation OpenStack Nova, BIOS 1.13.0-1ubuntu1.1 04/01/2014 RIP: 0010:skb_zerocopy+0x37a/0x3a0 RSP: 0018:ffffbcc70013ca38 EFLAGS: 00010246 Call Trace: queue_userspace_packet+0x2af/0x5e0 [openvswitch] ovs_dp_upcall+0x3d/0x60 [openvswitch] ovs_dp_process_packet+0x125/0x150 [openvswitch] ovs_vport_receive+0x77/0xd0 [openvswitch] netdev_port_receive+0x87/0x130 [openvswitch] netdev_frame_hook+0x4b/0x60 [openvswitch] __netif_receive_skb_core+0x2b4/0xc90 __netif_receive_skb_one_core+0x3f/0xa0 __netif_receive_skb+0x18/0x60 process_backlog+0xa9/0x160 net_rx_action+0x142/0x390 __do_softirq+0xe1/0x2d6 irq_exit+0xae/0xb0 do_IRQ+0x5a/0xf0 common_interrupt+0xf/0xf Code that triggered BUG: int skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen) { int i, j = 0; int plen = 0; /* length of skb->head fragment */ int ret; struct page *page; unsigned int offset; BUG_ON(!from->head_frag && !hlen); Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/core/skbuff.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0fe97d660790..fc7942c0dddc 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3022,8 +3022,11 @@ skb_zerocopy_headlen(const struct sk_buff *from) if (!from->head_frag || skb_headlen(from) < L1_CACHE_BYTES || - skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) + skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) { hlen = skb_headlen(from); + if (!hlen) + hlen = from->len; + } if (skb_has_frag_list(from)) hlen = from->len; From f5051bcece50140abd1a11a2d36dc3ec5484fc32 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Sat, 17 Jul 2021 14:29:33 +0300 Subject: [PATCH 149/794] net: sched: fix memory leak in tcindex_partial_destroy_work Syzbot reported memory leak in tcindex_set_parms(). The problem was in non-freed perfect hash in tcindex_partial_destroy_work(). In tcindex_set_parms() new tcindex_data is allocated and some fields from old one are copied to new one, but not the perfect hash. Since tcindex_partial_destroy_work() is the destroy function for old tcindex_data, we need to free perfect hash to avoid memory leak. Reported-and-tested-by: syzbot+f0bbb2287b8993d4fa74@syzkaller.appspotmail.com Fixes: 331b72922c5f ("net: sched: RCU cls_tcindex") Signed-off-by: Pavel Skripkin Signed-off-by: David S. Miller --- net/sched/cls_tcindex.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 5b274534264c..e9a8a2c86bbd 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -278,6 +278,8 @@ static int tcindex_filter_result_init(struct tcindex_filter_result *r, TCA_TCINDEX_POLICE); } +static void tcindex_free_perfect_hash(struct tcindex_data *cp); + static void tcindex_partial_destroy_work(struct work_struct *work) { struct tcindex_data *p = container_of(to_rcu_work(work), @@ -285,7 +287,8 @@ static void tcindex_partial_destroy_work(struct work_struct *work) rwork); rtnl_lock(); - kfree(p->perfect); + if (p->perfect) + tcindex_free_perfect_hash(p); kfree(p); rtnl_unlock(); } From 2f3fdd8d4805015fa964807e1c7f3d88f31bd389 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 17 Jul 2021 17:19:19 -0400 Subject: [PATCH 150/794] sctp: trim optlen when it's a huge value in sctp_setsockopt After commit ca84bd058dae ("sctp: copy the optval from user space in sctp_setsockopt"), it does memory allocation in sctp_setsockopt with the optlen, and it would fail the allocation and return error if the optlen from user space is a huge value. This breaks some sockopts, like SCTP_HMAC_IDENT, SCTP_RESET_STREAMS and SCTP_AUTH_KEY, as when processing these sockopts before, optlen would be trimmed to a biggest value it needs when optlen is a huge value, instead of failing the allocation and returning error. This patch is to fix the allocation failure when it's a huge optlen from user space by trimming it to the biggest size sctp sockopt may need when necessary, and this biggest size is from sctp_setsockopt_reset_streams() for SCTP_RESET_STREAMS, which is bigger than those for SCTP_HMAC_IDENT and SCTP_AUTH_KEY. Fixes: ca84bd058dae ("sctp: copy the optval from user space in sctp_setsockopt") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index e64e01f61b11..6b937bfd4751 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4577,6 +4577,10 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, } if (optlen > 0) { + /* Trim it to the biggest size sctp sockopt may need if necessary */ + optlen = min_t(unsigned int, optlen, + PAGE_ALIGN(USHRT_MAX + + sizeof(__u16) * sizeof(struct sctp_reset_streams))); kopt = memdup_sockptr(optval, optlen); if (IS_ERR(kopt)) return PTR_ERR(kopt); From 517a16b1a88bdb6b530f48d5d153478b2552d9a8 Mon Sep 17 00:00:00 2001 From: Nguyen Dinh Phi Date: Sun, 18 Jul 2021 22:40:13 +0800 Subject: [PATCH 151/794] netrom: Decrease sock refcount when sock timers expire Commit 63346650c1a9 ("netrom: switch to sock timer API") switched to use sock timer API. It replaces mod_timer() by sk_reset_timer(), and del_timer() by sk_stop_timer(). Function sk_reset_timer() will increase the refcount of sock if it is called on an inactive timer, hence, in case the timer expires, we need to decrease the refcount ourselves in the handler, otherwise, the sock refcount will be unbalanced and the sock will never be freed. Signed-off-by: Nguyen Dinh Phi Reported-by: syzbot+10f1194569953b72f1ae@syzkaller.appspotmail.com Fixes: 63346650c1a9 ("netrom: switch to sock timer API") Signed-off-by: David S. Miller --- net/netrom/nr_timer.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c index 9115f8a7dd45..a8da88db7893 100644 --- a/net/netrom/nr_timer.c +++ b/net/netrom/nr_timer.c @@ -121,11 +121,9 @@ static void nr_heartbeat_expiry(struct timer_list *t) is accepted() it isn't 'dead' so doesn't get removed. */ if (sock_flag(sk, SOCK_DESTROY) || (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { - sock_hold(sk); bh_unlock_sock(sk); nr_destroy_socket(sk); - sock_put(sk); - return; + goto out; } break; @@ -146,6 +144,8 @@ static void nr_heartbeat_expiry(struct timer_list *t) nr_start_heartbeat(sk); bh_unlock_sock(sk); +out: + sock_put(sk); } static void nr_t2timer_expiry(struct timer_list *t) @@ -159,6 +159,7 @@ static void nr_t2timer_expiry(struct timer_list *t) nr_enquiry_response(sk); } bh_unlock_sock(sk); + sock_put(sk); } static void nr_t4timer_expiry(struct timer_list *t) @@ -169,6 +170,7 @@ static void nr_t4timer_expiry(struct timer_list *t) bh_lock_sock(sk); nr_sk(sk)->condition &= ~NR_COND_PEER_RX_BUSY; bh_unlock_sock(sk); + sock_put(sk); } static void nr_idletimer_expiry(struct timer_list *t) @@ -197,6 +199,7 @@ static void nr_idletimer_expiry(struct timer_list *t) sock_set_flag(sk, SOCK_DEAD); } bh_unlock_sock(sk); + sock_put(sk); } static void nr_t1timer_expiry(struct timer_list *t) @@ -209,8 +212,7 @@ static void nr_t1timer_expiry(struct timer_list *t) case NR_STATE_1: if (nr->n2count == nr->n2) { nr_disconnect(sk, ETIMEDOUT); - bh_unlock_sock(sk); - return; + goto out; } else { nr->n2count++; nr_write_internal(sk, NR_CONNREQ); @@ -220,8 +222,7 @@ static void nr_t1timer_expiry(struct timer_list *t) case NR_STATE_2: if (nr->n2count == nr->n2) { nr_disconnect(sk, ETIMEDOUT); - bh_unlock_sock(sk); - return; + goto out; } else { nr->n2count++; nr_write_internal(sk, NR_DISCREQ); @@ -231,8 +232,7 @@ static void nr_t1timer_expiry(struct timer_list *t) case NR_STATE_3: if (nr->n2count == nr->n2) { nr_disconnect(sk, ETIMEDOUT); - bh_unlock_sock(sk); - return; + goto out; } else { nr->n2count++; nr_requeue_frames(sk); @@ -241,5 +241,7 @@ static void nr_t1timer_expiry(struct timer_list *t) } nr_start_t1timer(sk); +out: bh_unlock_sock(sk); + sock_put(sk); } From e746f3451ec7f91dcc9fd67a631239c715850a34 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 30 Jun 2021 19:25:59 -0500 Subject: [PATCH 152/794] scsi: iscsi: Fix iface sysfs attr detection A ISCSI_IFACE_PARAM can have the same value as a ISCSI_NET_PARAM so when iscsi_iface_attr_is_visible tries to figure out the type by just checking the value, we can collide and return the wrong type. When we call into the driver we might not match and return that we don't want attr visible in sysfs. The patch fixes this by setting the type when we figure out what the param is. Link: https://lore.kernel.org/r/20210701002559.89533-1-michael.christie@oracle.com Fixes: 3e0f65b34cc9 ("[SCSI] iscsi_transport: Additional parameters for network settings") Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_iscsi.c | 90 +++++++++++------------------ 1 file changed, 34 insertions(+), 56 deletions(-) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index b07105ae7c91..d8b05d8b5470 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -439,39 +439,10 @@ static umode_t iscsi_iface_attr_is_visible(struct kobject *kobj, struct device *dev = container_of(kobj, struct device, kobj); struct iscsi_iface *iface = iscsi_dev_to_iface(dev); struct iscsi_transport *t = iface->transport; - int param; - int param_type; + int param = -1; if (attr == &dev_attr_iface_enabled.attr) param = ISCSI_NET_PARAM_IFACE_ENABLE; - else if (attr == &dev_attr_iface_vlan_id.attr) - param = ISCSI_NET_PARAM_VLAN_ID; - else if (attr == &dev_attr_iface_vlan_priority.attr) - param = ISCSI_NET_PARAM_VLAN_PRIORITY; - else if (attr == &dev_attr_iface_vlan_enabled.attr) - param = ISCSI_NET_PARAM_VLAN_ENABLED; - else if (attr == &dev_attr_iface_mtu.attr) - param = ISCSI_NET_PARAM_MTU; - else if (attr == &dev_attr_iface_port.attr) - param = ISCSI_NET_PARAM_PORT; - else if (attr == &dev_attr_iface_ipaddress_state.attr) - param = ISCSI_NET_PARAM_IPADDR_STATE; - else if (attr == &dev_attr_iface_delayed_ack_en.attr) - param = ISCSI_NET_PARAM_DELAYED_ACK_EN; - else if (attr == &dev_attr_iface_tcp_nagle_disable.attr) - param = ISCSI_NET_PARAM_TCP_NAGLE_DISABLE; - else if (attr == &dev_attr_iface_tcp_wsf_disable.attr) - param = ISCSI_NET_PARAM_TCP_WSF_DISABLE; - else if (attr == &dev_attr_iface_tcp_wsf.attr) - param = ISCSI_NET_PARAM_TCP_WSF; - else if (attr == &dev_attr_iface_tcp_timer_scale.attr) - param = ISCSI_NET_PARAM_TCP_TIMER_SCALE; - else if (attr == &dev_attr_iface_tcp_timestamp_en.attr) - param = ISCSI_NET_PARAM_TCP_TIMESTAMP_EN; - else if (attr == &dev_attr_iface_cache_id.attr) - param = ISCSI_NET_PARAM_CACHE_ID; - else if (attr == &dev_attr_iface_redirect_en.attr) - param = ISCSI_NET_PARAM_REDIRECT_EN; else if (attr == &dev_attr_iface_def_taskmgmt_tmo.attr) param = ISCSI_IFACE_PARAM_DEF_TASKMGMT_TMO; else if (attr == &dev_attr_iface_header_digest.attr) @@ -508,6 +479,38 @@ static umode_t iscsi_iface_attr_is_visible(struct kobject *kobj, param = ISCSI_IFACE_PARAM_STRICT_LOGIN_COMP_EN; else if (attr == &dev_attr_iface_initiator_name.attr) param = ISCSI_IFACE_PARAM_INITIATOR_NAME; + + if (param != -1) + return t->attr_is_visible(ISCSI_IFACE_PARAM, param); + + if (attr == &dev_attr_iface_vlan_id.attr) + param = ISCSI_NET_PARAM_VLAN_ID; + else if (attr == &dev_attr_iface_vlan_priority.attr) + param = ISCSI_NET_PARAM_VLAN_PRIORITY; + else if (attr == &dev_attr_iface_vlan_enabled.attr) + param = ISCSI_NET_PARAM_VLAN_ENABLED; + else if (attr == &dev_attr_iface_mtu.attr) + param = ISCSI_NET_PARAM_MTU; + else if (attr == &dev_attr_iface_port.attr) + param = ISCSI_NET_PARAM_PORT; + else if (attr == &dev_attr_iface_ipaddress_state.attr) + param = ISCSI_NET_PARAM_IPADDR_STATE; + else if (attr == &dev_attr_iface_delayed_ack_en.attr) + param = ISCSI_NET_PARAM_DELAYED_ACK_EN; + else if (attr == &dev_attr_iface_tcp_nagle_disable.attr) + param = ISCSI_NET_PARAM_TCP_NAGLE_DISABLE; + else if (attr == &dev_attr_iface_tcp_wsf_disable.attr) + param = ISCSI_NET_PARAM_TCP_WSF_DISABLE; + else if (attr == &dev_attr_iface_tcp_wsf.attr) + param = ISCSI_NET_PARAM_TCP_WSF; + else if (attr == &dev_attr_iface_tcp_timer_scale.attr) + param = ISCSI_NET_PARAM_TCP_TIMER_SCALE; + else if (attr == &dev_attr_iface_tcp_timestamp_en.attr) + param = ISCSI_NET_PARAM_TCP_TIMESTAMP_EN; + else if (attr == &dev_attr_iface_cache_id.attr) + param = ISCSI_NET_PARAM_CACHE_ID; + else if (attr == &dev_attr_iface_redirect_en.attr) + param = ISCSI_NET_PARAM_REDIRECT_EN; else if (iface->iface_type == ISCSI_IFACE_TYPE_IPV4) { if (attr == &dev_attr_ipv4_iface_ipaddress.attr) param = ISCSI_NET_PARAM_IPV4_ADDR; @@ -598,32 +601,7 @@ static umode_t iscsi_iface_attr_is_visible(struct kobject *kobj, return 0; } - switch (param) { - case ISCSI_IFACE_PARAM_DEF_TASKMGMT_TMO: - case ISCSI_IFACE_PARAM_HDRDGST_EN: - case ISCSI_IFACE_PARAM_DATADGST_EN: - case ISCSI_IFACE_PARAM_IMM_DATA_EN: - case ISCSI_IFACE_PARAM_INITIAL_R2T_EN: - case ISCSI_IFACE_PARAM_DATASEQ_INORDER_EN: - case ISCSI_IFACE_PARAM_PDU_INORDER_EN: - case ISCSI_IFACE_PARAM_ERL: - case ISCSI_IFACE_PARAM_MAX_RECV_DLENGTH: - case ISCSI_IFACE_PARAM_FIRST_BURST: - case ISCSI_IFACE_PARAM_MAX_R2T: - case ISCSI_IFACE_PARAM_MAX_BURST: - case ISCSI_IFACE_PARAM_CHAP_AUTH_EN: - case ISCSI_IFACE_PARAM_BIDI_CHAP_EN: - case ISCSI_IFACE_PARAM_DISCOVERY_AUTH_OPTIONAL: - case ISCSI_IFACE_PARAM_DISCOVERY_LOGOUT_EN: - case ISCSI_IFACE_PARAM_STRICT_LOGIN_COMP_EN: - case ISCSI_IFACE_PARAM_INITIATOR_NAME: - param_type = ISCSI_IFACE_PARAM; - break; - default: - param_type = ISCSI_NET_PARAM; - } - - return t->attr_is_visible(param_type, param); + return t->attr_is_visible(ISCSI_NET_PARAM, param); } static struct attribute *iscsi_iface_attrs[] = { From 6d8e7e7c932162bccd06872362751b0e1d76f5af Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Fri, 2 Jul 2021 12:16:55 +0300 Subject: [PATCH 153/794] scsi: target: Fix protect handling in WRITE SAME(32) WRITE SAME(32) command handling reads WRPROTECT at the wrong offset in 1st byte instead of 10th byte. Link: https://lore.kernel.org/r/20210702091655.22818-1-d.bogdanov@yadro.com Fixes: afd73f1b60fc ("target: Perform PROTECT sanity checks for WRITE_SAME") Signed-off-by: Dmitry Bogdanov Signed-off-by: Martin K. Petersen --- drivers/target/target_core_sbc.c | 35 ++++++++++++++++---------------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index b32f4ee88e79..ca1b2312d6e7 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -25,7 +25,7 @@ #include "target_core_alua.h" static sense_reason_t -sbc_check_prot(struct se_device *, struct se_cmd *, unsigned char *, u32, bool); +sbc_check_prot(struct se_device *, struct se_cmd *, unsigned char, u32, bool); static sense_reason_t sbc_execute_unmap(struct se_cmd *cmd); static sense_reason_t @@ -279,14 +279,14 @@ static inline unsigned long long transport_lba_64_ext(unsigned char *cdb) } static sense_reason_t -sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *ops) +sbc_setup_write_same(struct se_cmd *cmd, unsigned char flags, struct sbc_ops *ops) { struct se_device *dev = cmd->se_dev; sector_t end_lba = dev->transport->get_blocks(dev) + 1; unsigned int sectors = sbc_get_write_same_sectors(cmd); sense_reason_t ret; - if ((flags[0] & 0x04) || (flags[0] & 0x02)) { + if ((flags & 0x04) || (flags & 0x02)) { pr_err("WRITE_SAME PBDATA and LBDATA" " bits not supported for Block Discard" " Emulation\n"); @@ -308,7 +308,7 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *o } /* We always have ANC_SUP == 0 so setting ANCHOR is always an error */ - if (flags[0] & 0x10) { + if (flags & 0x10) { pr_warn("WRITE SAME with ANCHOR not supported\n"); return TCM_INVALID_CDB_FIELD; } @@ -316,7 +316,7 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *o * Special case for WRITE_SAME w/ UNMAP=1 that ends up getting * translated into block discard requests within backend code. */ - if (flags[0] & 0x08) { + if (flags & 0x08) { if (!ops->execute_unmap) return TCM_UNSUPPORTED_SCSI_OPCODE; @@ -331,7 +331,7 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *o if (!ops->execute_write_same) return TCM_UNSUPPORTED_SCSI_OPCODE; - ret = sbc_check_prot(dev, cmd, &cmd->t_task_cdb[0], sectors, true); + ret = sbc_check_prot(dev, cmd, flags >> 5, sectors, true); if (ret) return ret; @@ -717,10 +717,9 @@ sbc_set_prot_op_checks(u8 protect, bool fabric_prot, enum target_prot_type prot_ } static sense_reason_t -sbc_check_prot(struct se_device *dev, struct se_cmd *cmd, unsigned char *cdb, +sbc_check_prot(struct se_device *dev, struct se_cmd *cmd, unsigned char protect, u32 sectors, bool is_write) { - u8 protect = cdb[1] >> 5; int sp_ops = cmd->se_sess->sup_prot_ops; int pi_prot_type = dev->dev_attrib.pi_prot_type; bool fabric_prot = false; @@ -768,7 +767,7 @@ sbc_check_prot(struct se_device *dev, struct se_cmd *cmd, unsigned char *cdb, fallthrough; default: pr_err("Unable to determine pi_prot_type for CDB: 0x%02x " - "PROTECT: 0x%02x\n", cdb[0], protect); + "PROTECT: 0x%02x\n", cmd->t_task_cdb[0], protect); return TCM_INVALID_CDB_FIELD; } @@ -843,7 +842,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) if (sbc_check_dpofua(dev, cmd, cdb)) return TCM_INVALID_CDB_FIELD; - ret = sbc_check_prot(dev, cmd, cdb, sectors, false); + ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, false); if (ret) return ret; @@ -857,7 +856,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) if (sbc_check_dpofua(dev, cmd, cdb)) return TCM_INVALID_CDB_FIELD; - ret = sbc_check_prot(dev, cmd, cdb, sectors, false); + ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, false); if (ret) return ret; @@ -871,7 +870,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) if (sbc_check_dpofua(dev, cmd, cdb)) return TCM_INVALID_CDB_FIELD; - ret = sbc_check_prot(dev, cmd, cdb, sectors, false); + ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, false); if (ret) return ret; @@ -892,7 +891,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) if (sbc_check_dpofua(dev, cmd, cdb)) return TCM_INVALID_CDB_FIELD; - ret = sbc_check_prot(dev, cmd, cdb, sectors, true); + ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, true); if (ret) return ret; @@ -906,7 +905,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) if (sbc_check_dpofua(dev, cmd, cdb)) return TCM_INVALID_CDB_FIELD; - ret = sbc_check_prot(dev, cmd, cdb, sectors, true); + ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, true); if (ret) return ret; @@ -921,7 +920,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) if (sbc_check_dpofua(dev, cmd, cdb)) return TCM_INVALID_CDB_FIELD; - ret = sbc_check_prot(dev, cmd, cdb, sectors, true); + ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, true); if (ret) return ret; @@ -980,7 +979,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) size = sbc_get_size(cmd, 1); cmd->t_task_lba = get_unaligned_be64(&cdb[12]); - ret = sbc_setup_write_same(cmd, &cdb[10], ops); + ret = sbc_setup_write_same(cmd, cdb[10], ops); if (ret) return ret; break; @@ -1079,7 +1078,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) size = sbc_get_size(cmd, 1); cmd->t_task_lba = get_unaligned_be64(&cdb[2]); - ret = sbc_setup_write_same(cmd, &cdb[1], ops); + ret = sbc_setup_write_same(cmd, cdb[1], ops); if (ret) return ret; break; @@ -1097,7 +1096,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) * Follow sbcr26 with WRITE_SAME (10) and check for the existence * of byte 1 bit 3 UNMAP instead of original reserved field */ - ret = sbc_setup_write_same(cmd, &cdb[1], ops); + ret = sbc_setup_write_same(cmd, cdb[1], ops); if (ret) return ret; break; From a3a9ee4b5254f212c2adaa8cd8ca03bfa112f49d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 9 Jun 2021 19:25:56 +0200 Subject: [PATCH 154/794] drm/nouveau: init the base GEM fields for internal BOs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TTMs buffer objects are based on GEM objects for quite a while and rely on initializing those fields before initializing the TTM BO. Nouveau now doesn't init the GEM object for internally allocated BOs, so make sure that we at least initialize some necessary fields. Signed-off-by: Christian König Tested-by: Mikko Perttunen Reviewed-by: Matthew Auld Reviewed-by: Huang Rui Link: https://patchwork.freedesktop.org/patch/msgid/20210609172902.1937-1-christian.koenig@amd.com --- drivers/gpu/drm/nouveau/nouveau_bo.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 4f3a5357dd56..6d07e653f82d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -149,6 +149,8 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo) */ if (bo->base.dev) drm_gem_object_release(&bo->base); + else + dma_resv_fini(&bo->base._resv); kfree(nvbo); } @@ -330,6 +332,10 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align, if (IS_ERR(nvbo)) return PTR_ERR(nvbo); + nvbo->bo.base.size = size; + dma_resv_init(&nvbo->bo.base._resv); + drm_vma_node_reset(&nvbo->bo.base.vma_node); + ret = nouveau_bo_init(nvbo, size, align, domain, sg, robj); if (ret) return ret; From e4efa82660e6d80338c554e45e903714e1b2c27b Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Mon, 19 Jul 2021 11:02:31 +0800 Subject: [PATCH 155/794] ALSA: hda/realtek: Fix pop noise and 2 Front Mic issues on a machine This is a Lenovo ThinkStation machine which uses the codec alc623. There are 2 issues on this machine, the 1st one is the pop noise in the lineout, the 2nd one is there are 2 Front Mics and pulseaudio can't handle them, After applying the fixup of ALC623_FIXUP_LENOVO_THINKSTATION_P340 to this machine, the 2 issues are fixed. Cc: Signed-off-by: Hui Wang Link: https://lore.kernel.org/r/20210719030231.6870-1-hui.wang@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 1389cfd5e0db..caaf0e8aac11 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8626,6 +8626,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3151, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x3176, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x3178, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x17aa, 0x31af, "ThinkCentre Station", ALC623_FIXUP_LENOVO_THINKSTATION_P340), SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940", ALC298_FIXUP_LENOVO_SPK_VOLUME), SND_PCI_QUIRK(0x17aa, 0x3827, "Ideapad S740", ALC285_FIXUP_IDEAPAD_S740_COEF), SND_PCI_QUIRK(0x17aa, 0x3843, "Yoga 9i", ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP), From 7c9ff3deeee61b253715dcf968a6307af148c9b2 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 16 Jul 2021 11:21:13 -0700 Subject: [PATCH 156/794] Drivers: hv: vmbus: Fix duplicate CPU assignments within a device The vmbus module uses a rotational algorithm to assign target CPUs to a device's channels. Depending on the timing of different device's channel offers, different channels of a device may be assigned to the same CPU. For example on a VM with 2 CPUs, if NIC A and B's channels are offered in the following order, NIC A will have both channels on CPU0, and NIC B will have both channels on CPU1 -- see below. This kind of assignment causes RSS load that is spreading across different channels to end up on the same CPU. Timing of channel offers: NIC A channel 0 NIC B channel 0 NIC A channel 1 NIC B channel 1 VMBUS ID 14: Class_ID = {f8615163-df3e-46c5-913f-f2d2f965ed0e} - Synthetic network adapter Device_ID = {cab064cd-1f31-47d5-a8b4-9d57e320cccd} Sysfs path: /sys/bus/vmbus/devices/cab064cd-1f31-47d5-a8b4-9d57e320cccd Rel_ID=14, target_cpu=0 Rel_ID=17, target_cpu=0 VMBUS ID 16: Class_ID = {f8615163-df3e-46c5-913f-f2d2f965ed0e} - Synthetic network adapter Device_ID = {244225ca-743e-4020-a17d-d7baa13d6cea} Sysfs path: /sys/bus/vmbus/devices/244225ca-743e-4020-a17d-d7baa13d6cea Rel_ID=16, target_cpu=1 Rel_ID=18, target_cpu=1 Update the vmbus CPU assignment algorithm to avoid duplicate CPU assignments within a device. The new algorithm iterates num_online_cpus + 1 times. The existing rotational algorithm to find "next NUMA & CPU" is still here. But if the resulting CPU is already used by the same device, it will try the next CPU. In the last iteration, it assigns the channel to the next available CPU like the existing algorithm. This is not normally expected, because during device probe, we limit the number of channels of a device to be <= number of online CPUs. Signed-off-by: Haiyang Zhang Reviewed-by: Michael Kelley Tested-by: Michael Kelley Link: https://lore.kernel.org/r/1626459673-17420-1-git-send-email-haiyangz@microsoft.com Signed-off-by: Wei Liu --- drivers/hv/channel_mgmt.c | 100 +++++++++++++++++++++++++------------- 1 file changed, 66 insertions(+), 34 deletions(-) diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index caf6d0c4bc1b..142308526ec6 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -605,6 +605,17 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) */ mutex_lock(&vmbus_connection.channel_mutex); + list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { + if (guid_equal(&channel->offermsg.offer.if_type, + &newchannel->offermsg.offer.if_type) && + guid_equal(&channel->offermsg.offer.if_instance, + &newchannel->offermsg.offer.if_instance)) { + fnew = false; + newchannel->primary_channel = channel; + break; + } + } + init_vp_index(newchannel); /* Remember the channels that should be cleaned up upon suspend. */ @@ -617,16 +628,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) */ atomic_dec(&vmbus_connection.offer_in_progress); - list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { - if (guid_equal(&channel->offermsg.offer.if_type, - &newchannel->offermsg.offer.if_type) && - guid_equal(&channel->offermsg.offer.if_instance, - &newchannel->offermsg.offer.if_instance)) { - fnew = false; - break; - } - } - if (fnew) { list_add_tail(&newchannel->listentry, &vmbus_connection.chn_list); @@ -647,7 +648,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) /* * Process the sub-channel. */ - newchannel->primary_channel = channel; list_add_tail(&newchannel->sc_list, &channel->sc_list); } @@ -683,6 +683,30 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) queue_work(wq, &newchannel->add_channel_work); } +/* + * Check if CPUs used by other channels of the same device. + * It should only be called by init_vp_index(). + */ +static bool hv_cpuself_used(u32 cpu, struct vmbus_channel *chn) +{ + struct vmbus_channel *primary = chn->primary_channel; + struct vmbus_channel *sc; + + lockdep_assert_held(&vmbus_connection.channel_mutex); + + if (!primary) + return false; + + if (primary->target_cpu == cpu) + return true; + + list_for_each_entry(sc, &primary->sc_list, sc_list) + if (sc != chn && sc->target_cpu == cpu) + return true; + + return false; +} + /* * We use this state to statically distribute the channel interrupt load. */ @@ -702,6 +726,7 @@ static int next_numa_node_id; static void init_vp_index(struct vmbus_channel *channel) { bool perf_chn = hv_is_perf_channel(channel); + u32 i, ncpu = num_online_cpus(); cpumask_var_t available_mask; struct cpumask *alloced_mask; u32 target_cpu; @@ -724,31 +749,38 @@ static void init_vp_index(struct vmbus_channel *channel) return; } - while (true) { - numa_node = next_numa_node_id++; - if (numa_node == nr_node_ids) { - next_numa_node_id = 0; - continue; + for (i = 1; i <= ncpu + 1; i++) { + while (true) { + numa_node = next_numa_node_id++; + if (numa_node == nr_node_ids) { + next_numa_node_id = 0; + continue; + } + if (cpumask_empty(cpumask_of_node(numa_node))) + continue; + break; } - if (cpumask_empty(cpumask_of_node(numa_node))) - continue; - break; + alloced_mask = &hv_context.hv_numa_map[numa_node]; + + if (cpumask_weight(alloced_mask) == + cpumask_weight(cpumask_of_node(numa_node))) { + /* + * We have cycled through all the CPUs in the node; + * reset the alloced map. + */ + cpumask_clear(alloced_mask); + } + + cpumask_xor(available_mask, alloced_mask, + cpumask_of_node(numa_node)); + + target_cpu = cpumask_first(available_mask); + cpumask_set_cpu(target_cpu, alloced_mask); + + if (channel->offermsg.offer.sub_channel_index >= ncpu || + i > ncpu || !hv_cpuself_used(target_cpu, channel)) + break; } - alloced_mask = &hv_context.hv_numa_map[numa_node]; - - if (cpumask_weight(alloced_mask) == - cpumask_weight(cpumask_of_node(numa_node))) { - /* - * We have cycled through all the CPUs in the node; - * reset the alloced map. - */ - cpumask_clear(alloced_mask); - } - - cpumask_xor(available_mask, alloced_mask, cpumask_of_node(numa_node)); - - target_cpu = cpumask_first(available_mask); - cpumask_set_cpu(target_cpu, alloced_mask); channel->target_cpu = target_cpu; From 21ed49265986931b8921a2404394426870245bd2 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 12 Jul 2021 09:40:22 +0200 Subject: [PATCH 157/794] m68k: MAC should select HAVE_PATA_PLATFORM The defconfigs switched Mac from the deprecated CONFIG_BLK_DEV_PLATFORM to CONFIG_PATA_PLATFORM. However, the latter depends on CONFIG_HAVE_PATA_PLATFORM, which thus must be selected first. Fixes: b90257bfddbd01f3 ("m68k: use libata instead of the legacy ide driver") Signed-off-by: Geert Uytterhoeven Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210712074022.2116655-1-geert@linux-m68k.org --- arch/m68k/Kconfig.machine | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/m68k/Kconfig.machine b/arch/m68k/Kconfig.machine index d964c1f27399..6a07a6817885 100644 --- a/arch/m68k/Kconfig.machine +++ b/arch/m68k/Kconfig.machine @@ -33,6 +33,7 @@ config MAC depends on MMU select MMU_MOTOROLA if MMU select HAVE_ARCH_NVRAM_OPS + select HAVE_PATA_PLATFORM select LEGACY_TIMER_TICK help This option enables support for the Apple Macintosh series of From 78d2a05ef22e7b5863b01e073dd6a06b3979bb00 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Sat, 17 Jul 2021 15:28:18 +0300 Subject: [PATCH 158/794] ASoC: ti: j721e-evm: Fix unbalanced domain activity tracking during startup In case of an error within j721e_audio_startup() the domain->active must be decremented to avoid unbalanced counter. Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20210717122820.1467-2-peter.ujfalusi@gmail.com Signed-off-by: Mark Brown --- sound/soc/ti/j721e-evm.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/sound/soc/ti/j721e-evm.c b/sound/soc/ti/j721e-evm.c index a7c0484d44ec..017c4ad11ca6 100644 --- a/sound/soc/ti/j721e-evm.c +++ b/sound/soc/ti/j721e-evm.c @@ -278,23 +278,29 @@ static int j721e_audio_startup(struct snd_pcm_substream *substream) j721e_rule_rate, &priv->rate_range, SNDRV_PCM_HW_PARAM_RATE, -1); - mutex_unlock(&priv->mutex); if (ret) - return ret; + goto out; /* Reset TDM slots to 32 */ ret = snd_soc_dai_set_tdm_slot(cpu_dai, 0x3, 0x3, 2, 32); if (ret && ret != -ENOTSUPP) - return ret; + goto out; for_each_rtd_codec_dais(rtd, i, codec_dai) { ret = snd_soc_dai_set_tdm_slot(codec_dai, 0x3, 0x3, 2, 32); if (ret && ret != -ENOTSUPP) - return ret; + goto out; } - return 0; + if (ret == -ENOTSUPP) + ret = 0; +out: + if (ret) + domain->active--; + mutex_unlock(&priv->mutex); + + return ret; } static int j721e_audio_hw_params(struct snd_pcm_substream *substream, From 82d28b67f780910f816fe1cfb0f676fc38c4cbb3 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Sat, 17 Jul 2021 15:28:19 +0300 Subject: [PATCH 159/794] ASoC: ti: j721e-evm: Check for not initialized parent_clk_id During probe the parent_clk_id is set to -1 which should not be used to array index within hsdiv_rates[]. Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20210717122820.1467-3-peter.ujfalusi@gmail.com Signed-off-by: Mark Brown --- sound/soc/ti/j721e-evm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/ti/j721e-evm.c b/sound/soc/ti/j721e-evm.c index 017c4ad11ca6..265bbc5a2f96 100644 --- a/sound/soc/ti/j721e-evm.c +++ b/sound/soc/ti/j721e-evm.c @@ -197,7 +197,7 @@ static int j721e_configure_refclk(struct j721e_priv *priv, return ret; } - if (priv->hsdiv_rates[domain->parent_clk_id] != scki) { + if (domain->parent_clk_id == -1 || priv->hsdiv_rates[domain->parent_clk_id] != scki) { dev_dbg(priv->dev, "%s configuration for %u Hz: %s, %dxFS (SCKI: %u Hz)\n", audio_domain == J721E_AUDIO_DOMAIN_CPB ? "CPB" : "IVI", From 56912da7a68c8356df6a6740476237441b0b792a Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 16 Jul 2021 20:21:33 +0200 Subject: [PATCH 160/794] spi: cadence: Correct initialisation of runtime PM again The original implementation of RPM handling in probe() was mostly correct, except it failed to call pm_runtime_get_*() to activate the hardware. The subsequent fix, 734882a8bf98 ("spi: cadence: Correct initialisation of runtime PM"), breaks the implementation further, to the point where the system using this hard IP on ZynqMP hangs on boot, because it accesses hardware which is gated off. Undo 734882a8bf98 ("spi: cadence: Correct initialisation of runtime PM") and instead add missing pm_runtime_get_noresume() and move the RPM disabling all the way to the end of probe(). That makes ZynqMP not hang on boot yet again. Fixes: 734882a8bf98 ("spi: cadence: Correct initialisation of runtime PM") Signed-off-by: Marek Vasut Cc: Charles Keepax Cc: Mark Brown Link: https://lore.kernel.org/r/20210716182133.218640-1-marex@denx.de Signed-off-by: Mark Brown --- drivers/spi/spi-cadence.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c index a3afd1b9ac56..ceb16e70d235 100644 --- a/drivers/spi/spi-cadence.c +++ b/drivers/spi/spi-cadence.c @@ -517,6 +517,12 @@ static int cdns_spi_probe(struct platform_device *pdev) goto clk_dis_apb; } + pm_runtime_use_autosuspend(&pdev->dev); + pm_runtime_set_autosuspend_delay(&pdev->dev, SPI_AUTOSUSPEND_TIMEOUT); + pm_runtime_get_noresume(&pdev->dev); + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + ret = of_property_read_u32(pdev->dev.of_node, "num-cs", &num_cs); if (ret < 0) master->num_chipselect = CDNS_SPI_DEFAULT_NUM_CS; @@ -531,11 +537,6 @@ static int cdns_spi_probe(struct platform_device *pdev) /* SPI controller initializations */ cdns_spi_init_hw(xspi); - pm_runtime_set_active(&pdev->dev); - pm_runtime_enable(&pdev->dev); - pm_runtime_use_autosuspend(&pdev->dev); - pm_runtime_set_autosuspend_delay(&pdev->dev, SPI_AUTOSUSPEND_TIMEOUT); - irq = platform_get_irq(pdev, 0); if (irq <= 0) { ret = -ENXIO; @@ -566,6 +567,9 @@ static int cdns_spi_probe(struct platform_device *pdev) master->bits_per_word_mask = SPI_BPW_MASK(8); + pm_runtime_mark_last_busy(&pdev->dev); + pm_runtime_put_autosuspend(&pdev->dev); + ret = spi_register_master(master); if (ret) { dev_err(&pdev->dev, "spi_register_master failed\n"); From c9d9fdbc108af8915d3f497bbdf3898bf8f321b8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 14 Jul 2021 14:34:15 -0500 Subject: [PATCH 161/794] drm/i915: Revert "drm/i915/gem: Asynchronous cmdparser" This reverts 686c7c35abc2 ("drm/i915/gem: Asynchronous cmdparser"). The justification for this commit in the git history was a vague comment about getting it out from under the struct_mutex. While this may improve perf for some workloads on Gen7 platforms where we rely on the command parser for features such as indirect rendering, no numbers were provided to prove such an improvement. It claims to closed two gitlab/bugzilla issues but with no explanation whatsoever as to why or what bug it's fixing. Meanwhile, by moving command parsing off to an async callback, it leaves us with a problem of what to do on error. When things were synchronous, EXECBUFFER2 would fail with an error code if parsing failed. When moving it to async, we needed another way to handle that error and the solution employed was to set an error on the dma_fence and then trust that said error gets propagated to the client eventually. Moving back to synchronous will help us untangle the fence error propagation mess. This also reverts most of 0edbb9ba1bfe ("drm/i915: Move cmd parser pinning to execbuffer") which is a refactor of some of our allocation paths for asynchronous parsing. Now that everything is synchronous, we don't need it. v2 (Daniel Vetter): - Add stabel Cc and Fixes tag Signed-off-by: Jason Ekstrand Cc: # v5.6+ Fixes: 9e31c1fe45d5 ("drm/i915: Propagate errors on awaiting already signaled fences") Cc: Maarten Lankhorst Reviewed-by: Jon Bloomfield Acked-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210714193419.1459723-2-jason@jlekstrand.net (cherry picked from commit 93b713304188844b8514074dc13ffd56d12235d3) Signed-off-by: Rodrigo Vivi --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 227 +----------------- .../i915/gem/selftests/i915_gem_execbuffer.c | 4 + drivers/gpu/drm/i915/i915_cmd_parser.c | 126 +++++----- drivers/gpu/drm/i915/i915_drv.h | 7 +- 4 files changed, 88 insertions(+), 276 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index a8abc9af5ff4..4a6419d7be93 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -25,10 +25,8 @@ #include "i915_gem_clflush.h" #include "i915_gem_context.h" #include "i915_gem_ioctls.h" -#include "i915_sw_fence_work.h" #include "i915_trace.h" #include "i915_user_extensions.h" -#include "i915_memcpy.h" struct eb_vma { struct i915_vma *vma; @@ -1456,6 +1454,10 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, int err; struct intel_engine_cs *engine = eb->engine; + /* If we need to copy for the cmdparser, we will stall anyway */ + if (eb_use_cmdparser(eb)) + return ERR_PTR(-EWOULDBLOCK); + if (!reloc_can_use_engine(engine)) { engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0]; if (!engine) @@ -2372,217 +2374,6 @@ shadow_batch_pin(struct i915_execbuffer *eb, return vma; } -struct eb_parse_work { - struct dma_fence_work base; - struct intel_engine_cs *engine; - struct i915_vma *batch; - struct i915_vma *shadow; - struct i915_vma *trampoline; - unsigned long batch_offset; - unsigned long batch_length; - unsigned long *jump_whitelist; - const void *batch_map; - void *shadow_map; -}; - -static int __eb_parse(struct dma_fence_work *work) -{ - struct eb_parse_work *pw = container_of(work, typeof(*pw), base); - int ret; - bool cookie; - - cookie = dma_fence_begin_signalling(); - ret = intel_engine_cmd_parser(pw->engine, - pw->batch, - pw->batch_offset, - pw->batch_length, - pw->shadow, - pw->jump_whitelist, - pw->shadow_map, - pw->batch_map); - dma_fence_end_signalling(cookie); - - return ret; -} - -static void __eb_parse_release(struct dma_fence_work *work) -{ - struct eb_parse_work *pw = container_of(work, typeof(*pw), base); - - if (!IS_ERR_OR_NULL(pw->jump_whitelist)) - kfree(pw->jump_whitelist); - - if (pw->batch_map) - i915_gem_object_unpin_map(pw->batch->obj); - else - i915_gem_object_unpin_pages(pw->batch->obj); - - i915_gem_object_unpin_map(pw->shadow->obj); - - if (pw->trampoline) - i915_active_release(&pw->trampoline->active); - i915_active_release(&pw->shadow->active); - i915_active_release(&pw->batch->active); -} - -static const struct dma_fence_work_ops eb_parse_ops = { - .name = "eb_parse", - .work = __eb_parse, - .release = __eb_parse_release, -}; - -static inline int -__parser_mark_active(struct i915_vma *vma, - struct intel_timeline *tl, - struct dma_fence *fence) -{ - struct intel_gt_buffer_pool_node *node = vma->private; - - return i915_active_ref(&node->active, tl->fence_context, fence); -} - -static int -parser_mark_active(struct eb_parse_work *pw, struct intel_timeline *tl) -{ - int err; - - mutex_lock(&tl->mutex); - - err = __parser_mark_active(pw->shadow, tl, &pw->base.dma); - if (err) - goto unlock; - - if (pw->trampoline) { - err = __parser_mark_active(pw->trampoline, tl, &pw->base.dma); - if (err) - goto unlock; - } - -unlock: - mutex_unlock(&tl->mutex); - return err; -} - -static int eb_parse_pipeline(struct i915_execbuffer *eb, - struct i915_vma *shadow, - struct i915_vma *trampoline) -{ - struct eb_parse_work *pw; - struct drm_i915_gem_object *batch = eb->batch->vma->obj; - bool needs_clflush; - int err; - - GEM_BUG_ON(overflows_type(eb->batch_start_offset, pw->batch_offset)); - GEM_BUG_ON(overflows_type(eb->batch_len, pw->batch_length)); - - pw = kzalloc(sizeof(*pw), GFP_KERNEL); - if (!pw) - return -ENOMEM; - - err = i915_active_acquire(&eb->batch->vma->active); - if (err) - goto err_free; - - err = i915_active_acquire(&shadow->active); - if (err) - goto err_batch; - - if (trampoline) { - err = i915_active_acquire(&trampoline->active); - if (err) - goto err_shadow; - } - - pw->shadow_map = i915_gem_object_pin_map(shadow->obj, I915_MAP_WB); - if (IS_ERR(pw->shadow_map)) { - err = PTR_ERR(pw->shadow_map); - goto err_trampoline; - } - - needs_clflush = - !(batch->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ); - - pw->batch_map = ERR_PTR(-ENODEV); - if (needs_clflush && i915_has_memcpy_from_wc()) - pw->batch_map = i915_gem_object_pin_map(batch, I915_MAP_WC); - - if (IS_ERR(pw->batch_map)) { - err = i915_gem_object_pin_pages(batch); - if (err) - goto err_unmap_shadow; - pw->batch_map = NULL; - } - - pw->jump_whitelist = - intel_engine_cmd_parser_alloc_jump_whitelist(eb->batch_len, - trampoline); - if (IS_ERR(pw->jump_whitelist)) { - err = PTR_ERR(pw->jump_whitelist); - goto err_unmap_batch; - } - - dma_fence_work_init(&pw->base, &eb_parse_ops); - - pw->engine = eb->engine; - pw->batch = eb->batch->vma; - pw->batch_offset = eb->batch_start_offset; - pw->batch_length = eb->batch_len; - pw->shadow = shadow; - pw->trampoline = trampoline; - - /* Mark active refs early for this worker, in case we get interrupted */ - err = parser_mark_active(pw, eb->context->timeline); - if (err) - goto err_commit; - - err = dma_resv_reserve_shared(pw->batch->resv, 1); - if (err) - goto err_commit; - - err = dma_resv_reserve_shared(shadow->resv, 1); - if (err) - goto err_commit; - - /* Wait for all writes (and relocs) into the batch to complete */ - err = i915_sw_fence_await_reservation(&pw->base.chain, - pw->batch->resv, NULL, false, - 0, I915_FENCE_GFP); - if (err < 0) - goto err_commit; - - /* Keep the batch alive and unwritten as we parse */ - dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma); - - /* Force execution to wait for completion of the parser */ - dma_resv_add_excl_fence(shadow->resv, &pw->base.dma); - - dma_fence_work_commit_imm(&pw->base); - return 0; - -err_commit: - i915_sw_fence_set_error_once(&pw->base.chain, err); - dma_fence_work_commit_imm(&pw->base); - return err; - -err_unmap_batch: - if (pw->batch_map) - i915_gem_object_unpin_map(batch); - else - i915_gem_object_unpin_pages(batch); -err_unmap_shadow: - i915_gem_object_unpin_map(shadow->obj); -err_trampoline: - if (trampoline) - i915_active_release(&trampoline->active); -err_shadow: - i915_active_release(&shadow->active); -err_batch: - i915_active_release(&eb->batch->vma->active); -err_free: - kfree(pw); - return err; -} - static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma) { /* @@ -2672,7 +2463,15 @@ static int eb_parse(struct i915_execbuffer *eb) goto err_trampoline; } - err = eb_parse_pipeline(eb, shadow, trampoline); + err = dma_resv_reserve_shared(shadow->resv, 1); + if (err) + goto err_trampoline; + + err = intel_engine_cmd_parser(eb->engine, + eb->batch->vma, + eb->batch_start_offset, + eb->batch_len, + shadow, trampoline); if (err) goto err_unpin_batch; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c index 4df505e4c53a..16162fc2782d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c @@ -125,6 +125,10 @@ static int igt_gpu_reloc(void *arg) intel_gt_pm_get(&eb.i915->gt); for_each_uabi_engine(eb.engine, eb.i915) { + if (intel_engine_requires_cmd_parser(eb.engine) || + intel_engine_using_cmd_parser(eb.engine)) + continue; + reloc_cache_init(&eb.reloc_cache, eb.i915); memset(map, POISON_INUSE, 4096); diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 3992c25a191d..00ec618d0159 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1145,19 +1145,41 @@ find_reg(const struct intel_engine_cs *engine, u32 addr) static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, struct drm_i915_gem_object *src_obj, unsigned long offset, unsigned long length, - void *dst, const void *src) + bool *needs_clflush_after) { - bool needs_clflush = - !(src_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ); + unsigned int src_needs_clflush; + unsigned int dst_needs_clflush; + void *dst, *src; + int ret; - if (src) { - GEM_BUG_ON(!needs_clflush); - i915_unaligned_memcpy_from_wc(dst, src + offset, length); - } else { - struct scatterlist *sg; + ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush); + if (ret) + return ERR_PTR(ret); + + dst = i915_gem_object_pin_map(dst_obj, I915_MAP_WB); + i915_gem_object_finish_access(dst_obj); + if (IS_ERR(dst)) + return dst; + + ret = i915_gem_object_prepare_read(src_obj, &src_needs_clflush); + if (ret) { + i915_gem_object_unpin_map(dst_obj); + return ERR_PTR(ret); + } + + src = ERR_PTR(-ENODEV); + if (src_needs_clflush && i915_has_memcpy_from_wc()) { + src = i915_gem_object_pin_map(src_obj, I915_MAP_WC); + if (!IS_ERR(src)) { + i915_unaligned_memcpy_from_wc(dst, + src + offset, + length); + i915_gem_object_unpin_map(src_obj); + } + } + if (IS_ERR(src)) { + unsigned long x, n, remain; void *ptr; - unsigned int x, sg_ofs; - unsigned long remain; /* * We can avoid clflushing partial cachelines before the write @@ -1168,40 +1190,34 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, * validate up to the end of the batch. */ remain = length; - if (!(dst_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) + if (dst_needs_clflush & CLFLUSH_BEFORE) remain = round_up(remain, boot_cpu_data.x86_clflush_size); ptr = dst; x = offset_in_page(offset); - sg = i915_gem_object_get_sg(src_obj, offset >> PAGE_SHIFT, &sg_ofs, false); + for (n = offset >> PAGE_SHIFT; remain; n++) { + int len = min(remain, PAGE_SIZE - x); - while (remain) { - unsigned long sg_max = sg->length >> PAGE_SHIFT; + src = kmap_atomic(i915_gem_object_get_page(src_obj, n)); + if (src_needs_clflush) + drm_clflush_virt_range(src + x, len); + memcpy(ptr, src + x, len); + kunmap_atomic(src); - for (; remain && sg_ofs < sg_max; sg_ofs++) { - unsigned long len = min(remain, PAGE_SIZE - x); - void *map; - - map = kmap_atomic(nth_page(sg_page(sg), sg_ofs)); - if (needs_clflush) - drm_clflush_virt_range(map + x, len); - memcpy(ptr, map + x, len); - kunmap_atomic(map); - - ptr += len; - remain -= len; - x = 0; - } - - sg_ofs = 0; - sg = sg_next(sg); + ptr += len; + remain -= len; + x = 0; } } + i915_gem_object_finish_access(src_obj); + memset32(dst + length, 0, (dst_obj->base.size - length) / sizeof(u32)); /* dst_obj is returned with vmap pinned */ + *needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER; + return dst; } @@ -1360,6 +1376,9 @@ static int check_bbstart(u32 *cmd, u32 offset, u32 length, if (target_cmd_index == offset) return 0; + if (IS_ERR(jump_whitelist)) + return PTR_ERR(jump_whitelist); + if (!test_bit(target_cmd_index, jump_whitelist)) { DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n", jump_target); @@ -1369,28 +1388,10 @@ static int check_bbstart(u32 *cmd, u32 offset, u32 length, return 0; } -/** - * intel_engine_cmd_parser_alloc_jump_whitelist() - preallocate jump whitelist for intel_engine_cmd_parser() - * @batch_length: length of the commands in batch_obj - * @trampoline: Whether jump trampolines are used. - * - * Preallocates a jump whitelist for parsing the cmd buffer in intel_engine_cmd_parser(). - * This has to be preallocated, because the command parser runs in signaling context, - * and may not allocate any memory. - * - * Return: NULL or pointer to a jump whitelist, or ERR_PTR() on failure. Use - * IS_ERR() to check for errors. Must bre freed() with kfree(). - * - * NULL is a valid value, meaning no allocation was required. - */ -unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length, - bool trampoline) +static unsigned long *alloc_whitelist(u32 batch_length) { unsigned long *jmp; - if (trampoline) - return NULL; - /* * We expect batch_length to be less than 256KiB for known users, * i.e. we need at most an 8KiB bitmap allocation which should be @@ -1425,21 +1426,21 @@ unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length, * Return: non-zero if the parser finds violations or otherwise fails; -EACCES * if the batch appears legal but should use hardware parsing */ + int intel_engine_cmd_parser(struct intel_engine_cs *engine, struct i915_vma *batch, unsigned long batch_offset, unsigned long batch_length, struct i915_vma *shadow, - unsigned long *jump_whitelist, - void *shadow_map, - const void *batch_map) + bool trampoline) { u32 *cmd, *batch_end, offset = 0; struct drm_i915_cmd_descriptor default_desc = noop_desc; const struct drm_i915_cmd_descriptor *desc = &default_desc; + bool needs_clflush_after = false; + unsigned long *jump_whitelist; u64 batch_addr, shadow_addr; int ret = 0; - bool trampoline = !jump_whitelist; GEM_BUG_ON(!IS_ALIGNED(batch_offset, sizeof(*cmd))); GEM_BUG_ON(!IS_ALIGNED(batch_length, sizeof(*cmd))); @@ -1447,8 +1448,18 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, batch->size)); GEM_BUG_ON(!batch_length); - cmd = copy_batch(shadow->obj, batch->obj, batch_offset, batch_length, - shadow_map, batch_map); + cmd = copy_batch(shadow->obj, batch->obj, + batch_offset, batch_length, + &needs_clflush_after); + if (IS_ERR(cmd)) { + DRM_DEBUG("CMD: Failed to copy batch\n"); + return PTR_ERR(cmd); + } + + jump_whitelist = NULL; + if (!trampoline) + /* Defer failure until attempted use */ + jump_whitelist = alloc_whitelist(batch_length); shadow_addr = gen8_canonical_addr(shadow->node.start); batch_addr = gen8_canonical_addr(batch->node.start + batch_offset); @@ -1549,6 +1560,9 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, i915_gem_object_flush_map(shadow->obj); + if (!IS_ERR_OR_NULL(jump_whitelist)) + kfree(jump_whitelist); + i915_gem_object_unpin_map(shadow->obj); return ret; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 38ff2fb89744..b30397b04529 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1906,17 +1906,12 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type); int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv); int intel_engine_init_cmd_parser(struct intel_engine_cs *engine); void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine); -unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length, - bool trampoline); - int intel_engine_cmd_parser(struct intel_engine_cs *engine, struct i915_vma *batch, unsigned long batch_offset, unsigned long batch_length, struct i915_vma *shadow, - unsigned long *jump_whitelist, - void *shadow_map, - const void *batch_map); + bool trampoline); #define I915_CMD_PARSER_TRAMPOLINE_SIZE 8 /* intel_device_info.c */ From 3761baae908a7b5012be08d70fa553cc2eb82305 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 14 Jul 2021 14:34:16 -0500 Subject: [PATCH 162/794] Revert "drm/i915: Propagate errors on awaiting already signaled fences" This reverts commit 9e31c1fe45d555a948ff66f1f0e3fe1f83ca63f7. Ever since that commit, we've been having issues where a hang in one client can propagate to another. In particular, a hang in an app can propagate to the X server which causes the whole desktop to lock up. Error propagation along fences sound like a good idea, but as your bug shows, surprising consequences, since propagating errors across security boundaries is not a good thing. What we do have is track the hangs on the ctx, and report information to userspace using RESET_STATS. That's how arb_robustness works. Also, if my understanding is still correct, the EIO from execbuf is when your context is banned (because not recoverable or too many hangs). And in all these cases it's up to userspace to figure out what is all impacted and should be reported to the application, that's not on the kernel to guess and automatically propagate. What's more, we're also building more features on top of ctx error reporting with RESET_STATS ioctl: Encrypted buffers use the same, and the userspace fence wait also relies on that mechanism. So it is the path going forward for reporting gpu hangs and resets to userspace. So all together that's why I think we should just bury this idea again as not quite the direction we want to go to, hence why I think the revert is the right option here. For backporters: Please note that you _must_ have a backport of https://lore.kernel.org/dri-devel/20210602164149.391653-2-jason@jlekstrand.net/ for otherwise backporting just this patch opens up a security bug. v2: Augment commit message. Also restore Jason's sob that I accidentally lost. v3: Add a note for backporters Signed-off-by: Jason Ekstrand Reported-by: Marcin Slusarz Cc: # v5.6+ Cc: Jason Ekstrand Cc: Marcin Slusarz Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/3080 Fixes: 9e31c1fe45d5 ("drm/i915: Propagate errors on awaiting already signaled fences") Acked-by: Daniel Vetter Reviewed-by: Jon Bloomfield Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210714193419.1459723-3-jason@jlekstrand.net (cherry picked from commit 93a2711cddd5760e2f0f901817d71c93183c3b87) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_request.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 1014c71cf7f5..37aef1308573 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1426,10 +1426,8 @@ i915_request_await_execution(struct i915_request *rq, do { fence = *child++; - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { - i915_sw_fence_set_error_once(&rq->submit, fence->error); + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) continue; - } if (fence->context == rq->fence.context) continue; @@ -1527,10 +1525,8 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) do { fence = *child++; - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { - i915_sw_fence_set_error_once(&rq->submit, fence->error); + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) continue; - } /* * Requests on the same timeline are explicitly ordered, along From d2cbbf1fe503c07e466c62f83aa1926d74d15821 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 15 Jul 2021 11:26:01 +0200 Subject: [PATCH 163/794] ACPI: Kconfig: Fix table override from built-in initrd During a rework of initramfs code the INITRAMFS_COMPRESSION config option was removed in commit 65e00e04e5ae. A leftover as a dependency broke the config option ACPI_TABLE_OVERRIDE_VIA_ BUILTIN_INITRD that is used to enable the overriding of ACPI tables from built-in initrd. Fixing the dependency. Fixes: 65e00e04e5ae ("initramfs: refactor the initramfs build rules") Signed-off-by: Robert Richter Signed-off-by: Rafael J. Wysocki --- drivers/acpi/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 9d872ea477a6..8f9940f40baa 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -370,7 +370,7 @@ config ACPI_TABLE_UPGRADE config ACPI_TABLE_OVERRIDE_VIA_BUILTIN_INITRD bool "Override ACPI tables from built-in initrd" depends on ACPI_TABLE_UPGRADE - depends on INITRAMFS_SOURCE!="" && INITRAMFS_COMPRESSION="" + depends on INITRAMFS_SOURCE!="" && INITRAMFS_COMPRESSION_NONE help This option provides functionality to override arbitrary ACPI tables from built-in uncompressed initrd. From 71f6428332844f38c7cb10461d9f29e9c9b983a0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 12 Jul 2021 21:21:21 +0300 Subject: [PATCH 164/794] ACPI: utils: Fix reference counting in for_each_acpi_dev_match() Currently it's possible to iterate over the dangling pointer in case the device suddenly disappears. This may happen becase callers put it at the end of a loop. Instead, let's move that call inside acpi_dev_get_next_match_dev(). Fixes: 803abec64ef9 ("media: ipu3-cio2: Add cio2-bridge to ipu3-cio2 driver") Fixes: bf263f64e804 ("media: ACPI / bus: Add acpi_dev_get_next_match_dev() and helper macro") Fixes: edbd1bc4951e ("efi/dev-path-parser: Switch to use for_each_acpi_dev_match()") Signed-off-by: Andy Shevchenko Reviewed-by: Daniel Scally Signed-off-by: Rafael J. Wysocki --- drivers/acpi/utils.c | 7 +++---- drivers/firmware/efi/dev-path-parser.c | 1 - drivers/media/pci/intel/ipu3/cio2-bridge.c | 6 ++---- include/acpi/acpi_bus.h | 5 ----- 4 files changed, 5 insertions(+), 14 deletions(-) diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c index e7ddd281afff..d5cedffeeff9 100644 --- a/drivers/acpi/utils.c +++ b/drivers/acpi/utils.c @@ -860,11 +860,9 @@ EXPORT_SYMBOL(acpi_dev_present); * Return the next match of ACPI device if another matching device was present * at the moment of invocation, or NULL otherwise. * - * FIXME: The function does not tolerate the sudden disappearance of @adev, e.g. - * in the case of a hotplug event. That said, the caller should ensure that - * this will never happen. - * * The caller is responsible for invoking acpi_dev_put() on the returned device. + * On the other hand the function invokes acpi_dev_put() on the given @adev + * assuming that its reference counter had been increased beforehand. * * See additional information in acpi_dev_present() as well. */ @@ -880,6 +878,7 @@ acpi_dev_get_next_match_dev(struct acpi_device *adev, const char *hid, const cha match.hrv = hrv; dev = bus_find_device(&acpi_bus_type, start, &match, acpi_dev_match_cb); + acpi_dev_put(adev); return dev ? to_acpi_device(dev) : NULL; } EXPORT_SYMBOL(acpi_dev_get_next_match_dev); diff --git a/drivers/firmware/efi/dev-path-parser.c b/drivers/firmware/efi/dev-path-parser.c index 10d4457417a4..eb9c65f97841 100644 --- a/drivers/firmware/efi/dev-path-parser.c +++ b/drivers/firmware/efi/dev-path-parser.c @@ -34,7 +34,6 @@ static long __init parse_acpi_path(const struct efi_dev_path *node, break; if (!adev->pnp.unique_id && node->acpi.uid == 0) break; - acpi_dev_put(adev); } if (!adev) return -ENODEV; diff --git a/drivers/media/pci/intel/ipu3/cio2-bridge.c b/drivers/media/pci/intel/ipu3/cio2-bridge.c index 4657e99df033..59a36f922675 100644 --- a/drivers/media/pci/intel/ipu3/cio2-bridge.c +++ b/drivers/media/pci/intel/ipu3/cio2-bridge.c @@ -173,10 +173,8 @@ static int cio2_bridge_connect_sensor(const struct cio2_sensor_config *cfg, int ret; for_each_acpi_dev_match(adev, cfg->hid, NULL, -1) { - if (!adev->status.enabled) { - acpi_dev_put(adev); + if (!adev->status.enabled) continue; - } if (bridge->n_sensors >= CIO2_NUM_PORTS) { acpi_dev_put(adev); @@ -185,7 +183,6 @@ static int cio2_bridge_connect_sensor(const struct cio2_sensor_config *cfg, } sensor = &bridge->sensors[bridge->n_sensors]; - sensor->adev = adev; strscpy(sensor->name, cfg->hid, sizeof(sensor->name)); ret = cio2_bridge_read_acpi_buffer(adev, "SSDB", @@ -215,6 +212,7 @@ static int cio2_bridge_connect_sensor(const struct cio2_sensor_config *cfg, goto err_free_swnodes; } + sensor->adev = acpi_dev_get(adev); adev->fwnode.secondary = fwnode; dev_info(&cio2->dev, "Found supported sensor %s\n", diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 1ae993fee4a5..b9d434a93632 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -707,11 +707,6 @@ acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv); * @hrv: Hardware Revision of the device, pass -1 to not check _HRV * * The caller is responsible for invoking acpi_dev_put() on the returned device. - * - * FIXME: Due to above requirement there is a window that may invalidate @adev - * and next iteration will use a dangling pointer, e.g. in the case of a - * hotplug event. That said, the caller should ensure that this will never - * happen. */ #define for_each_acpi_dev_match(adev, hid, uid, hrv) \ for (adev = acpi_dev_get_first_match_dev(hid, uid, hrv); \ From c81cfb6256d90ea5ba4a6fb280ea3b171be4e05c Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Sun, 18 Jul 2021 15:36:25 -0400 Subject: [PATCH 165/794] bnxt_en: don't disable an already disabled PCI device If device is already disabled in reset path and PCI io error is detected before the device could be enabled, driver could call pci_disable_device() for already disabled device. Fix this problem by calling pci_disable_device() only if the device is already enabled. Fixes: 6316ea6db93d ("bnxt_en: Enable AER support.") Signed-off-by: Kalesh AP Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index f56245eeef7b..fdfb75a1608d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -13436,7 +13436,8 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev, if (netif_running(netdev)) bnxt_close(netdev); - pci_disable_device(pdev); + if (pci_is_enabled(pdev)) + pci_disable_device(pdev); bnxt_free_ctx_mem(bp); kfree(bp->ctx); bp->ctx = NULL; From c08c59653415201ac46ab791c936ae804c45a11b Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Sun, 18 Jul 2021 15:36:26 -0400 Subject: [PATCH 166/794] bnxt_en: reject ETS settings that will starve a TC ETS proportions are presented to HWRM_QUEUE_COS2BW_CFG as minimum bandwidth constraints. Thus, zero is a legal value for a given TC. However, if all the other TCs sum up to 100%, then at least one hardware queue will starve, resulting in guaranteed TX timeouts. Reject such nonsensical configurations. Reviewed-by: Pavan Chebbi Signed-off-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c index 8e90224c43a2..8a68df4d9e59 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c @@ -433,6 +433,7 @@ static int bnxt_hwrm_queue_dscp2pri_cfg(struct bnxt *bp, struct dcb_app *app, static int bnxt_ets_validate(struct bnxt *bp, struct ieee_ets *ets, u8 *tc) { int total_ets_bw = 0; + bool zero = false; u8 max_tc = 0; int i; @@ -453,13 +454,20 @@ static int bnxt_ets_validate(struct bnxt *bp, struct ieee_ets *ets, u8 *tc) break; case IEEE_8021QAZ_TSA_ETS: total_ets_bw += ets->tc_tx_bw[i]; + zero = zero || !ets->tc_tx_bw[i]; break; default: return -ENOTSUPP; } } - if (total_ets_bw > 100) + if (total_ets_bw > 100) { + netdev_warn(bp->dev, "rejecting ETS config exceeding available bandwidth\n"); return -EINVAL; + } + if (zero && total_ets_bw == 100) { + netdev_warn(bp->dev, "rejecting ETS config starving a TC\n"); + return -EINVAL; + } if (max_tc >= bp->max_tc) *tc = bp->max_tc; From 2c9f046bc377efd1f5e26e74817d5f96e9506c86 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 18 Jul 2021 15:36:27 -0400 Subject: [PATCH 167/794] bnxt_en: Refresh RoCE capabilities in bnxt_ulp_probe() The capabilities can change after firmware upgrade/downgrade, so we should get the up-to-date RoCE capabilities everytime bnxt_ulp_probe() is called. Fixes: 2151fe0830fd ("bnxt_en: Handle RESET_NOTIFY async event from firmware.") Reviewed-by: Somnath Kotur Reviewed-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index a918e374f3c5..187ff643ad2a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -479,16 +479,17 @@ struct bnxt_en_dev *bnxt_ulp_probe(struct net_device *dev) if (!edev) return ERR_PTR(-ENOMEM); edev->en_ops = &bnxt_en_ops_tbl; - if (bp->flags & BNXT_FLAG_ROCEV1_CAP) - edev->flags |= BNXT_EN_FLAG_ROCEV1_CAP; - if (bp->flags & BNXT_FLAG_ROCEV2_CAP) - edev->flags |= BNXT_EN_FLAG_ROCEV2_CAP; edev->net = dev; edev->pdev = bp->pdev; edev->l2_db_size = bp->db_size; edev->l2_db_size_nc = bp->db_size; bp->edev = edev; } + edev->flags &= ~BNXT_EN_FLAG_ROCE_CAP; + if (bp->flags & BNXT_FLAG_ROCEV1_CAP) + edev->flags |= BNXT_EN_FLAG_ROCEV1_CAP; + if (bp->flags & BNXT_FLAG_ROCEV2_CAP) + edev->flags |= BNXT_EN_FLAG_ROCEV2_CAP; return bp->edev; } EXPORT_SYMBOL(bnxt_ulp_probe); From 6cd657cb3ee6f4de57e635b126ffbe0e51d00f1a Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 18 Jul 2021 15:36:28 -0400 Subject: [PATCH 168/794] bnxt_en: Add missing check for BNXT_STATE_ABORT_ERR in bnxt_fw_rset_task() In the BNXT_FW_RESET_STATE_POLL_VF state in bnxt_fw_reset_task() after all VFs have unregistered, we need to check for BNXT_STATE_ABORT_ERR after we acquire the rtnl_lock. If the flag is set, we need to abort. Fixes: 230d1f0de754 ("bnxt_en: Handle firmware reset.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index fdfb75a1608d..39908a3d9460 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11992,6 +11992,10 @@ static void bnxt_fw_reset_task(struct work_struct *work) } bp->fw_reset_timestamp = jiffies; rtnl_lock(); + if (test_bit(BNXT_STATE_ABORT_ERR, &bp->state)) { + rtnl_unlock(); + goto fw_reset_abort; + } bnxt_fw_reset_close(bp); if (bp->fw_cap & BNXT_FW_CAP_ERR_RECOVER_RELOAD) { bp->fw_reset_state = BNXT_FW_RESET_STATE_POLL_FW_DOWN; From 3958b1da725a477b4a222183d16a14d85445d4b6 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Sun, 18 Jul 2021 15:36:29 -0400 Subject: [PATCH 169/794] bnxt_en: fix error path of FW reset When bnxt_open() fails in the firmware reset path, the driver needs to gracefully abort, but it is executing code that should be invoked only in the success path. Define a function to abort FW reset and consolidate all error paths to call this new function. Fixes: dab62e7c2de7 ("bnxt_en: Implement faster recovery for firmware fatal error.") Signed-off-by: Somnath Kotur Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 31 +++++++++++++++-------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 39908a3d9460..f2f1136fd492 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11959,10 +11959,21 @@ static bool bnxt_fw_reset_timeout(struct bnxt *bp) (bp->fw_reset_max_dsecs * HZ / 10)); } +static void bnxt_fw_reset_abort(struct bnxt *bp, int rc) +{ + clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); + if (bp->fw_reset_state != BNXT_FW_RESET_STATE_POLL_VF) { + bnxt_ulp_start(bp, rc); + bnxt_dl_health_status_update(bp, false); + } + bp->fw_reset_state = 0; + dev_close(bp->dev); +} + static void bnxt_fw_reset_task(struct work_struct *work) { struct bnxt *bp = container_of(work, struct bnxt, fw_reset_task.work); - int rc; + int rc = 0; if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { netdev_err(bp->dev, "bnxt_fw_reset_task() called when not in fw reset mode!\n"); @@ -11993,8 +12004,9 @@ static void bnxt_fw_reset_task(struct work_struct *work) bp->fw_reset_timestamp = jiffies; rtnl_lock(); if (test_bit(BNXT_STATE_ABORT_ERR, &bp->state)) { + bnxt_fw_reset_abort(bp, rc); rtnl_unlock(); - goto fw_reset_abort; + return; } bnxt_fw_reset_close(bp); if (bp->fw_cap & BNXT_FW_CAP_ERR_RECOVER_RELOAD) { @@ -12043,6 +12055,7 @@ static void bnxt_fw_reset_task(struct work_struct *work) if (val == 0xffff) { if (bnxt_fw_reset_timeout(bp)) { netdev_err(bp->dev, "Firmware reset aborted, PCI config space invalid\n"); + rc = -ETIMEDOUT; goto fw_reset_abort; } bnxt_queue_fw_reset_work(bp, HZ / 1000); @@ -12052,6 +12065,7 @@ static void bnxt_fw_reset_task(struct work_struct *work) clear_bit(BNXT_STATE_FW_FATAL_COND, &bp->state); if (pci_enable_device(bp->pdev)) { netdev_err(bp->dev, "Cannot re-enable PCI device\n"); + rc = -ENODEV; goto fw_reset_abort; } pci_set_master(bp->pdev); @@ -12078,9 +12092,10 @@ static void bnxt_fw_reset_task(struct work_struct *work) } rc = bnxt_open(bp->dev); if (rc) { - netdev_err(bp->dev, "bnxt_open_nic() failed\n"); - clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); - dev_close(bp->dev); + netdev_err(bp->dev, "bnxt_open() failed during FW reset\n"); + bnxt_fw_reset_abort(bp, rc); + rtnl_unlock(); + return; } bp->fw_reset_state = 0; @@ -12107,12 +12122,8 @@ fw_reset_abort_status: netdev_err(bp->dev, "fw_health_status 0x%x\n", sts); } fw_reset_abort: - clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); - if (bp->fw_reset_state != BNXT_FW_RESET_STATE_POLL_VF) - bnxt_dl_health_status_update(bp, false); - bp->fw_reset_state = 0; rtnl_lock(); - dev_close(bp->dev); + bnxt_fw_reset_abort(bp, rc); rtnl_unlock(); } From 96bdd4b9ea7ef9a12db8fdd0ce90e37dffbd3703 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 18 Jul 2021 15:36:30 -0400 Subject: [PATCH 170/794] bnxt_en: Validate vlan protocol ID on RX packets Only pass supported VLAN protocol IDs for stripped VLAN tags to the stack. The stack will hit WARN() if the protocol ID is unsupported. Existing firmware sets up the chip to strip 0x8100, 0x88a8, 0x9100. Only the 1st two protocols are supported by the kernel. Fixes: a196e96bb68f ("bnxt_en: clean up VLAN feature bit handling") Reviewed-by: Somnath Kotur Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index f2f1136fd492..169f093e01de 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1671,11 +1671,16 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, if ((tpa_info->flags2 & RX_CMP_FLAGS2_META_FORMAT_VLAN) && (skb->dev->features & BNXT_HW_FEATURE_VLAN_ALL_RX)) { - u16 vlan_proto = tpa_info->metadata >> - RX_CMP_FLAGS2_METADATA_TPID_SFT; + __be16 vlan_proto = htons(tpa_info->metadata >> + RX_CMP_FLAGS2_METADATA_TPID_SFT); u16 vtag = tpa_info->metadata & RX_CMP_FLAGS2_METADATA_TCI_MASK; - __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vtag); + if (eth_type_vlan(vlan_proto)) { + __vlan_hwaccel_put_tag(skb, vlan_proto, vtag); + } else { + dev_kfree_skb(skb); + return NULL; + } } skb_checksum_none_assert(skb); @@ -1897,9 +1902,15 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, (skb->dev->features & BNXT_HW_FEATURE_VLAN_ALL_RX)) { u32 meta_data = le32_to_cpu(rxcmp1->rx_cmp_meta_data); u16 vtag = meta_data & RX_CMP_FLAGS2_METADATA_TCI_MASK; - u16 vlan_proto = meta_data >> RX_CMP_FLAGS2_METADATA_TPID_SFT; + __be16 vlan_proto = htons(meta_data >> + RX_CMP_FLAGS2_METADATA_TPID_SFT); - __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vtag); + if (eth_type_vlan(vlan_proto)) { + __vlan_hwaccel_put_tag(skb, vlan_proto, vtag); + } else { + dev_kfree_skb(skb); + goto next_rx; + } } skb_checksum_none_assert(skb); From 11a39259ff79b74bc99f8b7c44075a2d6d5e7ab1 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Sun, 18 Jul 2021 15:36:31 -0400 Subject: [PATCH 171/794] bnxt_en: Check abort error state in bnxt_half_open_nic() bnxt_half_open_nic() is called during during ethtool self test and is protected by rtnl_lock. Firmware reset can be happening at the same time. Only critical portions of the entire firmware reset sequence are protected by the rtnl_lock. It is possible that bnxt_half_open_nic() can be called when the firmware reset sequence is aborting. In that case, bnxt_half_open_nic() needs to check if the ABORT_ERR flag is set and abort if it is. The ethtool self test will fail but the NIC will be brought to a consistent IF_DOWN state. Without this patch, if bnxt_half_open_nic() were to continue in this error state, it may crash like this: bnxt_en 0000:82:00.1 enp130s0f1np1: FW reset in progress during close, FW reset will be aborted Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 ... Process ethtool (pid: 333327, stack limit = 0x0000000046476577) Call trace: bnxt_alloc_mem+0x444/0xef0 [bnxt_en] bnxt_half_open_nic+0x24/0xb8 [bnxt_en] bnxt_self_test+0x2dc/0x390 [bnxt_en] ethtool_self_test+0xe0/0x1f8 dev_ethtool+0x1744/0x22d0 dev_ioctl+0x190/0x3e0 sock_ioctl+0x238/0x480 do_vfs_ioctl+0xc4/0x758 ksys_ioctl+0x84/0xb8 __arm64_sys_ioctl+0x28/0x38 el0_svc_handler+0xb0/0x180 el0_svc+0x8/0xc Fixes: a1301f08c5ac ("bnxt_en: Check abort error state in bnxt_open_nic().") Signed-off-by: Somnath Kotur Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 169f093e01de..31eb3c00851a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10208,6 +10208,12 @@ int bnxt_half_open_nic(struct bnxt *bp) { int rc = 0; + if (test_bit(BNXT_STATE_ABORT_ERR, &bp->state)) { + netdev_err(bp->dev, "A previous firmware reset has not completed, aborting half open\n"); + rc = -ENODEV; + goto half_open_err; + } + rc = bnxt_alloc_mem(bp, false); if (rc) { netdev_err(bp->dev, "bnxt_alloc_mem err: %x\n", rc); From d7859afb6880249039b178fdfb1bef94fd954cf2 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 18 Jul 2021 15:36:32 -0400 Subject: [PATCH 172/794] bnxt_en: Move bnxt_ptp_init() to bnxt_open() The device needs to be in ifup state for PTP to function, so move bnxt_ptp_init() to bnxt_open(). This means that the PHC will be registered during bnxt_open(). This also makes firmware reset work correctly. PTP configurations may change after firmware upgrade or downgrade. bnxt_open() will be called after firmware reset, so it will work properly. bnxt_ptp_start() is now incorporated into bnxt_ptp_init(). We now also need to call bnxt_ptp_clear() in bnxt_close(). Fixes: 93cb62d98e9c ("bnxt_en: Enable hardware PTP support") Cc: Richard Cochran Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 16 +++++++------ drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 24 ++++++------------- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h | 1 - 3 files changed, 16 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 31eb3c00851a..b8b73c210995 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10134,7 +10134,6 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) } } - bnxt_ptp_start(bp); rc = bnxt_init_nic(bp, irq_re_init); if (rc) { netdev_err(bp->dev, "bnxt_init_nic err: %x\n", rc); @@ -10273,9 +10272,16 @@ static int bnxt_open(struct net_device *dev) rc = bnxt_hwrm_if_change(bp, true); if (rc) return rc; + + if (bnxt_ptp_init(bp)) { + netdev_warn(dev, "PTP initialization failed.\n"); + kfree(bp->ptp_cfg); + bp->ptp_cfg = NULL; + } rc = __bnxt_open_nic(bp, true, true); if (rc) { bnxt_hwrm_if_change(bp, false); + bnxt_ptp_clear(bp); } else { if (test_and_clear_bit(BNXT_STATE_FW_RESET_DET, &bp->state)) { if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { @@ -10366,6 +10372,7 @@ static int bnxt_close(struct net_device *dev) { struct bnxt *bp = netdev_priv(dev); + bnxt_ptp_clear(bp); bnxt_hwmon_close(bp); bnxt_close_nic(bp, true, true); bnxt_hwrm_shutdown_link(bp); @@ -11352,6 +11359,7 @@ static void bnxt_fw_reset_close(struct bnxt *bp) bnxt_clear_int_mode(bp); pci_disable_device(bp->pdev); } + bnxt_ptp_clear(bp); __bnxt_close_nic(bp, true, false); bnxt_vf_reps_free(bp); bnxt_clear_int_mode(bp); @@ -12694,7 +12702,6 @@ static void bnxt_remove_one(struct pci_dev *pdev) if (BNXT_PF(bp)) devlink_port_type_clear(&bp->dl_port); - bnxt_ptp_clear(bp); pci_disable_pcie_error_reporting(pdev); unregister_netdev(dev); clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); @@ -13278,11 +13285,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) rc); } - if (bnxt_ptp_init(bp)) { - netdev_warn(dev, "PTP initialization failed.\n"); - kfree(bp->ptp_cfg); - bp->ptp_cfg = NULL; - } bnxt_inv_fw_health_reg(bp); bnxt_dl_register(bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index f698b6bd4ff8..9089e7f3fbd4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -385,22 +385,6 @@ int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts) return 0; } -void bnxt_ptp_start(struct bnxt *bp) -{ - struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; - - if (!ptp) - return; - - if (bp->flags & BNXT_FLAG_CHIP_P5) { - spin_lock_bh(&ptp->ptp_lock); - ptp->current_time = bnxt_refclk_read(bp, NULL); - WRITE_ONCE(ptp->old_time, ptp->current_time); - spin_unlock_bh(&ptp->ptp_lock); - ptp_schedule_worker(ptp->ptp_clock, 0); - } -} - static const struct ptp_clock_info bnxt_ptp_caps = { .owner = THIS_MODULE, .name = "bnxt clock", @@ -450,7 +434,13 @@ int bnxt_ptp_init(struct bnxt *bp) bnxt_unmap_ptp_regs(bp); return err; } - + if (bp->flags & BNXT_FLAG_CHIP_P5) { + spin_lock_bh(&ptp->ptp_lock); + ptp->current_time = bnxt_refclk_read(bp, NULL); + WRITE_ONCE(ptp->old_time, ptp->current_time); + spin_unlock_bh(&ptp->ptp_lock); + ptp_schedule_worker(ptp->ptp_clock, 0); + } return 0; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h index 6b6245750e20..4135ea3ec788 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h @@ -75,7 +75,6 @@ int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr); int bnxt_hwtstamp_get(struct net_device *dev, struct ifreq *ifr); int bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb); int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts); -void bnxt_ptp_start(struct bnxt *bp); int bnxt_ptp_init(struct bnxt *bp); void bnxt_ptp_clear(struct bnxt *bp); #endif From de5bf19414fec860168f05d00d574562bd9d86d1 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 18 Jul 2021 15:36:33 -0400 Subject: [PATCH 173/794] bnxt_en: Fix PTP capability discovery The current PTP initialization logic does not account for firmware reset that may cause PTP capability to change. The valid pointer bp->ptp_cfg is used to indicate that the device is capable of PTP and that it has been initialized. So we must clean up bp->ptp_cfg and free it if the firmware after reset does not support PTP. Fixes: 93cb62d98e9c ("bnxt_en: Enable hardware PTP support") Cc: Richard Cochran Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b8b73c210995..4db162cee911 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7574,8 +7574,12 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) bp->flags &= ~BNXT_FLAG_WOL_CAP; if (flags & FUNC_QCAPS_RESP_FLAGS_WOL_MAGICPKT_SUPPORTED) bp->flags |= BNXT_FLAG_WOL_CAP; - if (flags & FUNC_QCAPS_RESP_FLAGS_PTP_SUPPORTED) + if (flags & FUNC_QCAPS_RESP_FLAGS_PTP_SUPPORTED) { __bnxt_hwrm_ptp_qcfg(bp); + } else { + kfree(bp->ptp_cfg); + bp->ptp_cfg = NULL; + } } else { #ifdef CONFIG_BNXT_SRIOV struct bnxt_vf_info *vf = &bp->vf; From b16f3299ae1aa3c327e1fb742d0379ae4d6e86f2 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 18 Jul 2021 13:38:34 -0700 Subject: [PATCH 174/794] net: hisilicon: rename CACHE_LINE_MASK to avoid redefinition Building on ARCH=arc causes a "redefined" warning, so rename this driver's CACHE_LINE_MASK to avoid the warning. ../drivers/net/ethernet/hisilicon/hip04_eth.c:134: warning: "CACHE_LINE_MASK" redefined 134 | #define CACHE_LINE_MASK 0x3F In file included from ../include/linux/cache.h:6, from ../include/linux/printk.h:9, from ../include/linux/kernel.h:19, from ../include/linux/list.h:9, from ../include/linux/module.h:12, from ../drivers/net/ethernet/hisilicon/hip04_eth.c:7: ../arch/arc/include/asm/cache.h:17: note: this is the location of the previous definition 17 | #define CACHE_LINE_MASK (~(L1_CACHE_BYTES - 1)) Fixes: d413779cdd93 ("net: hisilicon: Add an tx_desc to adapt HI13X1_GMAC") Signed-off-by: Randy Dunlap Cc: Vineet Gupta Cc: Jiangfeng Xiao Cc: "David S. Miller" Cc: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hip04_eth.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index 12f6c2442a7a..e53512f6878a 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -131,7 +131,7 @@ /* buf unit size is cache_line_size, which is 64, so the shift is 6 */ #define PPE_BUF_SIZE_SHIFT 6 #define PPE_TX_BUF_HOLD BIT(31) -#define CACHE_LINE_MASK 0x3F +#define SOC_CACHE_LINE_MASK 0x3F #else #define PPE_CFG_QOS_VMID_GRP_SHIFT 8 #define PPE_CFG_RX_CTRL_ALIGN_SHIFT 11 @@ -531,8 +531,8 @@ hip04_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev) #if defined(CONFIG_HI13X1_GMAC) desc->cfg = (__force u32)cpu_to_be32(TX_CLEAR_WB | TX_FINISH_CACHE_INV | TX_RELEASE_TO_PPE | priv->port << TX_POOL_SHIFT); - desc->data_offset = (__force u32)cpu_to_be32(phys & CACHE_LINE_MASK); - desc->send_addr = (__force u32)cpu_to_be32(phys & ~CACHE_LINE_MASK); + desc->data_offset = (__force u32)cpu_to_be32(phys & SOC_CACHE_LINE_MASK); + desc->send_addr = (__force u32)cpu_to_be32(phys & ~SOC_CACHE_LINE_MASK); #else desc->cfg = (__force u32)cpu_to_be32(TX_CLEAR_WB | TX_FINISH_CACHE_INV); desc->send_addr = (__force u32)cpu_to_be32(phys); From bdad810eb97875813a067504424a483aaa309bad Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Mon, 19 Jul 2021 15:18:19 +0800 Subject: [PATCH 175/794] dt-bindings: net: snps,dwmac: add missing DWMAC IP version Add missing DWMAC IP version in snps,dwmac.yaml which found by below command, as NXP i.MX8 families support SNPS DWMAC 5.10a IP. $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- dt_binding_check DT_SCHEMA_FILES=Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml Documentation/devicetree/bindings/net/nxp,dwmac-imx.example.dt.yaml: ethernet@30bf0000: compatible: None of ['nxp,imx8mp-dwmac-eqos', 'snps,dwmac-5.10a'] are valid under the given schema Signed-off-by: Joakim Zhang Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/snps,dwmac.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index d7652596a09b..42689b7d03a2 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -28,6 +28,7 @@ select: - snps,dwmac-4.00 - snps,dwmac-4.10a - snps,dwmac-4.20a + - snps,dwmac-5.10a - snps,dwxgmac - snps,dwxgmac-2.10 @@ -82,6 +83,7 @@ properties: - snps,dwmac-4.00 - snps,dwmac-4.10a - snps,dwmac-4.20a + - snps,dwmac-5.10a - snps,dwxgmac - snps,dwxgmac-2.10 @@ -375,6 +377,7 @@ allOf: - snps,dwmac-4.00 - snps,dwmac-4.10a - snps,dwmac-4.20a + - snps,dwmac-5.10a - snps,dwxgmac - snps,dwxgmac-2.10 - st,spear600-gmac From e314a07ef263916f761b736ded7a30894709dfd7 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Mon, 19 Jul 2021 15:18:20 +0800 Subject: [PATCH 176/794] dt-bindings: net: imx-dwmac: convert imx-dwmac bindings to yaml In order to automate the verification of DT nodes covert imx-dwmac to nxp,dwmac-imx.yaml, and pass below checking. $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- dt_binding_check DT_SCHEMA_FILES=Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- dtbs_check DT_SCHEMA_FILES=Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml Signed-off-by: Joakim Zhang Signed-off-by: David S. Miller --- .../devicetree/bindings/net/imx-dwmac.txt | 56 ----------- .../bindings/net/nxp,dwmac-imx.yaml | 93 +++++++++++++++++++ 2 files changed, 93 insertions(+), 56 deletions(-) delete mode 100644 Documentation/devicetree/bindings/net/imx-dwmac.txt create mode 100644 Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml diff --git a/Documentation/devicetree/bindings/net/imx-dwmac.txt b/Documentation/devicetree/bindings/net/imx-dwmac.txt deleted file mode 100644 index 921d522fe8d7..000000000000 --- a/Documentation/devicetree/bindings/net/imx-dwmac.txt +++ /dev/null @@ -1,56 +0,0 @@ -IMX8 glue layer controller, NXP imx8 families support Synopsys MAC 5.10a IP. - -This file documents platform glue layer for IMX. -Please see stmmac.txt for the other unchanged properties. - -The device node has following properties. - -Required properties: -- compatible: Should be "nxp,imx8mp-dwmac-eqos" to select glue layer - and "snps,dwmac-5.10a" to select IP version. -- clocks: Must contain a phandle for each entry in clock-names. -- clock-names: Should be "stmmaceth" for the host clock. - Should be "pclk" for the MAC apb clock. - Should be "ptp_ref" for the MAC timer clock. - Should be "tx" for the MAC RGMII TX clock: - Should be "mem" for EQOS MEM clock. - - "mem" clock is required for imx8dxl platform. - - "mem" clock is not required for imx8mp platform. -- interrupt-names: Should contain a list of interrupt names corresponding to - the interrupts in the interrupts property, if available. - Should be "macirq" for the main MAC IRQ - Should be "eth_wake_irq" for the IT which wake up system -- intf_mode: Should be phandle/offset pair. The phandle to the syscon node which - encompases the GPR register, and the offset of the GPR register. - - required for imx8mp platform. - - is optional for imx8dxl platform. - -Optional properties: -- intf_mode: is optional for imx8dxl platform. -- snps,rmii_refclk_ext: to select RMII reference clock from external. - -Example: - eqos: ethernet@30bf0000 { - compatible = "nxp,imx8mp-dwmac-eqos", "snps,dwmac-5.10a"; - reg = <0x30bf0000 0x10000>; - interrupts = , - ; - interrupt-names = "eth_wake_irq", "macirq"; - clocks = <&clk IMX8MP_CLK_ENET_QOS_ROOT>, - <&clk IMX8MP_CLK_QOS_ENET_ROOT>, - <&clk IMX8MP_CLK_ENET_QOS_TIMER>, - <&clk IMX8MP_CLK_ENET_QOS>; - clock-names = "stmmaceth", "pclk", "ptp_ref", "tx"; - assigned-clocks = <&clk IMX8MP_CLK_ENET_AXI>, - <&clk IMX8MP_CLK_ENET_QOS_TIMER>, - <&clk IMX8MP_CLK_ENET_QOS>; - assigned-clock-parents = <&clk IMX8MP_SYS_PLL1_266M>, - <&clk IMX8MP_SYS_PLL2_100M>, - <&clk IMX8MP_SYS_PLL2_125M>; - assigned-clock-rates = <0>, <100000000>, <125000000>; - nvmem-cells = <ð_mac0>; - nvmem-cell-names = "mac-address"; - nvmem_macaddr_swap; - intf_mode = <&gpr 0x4>; - status = "disabled"; - }; diff --git a/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml b/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml new file mode 100644 index 000000000000..5629b2e4ccf8 --- /dev/null +++ b/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml @@ -0,0 +1,93 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/nxp,dwmac-imx.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: NXP i.MX8 DWMAC glue layer Device Tree Bindings + +maintainers: + - Joakim Zhang + +# We need a select here so we don't match all nodes with 'snps,dwmac' +select: + properties: + compatible: + contains: + enum: + - nxp,imx8mp-dwmac-eqos + - nxp,imx8dxl-dwmac-eqos + required: + - compatible + +allOf: + - $ref: "snps,dwmac.yaml#" + +properties: + compatible: + oneOf: + - items: + - enum: + - nxp,imx8mp-dwmac-eqos + - nxp,imx8dxl-dwmac-eqos + - const: snps,dwmac-5.10a + + clocks: + minItems: 3 + maxItems: 5 + items: + - description: MAC host clock + - description: MAC apb clock + - description: MAC timer clock + - description: MAC RGMII TX clock + - description: EQOS MEM clock + + clock-names: + minItems: 3 + maxItems: 5 + contains: + enum: + - stmmaceth + - pclk + - ptp_ref + - tx + - mem + + intf_mode: + $ref: /schemas/types.yaml#/definitions/phandle-array + description: + Should be phandle/offset pair. The phandle to the syscon node which + encompases the GPR register, and the offset of the GPR register. + + snps,rmii_refclk_ext: + $ref: /schemas/types.yaml#/definitions/flag + description: + To select RMII reference clock from external. + +required: + - compatible + - clocks + - clock-names + +unevaluatedProperties: false + +examples: + - | + #include + #include + #include + + eqos: ethernet@30bf0000 { + compatible = "nxp,imx8mp-dwmac-eqos","snps,dwmac-5.10a"; + reg = <0x30bf0000 0x10000>; + interrupts = , + ; + interrupt-names = "macirq", "eth_wake_irq"; + clocks = <&clk IMX8MP_CLK_ENET_QOS_ROOT>, + <&clk IMX8MP_CLK_QOS_ENET_ROOT>, + <&clk IMX8MP_CLK_ENET_QOS_TIMER>, + <&clk IMX8MP_CLK_ENET_QOS>; + clock-names = "stmmaceth", "pclk", "ptp_ref", "tx"; + phy-mode = "rgmii"; + status = "disabled"; + }; From 77e5253deadf7fae59207330e3a639e592ee7892 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Mon, 19 Jul 2021 15:18:21 +0800 Subject: [PATCH 177/794] arm64: dts: imx8mp: change interrupt order per dt-binding This patch changs interrupt order which found by dtbs_check. $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- dtbs_check DT_SCHEMA_FILES=Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml arch/arm64/boot/dts/freescale/imx8mp-evk.dt.yaml: ethernet@30bf0000: interrupt-names:0: 'macirq' was expected arch/arm64/boot/dts/freescale/imx8mp-evk.dt.yaml: ethernet@30bf0000: interrupt-names:1: 'eth_wake_irq' was expected According to Documentation/devicetree/bindings/net/snps,dwmac.yaml, we should list interrupt in it's order. Signed-off-by: Joakim Zhang Signed-off-by: David S. Miller --- arch/arm64/boot/dts/freescale/imx8mp.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index 9f7c7f587d38..ca38d0d6c3c4 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -821,9 +821,9 @@ eqos: ethernet@30bf0000 { compatible = "nxp,imx8mp-dwmac-eqos", "snps,dwmac-5.10a"; reg = <0x30bf0000 0x10000>; - interrupts = , - ; - interrupt-names = "eth_wake_irq", "macirq"; + interrupts = , + ; + interrupt-names = "macirq", "eth_wake_irq"; clocks = <&clk IMX8MP_CLK_ENET_QOS_ROOT>, <&clk IMX8MP_CLK_QOS_ENET_ROOT>, <&clk IMX8MP_CLK_ENET_QOS_TIMER>, From 6f20c8adb1813467ea52c1296d52c4e95978cb2f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 19 Jul 2021 02:12:18 -0700 Subject: [PATCH 178/794] net/tcp_fastopen: fix data races around tfo_active_disable_stamp tfo_active_disable_stamp is read and written locklessly. We need to annotate these accesses appropriately. Then, we need to perform the atomic_inc(tfo_active_disable_times) after the timestamp has been updated, and thus add barriers to make sure tcp_fastopen_active_should_disable() wont read a stale timestamp. Fixes: cf1ef3f0719b ("net/tcp_fastopen: Disable active side TFO in certain scenarios") Signed-off-by: Eric Dumazet Cc: Wei Wang Cc: Yuchung Cheng Cc: Neal Cardwell Acked-by: Wei Wang Signed-off-by: David S. Miller --- net/ipv4/tcp_fastopen.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 47c32604d38f..b32af76e2132 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -507,8 +507,15 @@ void tcp_fastopen_active_disable(struct sock *sk) { struct net *net = sock_net(sk); + /* Paired with READ_ONCE() in tcp_fastopen_active_should_disable() */ + WRITE_ONCE(net->ipv4.tfo_active_disable_stamp, jiffies); + + /* Paired with smp_rmb() in tcp_fastopen_active_should_disable(). + * We want net->ipv4.tfo_active_disable_stamp to be updated first. + */ + smp_mb__before_atomic(); atomic_inc(&net->ipv4.tfo_active_disable_times); - net->ipv4.tfo_active_disable_stamp = jiffies; + NET_INC_STATS(net, LINUX_MIB_TCPFASTOPENBLACKHOLE); } @@ -526,10 +533,16 @@ bool tcp_fastopen_active_should_disable(struct sock *sk) if (!tfo_da_times) return false; + /* Paired with smp_mb__before_atomic() in tcp_fastopen_active_disable() */ + smp_rmb(); + /* Limit timeout to max: 2^6 * initial timeout */ multiplier = 1 << min(tfo_da_times - 1, 6); - timeout = multiplier * tfo_bh_timeout * HZ; - if (time_before(jiffies, sock_net(sk)->ipv4.tfo_active_disable_stamp + timeout)) + + /* Paired with the WRITE_ONCE() in tcp_fastopen_active_disable(). */ + timeout = READ_ONCE(sock_net(sk)->ipv4.tfo_active_disable_stamp) + + multiplier * tfo_bh_timeout * HZ; + if (time_before(jiffies, timeout)) return true; /* Mark check bit so we can check for successful active TFO From fa2bf6baf2b1d8350e5193ce4014bdddc51a25d0 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Mon, 19 Jul 2021 14:29:32 +0530 Subject: [PATCH 179/794] octeontx2-af: Enable transmit side LBK link For enabling VF-VF switching the packets egressing out of CGX mapped VFs needed to be sent to LBK so that same packets are received back to the system. But the LBK link also needs to be enabled in addition to a VF's mapped CGX_LMAC link otherwise hardware raises send error interrupt indicating selected LBK link is not enabled in NIX_AF_TL3_TL2X_LINKX_CFG register. Hence this patch enables all LBK links in TL3_TL2_LINKX_CFG registers. Also to enable packet flow between PFs/VFs of NIX0 to PFs/VFs of NIX1(in 98xx silicon) the NPC TX DMAC rules has to be installed such that rules must be hit for any TX interface i.e., NIX0-TX or NIX1-TX provided DMAC match creteria is met. Hence this patch changes the behavior such that MCAM is programmed to match with any NIX0/1-TX interface for TX rules. Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- .../net/ethernet/marvell/octeontx2/af/rvu.c | 6 ++++ .../net/ethernet/marvell/octeontx2/af/rvu.h | 2 ++ .../ethernet/marvell/octeontx2/af/rvu_nix.c | 32 +++++++++++++++++++ .../ethernet/marvell/octeontx2/af/rvu_npc.c | 15 +++++++-- .../marvell/octeontx2/af/rvu_npc_fs.c | 9 +++++- 5 files changed, 61 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 10cddf1ac7b9..086eb6d283ee 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -2859,6 +2859,12 @@ static int rvu_enable_sriov(struct rvu *rvu) if (!vfs) return 0; + /* LBK channel number 63 is used for switching packets between + * CGX mapped VFs. Hence limit LBK pairs till 62 only. + */ + if (vfs > 62) + vfs = 62; + /* Save VFs number for reference in VF interrupts handlers. * Since interrupts might start arriving during SRIOV enablement * ordinary API cannot be used to get number of enabled VFs. diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 10e58a5d5861..e53f530e5e31 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -415,6 +415,8 @@ struct npc_kpu_profile_adapter { size_t kpus; }; +#define RVU_SWITCH_LBK_CHAN 63 + struct rvu { void __iomem *afreg_base; void __iomem *pfreg_base; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index aeae37704428..a2d69eaac4f8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -1952,6 +1952,35 @@ static void nix_tl1_default_cfg(struct rvu *rvu, struct nix_hw *nix_hw, pfvf_map[schq] = TXSCH_SET_FLAG(pfvf_map[schq], NIX_TXSCHQ_CFG_DONE); } +static void rvu_nix_tx_tl2_cfg(struct rvu *rvu, int blkaddr, + u16 pcifunc, struct nix_txsch *txsch) +{ + struct rvu_hwinfo *hw = rvu->hw; + int lbk_link_start, lbk_links; + u8 pf = rvu_get_pf(pcifunc); + int schq; + + if (!is_pf_cgxmapped(rvu, pf)) + return; + + lbk_link_start = hw->cgx_links; + + for (schq = 0; schq < txsch->schq.max; schq++) { + if (TXSCH_MAP_FUNC(txsch->pfvf_map[schq]) != pcifunc) + continue; + /* Enable all LBK links with channel 63 by default so that + * packets can be sent to LBK with a NPC TX MCAM rule + */ + lbk_links = hw->lbk_links; + while (lbk_links--) + rvu_write64(rvu, blkaddr, + NIX_AF_TL3_TL2X_LINKX_CFG(schq, + lbk_link_start + + lbk_links), + BIT_ULL(12) | RVU_SWITCH_LBK_CHAN); + } +} + int rvu_mbox_handler_nix_txschq_cfg(struct rvu *rvu, struct nix_txschq_config *req, struct msg_rsp *rsp) @@ -2040,6 +2069,9 @@ int rvu_mbox_handler_nix_txschq_cfg(struct rvu *rvu, rvu_write64(rvu, blkaddr, reg, regval); } + rvu_nix_tx_tl2_cfg(rvu, blkaddr, pcifunc, + &nix_hw->txsch[NIX_TXSCH_LVL_TL2]); + return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 3612e0a2cab3..16c557cbe6a0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -468,6 +468,8 @@ static void npc_config_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, { int bank = npc_get_bank(mcam, index); int kw = 0, actbank, actindex; + u8 tx_intf_mask = ~intf & 0x3; + u8 tx_intf = intf; u64 cam0, cam1; actbank = bank; /* Save bank id, to set action later on */ @@ -488,12 +490,21 @@ static void npc_config_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, */ for (; bank < (actbank + mcam->banks_per_entry); bank++, kw = kw + 2) { /* Interface should be set in all banks */ + if (is_npc_intf_tx(intf)) { + /* Last bit must be set and rest don't care + * for TX interfaces + */ + tx_intf_mask = 0x1; + tx_intf = intf & tx_intf_mask; + tx_intf_mask = ~tx_intf & tx_intf_mask; + } + rvu_write64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_CAMX_INTF(index, bank, 1), - intf); + tx_intf); rvu_write64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_CAMX_INTF(index, bank, 0), - ~intf & 0x3); + tx_intf_mask); /* Set the match key */ npc_get_keyword(entry, kw, &cam0, &cam1); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index 68633145a8b8..92d64bdff0ea 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -949,9 +949,16 @@ static void npc_update_tx_entry(struct rvu *rvu, struct rvu_pfvf *pfvf, struct npc_install_flow_req *req, u16 target) { struct nix_tx_action action; + u64 mask = ~0ULL; + + /* If AF is installing then do not care about + * PF_FUNC in Send Descriptor + */ + if (is_pffunc_af(req->hdr.pcifunc)) + mask = 0; npc_update_entry(rvu, NPC_PF_FUNC, entry, (__force u16)htons(target), - 0, ~0ULL, 0, NIX_INTF_TX); + 0, mask, 0, NIX_INTF_TX); *(u64 *)&action = 0x00; action.op = req->op; From cb7a6b3bac1d0d773f2b6cc35f6bab61eb5de5ef Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Mon, 19 Jul 2021 14:29:33 +0530 Subject: [PATCH 180/794] octeontx2-af: Prepare for allocating MCAM rules for AF AF till now only manages the allocation and freeing of MCAM rules for other PF/VFs in system. To implement L2 switching between all CGX mapped PF and VFs, AF requires MCAM entries for DMAC rules for each PF and VF. This patch modifies AF driver such that AF can also allocate MCAM rules and install rules for other PFs and VFs. All the checks like channel verification for RX rules and PF_FUNC verification for TX rules are relaxed in case AF is allocating or installing rules. Also all the entry and counter to owner mappings are set to NPC_MCAM_INVALID_MAP when they are free indicating those are not allocated to AF nor PF/VFs. This patch also ensures that AF allocated and installed entries are displayed in debugfs. Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- .../marvell/octeontx2/af/rvu_debugfs.c | 5 +-- .../ethernet/marvell/octeontx2/af/rvu_npc.c | 32 +++++++++++++------ .../marvell/octeontx2/af/rvu_npc_fs.c | 11 ++++--- 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index 370d4ca1e5ed..9b2dfbf90e51 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -2113,9 +2113,6 @@ static void rvu_print_npc_mcam_info(struct seq_file *s, int entry_acnt, entry_ecnt; int cntr_acnt, cntr_ecnt; - /* Skip PF0 */ - if (!pcifunc) - return; rvu_npc_get_mcam_entry_alloc_info(rvu, pcifunc, blkaddr, &entry_acnt, &entry_ecnt); rvu_npc_get_mcam_counter_alloc_info(rvu, pcifunc, blkaddr, @@ -2298,7 +2295,7 @@ static void rvu_dbg_npc_mcam_show_flows(struct seq_file *s, static void rvu_dbg_npc_mcam_show_action(struct seq_file *s, struct rvu_npc_mcam_rule *rule) { - if (rule->intf == NIX_INTF_TX) { + if (is_npc_intf_tx(rule->intf)) { switch (rule->tx_action.op) { case NIX_TX_ACTIONOP_DROP: seq_puts(s, "\taction: Drop\n"); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 16c557cbe6a0..1097291aaa45 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -442,7 +442,8 @@ static void npc_fixup_vf_rule(struct rvu *rvu, struct npc_mcam *mcam, owner = mcam->entry2pfvf_map[index]; target_func = (entry->action >> 4) & 0xffff; /* do nothing when target is LBK/PF or owner is not PF */ - if (is_afvf(target_func) || (owner & RVU_PFVF_FUNC_MASK) || + if (is_pffunc_af(owner) || is_afvf(target_func) || + (owner & RVU_PFVF_FUNC_MASK) || !(target_func & RVU_PFVF_FUNC_MASK)) return; @@ -661,6 +662,7 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc, eth_broadcast_addr((u8 *)&req.mask.dmac); req.features = BIT_ULL(NPC_DMAC); req.channel = chan; + req.chan_mask = 0xFFFU; req.intf = pfvf->nix_rx_intf; req.op = action.op; req.hdr.pcifunc = 0; /* AF is requester */ @@ -810,6 +812,7 @@ void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc, eth_broadcast_addr((u8 *)&req.mask.dmac); req.features = BIT_ULL(NPC_DMAC); req.channel = chan; + req.chan_mask = 0xFFFU; req.intf = pfvf->nix_rx_intf; req.entry = index; req.hdr.pcifunc = 0; /* AF is requester */ @@ -1756,6 +1759,8 @@ static int npc_mcam_rsrcs_init(struct rvu *rvu, int blkaddr) int nixlf_count = rvu_get_nixlf_count(rvu); struct npc_mcam *mcam = &rvu->hw->mcam; int rsvd, err; + u16 index; + int cntr; u64 cfg; /* Actual number of MCAM entries vary by entry size */ @@ -1856,6 +1861,14 @@ static int npc_mcam_rsrcs_init(struct rvu *rvu, int blkaddr) if (!mcam->entry2target_pffunc) goto free_mem; + for (index = 0; index < mcam->bmap_entries; index++) { + mcam->entry2pfvf_map[index] = NPC_MCAM_INVALID_MAP; + mcam->entry2cntr_map[index] = NPC_MCAM_INVALID_MAP; + } + + for (cntr = 0; cntr < mcam->counters.max; cntr++) + mcam->cntr2pfvf_map[cntr] = NPC_MCAM_INVALID_MAP; + mutex_init(&mcam->lock); return 0; @@ -2573,7 +2586,7 @@ int rvu_mbox_handler_npc_mcam_alloc_entry(struct rvu *rvu, } /* Alloc request from PFFUNC with no NIXLF attached should be denied */ - if (!is_nixlf_attached(rvu, pcifunc)) + if (!is_pffunc_af(pcifunc) && !is_nixlf_attached(rvu, pcifunc)) return NPC_MCAM_ALLOC_DENIED; return npc_mcam_alloc_entries(mcam, pcifunc, req, rsp); @@ -2593,7 +2606,7 @@ int rvu_mbox_handler_npc_mcam_free_entry(struct rvu *rvu, return NPC_MCAM_INVALID_REQ; /* Free request from PFFUNC with no NIXLF attached, ignore */ - if (!is_nixlf_attached(rvu, pcifunc)) + if (!is_pffunc_af(pcifunc) && !is_nixlf_attached(rvu, pcifunc)) return NPC_MCAM_INVALID_REQ; mutex_lock(&mcam->lock); @@ -2605,7 +2618,7 @@ int rvu_mbox_handler_npc_mcam_free_entry(struct rvu *rvu, if (rc) goto exit; - mcam->entry2pfvf_map[req->entry] = 0; + mcam->entry2pfvf_map[req->entry] = NPC_MCAM_INVALID_MAP; mcam->entry2target_pffunc[req->entry] = 0x0; npc_mcam_clear_bit(mcam, req->entry); npc_enable_mcam_entry(rvu, mcam, blkaddr, req->entry, false); @@ -2690,13 +2703,14 @@ int rvu_mbox_handler_npc_mcam_write_entry(struct rvu *rvu, else nix_intf = pfvf->nix_rx_intf; - if (npc_mcam_verify_channel(rvu, pcifunc, req->intf, channel)) { + if (!is_pffunc_af(pcifunc) && + npc_mcam_verify_channel(rvu, pcifunc, req->intf, channel)) { rc = NPC_MCAM_INVALID_REQ; goto exit; } - if (npc_mcam_verify_pf_func(rvu, &req->entry_data, req->intf, - pcifunc)) { + if (!is_pffunc_af(pcifunc) && + npc_mcam_verify_pf_func(rvu, &req->entry_data, req->intf, pcifunc)) { rc = NPC_MCAM_INVALID_REQ; goto exit; } @@ -2847,7 +2861,7 @@ int rvu_mbox_handler_npc_mcam_alloc_counter(struct rvu *rvu, return NPC_MCAM_INVALID_REQ; /* If the request is from a PFFUNC with no NIXLF attached, ignore */ - if (!is_nixlf_attached(rvu, pcifunc)) + if (!is_pffunc_af(pcifunc) && !is_nixlf_attached(rvu, pcifunc)) return NPC_MCAM_INVALID_REQ; /* Since list of allocated counter IDs needs to be sent to requester, @@ -3092,7 +3106,7 @@ int rvu_mbox_handler_npc_mcam_alloc_and_write_entry(struct rvu *rvu, if (rc) { /* Free allocated MCAM entry */ mutex_lock(&mcam->lock); - mcam->entry2pfvf_map[entry] = 0; + mcam->entry2pfvf_map[entry] = NPC_MCAM_INVALID_MAP; npc_mcam_clear_bit(mcam, entry); mutex_unlock(&mcam->lock); return rc; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index 92d64bdff0ea..c1f35a0971ad 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -913,11 +913,9 @@ static void npc_update_rx_entry(struct rvu *rvu, struct rvu_pfvf *pfvf, struct npc_install_flow_req *req, u16 target) { struct nix_rx_action action; - u64 chan_mask; - chan_mask = req->chan_mask ? req->chan_mask : ~0ULL; - npc_update_entry(rvu, NPC_CHAN, entry, req->channel, 0, chan_mask, 0, - NIX_INTF_RX); + npc_update_entry(rvu, NPC_CHAN, entry, req->channel, 0, req->chan_mask, + 0, NIX_INTF_RX); *(u64 *)&action = 0x00; action.pf_func = target; @@ -1171,7 +1169,9 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu, if (err) return err; - if (npc_mcam_verify_channel(rvu, target, req->intf, req->channel)) + /* Skip channel validation if AF is installing */ + if (!is_pffunc_af(req->hdr.pcifunc) && + npc_mcam_verify_channel(rvu, target, req->intf, req->channel)) return -EINVAL; pfvf = rvu_get_pfvf(rvu, target); @@ -1187,6 +1187,7 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu, eth_broadcast_addr((u8 *)&req->mask.dmac); } + /* Proceed if NIXLF is attached or not for TX rules */ err = nix_get_nixlf(rvu, target, &nixlf, NULL); if (err && is_npc_intf_rx(req->intf) && !pf_set_vfs_mac) return -EINVAL; From 23109f8dd06d0bd04c9360cf7c501c97b0ab1545 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Mon, 19 Jul 2021 14:29:34 +0530 Subject: [PATCH 181/794] octeontx2-af: Introduce internal packet switching As of now any communication between CGXs PFs and their VFs within the system is possible only by external switches sending packets back to the system. This patch adds internal switching support. Broadcast packet replication is not covered here. RVU admin function (AF) maintains MAC addresses of all interfaces in the system. When switching is enabled, MCAM entries are allocated to install rules such that packets with DMAC matching any of the internal interface MAC addresses is punted back into the system via the loopback channel. On the receive side the default unicast rules are modified to not check for ingress channel. So any packet with matching DMAC irrespective of which interface it is coming from will be forwarded to the respective PF/VF interface. The transmit side rules and default unicast rules are updated if user changes MAC address of an interface. Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- .../ethernet/marvell/octeontx2/af/Makefile | 2 +- .../net/ethernet/marvell/octeontx2/af/rvu.c | 4 +- .../net/ethernet/marvell/octeontx2/af/rvu.h | 19 ++ .../ethernet/marvell/octeontx2/af/rvu_cgx.c | 3 + .../marvell/octeontx2/af/rvu_devlink.c | 48 +++- .../ethernet/marvell/octeontx2/af/rvu_nix.c | 4 + .../marvell/octeontx2/af/rvu_npc_fs.c | 9 +- .../marvell/octeontx2/af/rvu_switch.c | 258 ++++++++++++++++++ 8 files changed, 336 insertions(+), 11 deletions(-) create mode 100644 drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c diff --git a/drivers/net/ethernet/marvell/octeontx2/af/Makefile b/drivers/net/ethernet/marvell/octeontx2/af/Makefile index 1a3455620b38..cc8ac36cf687 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/Makefile +++ b/drivers/net/ethernet/marvell/octeontx2/af/Makefile @@ -10,4 +10,4 @@ obj-$(CONFIG_OCTEONTX2_AF) += rvu_af.o rvu_mbox-y := mbox.o rvu_trace.o rvu_af-y := cgx.o rvu.o rvu_cgx.o rvu_npa.o rvu_nix.o \ rvu_reg.o rvu_npc.o rvu_debugfs.o ptp.o rvu_npc_fs.o \ - rvu_cpt.o rvu_devlink.o rpm.o rvu_cn10k.o + rvu_cpt.o rvu_devlink.o rpm.o rvu_cn10k.o rvu_switch.o diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 086eb6d283ee..017163fb3cd5 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -1314,7 +1314,7 @@ int rvu_mbox_handler_detach_resources(struct rvu *rvu, return rvu_detach_rsrcs(rvu, detach, detach->hdr.pcifunc); } -static int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc) +int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc) { struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc); int blkaddr = BLKADDR_NIX0, vf; @@ -3007,6 +3007,8 @@ static int rvu_probe(struct pci_dev *pdev, const struct pci_device_id *id) /* Initialize debugfs */ rvu_dbg_init(rvu); + mutex_init(&rvu->rswitch.switch_lock); + return 0; err_dl: rvu_unregister_dl(rvu); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index e53f530e5e31..91503fb2762c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -417,6 +417,14 @@ struct npc_kpu_profile_adapter { #define RVU_SWITCH_LBK_CHAN 63 +struct rvu_switch { + struct mutex switch_lock; /* Serialize flow installation */ + u32 used_entries; + u16 *entry2pcifunc; + u16 mode; + u16 start_entry; +}; + struct rvu { void __iomem *afreg_base; void __iomem *pfreg_base; @@ -447,6 +455,7 @@ struct rvu { /* CGX */ #define PF_CGXMAP_BASE 1 /* PF 0 is reserved for RVU PF */ + u16 cgx_mapped_vfs; /* maximum CGX mapped VFs */ u8 cgx_mapped_pfs; u8 cgx_cnt_max; /* CGX port count max */ u8 *pf2cgxlmac_map; /* pf to cgx_lmac map */ @@ -479,6 +488,9 @@ struct rvu { struct rvu_debugfs rvu_dbg; #endif struct rvu_devlink *rvu_dl; + + /* RVU switch implementation over NPC with DMAC rules */ + struct rvu_switch rswitch; }; static inline void rvu_write64(struct rvu *rvu, u64 block, u64 offset, u64 val) @@ -693,6 +705,7 @@ int nix_aq_context_read(struct rvu *rvu, struct nix_hw *nix_hw, struct nix_cn10k_aq_enq_req *aq_req, struct nix_cn10k_aq_enq_rsp *aq_rsp, u16 pcifunc, u8 ctype, u32 qidx); +int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc); /* NPC APIs */ int rvu_npc_init(struct rvu *rvu); @@ -770,4 +783,10 @@ void rvu_dbg_exit(struct rvu *rvu); static inline void rvu_dbg_init(struct rvu *rvu) {} static inline void rvu_dbg_exit(struct rvu *rvu) {} #endif + +/* RVU Switch */ +void rvu_switch_enable(struct rvu *rvu); +void rvu_switch_disable(struct rvu *rvu); +void rvu_switch_update_rules(struct rvu *rvu, u16 pcifunc); + #endif /* RVU_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 6cc8fbb7190c..fe99ac4a4dd8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -126,6 +126,7 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu) unsigned long lmac_bmap; int size, free_pkind; int cgx, lmac, iter; + int numvfs, hwvfs; if (!cgx_cnt_max) return 0; @@ -166,6 +167,8 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu) pkind->pfchan_map[free_pkind] = ((pf) & 0x3F) << 16; rvu_map_cgx_nix_block(rvu, pf, cgx, lmac); rvu->cgx_mapped_pfs++; + rvu_get_pf_numvfs(rvu, pf, &numvfs, &hwvfs); + rvu->cgx_mapped_vfs += numvfs; pf++; } } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c index 10a98bcb7c54..2688186066d9 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c @@ -1364,6 +1364,44 @@ static void rvu_health_reporters_destroy(struct rvu *rvu) rvu_nix_health_reporters_destroy(rvu_dl); } +static int rvu_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) +{ + struct rvu_devlink *rvu_dl = devlink_priv(devlink); + struct rvu *rvu = rvu_dl->rvu; + struct rvu_switch *rswitch; + + rswitch = &rvu->rswitch; + *mode = rswitch->mode; + + return 0; +} + +static int rvu_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, + struct netlink_ext_ack *extack) +{ + struct rvu_devlink *rvu_dl = devlink_priv(devlink); + struct rvu *rvu = rvu_dl->rvu; + struct rvu_switch *rswitch; + + rswitch = &rvu->rswitch; + switch (mode) { + case DEVLINK_ESWITCH_MODE_LEGACY: + case DEVLINK_ESWITCH_MODE_SWITCHDEV: + if (rswitch->mode == mode) + return 0; + rswitch->mode = mode; + if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) + rvu_switch_enable(rvu); + else + rvu_switch_disable(rvu); + break; + default: + return -EINVAL; + } + + return 0; +} + static int rvu_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, struct netlink_ext_ack *extack) { @@ -1372,6 +1410,8 @@ static int rvu_devlink_info_get(struct devlink *devlink, struct devlink_info_req static const struct devlink_ops rvu_devlink_ops = { .info_get = rvu_devlink_info_get, + .eswitch_mode_get = rvu_devlink_eswitch_mode_get, + .eswitch_mode_set = rvu_devlink_eswitch_mode_set, }; int rvu_register_dl(struct rvu *rvu) @@ -1380,14 +1420,9 @@ int rvu_register_dl(struct rvu *rvu) struct devlink *dl; int err; - rvu_dl = kzalloc(sizeof(*rvu_dl), GFP_KERNEL); - if (!rvu_dl) - return -ENOMEM; - dl = devlink_alloc(&rvu_devlink_ops, sizeof(struct rvu_devlink)); if (!dl) { dev_warn(rvu->dev, "devlink_alloc failed\n"); - kfree(rvu_dl); return -ENOMEM; } @@ -1395,10 +1430,10 @@ int rvu_register_dl(struct rvu *rvu) if (err) { dev_err(rvu->dev, "devlink register failed with error %d\n", err); devlink_free(dl); - kfree(rvu_dl); return err; } + rvu_dl = devlink_priv(dl); rvu_dl->dl = dl; rvu_dl->rvu = rvu; rvu->rvu_dl = rvu_dl; @@ -1417,5 +1452,4 @@ void rvu_unregister_dl(struct rvu *rvu) rvu_health_reporters_destroy(rvu); devlink_unregister(dl); devlink_free(dl); - kfree(rvu_dl); } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index a2d69eaac4f8..0933699a0d2d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -3212,6 +3212,8 @@ int rvu_mbox_handler_nix_set_mac_addr(struct rvu *rvu, if (test_bit(PF_SET_VF_TRUSTED, &pfvf->flags) && from_vf) ether_addr_copy(pfvf->default_mac, req->mac_addr); + rvu_switch_update_rules(rvu, pcifunc); + return 0; } @@ -3881,6 +3883,8 @@ int rvu_mbox_handler_nix_lf_start_rx(struct rvu *rvu, struct msg_req *req, pfvf = rvu_get_pfvf(rvu, pcifunc); set_bit(NIXLF_INITIALIZED, &pfvf->flags); + rvu_switch_update_rules(rvu, pcifunc); + return rvu_cgx_start_stop_io(rvu, pcifunc, true); } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index c1f35a0971ad..5c01cf4a9c5b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -910,10 +910,15 @@ static void rvu_mcam_add_counter_to_rule(struct rvu *rvu, u16 pcifunc, static void npc_update_rx_entry(struct rvu *rvu, struct rvu_pfvf *pfvf, struct mcam_entry *entry, - struct npc_install_flow_req *req, u16 target) + struct npc_install_flow_req *req, + u16 target, bool pf_set_vfs_mac) { + struct rvu_switch *rswitch = &rvu->rswitch; struct nix_rx_action action; + if (rswitch->mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && pf_set_vfs_mac) + req->chan_mask = 0x0; /* Do not care channel */ + npc_update_entry(rvu, NPC_CHAN, entry, req->channel, 0, req->chan_mask, 0, NIX_INTF_RX); @@ -1007,7 +1012,7 @@ static int npc_install_flow(struct rvu *rvu, int blkaddr, u16 target, req->intf); if (is_npc_intf_rx(req->intf)) - npc_update_rx_entry(rvu, pfvf, entry, req, target); + npc_update_rx_entry(rvu, pfvf, entry, req, target, pf_set_vfs_mac); else npc_update_tx_entry(rvu, pfvf, entry, req, target); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c new file mode 100644 index 000000000000..2e5379710aa5 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c @@ -0,0 +1,258 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTx2 RVU Admin Function driver + * + * Copyright (C) 2021 Marvell. + */ + +#include +#include "rvu.h" + +static int rvu_switch_install_rx_rule(struct rvu *rvu, u16 pcifunc, + u16 chan_mask) +{ + struct npc_install_flow_req req = { 0 }; + struct npc_install_flow_rsp rsp = { 0 }; + struct rvu_pfvf *pfvf; + + pfvf = rvu_get_pfvf(rvu, pcifunc); + /* If the pcifunc is not initialized then nothing to do. + * This same function will be called again via rvu_switch_update_rules + * after pcifunc is initialized. + */ + if (!test_bit(NIXLF_INITIALIZED, &pfvf->flags)) + return 0; + + ether_addr_copy(req.packet.dmac, pfvf->mac_addr); + eth_broadcast_addr((u8 *)&req.mask.dmac); + req.hdr.pcifunc = 0; /* AF is requester */ + req.vf = pcifunc; + req.features = BIT_ULL(NPC_DMAC); + req.channel = pfvf->rx_chan_base; + req.chan_mask = chan_mask; + req.intf = pfvf->nix_rx_intf; + req.op = NIX_RX_ACTION_DEFAULT; + req.default_rule = 1; + + return rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp); +} + +static int rvu_switch_install_tx_rule(struct rvu *rvu, u16 pcifunc, u16 entry) +{ + struct npc_install_flow_req req = { 0 }; + struct npc_install_flow_rsp rsp = { 0 }; + struct rvu_pfvf *pfvf; + u8 lbkid; + + pfvf = rvu_get_pfvf(rvu, pcifunc); + /* If the pcifunc is not initialized then nothing to do. + * This same function will be called again via rvu_switch_update_rules + * after pcifunc is initialized. + */ + if (!test_bit(NIXLF_INITIALIZED, &pfvf->flags)) + return 0; + + lbkid = pfvf->nix_blkaddr == BLKADDR_NIX0 ? 0 : 1; + ether_addr_copy(req.packet.dmac, pfvf->mac_addr); + eth_broadcast_addr((u8 *)&req.mask.dmac); + req.hdr.pcifunc = 0; /* AF is requester */ + req.vf = pcifunc; + req.entry = entry; + req.features = BIT_ULL(NPC_DMAC); + req.intf = pfvf->nix_tx_intf; + req.op = NIX_TX_ACTIONOP_UCAST_CHAN; + req.index = (lbkid << 8) | RVU_SWITCH_LBK_CHAN; + req.set_cntr = 1; + + return rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp); +} + +static int rvu_switch_install_rules(struct rvu *rvu) +{ + struct rvu_switch *rswitch = &rvu->rswitch; + u16 start = rswitch->start_entry; + struct rvu_hwinfo *hw = rvu->hw; + int pf, vf, numvfs, hwvf; + u16 pcifunc, entry = 0; + int err; + + for (pf = 1; pf < hw->total_pfs; pf++) { + if (!is_pf_cgxmapped(rvu, pf)) + continue; + + pcifunc = pf << 10; + /* rvu_get_nix_blkaddr sets up the corresponding NIX block + * address and NIX RX and TX interfaces for a pcifunc. + * Generally it is called during attach call of a pcifunc but it + * is called here since we are pre-installing rules before + * nixlfs are attached + */ + rvu_get_nix_blkaddr(rvu, pcifunc); + + /* MCAM RX rule for a PF/VF already exists as default unicast + * rules installed by AF. Hence change the channel in those + * rules to ignore channel so that packets with the required + * DMAC received from LBK(by other PF/VFs in system) or from + * external world (from wire) are accepted. + */ + err = rvu_switch_install_rx_rule(rvu, pcifunc, 0x0); + if (err) { + dev_err(rvu->dev, "RX rule for PF%d failed(%d)\n", + pf, err); + return err; + } + + err = rvu_switch_install_tx_rule(rvu, pcifunc, start + entry); + if (err) { + dev_err(rvu->dev, "TX rule for PF%d failed(%d)\n", + pf, err); + return err; + } + + rswitch->entry2pcifunc[entry++] = pcifunc; + + rvu_get_pf_numvfs(rvu, pf, &numvfs, &hwvf); + for (vf = 0; vf < numvfs; vf++, hwvf++) { + pcifunc = pf << 10 | ((vf + 1) & 0x3FF); + rvu_get_nix_blkaddr(rvu, pcifunc); + + err = rvu_switch_install_rx_rule(rvu, pcifunc, 0x0); + if (err) { + dev_err(rvu->dev, + "RX rule for PF%dVF%d failed(%d)\n", + pf, vf, err); + return err; + } + + err = rvu_switch_install_tx_rule(rvu, pcifunc, + start + entry); + if (err) { + dev_err(rvu->dev, + "TX rule for PF%dVF%d failed(%d)\n", + pf, vf, err); + return err; + } + + rswitch->entry2pcifunc[entry++] = pcifunc; + } + } + + return 0; +} + +void rvu_switch_enable(struct rvu *rvu) +{ + struct npc_mcam_alloc_entry_req alloc_req = { 0 }; + struct npc_mcam_alloc_entry_rsp alloc_rsp = { 0 }; + struct npc_delete_flow_req uninstall_req = { 0 }; + struct npc_mcam_free_entry_req free_req = { 0 }; + struct rvu_switch *rswitch = &rvu->rswitch; + struct msg_rsp rsp; + int ret; + + alloc_req.contig = true; + alloc_req.count = rvu->cgx_mapped_pfs + rvu->cgx_mapped_vfs; + ret = rvu_mbox_handler_npc_mcam_alloc_entry(rvu, &alloc_req, + &alloc_rsp); + if (ret) { + dev_err(rvu->dev, + "Unable to allocate MCAM entries\n"); + goto exit; + } + + if (alloc_rsp.count != alloc_req.count) { + dev_err(rvu->dev, + "Unable to allocate %d MCAM entries, got %d\n", + alloc_req.count, alloc_rsp.count); + goto free_entries; + } + + rswitch->entry2pcifunc = kcalloc(alloc_req.count, sizeof(u16), + GFP_KERNEL); + if (!rswitch->entry2pcifunc) + goto free_entries; + + rswitch->used_entries = alloc_rsp.count; + rswitch->start_entry = alloc_rsp.entry; + + ret = rvu_switch_install_rules(rvu); + if (ret) + goto uninstall_rules; + + return; + +uninstall_rules: + uninstall_req.start = rswitch->start_entry; + uninstall_req.end = rswitch->start_entry + rswitch->used_entries - 1; + rvu_mbox_handler_npc_delete_flow(rvu, &uninstall_req, &rsp); + kfree(rswitch->entry2pcifunc); +free_entries: + free_req.all = 1; + rvu_mbox_handler_npc_mcam_free_entry(rvu, &free_req, &rsp); +exit: + return; +} + +void rvu_switch_disable(struct rvu *rvu) +{ + struct npc_delete_flow_req uninstall_req = { 0 }; + struct npc_mcam_free_entry_req free_req = { 0 }; + struct rvu_switch *rswitch = &rvu->rswitch; + struct rvu_hwinfo *hw = rvu->hw; + int pf, vf, numvfs, hwvf; + struct msg_rsp rsp; + u16 pcifunc; + int err; + + if (!rswitch->used_entries) + return; + + for (pf = 1; pf < hw->total_pfs; pf++) { + if (!is_pf_cgxmapped(rvu, pf)) + continue; + + pcifunc = pf << 10; + err = rvu_switch_install_rx_rule(rvu, pcifunc, 0xFFF); + if (err) + dev_err(rvu->dev, + "Reverting RX rule for PF%d failed(%d)\n", + pf, err); + + for (vf = 0; vf < numvfs; vf++, hwvf++) { + pcifunc = pf << 10 | ((vf + 1) & 0x3FF); + err = rvu_switch_install_rx_rule(rvu, pcifunc, 0xFFF); + if (err) + dev_err(rvu->dev, + "Reverting RX rule for PF%dVF%d failed(%d)\n", + pf, vf, err); + } + } + + uninstall_req.start = rswitch->start_entry; + uninstall_req.end = rswitch->start_entry + rswitch->used_entries - 1; + free_req.all = 1; + rvu_mbox_handler_npc_delete_flow(rvu, &uninstall_req, &rsp); + rvu_mbox_handler_npc_mcam_free_entry(rvu, &free_req, &rsp); + rswitch->used_entries = 0; + kfree(rswitch->entry2pcifunc); +} + +void rvu_switch_update_rules(struct rvu *rvu, u16 pcifunc) +{ + struct rvu_switch *rswitch = &rvu->rswitch; + u32 max = rswitch->used_entries; + u16 entry; + + if (!rswitch->used_entries) + return; + + for (entry = 0; entry < max; entry++) { + if (rswitch->entry2pcifunc[entry] == pcifunc) + break; + } + + if (entry >= max) + return; + + rvu_switch_install_tx_rule(rvu, pcifunc, rswitch->start_entry + entry); + rvu_switch_install_rx_rule(rvu, pcifunc, 0x0); +} From 615c77eb5e870d1ffa95f4001cba3612bd2f2332 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 15 Jul 2021 00:10:26 -0500 Subject: [PATCH 182/794] powerpc/pasemi: Fix fall-through warning for Clang Fix the following fallthrough warning: arch/powerpc/platforms/pasemi/idle.c:45:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/60efbf18.d9n6eXv275OJcc7T%25lkp@intel.com/ Signed-off-by: Gustavo A. R. Silva --- arch/powerpc/platforms/pasemi/idle.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/pasemi/idle.c b/arch/powerpc/platforms/pasemi/idle.c index 9b88e3cded7d..534b0317fc15 100644 --- a/arch/powerpc/platforms/pasemi/idle.c +++ b/arch/powerpc/platforms/pasemi/idle.c @@ -42,6 +42,7 @@ static int pasemi_system_reset_exception(struct pt_regs *regs) switch (regs->msr & SRR1_WAKEMASK) { case SRR1_WAKEDEC: set_dec(1); + break; case SRR1_WAKEEE: /* * Handle these when interrupts get re-enabled and we take From d6371c76e20d7d3f61b05fd67b596af4d14a8886 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 19 Jul 2021 09:51:34 +0100 Subject: [PATCH 183/794] bpf: Fix OOB read when printing XDP link fdinfo We got the following UBSAN report on one of our testing machines: ================================================================================ UBSAN: array-index-out-of-bounds in kernel/bpf/syscall.c:2389:24 index 6 is out of range for type 'char *[6]' CPU: 43 PID: 930921 Comm: systemd-coredum Tainted: G O 5.10.48-cloudflare-kasan-2021.7.0 #1 Hardware name: Call Trace: dump_stack+0x7d/0xa3 ubsan_epilogue+0x5/0x40 __ubsan_handle_out_of_bounds.cold+0x43/0x48 ? seq_printf+0x17d/0x250 bpf_link_show_fdinfo+0x329/0x380 ? bpf_map_value_size+0xe0/0xe0 ? put_files_struct+0x20/0x2d0 ? __kasan_kmalloc.constprop.0+0xc2/0xd0 seq_show+0x3f7/0x540 seq_read_iter+0x3f8/0x1040 seq_read+0x329/0x500 ? seq_read_iter+0x1040/0x1040 ? __fsnotify_parent+0x80/0x820 ? __fsnotify_update_child_dentry_flags+0x380/0x380 vfs_read+0x123/0x460 ksys_read+0xed/0x1c0 ? __x64_sys_pwrite64+0x1f0/0x1f0 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 ================================================================================ ================================================================================ UBSAN: object-size-mismatch in kernel/bpf/syscall.c:2384:2 From the report, we can infer that some array access in bpf_link_show_fdinfo at index 6 is out of bounds. The obvious candidate is bpf_link_type_strs[BPF_LINK_TYPE_XDP] with BPF_LINK_TYPE_XDP == 6. It turns out that BPF_LINK_TYPE_XDP is missing from bpf_types.h and therefore doesn't have an entry in bpf_link_type_strs: pos: 0 flags: 02000000 mnt_id: 13 link_type: (null) link_id: 4 prog_tag: bcf7977d3b93787c prog_id: 4 ifindex: 1 Fixes: aa8d3a716b59 ("bpf, xdp: Add bpf_link-based XDP attachment API") Signed-off-by: Lorenz Bauer Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210719085134.43325-2-lmb@cloudflare.com --- include/linux/bpf_types.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index a9db1eae6796..ae3ac3a2018c 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -134,4 +134,5 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_CGROUP, cgroup) BPF_LINK_TYPE(BPF_LINK_TYPE_ITER, iter) #ifdef CONFIG_NET BPF_LINK_TYPE(BPF_LINK_TYPE_NETNS, netns) +BPF_LINK_TYPE(BPF_LINK_TYPE_XDP, xdp) #endif From 8cae8cd89f05f6de223d63e6d15e31c8ba9cf53b Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Tue, 13 Jul 2021 17:49:23 +0200 Subject: [PATCH 184/794] seq_file: disallow extremely large seq buffer allocations There is no reasonable need for a buffer larger than this, and it avoids int overflow pitfalls. Fixes: 058504edd026 ("fs/seq_file: fallback to vmalloc allocation") Suggested-by: Al Viro Reported-by: Qualys Security Advisory Signed-off-by: Eric Sandeen Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- fs/seq_file.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/seq_file.c b/fs/seq_file.c index b117b212ef28..4a2cda04d3e2 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -32,6 +32,9 @@ static void seq_set_overflow(struct seq_file *m) static void *seq_buf_alloc(unsigned long size) { + if (unlikely(size > MAX_RW_COUNT)) + return NULL; + return kvmalloc(size, GFP_KERNEL_ACCOUNT); } From 8d4abca95ecc82fc8c41912fa0085281f19cc29f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 19 Apr 2021 18:43:32 -0500 Subject: [PATCH 185/794] media: ngene: Fix out-of-bounds bug in ngene_command_config_free_buf() Fix an 11-year old bug in ngene_command_config_free_buf() while addressing the following warnings caught with -Warray-bounds: arch/alpha/include/asm/string.h:22:16: warning: '__builtin_memcpy' offset [12, 16] from the object at 'com' is out of the bounds of referenced subobject 'config' with type 'unsigned char' at offset 10 [-Warray-bounds] arch/x86/include/asm/string_32.h:182:25: warning: '__builtin_memcpy' offset [12, 16] from the object at 'com' is out of the bounds of referenced subobject 'config' with type 'unsigned char' at offset 10 [-Warray-bounds] The problem is that the original code is trying to copy 6 bytes of data into a one-byte size member _config_ of the wrong structue FW_CONFIGURE_BUFFERS, in a single call to memcpy(). This causes a legitimate compiler warning because memcpy() overruns the length of &com.cmd.ConfigureBuffers.config. It seems that the right structure is FW_CONFIGURE_FREE_BUFFERS, instead, because it contains 6 more members apart from the header _hdr_. Also, the name of the function ngene_command_config_free_buf() suggests that the actual intention is to ConfigureFreeBuffers, instead of ConfigureBuffers (which takes place in the function ngene_command_config_buf(), above). Fix this by enclosing those 6 members of struct FW_CONFIGURE_FREE_BUFFERS into new struct config, and use &com.cmd.ConfigureFreeBuffers.config as the destination address, instead of &com.cmd.ConfigureBuffers.config, when calling memcpy(). This also helps with the ongoing efforts to globally enable -Warray-bounds and get us closer to being able to tighten the FORTIFY_SOURCE routines on memcpy(). Link: https://github.com/KSPP/linux/issues/109 Fixes: dae52d009fc9 ("V4L/DVB: ngene: Initial check-in") Cc: stable@vger.kernel.org Reported-by: kernel test robot Reviewed-by: Kees Cook Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/linux-hardening/20210420001631.GA45456@embeddedor/ --- drivers/media/pci/ngene/ngene-core.c | 2 +- drivers/media/pci/ngene/ngene.h | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/media/pci/ngene/ngene-core.c b/drivers/media/pci/ngene/ngene-core.c index 07f342db6701..7481f553f959 100644 --- a/drivers/media/pci/ngene/ngene-core.c +++ b/drivers/media/pci/ngene/ngene-core.c @@ -385,7 +385,7 @@ static int ngene_command_config_free_buf(struct ngene *dev, u8 *config) com.cmd.hdr.Opcode = CMD_CONFIGURE_FREE_BUFFER; com.cmd.hdr.Length = 6; - memcpy(&com.cmd.ConfigureBuffers.config, config, 6); + memcpy(&com.cmd.ConfigureFreeBuffers.config, config, 6); com.in_len = 6; com.out_len = 0; diff --git a/drivers/media/pci/ngene/ngene.h b/drivers/media/pci/ngene/ngene.h index 84f04e0e0cb9..3d296f1998a1 100644 --- a/drivers/media/pci/ngene/ngene.h +++ b/drivers/media/pci/ngene/ngene.h @@ -407,12 +407,14 @@ enum _BUFFER_CONFIGS { struct FW_CONFIGURE_FREE_BUFFERS { struct FW_HEADER hdr; - u8 UVI1_BufferLength; - u8 UVI2_BufferLength; - u8 TVO_BufferLength; - u8 AUD1_BufferLength; - u8 AUD2_BufferLength; - u8 TVA_BufferLength; + struct { + u8 UVI1_BufferLength; + u8 UVI2_BufferLength; + u8 TVO_BufferLength; + u8 AUD1_BufferLength; + u8 AUD2_BufferLength; + u8 TVA_BufferLength; + } __packed config; } __attribute__ ((__packed__)); struct FW_CONFIGURE_UART { From fae21608c31ca987d9dfc0422ac9b5bd21c213a6 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Mon, 5 Jul 2021 20:29:50 +0530 Subject: [PATCH 186/794] scsi: mpt3sas: Transition IOC to Ready state during shutdown The IOC firmware assumes that the host driver is still alive after shutdown and continues to post events to host memory (due to faulty expander phy links, etc). This leads to 0x2666 (a bus fault occurred during a host-IOC memory access). Perform an IOC soft reset as part of shutdown to disable event posting. Link: https://lore.kernel.org/r/20210705145951.32258-1-sreekanth.reddy@broadcom.com Signed-off-by: Sreekanth Reddy Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 32 ++++++++++++++-------------- drivers/scsi/mpt3sas/mpt3sas_base.h | 4 ++++ drivers/scsi/mpt3sas/mpt3sas_scsih.c | 7 +++++- 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index c39955239d1c..19b1c0cf5f2a 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -2983,13 +2983,13 @@ _base_check_enable_msix(struct MPT3SAS_ADAPTER *ioc) } /** - * _base_free_irq - free irq + * mpt3sas_base_free_irq - free irq * @ioc: per adapter object * * Freeing respective reply_queue from the list. */ -static void -_base_free_irq(struct MPT3SAS_ADAPTER *ioc) +void +mpt3sas_base_free_irq(struct MPT3SAS_ADAPTER *ioc) { struct adapter_reply_queue *reply_q, *next; @@ -3191,12 +3191,12 @@ _base_check_and_enable_high_iops_queues(struct MPT3SAS_ADAPTER *ioc, } /** - * _base_disable_msix - disables msix + * mpt3sas_base_disable_msix - disables msix * @ioc: per adapter object * */ -static void -_base_disable_msix(struct MPT3SAS_ADAPTER *ioc) +void +mpt3sas_base_disable_msix(struct MPT3SAS_ADAPTER *ioc) { if (!ioc->msix_enable) return; @@ -3304,8 +3304,8 @@ _base_enable_msix(struct MPT3SAS_ADAPTER *ioc) for (i = 0; i < ioc->reply_queue_count; i++) { r = _base_request_irq(ioc, i); if (r) { - _base_free_irq(ioc); - _base_disable_msix(ioc); + mpt3sas_base_free_irq(ioc); + mpt3sas_base_disable_msix(ioc); goto try_ioapic; } } @@ -3342,8 +3342,8 @@ mpt3sas_base_unmap_resources(struct MPT3SAS_ADAPTER *ioc) dexitprintk(ioc, ioc_info(ioc, "%s\n", __func__)); - _base_free_irq(ioc); - _base_disable_msix(ioc); + mpt3sas_base_free_irq(ioc); + mpt3sas_base_disable_msix(ioc); kfree(ioc->replyPostRegisterIndex); ioc->replyPostRegisterIndex = NULL; @@ -7613,14 +7613,14 @@ _base_diag_reset(struct MPT3SAS_ADAPTER *ioc) } /** - * _base_make_ioc_ready - put controller in READY state + * mpt3sas_base_make_ioc_ready - put controller in READY state * @ioc: per adapter object * @type: FORCE_BIG_HAMMER or SOFT_RESET * * Return: 0 for success, non-zero for failure. */ -static int -_base_make_ioc_ready(struct MPT3SAS_ADAPTER *ioc, enum reset_type type) +int +mpt3sas_base_make_ioc_ready(struct MPT3SAS_ADAPTER *ioc, enum reset_type type) { u32 ioc_state; int rc; @@ -7897,7 +7897,7 @@ mpt3sas_base_free_resources(struct MPT3SAS_ADAPTER *ioc) if (ioc->chip_phys && ioc->chip) { mpt3sas_base_mask_interrupts(ioc); ioc->shost_recovery = 1; - _base_make_ioc_ready(ioc, SOFT_RESET); + mpt3sas_base_make_ioc_ready(ioc, SOFT_RESET); ioc->shost_recovery = 0; } @@ -8017,7 +8017,7 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc) ioc->build_sg_mpi = &_base_build_sg; ioc->build_zero_len_sge_mpi = &_base_build_zero_len_sge; - r = _base_make_ioc_ready(ioc, SOFT_RESET); + r = mpt3sas_base_make_ioc_ready(ioc, SOFT_RESET); if (r) goto out_free_resources; @@ -8471,7 +8471,7 @@ mpt3sas_base_hard_reset_handler(struct MPT3SAS_ADAPTER *ioc, _base_pre_reset_handler(ioc); mpt3sas_wait_for_commands_to_complete(ioc); mpt3sas_base_mask_interrupts(ioc); - r = _base_make_ioc_ready(ioc, type); + r = mpt3sas_base_make_ioc_ready(ioc, type); if (r) goto out; _base_clear_outstanding_commands(ioc); diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index d4834c8ee9c0..0c6c3df0038d 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -1730,6 +1730,10 @@ do { ioc_err(ioc, "In func: %s\n", __func__); \ status, mpi_request, sz); } while (0) int mpt3sas_wait_for_ioc(struct MPT3SAS_ADAPTER *ioc, int wait_count); +int +mpt3sas_base_make_ioc_ready(struct MPT3SAS_ADAPTER *ioc, enum reset_type type); +void mpt3sas_base_free_irq(struct MPT3SAS_ADAPTER *ioc); +void mpt3sas_base_disable_msix(struct MPT3SAS_ADAPTER *ioc); /* scsih shared API */ struct scsi_cmnd *mpt3sas_scsih_scsi_lookup_get(struct MPT3SAS_ADAPTER *ioc, diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 866d118f7931..8e64a6f14542 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -11295,7 +11295,12 @@ scsih_shutdown(struct pci_dev *pdev) _scsih_ir_shutdown(ioc); _scsih_nvme_shutdown(ioc); - mpt3sas_base_detach(ioc); + mpt3sas_base_mask_interrupts(ioc); + ioc->shost_recovery = 1; + mpt3sas_base_make_ioc_ready(ioc, SOFT_RESET); + ioc->shost_recovery = 0; + mpt3sas_base_free_irq(ioc); + mpt3sas_base_disable_msix(ioc); } From 114613f62f42e7cbc1242c4e82076a0153043761 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 19 Jul 2021 18:17:46 -0500 Subject: [PATCH 187/794] ALSA: hda: intel-dsp-cfg: add missing ElkhartLake PCI ID We missed the fact that ElkhartLake platforms have two different PCI IDs. We only added one so the SOF driver is never selected by the autodetection logic for the missing configuration. BugLink: https://github.com/thesofproject/linux/issues/2990 Fixes: cc8f81c7e625 ('ALSA: hda: fix intel DSP config') Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20210719231746.557325-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Takashi Iwai --- sound/hda/intel-dsp-config.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c index d8be146793ee..c9d0ba353463 100644 --- a/sound/hda/intel-dsp-config.c +++ b/sound/hda/intel-dsp-config.c @@ -319,6 +319,10 @@ static const struct config_entry config_table[] = { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC, .device = 0x4b55, }, + { + .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC, + .device = 0x4b58, + }, #endif /* Alder Lake */ From e9db418d4b828dd049caaf5ed65dc86f93bb1a0c Mon Sep 17 00:00:00 2001 From: Ian Ray Date: Mon, 19 Jul 2021 18:43:49 +0200 Subject: [PATCH 188/794] USB: serial: cp210x: fix comments for GE CS1000 Fix comments for GE CS1000 CP210x USB ID assignments. Fixes: 42213a0190b5 ("USB: serial: cp210x: add some more GE USB IDs") Signed-off-by: Ian Ray Signed-off-by: Sebastian Reichel Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 09b845d0da41..af286240807e 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -202,8 +202,8 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1901, 0x0194) }, /* GE Healthcare Remote Alarm Box */ { USB_DEVICE(0x1901, 0x0195) }, /* GE B850/B650/B450 CP2104 DP UART interface */ { USB_DEVICE(0x1901, 0x0196) }, /* GE B850 CP2105 DP UART interface */ - { USB_DEVICE(0x1901, 0x0197) }, /* GE CS1000 Display serial interface */ - { USB_DEVICE(0x1901, 0x0198) }, /* GE CS1000 M.2 Key E serial interface */ + { USB_DEVICE(0x1901, 0x0197) }, /* GE CS1000 M.2 Key E serial interface */ + { USB_DEVICE(0x1901, 0x0198) }, /* GE CS1000 Display serial interface */ { USB_DEVICE(0x199B, 0xBA30) }, /* LORD WSDA-200-USB */ { USB_DEVICE(0x19CF, 0x3000) }, /* Parrot NMEA GPS Flight Recorder */ { USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */ From 47e1e233e9d822dfda068383fb9a616451bda703 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 20 Jul 2021 09:28:09 +0200 Subject: [PATCH 189/794] efi/mokvar: Reserve the table only if it is in boot services data One of the SUSE QA tests triggered: localhost kernel: efi: Failed to lookup EFI memory descriptor for 0x000000003dcf8000 which comes from x86's version of efi_arch_mem_reserve() trying to reserve a memory region. Usually, that function expects EFI_BOOT_SERVICES_DATA memory descriptors but the above case is for the MOKvar table which is allocated in the EFI shim as runtime services. That lead to a fix changing the allocation of that table to boot services. However, that fix broke booting SEV guests with that shim leading to this kernel fix 8d651ee9c71b ("x86/ioremap: Map EFI-reserved memory as encrypted for SEV") which extended the ioremap hint to map reserved EFI boot services as decrypted too. However, all that wasn't needed, IMO, because that error message in efi_arch_mem_reserve() was innocuous in this case - if the MOKvar table is not in boot services, then it doesn't need to be reserved in the first place because it is, well, in runtime services which *should* be reserved anyway. So do that reservation for the MOKvar table only if it is allocated in boot services data. I couldn't find any requirement about where that table should be allocated in, unlike the ESRT which allocation is mandated to be done in boot services data by the UEFI spec. Signed-off-by: Borislav Petkov Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/mokvar-table.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/mokvar-table.c b/drivers/firmware/efi/mokvar-table.c index d8bc01340686..38722d2009e2 100644 --- a/drivers/firmware/efi/mokvar-table.c +++ b/drivers/firmware/efi/mokvar-table.c @@ -180,7 +180,10 @@ void __init efi_mokvar_table_init(void) pr_err("EFI MOKvar config table is not valid\n"); return; } - efi_mem_reserve(efi.mokvar_table, map_size_needed); + + if (md.type == EFI_BOOT_SERVICES_DATA) + efi_mem_reserve(efi.mokvar_table, map_size_needed); + efi_mokvar_table_size = map_size_needed; } From c4824ae7db418aee6f50f308a20b832e58e997fd Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 20 Jul 2021 11:26:40 +0200 Subject: [PATCH 190/794] ALSA: pcm: Fix mmap capability check The hw_support_mmap() doesn't cover all memory allocation types and might use a wrong device pointer for checking the capability. Check the all memory allocation types more completely. Cc: Link: https://lore.kernel.org/r/20210720092640.12338-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/pcm_native.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index c88c4316c417..6919d2943b9d 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -246,12 +246,18 @@ static bool hw_support_mmap(struct snd_pcm_substream *substream) if (!(substream->runtime->hw.info & SNDRV_PCM_INFO_MMAP)) return false; - if (substream->ops->mmap || - (substream->dma_buffer.dev.type != SNDRV_DMA_TYPE_DEV && - substream->dma_buffer.dev.type != SNDRV_DMA_TYPE_DEV_UC)) + if (substream->ops->mmap) return true; - return dma_can_mmap(substream->dma_buffer.dev.dev); + switch (substream->dma_buffer.dev.type) { + case SNDRV_DMA_TYPE_UNKNOWN: + return false; + case SNDRV_DMA_TYPE_CONTINUOUS: + case SNDRV_DMA_TYPE_VMALLOC: + return true; + default: + return dma_can_mmap(substream->dma_buffer.dev.dev); + } } static int constrain_mask_params(struct snd_pcm_substream *substream, From d371588910715ebf7fa8e3a5d21ea5169c852927 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 20 Jul 2021 11:27:32 +0200 Subject: [PATCH 191/794] ALSA: pcm: Fix mmap without buffer preallocation The recent rewrite of the memory allocation helpers also changed the page extraction to a common helper, snd_sgbuf_get_page(). But this assumes implicitly that the buffer was allocated via the standard helper (usually via preallocation), and didn't consider the case of the manual buffer handling. This patch fixes it and also covers the manual buffer management. Fixes: 37af81c5998f ("ALSA: core: Abstract memory alloc helpers") Link: https://lore.kernel.org/r/20210720092732.12412-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/pcm_native.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 6919d2943b9d..6a2971a7e6a1 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -3675,6 +3675,8 @@ static vm_fault_t snd_pcm_mmap_data_fault(struct vm_fault *vmf) return VM_FAULT_SIGBUS; if (substream->ops->page) page = substream->ops->page(substream, offset); + else if (!snd_pcm_get_dma_buf(substream)) + page = virt_to_page(runtime->dma_area + offset); else page = snd_sgbuf_get_page(snd_pcm_get_dma_buf(substream), offset); if (!page) From 2d85a1b31dde84038ea07ad825c3d8d3e71f4344 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 19 Jul 2021 10:55:14 +0300 Subject: [PATCH 192/794] ipv6: ip6_finish_output2: set sk into newly allocated nskb skb_set_owner_w() should set sk not to old skb but to new nskb. Fixes: 5796015fa968 ("ipv6: allocate enough headroom in ip6_finish_output2()") Signed-off-by: Vasily Averin Link: https://lore.kernel.org/r/70c0744f-89ae-1869-7e3e-4fa292158f4b@virtuozzo.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 01bea76e3891..e1b9f7ac8bad 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -74,7 +74,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * if (likely(nskb)) { if (skb->sk) - skb_set_owner_w(skb, skb->sk); + skb_set_owner_w(nskb, skb->sk); consume_skb(skb); } else { kfree_skb(skb); From 749468760b952e555529ca8a71256b991455101e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 19 Jul 2021 02:20:28 -0700 Subject: [PATCH 193/794] net/tcp_fastopen: remove obsolete extern After cited commit, sysctl_tcp_fastopen_blackhole_timeout is no longer a global variable. Fixes: 3733be14a32b ("ipv4: Namespaceify tcp_fastopen_blackhole_timeout knob") Signed-off-by: Eric Dumazet Cc: Haishuang Yan Cc: Yuchung Cheng Cc: Neal Cardwell Acked-by: Wei Wang Link: https://lore.kernel.org/r/20210719092028.3016745-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 17df9b047ee4..784d5c3ef1c5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1709,7 +1709,6 @@ struct tcp_fastopen_context { struct rcu_head rcu; }; -extern unsigned int sysctl_tcp_fastopen_blackhole_timeout; void tcp_fastopen_active_disable(struct sock *sk); bool tcp_fastopen_active_should_disable(struct sock *sk); void tcp_fastopen_active_disable_ofo_check(struct sock *sk); From 6c2d125823ae89d10293437c6fb8a2f3406d721a Mon Sep 17 00:00:00 2001 From: Landen Chao Date: Tue, 20 Jul 2021 11:50:07 +0800 Subject: [PATCH 194/794] net: Update MAINTAINERS for MediaTek switch driver Update maintainers for MediaTek switch driver with Deng Qingfang who has contributed many high-quality patches (interrupt, VLAN, GPIO, and etc.) and will help maintenance. Signed-off-by: Landen Chao Signed-off-by: DENG Qingfang Reviewed-by: Florian Fainelli Acked-by: Vladimir Oltean Link: https://lore.kernel.org/r/49e1aa8aac58dcbf1b5e036d09b3fa3bbb1d94d0.1626751861.git.landen.chao@mediatek.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 5779f6cacff7..e5ec539b15ed 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11757,6 +11757,7 @@ F: drivers/char/hw_random/mtk-rng.c MEDIATEK SWITCH DRIVER M: Sean Wang M: Landen Chao +M: DENG Qingfang L: netdev@vger.kernel.org S: Maintained F: drivers/net/dsa/mt7530.* From cbb56b03ec3f317e3728d0f68d25d4b9e590cdc9 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 19 Jul 2021 12:39:16 +0300 Subject: [PATCH 195/794] net: bridge: do not replay fdb entries pointing towards the bridge twice This simple script: ip link add br0 type bridge ip link set swp2 master br0 ip link set br0 address 00:01:02:03:04:05 ip link del br0 produces this result on a DSA switch: [ 421.306399] br0: port 1(swp2) entered blocking state [ 421.311445] br0: port 1(swp2) entered disabled state [ 421.472553] device swp2 entered promiscuous mode [ 421.488986] device swp2 left promiscuous mode [ 421.493508] br0: port 1(swp2) entered disabled state [ 421.886107] sja1105 spi0.1: port 1 failed to delete 00:01:02:03:04:05 vid 1 from fdb: -ENOENT [ 421.894374] sja1105 spi0.1: port 1 failed to delete 00:01:02:03:04:05 vid 0 from fdb: -ENOENT [ 421.943982] br0: port 1(swp2) entered blocking state [ 421.949030] br0: port 1(swp2) entered disabled state [ 422.112504] device swp2 entered promiscuous mode A very simplified view of what happens is: (1) the bridge port is created, and the bridge device inherits its MAC address (2) when joining, the bridge port (DSA) requests a replay of the addition of all FDB entries towards this bridge port and towards the bridge device itself. In fact, DSA calls br_fdb_replay() twice: br_fdb_replay(br, brport_dev); br_fdb_replay(br, br); DSA uses reference counting for the FDB entries. So the MAC address of the bridge is simply kept with refcount 2. When the bridge port leaves under normal circumstances, everything cancels out since the replay of the FDB entry deletion is also done twice per VLAN. (3) when the bridge MAC address changes, switchdev is notified of the deletion of the old address and of the insertion of the new one. But the old address does not really go away, since it had refcount 2, and the new address is added "only" with refcount 1. (4) when the bridge port leaves now, it will replay a deletion of the FDB entries pointing towards the bridge twice. Then DSA will complain that it can't delete something that no longer exists. It is clear that the problem is that the FDB entries towards the bridge are replayed too many times, so let's fix that problem. Fixes: 63c51453c82c ("net: dsa: replay the local bridge FDB entries pointing to the bridge dev too") Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20210719093916.4099032-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/bridge/br_fdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 2b862cffc03a..a16191dcaed1 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -780,7 +780,7 @@ int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev, struct net_device *dst_dev; dst_dev = dst ? dst->dev : br->dev; - if (dst_dev != br_dev && dst_dev != dev) + if (dst_dev && dst_dev != dev) continue; err = br_fdb_replay_one(nb, fdb, dst_dev, action, ctx); From 1b713d14dc3c077ec45e65dab4ea01a8bc41b8c1 Mon Sep 17 00:00:00 2001 From: Chengwen Feng Date: Mon, 19 Jul 2021 17:13:05 +0800 Subject: [PATCH 196/794] net: hns3: fix possible mismatches resp of mailbox Currently, the mailbox synchronous communication between VF and PF use the following fields to maintain communication: 1. Origin_mbx_msg which was combined by message code and subcode, used to match request and response. 2. Received_resp which means whether received response. There may possible mismatches of the following situation: 1. VF sends message A with code=1 subcode=1. 2. PF was blocked about 500ms when processing the message A. 3. VF will detect message A timeout because it can't get the response within 500ms. 4. VF sends message B with code=1 subcode=1 which equal message A. 5. PF processes the first message A and send the response message to VF. 6. VF will identify the response matched the message B because the code/subcode is the same. This will lead to mismatch of request and response. To fix the above bug, we use the following scheme: 1. The message sent from VF was labelled with match_id which was a unique 16-bit non-zero value. 2. The response sent from PF will label with match_id which got from the request. 3. The VF uses the match_id to match request and response message. As for PF driver, it only needs to copy the match_id from request to response. Fixes: dde1a86e93ca ("net: hns3: Add mailbox support to PF driver") Signed-off-by: Chengwen Feng Signed-off-by: Guangbin Huang Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h | 6 ++++-- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h index 0a6cda309b24..56b573e47072 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h +++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h @@ -143,7 +143,8 @@ struct hclge_mbx_vf_to_pf_cmd { u8 mbx_need_resp; u8 rsv1[1]; u8 msg_len; - u8 rsv2[3]; + u8 rsv2; + u16 match_id; struct hclge_vf_to_pf_msg msg; }; @@ -153,7 +154,8 @@ struct hclge_mbx_pf_to_vf_cmd { u8 dest_vfid; u8 rsv[3]; u8 msg_len; - u8 rsv1[3]; + u8 rsv1; + u16 match_id; struct hclge_pf_to_vf_msg msg; }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index e10a2c36b706..c0a478ae9583 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -47,6 +47,7 @@ static int hclge_gen_resp_to_vf(struct hclge_vport *vport, resp_pf_to_vf->dest_vfid = vf_to_pf_req->mbx_src_vfid; resp_pf_to_vf->msg_len = vf_to_pf_req->msg_len; + resp_pf_to_vf->match_id = vf_to_pf_req->match_id; resp_pf_to_vf->msg.code = HCLGE_MBX_PF_VF_RESP; resp_pf_to_vf->msg.vf_mbx_msg_code = vf_to_pf_req->msg.code; From 4671042f1ef0d37137884811afcc4ae67685ce07 Mon Sep 17 00:00:00 2001 From: Peng Li Date: Mon, 19 Jul 2021 17:13:06 +0800 Subject: [PATCH 197/794] net: hns3: add match_id to check mailbox response from PF to VF When VF need response from PF, VF will wait (1us - 1s) to receive the response, or it will wait timeout and the VF action fails. If VF do not receive response in 1st action because timeout, the 2nd action may receive response for the 1st action, and get incorrect response data.VF must reciveve the right response from PF,or it will cause unexpected error. This patch adds match_id to check mailbox response from PF to VF, to make sure VF get the right response: 1. The message sent from VF was labelled with match_id which was a unique 16-bit non-zero value. 2. The response sent from PF will label with match_id which got from the request. 3. The VF uses the match_id to match request and response message. This scheme depends on PF driver supports match_id, if PF driver doesn't support then VF will uses the original scheme. Signed-off-by: Peng Li Signed-off-by: Guangbin Huang Signed-off-by: Jakub Kicinski --- .../net/ethernet/hisilicon/hns3/hclge_mbx.h | 1 + .../hisilicon/hns3/hns3vf/hclgevf_mbx.c | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h index 56b573e47072..aa86a81c8f4a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h +++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h @@ -98,6 +98,7 @@ struct hclgevf_mbx_resp_status { u32 origin_mbx_msg; bool received_resp; int resp_status; + u16 match_id; u8 additional_info[HCLGE_MBX_MAX_RESP_DATA_SIZE]; }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c index 9b17735b9f4c..772b2f8acd2e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c @@ -13,6 +13,7 @@ static int hclgevf_resp_to_errno(u16 resp_code) return resp_code ? -resp_code : 0; } +#define HCLGEVF_MBX_MATCH_ID_START 1 static void hclgevf_reset_mbx_resp_status(struct hclgevf_dev *hdev) { /* this function should be called with mbx_resp.mbx_mutex held @@ -21,6 +22,10 @@ static void hclgevf_reset_mbx_resp_status(struct hclgevf_dev *hdev) hdev->mbx_resp.received_resp = false; hdev->mbx_resp.origin_mbx_msg = 0; hdev->mbx_resp.resp_status = 0; + hdev->mbx_resp.match_id++; + /* Update match_id and ensure the value of match_id is not zero */ + if (hdev->mbx_resp.match_id == 0) + hdev->mbx_resp.match_id = HCLGEVF_MBX_MATCH_ID_START; memset(hdev->mbx_resp.additional_info, 0, HCLGE_MBX_MAX_RESP_DATA_SIZE); } @@ -115,6 +120,7 @@ int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev, if (need_resp) { mutex_lock(&hdev->mbx_resp.mbx_mutex); hclgevf_reset_mbx_resp_status(hdev); + req->match_id = hdev->mbx_resp.match_id; status = hclgevf_cmd_send(&hdev->hw, &desc, 1); if (status) { dev_err(&hdev->pdev->dev, @@ -211,6 +217,19 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev) resp->additional_info[i] = *temp; temp++; } + + /* If match_id is not zero, it means PF support + * match_id. If the match_id is right, VF get the + * right response, otherwise ignore the response. + * Driver will clear hdev->mbx_resp when send + * next message which need response. + */ + if (req->match_id) { + if (req->match_id == resp->match_id) + resp->received_resp = true; + } else { + resp->received_resp = true; + } break; case HCLGE_MBX_LINK_STAT_CHANGE: case HCLGE_MBX_ASSERTING_RESET: From 184cd221a86321e53df9389c4b35a247b60c1e77 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Mon, 19 Jul 2021 17:13:07 +0800 Subject: [PATCH 198/794] net: hns3: disable port VLAN filter when support function level VLAN filter control For hardware limitation, port VLAN filter is port level, and effective for all the functions of the port. So if not support port VLAN bypass, it's necessary to disable the port VLAN filter, in order to support function level VLAN filter control. Fixes: 2ba306627f59 ("net: hns3: add support for modify VLAN filter state") Signed-off-by: Jian Shen Signed-off-by: Guangbin Huang Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index dd3354a57c62..ebeaf12e409b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -9552,13 +9552,17 @@ static int hclge_set_vport_vlan_filter(struct hclge_vport *vport, bool enable) if (ret) return ret; - if (test_bit(HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B, ae_dev->caps)) + if (test_bit(HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B, ae_dev->caps)) { ret = hclge_set_port_vlan_filter_bypass(hdev, vport->vport_id, !enable); - else if (!vport->vport_id) + } else if (!vport->vport_id) { + if (test_bit(HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B, ae_dev->caps)) + enable = false; + ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_PORT, HCLGE_FILTER_FE_INGRESS, enable, 0); + } return ret; } From bbfd4506f962e7e6fff8f37f017154a3c3791264 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Mon, 19 Jul 2021 17:13:08 +0800 Subject: [PATCH 199/794] net: hns3: fix rx VLAN offload state inconsistent issue Currently, VF doesn't enable rx VLAN offload when initializating, and PF does it for VFs. If user disable the rx VLAN offload for VF with ethtool -K, and reload the VF driver, it may cause the rx VLAN offload state being inconsistent between hardware and software. Fixes it by enabling rx VLAN offload when VF initializing. Fixes: e2cb1dec9779 ("net: hns3: Add HNS3 VF HCL(Hardware Compatibility Layer) Support") Signed-off-by: Jian Shen Signed-off-by: Guangbin Huang Signed-off-by: Jakub Kicinski --- .../net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 52eaf82b7cd7..8784d61e833f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -2641,6 +2641,16 @@ static int hclgevf_rss_init_hw(struct hclgevf_dev *hdev) static int hclgevf_init_vlan_config(struct hclgevf_dev *hdev) { + struct hnae3_handle *nic = &hdev->nic; + int ret; + + ret = hclgevf_en_hw_strip_rxvtag(nic, true); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to enable rx vlan offload, ret = %d\n", ret); + return ret; + } + return hclgevf_set_vlan_filter(&hdev->nic, htons(ETH_P_8021Q), 0, false); } From c45c1e82bba130db4f19d9dbc1deefcf4ea994ed Mon Sep 17 00:00:00 2001 From: Alexandru Tachici Date: Sat, 17 Jul 2021 00:02:45 +0300 Subject: [PATCH 200/794] spi: spi-bcm2835: Fix deadlock The bcm2835_spi_transfer_one function can create a deadlock if it is called while another thread already has the CCF lock. Signed-off-by: Alexandru Tachici Fixes: f8043872e796 ("spi: add driver for BCM2835") Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20210716210245.13240-2-alexandru.tachici@analog.com Signed-off-by: Mark Brown --- drivers/spi/spi-bcm2835.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c index 5f8771fe1a31..775c0bf2f923 100644 --- a/drivers/spi/spi-bcm2835.c +++ b/drivers/spi/spi-bcm2835.c @@ -83,6 +83,7 @@ MODULE_PARM_DESC(polling_limit_us, * struct bcm2835_spi - BCM2835 SPI controller * @regs: base address of register map * @clk: core clock, divided to calculate serial clock + * @clk_hz: core clock cached speed * @irq: interrupt, signals TX FIFO empty or RX FIFO ¾ full * @tfr: SPI transfer currently processed * @ctlr: SPI controller reverse lookup @@ -116,6 +117,7 @@ MODULE_PARM_DESC(polling_limit_us, struct bcm2835_spi { void __iomem *regs; struct clk *clk; + unsigned long clk_hz; int irq; struct spi_transfer *tfr; struct spi_controller *ctlr; @@ -1045,19 +1047,18 @@ static int bcm2835_spi_transfer_one(struct spi_controller *ctlr, { struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr); struct bcm2835_spidev *slv = spi_get_ctldata(spi); - unsigned long spi_hz, clk_hz, cdiv; + unsigned long spi_hz, cdiv; unsigned long hz_per_byte, byte_limit; u32 cs = slv->prepare_cs; /* set clock */ spi_hz = tfr->speed_hz; - clk_hz = clk_get_rate(bs->clk); - if (spi_hz >= clk_hz / 2) { + if (spi_hz >= bs->clk_hz / 2) { cdiv = 2; /* clk_hz/2 is the fastest we can go */ } else if (spi_hz) { /* CDIV must be a multiple of two */ - cdiv = DIV_ROUND_UP(clk_hz, spi_hz); + cdiv = DIV_ROUND_UP(bs->clk_hz, spi_hz); cdiv += (cdiv % 2); if (cdiv >= 65536) @@ -1065,7 +1066,7 @@ static int bcm2835_spi_transfer_one(struct spi_controller *ctlr, } else { cdiv = 0; /* 0 is the slowest we can go */ } - tfr->effective_speed_hz = cdiv ? (clk_hz / cdiv) : (clk_hz / 65536); + tfr->effective_speed_hz = cdiv ? (bs->clk_hz / cdiv) : (bs->clk_hz / 65536); bcm2835_wr(bs, BCM2835_SPI_CLK, cdiv); /* handle all the 3-wire mode */ @@ -1354,6 +1355,7 @@ static int bcm2835_spi_probe(struct platform_device *pdev) return bs->irq ? bs->irq : -ENODEV; clk_prepare_enable(bs->clk); + bs->clk_hz = clk_get_rate(bs->clk); err = bcm2835_dma_init(ctlr, &pdev->dev, bs); if (err) From 68b11e8b1562986c134764433af64e97d30c9fc0 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 20 Jul 2021 10:50:43 +0100 Subject: [PATCH 201/794] io_uring: explicitly count entries for poll reqs If __io_queue_proc() fails to add a second poll entry, e.g. kmalloc() failed, but it goes on with a third waitqueue, it may succeed and overwrite the error status. Count the number of poll entries we added, so we can set pt->error to zero at the beginning and find out when the mentioned scenario happens. Cc: stable@vger.kernel.org Fixes: 18bceab101add ("io_uring: allow POLL_ADD with double poll_wait() users") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/9d6b9e561f88bcc0163623b74a76c39f712151c3.1626774457.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 0cac361bf6b8..6668902cf50c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4802,6 +4802,7 @@ IO_NETOP_FN(recv); struct io_poll_table { struct poll_table_struct pt; struct io_kiocb *req; + int nr_entries; int error; }; @@ -4995,11 +4996,11 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, struct io_kiocb *req = pt->req; /* - * If poll->head is already set, it's because the file being polled - * uses multiple waitqueues for poll handling (eg one for read, one - * for write). Setup a separate io_poll_iocb if this happens. + * The file being polled uses multiple waitqueues for poll handling + * (e.g. one for read, one for write). Setup a separate io_poll_iocb + * if this happens. */ - if (unlikely(poll->head)) { + if (unlikely(pt->nr_entries)) { struct io_poll_iocb *poll_one = poll; /* already have a 2nd entry, fail a third attempt */ @@ -5027,7 +5028,7 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, *poll_ptr = poll; } - pt->error = 0; + pt->nr_entries++; poll->head = head; if (poll->events & EPOLLEXCLUSIVE) @@ -5104,9 +5105,12 @@ static __poll_t __io_arm_poll_handler(struct io_kiocb *req, ipt->pt._key = mask; ipt->req = req; - ipt->error = -EINVAL; + ipt->error = 0; + ipt->nr_entries = 0; mask = vfs_poll(req->file, &ipt->pt) & poll->events; + if (unlikely(!ipt->nr_entries) && !ipt->error) + ipt->error = -EINVAL; spin_lock_irq(&ctx->completion_lock); if (likely(poll->head)) { From 46fee9ab02cb24979bbe07631fc3ae95ae08aa3e Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 20 Jul 2021 10:50:44 +0100 Subject: [PATCH 202/794] io_uring: remove double poll entry on arm failure __io_queue_proc() can enqueue both poll entries and still fail afterwards, so the callers trying to cancel it should also try to remove the second poll entry (if any). For example, it may leave the request alive referencing a io_uring context but not accessible for cancellation: [ 282.599913][ T1620] task:iou-sqp-23145 state:D stack:28720 pid:23155 ppid: 8844 flags:0x00004004 [ 282.609927][ T1620] Call Trace: [ 282.613711][ T1620] __schedule+0x93a/0x26f0 [ 282.634647][ T1620] schedule+0xd3/0x270 [ 282.638874][ T1620] io_uring_cancel_generic+0x54d/0x890 [ 282.660346][ T1620] io_sq_thread+0xaac/0x1250 [ 282.696394][ T1620] ret_from_fork+0x1f/0x30 Cc: stable@vger.kernel.org Fixes: 18bceab101add ("io_uring: allow POLL_ADD with double poll_wait() users") Reported-and-tested-by: syzbot+ac957324022b7132accf@syzkaller.appspotmail.com Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/0ec1228fc5eda4cb524eeda857da8efdc43c331c.1626774457.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 6668902cf50c..6486b54a0f62 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5113,6 +5113,8 @@ static __poll_t __io_arm_poll_handler(struct io_kiocb *req, ipt->error = -EINVAL; spin_lock_irq(&ctx->completion_lock); + if (ipt->error) + io_poll_remove_double(req); if (likely(poll->head)) { spin_lock(&poll->head->lock); if (unlikely(list_empty(&poll->wait.entry))) { From 362a9e65289284f36403058eea2462d0330c1f24 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 20 Jul 2021 16:38:05 +0800 Subject: [PATCH 203/794] io_uring: fix memleak in io_init_wq_offload() I got memory leak report when doing fuzz test: BUG: memory leak unreferenced object 0xffff888107310a80 (size 96): comm "syz-executor.6", pid 4610, jiffies 4295140240 (age 20.135s) hex dump (first 32 bytes): 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 ad 4e ad de ff ff ff ff 00 00 00 00 .....N.......... backtrace: [<000000001974933b>] kmalloc include/linux/slab.h:591 [inline] [<000000001974933b>] kzalloc include/linux/slab.h:721 [inline] [<000000001974933b>] io_init_wq_offload fs/io_uring.c:7920 [inline] [<000000001974933b>] io_uring_alloc_task_context+0x466/0x640 fs/io_uring.c:7955 [<0000000039d0800d>] __io_uring_add_tctx_node+0x256/0x360 fs/io_uring.c:9016 [<000000008482e78c>] io_uring_add_tctx_node fs/io_uring.c:9052 [inline] [<000000008482e78c>] __do_sys_io_uring_enter fs/io_uring.c:9354 [inline] [<000000008482e78c>] __se_sys_io_uring_enter fs/io_uring.c:9301 [inline] [<000000008482e78c>] __x64_sys_io_uring_enter+0xabc/0xc20 fs/io_uring.c:9301 [<00000000b875f18f>] do_syscall_x64 arch/x86/entry/common.c:50 [inline] [<00000000b875f18f>] do_syscall_64+0x3b/0x90 arch/x86/entry/common.c:80 [<000000006b0a8484>] entry_SYSCALL_64_after_hwframe+0x44/0xae CPU0 CPU1 io_uring_enter io_uring_enter io_uring_add_tctx_node io_uring_add_tctx_node __io_uring_add_tctx_node __io_uring_add_tctx_node io_uring_alloc_task_context io_uring_alloc_task_context io_init_wq_offload io_init_wq_offload hash = kzalloc hash = kzalloc ctx->hash_map = hash ctx->hash_map = hash <- one of the hash is leaked When calling io_uring_enter() in parallel, the 'hash_map' will be leaked, add uring_lock to protect 'hash_map'. Fixes: e941894eae31 ("io-wq: make buffered file write hashed work map per-ctx") Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Reviewed-by: Pavel Begunkov Link: https://lore.kernel.org/r/20210720083805.3030730-1-yangyingliang@huawei.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 6486b54a0f62..fe3d948658ad 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7905,15 +7905,19 @@ static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx, struct io_wq_data data; unsigned int concurrency; + mutex_lock(&ctx->uring_lock); hash = ctx->hash_map; if (!hash) { hash = kzalloc(sizeof(*hash), GFP_KERNEL); - if (!hash) + if (!hash) { + mutex_unlock(&ctx->uring_lock); return ERR_PTR(-ENOMEM); + } refcount_set(&hash->refs, 1); init_waitqueue_head(&hash->wait); ctx->hash_map = hash; } + mutex_unlock(&ctx->uring_lock); data.hash = hash; data.task = task; From 3abab27c322e0f2acf981595aa8040c9164dc9fb Mon Sep 17 00:00:00 2001 From: Charles Baylis Date: Fri, 16 Jul 2021 17:43:12 +0100 Subject: [PATCH 204/794] drm: Return -ENOTTY for non-drm ioctls drm: Return -ENOTTY for non-drm ioctls Return -ENOTTY from drm_ioctl() when userspace passes in a cmd number which doesn't relate to the drm subsystem. Glibc uses the TCGETS ioctl to implement isatty(), and without this change isatty() returns it incorrectly returns true for drm devices. To test run this command: $ if [ -t 0 ]; then echo is a tty; fi < /dev/dri/card0 which shows "is a tty" without this patch. This may also modify memory which the userspace application is not expecting. Signed-off-by: Charles Baylis Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/YPG3IBlzaMhfPqCr@stando.fishzet.co.uk --- drivers/gpu/drm/drm_ioctl.c | 3 +++ include/drm/drm_ioctl.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 98ae00661656..f454e0424086 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -834,6 +834,9 @@ long drm_ioctl(struct file *filp, if (drm_dev_is_unplugged(dev)) return -ENODEV; + if (DRM_IOCTL_TYPE(cmd) != DRM_IOCTL_BASE) + return -ENOTTY; + is_driver_ioctl = nr >= DRM_COMMAND_BASE && nr < DRM_COMMAND_END; if (is_driver_ioctl) { diff --git a/include/drm/drm_ioctl.h b/include/drm/drm_ioctl.h index 10100a4bbe2a..afb27cb6a7bd 100644 --- a/include/drm/drm_ioctl.h +++ b/include/drm/drm_ioctl.h @@ -68,6 +68,7 @@ typedef int drm_ioctl_compat_t(struct file *filp, unsigned int cmd, unsigned long arg); #define DRM_IOCTL_NR(n) _IOC_NR(n) +#define DRM_IOCTL_TYPE(n) _IOC_TYPE(n) #define DRM_MAJOR 226 /** From 7e777021780e9c373fc0c04d40b8407ce8c3b5d5 Mon Sep 17 00:00:00 2001 From: Eric Woudstra Date: Mon, 19 Jul 2021 20:23:57 +0200 Subject: [PATCH 205/794] mt7530 mt7530_fdb_write only set ivl bit vid larger than 1 Fixes my earlier patch which broke vlan unaware bridges. The IVL bit now only gets set for vid's larger than 1. Fixes: 11d8d98cbeef ("mt7530 fix mt7530_fdb_write vid missing ivl bit") Signed-off-by: Eric Woudstra Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/mt7530.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 9e4df35f92cc..69f21b71614c 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -366,7 +366,8 @@ mt7530_fdb_write(struct mt7530_priv *priv, u16 vid, int i; reg[1] |= vid & CVID_MASK; - reg[1] |= ATA2_IVL; + if (vid > 1) + reg[1] |= ATA2_IVL; reg[2] |= (aging & AGE_TIMER_MASK) << AGE_TIMER; reg[2] |= (port_mask & PORT_MAP_MASK) << PORT_MAP; /* STATIC_ENT indicate that entry is static wouldn't From 727d6a8b7ef3d25080fad228b2c4a1d4da5999c6 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Mon, 19 Jul 2021 16:41:24 -0700 Subject: [PATCH 206/794] net/sched: act_skbmod: Skip non-Ethernet packets Currently tcf_skbmod_act() assumes that packets use Ethernet as their L2 protocol, which is not always the case. As an example, for CAN devices: $ ip link add dev vcan0 type vcan $ ip link set up vcan0 $ tc qdisc add dev vcan0 root handle 1: htb $ tc filter add dev vcan0 parent 1: protocol ip prio 10 \ matchall action skbmod swap mac Doing the above silently corrupts all the packets. Do not perform skbmod actions for non-Ethernet packets. Fixes: 86da71b57383 ("net_sched: Introduce skbmod action") Reviewed-by: Cong Wang Signed-off-by: Peilin Ye Signed-off-by: David S. Miller --- net/sched/act_skbmod.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 81a1c67335be..8d17a543cc9f 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -33,6 +34,13 @@ static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a, tcf_lastuse_update(&d->tcf_tm); bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); + action = READ_ONCE(d->tcf_action); + if (unlikely(action == TC_ACT_SHOT)) + goto drop; + + if (!skb->dev || skb->dev->type != ARPHRD_ETHER) + return action; + /* XXX: if you are going to edit more fields beyond ethernet header * (example when you add IP header replacement or vlan swap) * then MAX_EDIT_LEN needs to change appropriately @@ -41,10 +49,6 @@ static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a, if (unlikely(err)) /* best policy is to drop on the floor */ goto drop; - action = READ_ONCE(d->tcf_action); - if (unlikely(action == TC_ACT_SHOT)) - goto drop; - p = rcu_dereference_bh(d->skbmod_p); flags = p->flags; if (flags & SKBMOD_F_DMAC) From 75d5641497a60bb5d36ff77fd3f526906cbc148c Mon Sep 17 00:00:00 2001 From: Maxim Kochetkov Date: Tue, 20 Jul 2021 08:08:38 +0300 Subject: [PATCH 207/794] fsl/fman: Add fibre support Set SUPPORTED_FIBRE to mac_dev->if_support. It allows proper usage of PHYs with optical/fiber support. Signed-off-by: Maxim Kochetkov Acked-by: Madalin Bucur Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fman/mac.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index 46ecb42f2ef8..d9fc5c456bf3 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -524,6 +524,7 @@ static void setup_memac(struct mac_device *mac_dev) | SUPPORTED_Autoneg \ | SUPPORTED_Pause \ | SUPPORTED_Asym_Pause \ + | SUPPORTED_FIBRE \ | SUPPORTED_MII) static DEFINE_MUTEX(eth_lock); From 8fb4792f091e608a0a1d353dfdf07ef55a719db5 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 20 Jul 2021 15:08:40 +0200 Subject: [PATCH 208/794] ipv6: fix another slab-out-of-bounds in fib6_nh_flush_exceptions While running the self-tests on a KASAN enabled kernel, I observed a slab-out-of-bounds splat very similar to the one reported in commit 821bbf79fe46 ("ipv6: Fix KASAN: slab-out-of-bounds Read in fib6_nh_flush_exceptions"). We additionally need to take care of fib6_metrics initialization failure when the caller provides an nh. The fix is similar, explicitly free the route instead of calling fib6_info_release on a half-initialized object. Fixes: f88d8ea67fbdb ("ipv6: Plumb support for nexthop object in a fib6_info") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7b756a7dc036..b6ddf23d3833 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3769,7 +3769,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, err = PTR_ERR(rt->fib6_metrics); /* Do not leave garbage there. */ rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics; - goto out; + goto out_free; } if (cfg->fc_flags & RTF_ADDRCONF) From 91bed5565bba03b2a9f7334b58ae4be9df7c3840 Mon Sep 17 00:00:00 2001 From: Jia He Date: Tue, 20 Jul 2021 21:26:55 +0800 Subject: [PATCH 209/794] Revert "qed: fix possible unpaired spin_{un}lock_bh in _qed_mcp_cmd_and_union()" This reverts commit 6206b7981a36476f4695d661ae139f7db36a802d. That patch added additional spin_{un}lock_bh(), which was harmless but pointless. The orginal code path has guaranteed the pair of spin_{un}lock_bh(). We'd better revert it before we find the exact root cause of the bug_on mentioned in that patch. Fixes: 6206b7981a36 ("qed: fix possible unpaired spin_{un}lock_bh in _qed_mcp_cmd_and_union()") Cc: David S. Miller Cc: Prabhakar Kushwaha Signed-off-by: Jia He Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 79d879a5d663..4387292c37e2 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -474,18 +474,14 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); - if (!qed_mcp_has_pending_cmd(p_hwfn)) { - spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); + if (!qed_mcp_has_pending_cmd(p_hwfn)) break; - } rc = qed_mcp_update_pending_cmd(p_hwfn, p_ptt); - if (!rc) { - spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); + if (!rc) break; - } else if (rc != -EAGAIN) { + else if (rc != -EAGAIN) goto err; - } spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); @@ -502,8 +498,6 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, return -EAGAIN; } - spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); - /* Send the mailbox command */ qed_mcp_reread_offsets(p_hwfn, p_ptt); seq_num = ++p_hwfn->mcp_info->drv_mb_seq; @@ -530,18 +524,14 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); - if (p_cmd_elem->b_is_completed) { - spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); + if (p_cmd_elem->b_is_completed) break; - } rc = qed_mcp_update_pending_cmd(p_hwfn, p_ptt); - if (!rc) { - spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); + if (!rc) break; - } else if (rc != -EAGAIN) { + else if (rc != -EAGAIN) goto err; - } spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); } while (++cnt < max_retries); @@ -564,7 +554,6 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, return -EAGAIN; } - spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); qed_mcp_cmd_del_elem(p_hwfn, p_cmd_elem); spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); From e81d71e343c6c62cf323042caed4b7ca049deda5 Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Tue, 20 Jul 2021 18:32:16 +0300 Subject: [PATCH 210/794] ALSA: hda/hdmi: Add quirk to force pin connectivity on NUC10 On some Intel NUC10 variants, codec reports AC_JACK_PORT_NONE as pin default config for all pins. This results in broken audio. Add a quirk to force connectivity. BugLink: https://github.com/clearlinux/distribution/issues/2396 Signed-off-by: Kai Vehmanen Link: https://lore.kernel.org/r/20210720153216.2200938-1-kai.vehmanen@linux.intel.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 84c088912b3c..e143e69d8184 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1941,6 +1941,7 @@ static const struct snd_pci_quirk force_connect_list[] = { SND_PCI_QUIRK(0x103c, 0x870f, "HP", 1), SND_PCI_QUIRK(0x103c, 0x871a, "HP", 1), SND_PCI_QUIRK(0x1462, 0xec94, "MS-7C94", 1), + SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", 1), {} }; From 8798d070d416d18a75770fc19787e96705073f43 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sat, 3 Jul 2021 11:56:55 +0200 Subject: [PATCH 211/794] rbd: always kick acquire on "acquired" and "released" notifications Skipping the "lock has been released" notification if the lock owner is not what we expect based on owner_cid can lead to I/O hangs. One example is our own notifications: because owner_cid is cleared in rbd_unlock(), when we get our own notification it is processed as unexpected/duplicate and maybe_kick_acquire() isn't called. If a peer that requested the lock then doesn't go through with acquiring it, I/O requests that came in while the lock was being quiesced would be stalled until another I/O request is submitted and kicks acquire from rbd_img_exclusive_lock(). This makes the comment in rbd_release_lock() actually true: prior to this change the canceled work was being requeued in response to the "lock has been acquired" notification from rbd_handle_acquired_lock(). Cc: stable@vger.kernel.org # 5.3+ Signed-off-by: Ilya Dryomov Tested-by: Robin Geuze --- drivers/block/rbd.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 531d390902dd..e77cea6a6189 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4203,15 +4203,11 @@ static void rbd_handle_acquired_lock(struct rbd_device *rbd_dev, u8 struct_v, if (!rbd_cid_equal(&cid, &rbd_empty_cid)) { down_write(&rbd_dev->lock_rwsem); if (rbd_cid_equal(&cid, &rbd_dev->owner_cid)) { - /* - * we already know that the remote client is - * the owner - */ - up_write(&rbd_dev->lock_rwsem); - return; + dout("%s rbd_dev %p cid %llu-%llu == owner_cid\n", + __func__, rbd_dev, cid.gid, cid.handle); + } else { + rbd_set_owner_cid(rbd_dev, &cid); } - - rbd_set_owner_cid(rbd_dev, &cid); downgrade_write(&rbd_dev->lock_rwsem); } else { down_read(&rbd_dev->lock_rwsem); @@ -4236,14 +4232,12 @@ static void rbd_handle_released_lock(struct rbd_device *rbd_dev, u8 struct_v, if (!rbd_cid_equal(&cid, &rbd_empty_cid)) { down_write(&rbd_dev->lock_rwsem); if (!rbd_cid_equal(&cid, &rbd_dev->owner_cid)) { - dout("%s rbd_dev %p unexpected owner, cid %llu-%llu != owner_cid %llu-%llu\n", + dout("%s rbd_dev %p cid %llu-%llu != owner_cid %llu-%llu\n", __func__, rbd_dev, cid.gid, cid.handle, rbd_dev->owner_cid.gid, rbd_dev->owner_cid.handle); - up_write(&rbd_dev->lock_rwsem); - return; + } else { + rbd_set_owner_cid(rbd_dev, &rbd_empty_cid); } - - rbd_set_owner_cid(rbd_dev, &rbd_empty_cid); downgrade_write(&rbd_dev->lock_rwsem); } else { down_read(&rbd_dev->lock_rwsem); From ed9eb71085ecb7ded9a5118cec2ab70667cc7350 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sat, 3 Jul 2021 11:31:26 +0200 Subject: [PATCH 212/794] rbd: don't hold lock_rwsem while running_list is being drained Currently rbd_quiesce_lock() holds lock_rwsem for read while blocking on releasing_wait completion. On the I/O completion side, each image request also needs to take lock_rwsem for read. Because rw_semaphore implementation doesn't allow new readers after a writer has indicated interest in the lock, this can result in a deadlock if something that needs to take lock_rwsem for write gets involved. For example: 1. watch error occurs 2. rbd_watch_errcb() takes lock_rwsem for write, clears owner_cid and releases lock_rwsem 3. after reestablishing the watch, rbd_reregister_watch() takes lock_rwsem for write and calls rbd_reacquire_lock() 4. rbd_quiesce_lock() downgrades lock_rwsem to for read and blocks on releasing_wait until running_list becomes empty 5. another watch error occurs 6. rbd_watch_errcb() blocks trying to take lock_rwsem for write 7. no in-flight image request can complete and delete itself from running_list because lock_rwsem won't be granted anymore A similar scenario can occur with "lock has been acquired" and "lock has been released" notification handers which also take lock_rwsem for write to update owner_cid. We don't actually get anything useful from sitting on lock_rwsem in rbd_quiesce_lock() -- owner_cid updates certainly don't need to be synchronized with. In fact the whole owner_cid tracking logic could probably be removed from the kernel client because we don't support proxied maintenance operations. Cc: stable@vger.kernel.org # 5.3+ URL: https://tracker.ceph.com/issues/42757 Signed-off-by: Ilya Dryomov Tested-by: Robin Geuze --- drivers/block/rbd.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index e77cea6a6189..784797fa9a53 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4100,8 +4100,6 @@ again: static bool rbd_quiesce_lock(struct rbd_device *rbd_dev) { - bool need_wait; - dout("%s rbd_dev %p\n", __func__, rbd_dev); lockdep_assert_held_write(&rbd_dev->lock_rwsem); @@ -4113,11 +4111,11 @@ static bool rbd_quiesce_lock(struct rbd_device *rbd_dev) */ rbd_dev->lock_state = RBD_LOCK_STATE_RELEASING; rbd_assert(!completion_done(&rbd_dev->releasing_wait)); - need_wait = !list_empty(&rbd_dev->running_list); - downgrade_write(&rbd_dev->lock_rwsem); - if (need_wait) - wait_for_completion(&rbd_dev->releasing_wait); - up_read(&rbd_dev->lock_rwsem); + if (list_empty(&rbd_dev->running_list)) + return true; + + up_write(&rbd_dev->lock_rwsem); + wait_for_completion(&rbd_dev->releasing_wait); down_write(&rbd_dev->lock_rwsem); if (rbd_dev->lock_state != RBD_LOCK_STATE_RELEASING) From cdb330f4b41ab55feb35487729e883c9e08b8a54 Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Thu, 15 Jul 2021 14:40:39 +0100 Subject: [PATCH 213/794] ceph: don't WARN if we're still opening a session to an MDS If MDSs aren't available while mounting a filesystem, the session state will transition from SESSION_OPENING to SESSION_CLOSING. And in that scenario check_session_state() will be called from delayed_work() and trigger this WARN. Avoid this by only WARNing after a session has already been established (i.e., the s_ttl will be different from 0). Fixes: 62575e270f66 ("ceph: check session state after bumping session->s_seq") Signed-off-by: Luis Henriques Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index a818213c972f..9db1b39df773 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -4456,7 +4456,7 @@ bool check_session_state(struct ceph_mds_session *s) break; case CEPH_MDS_SESSION_CLOSING: /* Should never reach this when we're unmounting */ - WARN_ON_ONCE(true); + WARN_ON_ONCE(s->s_ttl); fallthrough; case CEPH_MDS_SESSION_NEW: case CEPH_MDS_SESSION_RESTARTING: From 463f36c76fa4ec015c640ff63ccf52e7527abee0 Mon Sep 17 00:00:00 2001 From: Alexander Egorenkov Date: Fri, 16 Jul 2021 22:00:22 +0200 Subject: [PATCH 214/794] s390/boot: fix use of expolines in the DMA code The DMA code section of the decompressor must be compiled with expolines if Spectre V2 mitigation has been enabled for the decompressed kernel. This is required because although the decompressor's image contains the DMA code section, it is handed over to the decompressed kernel for use. Because the DMA code is already slow w/o expolines, use expolines always regardless whether the decompressed kernel is using them or not. This simplifies the DMA code by dropping the conditional compilation of expolines. Fixes: bf72630130c2 ("s390: use proper expoline sections for .dma code") Cc: # 5.2 Signed-off-by: Alexander Egorenkov Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/boot/text_dma.S | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/arch/s390/boot/text_dma.S b/arch/s390/boot/text_dma.S index f7c77cd518f2..5ff5fee02801 100644 --- a/arch/s390/boot/text_dma.S +++ b/arch/s390/boot/text_dma.S @@ -9,16 +9,6 @@ #include #include -#ifdef CC_USING_EXPOLINE - .pushsection .dma.text.__s390_indirect_jump_r14,"axG" -__dma__s390_indirect_jump_r14: - larl %r1,0f - ex 0,0(%r1) - j . -0: br %r14 - .popsection -#endif - .section .dma.text,"ax" /* * Simplified version of expoline thunk. The normal thunks can not be used here, @@ -27,11 +17,10 @@ __dma__s390_indirect_jump_r14: * affects a few functions that are not performance-relevant. */ .macro BR_EX_DMA_r14 -#ifdef CC_USING_EXPOLINE - jg __dma__s390_indirect_jump_r14 -#else - br %r14 -#endif + larl %r1,0f + ex 0,0(%r1) + j . +0: br %r14 .endm /* From 7d244643758e4cb51a29f948f6be3edd15d92cc3 Mon Sep 17 00:00:00 2001 From: kernel test robot Date: Mon, 19 Jul 2021 12:41:41 +0800 Subject: [PATCH 215/794] s390/cpumf: fix semicolon.cocci warnings arch/s390/kernel/perf_cpum_cf.c:748:2-3: Unneeded semicolon Remove unneeded semicolon. Generated by: scripts/coccinelle/misc/semicolon.cocci Fixes: a029a4eab39e ("s390/cpumf: Allow concurrent access for CPU Measurement Counter Facility") CC: Thomas Richter Reported-by: kernel test robot Signed-off-by: kernel test robot Signed-off-by: Heiko Carstens --- arch/s390/kernel/perf_cpum_cf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 975a00c8c564..d7dc36ec0a60 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -745,7 +745,7 @@ static int __init cpumf_pmu_init(void) if (!cf_dbg) { pr_err("Registration of s390dbf(cpum_cf) failed\n"); return -ENOMEM; - }; + } debug_register_view(cf_dbg, &debug_sprintf_view); cpumf_pmu.attr_groups = cpumf_cf_event_group(); From 0cde560a8bfc3cb790715f39d4535129cca9e6ae Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 19 Jul 2021 21:58:21 +0200 Subject: [PATCH 216/794] s390: update defconfigs Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 24 ++++++++++++++++-------- arch/s390/configs/defconfig | 24 +++++++++++++++++------- arch/s390/configs/zfcpdump_defconfig | 3 +-- 3 files changed, 34 insertions(+), 17 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 55cb846cda37..7de253f766e8 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -5,7 +5,12 @@ CONFIG_WATCH_QUEUE=y CONFIG_AUDIT=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_BPF_SYSCALL=y +CONFIG_BPF_JIT=y +CONFIG_BPF_JIT_ALWAYS_ON=y +CONFIG_BPF_LSM=y CONFIG_PREEMPT=y +CONFIG_SCHED_CORE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_TASKSTATS=y @@ -28,14 +33,13 @@ CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_PERF=y CONFIG_CGROUP_BPF=y +CONFIG_CGROUP_MISC=y CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set -CONFIG_BPF_LSM=y -CONFIG_BPF_SYSCALL=y CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y @@ -76,6 +80,7 @@ CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y CONFIG_BLK_CGROUP_IOLATENCY=y CONFIG_BLK_CGROUP_IOCOST=y +CONFIG_BLK_CGROUP_IOPRIO=y CONFIG_BLK_INLINE_ENCRYPTION=y CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y CONFIG_PARTITION_ADVANCED=y @@ -95,6 +100,7 @@ CONFIG_CLEANCACHE=y CONFIG_FRONTSWAP=y CONFIG_CMA_DEBUG=y CONFIG_CMA_DEBUGFS=y +CONFIG_CMA_SYSFS=y CONFIG_CMA_AREAS=7 CONFIG_MEM_SOFT_DIRTY=y CONFIG_ZSWAP=y @@ -158,6 +164,7 @@ CONFIG_IPV6_RPL_LWTUNNEL=y CONFIG_MPTCP=y CONFIG_NETFILTER=y CONFIG_BRIDGE_NETFILTER=m +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_SECMARK=y CONFIG_NF_CONNTRACK_EVENTS=y @@ -280,6 +287,7 @@ CONFIG_IP_VS_FTP=m CONFIG_IP_VS_PE_SIP=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y +CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m CONFIG_IP_NF_MATCH_ECN=m @@ -384,12 +392,11 @@ CONFIG_VSOCKETS=m CONFIG_VIRTIO_VSOCKETS=m CONFIG_NETLINK_DIAG=m CONFIG_CGROUP_NET_PRIO=y -CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m CONFIG_PCI=y -CONFIG_PCI_IOV=y # CONFIG_PCIEASPM is not set CONFIG_PCI_DEBUG=y +CONFIG_PCI_IOV=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_DEVTMPFS=y @@ -496,6 +503,7 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_GOOGLE is not set # CONFIG_NET_VENDOR_HUAWEI is not set # CONFIG_NET_VENDOR_INTEL is not set +# CONFIG_NET_VENDOR_MICROSOFT is not set # CONFIG_NET_VENDOR_MARVELL is not set CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m @@ -552,7 +560,6 @@ CONFIG_INPUT_EVDEV=y CONFIG_LEGACY_PTY_COUNT=0 CONFIG_VIRTIO_CONSOLE=m CONFIG_HW_RANDOM_VIRTIO=m -CONFIG_RAW_DRIVER=m CONFIG_HANGCHECK_TIMER=m CONFIG_TN3270_FS=y CONFIG_PPS=m @@ -575,7 +582,6 @@ CONFIG_SYNC_FILE=y CONFIG_VFIO=m CONFIG_VFIO_PCI=m CONFIG_VFIO_MDEV=m -CONFIG_VFIO_MDEV_DEVICE=m CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y @@ -620,6 +626,7 @@ CONFIG_FUSE_FS=y CONFIG_CUSE=m CONFIG_VIRTIO_FS=m CONFIG_OVERLAY_FS=m +CONFIG_NETFS_STATS=y CONFIG_FSCACHE=m CONFIG_CACHEFILES=m CONFIG_ISO9660_FS=y @@ -655,7 +662,6 @@ CONFIG_NFSD_V3_ACL=y CONFIG_NFSD_V4=y CONFIG_NFSD_V4_SECURITY_LABEL=y CONFIG_CIFS=m -CONFIG_CIFS_STATS2=y CONFIG_CIFS_WEAK_PW_HASH=y CONFIG_CIFS_UPCALL=y CONFIG_CIFS_XATTR=y @@ -683,6 +689,7 @@ CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_SECURITY_LOCKDOWN_LSM=y CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y +CONFIG_SECURITY_LANDLOCK=y CONFIG_INTEGRITY_SIGNATURE=y CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y CONFIG_IMA=y @@ -697,6 +704,7 @@ CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_ECDSA=m CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_SM2=m CONFIG_CRYPTO_CURVE25519=m @@ -844,7 +852,6 @@ CONFIG_FAULT_INJECTION_DEBUG_FS=y CONFIG_FAIL_FUNCTION=y CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y CONFIG_LKDTM=m -CONFIG_TEST_LIST_SORT=y CONFIG_TEST_MIN_HEAP=y CONFIG_TEST_SORT=y CONFIG_KPROBES_SANITY_TEST=y @@ -854,3 +861,4 @@ CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y CONFIG_TEST_BITOPS=m CONFIG_TEST_BPF=m +CONFIG_TEST_LIVEPATCH=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 5d847ab5feaa..b671642967ba 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -4,6 +4,11 @@ CONFIG_WATCH_QUEUE=y CONFIG_AUDIT=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_BPF_SYSCALL=y +CONFIG_BPF_JIT=y +CONFIG_BPF_JIT_ALWAYS_ON=y +CONFIG_BPF_LSM=y +CONFIG_SCHED_CORE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_TASKSTATS=y @@ -26,14 +31,13 @@ CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_PERF=y CONFIG_CGROUP_BPF=y +CONFIG_CGROUP_MISC=y CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set -CONFIG_BPF_LSM=y -CONFIG_BPF_SYSCALL=y CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y @@ -70,6 +74,7 @@ CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y CONFIG_BLK_CGROUP_IOLATENCY=y CONFIG_BLK_CGROUP_IOCOST=y +CONFIG_BLK_CGROUP_IOPRIO=y CONFIG_BLK_INLINE_ENCRYPTION=y CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y CONFIG_PARTITION_ADVANCED=y @@ -87,6 +92,7 @@ CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_CLEANCACHE=y CONFIG_FRONTSWAP=y +CONFIG_CMA_SYSFS=y CONFIG_CMA_AREAS=7 CONFIG_MEM_SOFT_DIRTY=y CONFIG_ZSWAP=y @@ -149,6 +155,7 @@ CONFIG_IPV6_RPL_LWTUNNEL=y CONFIG_MPTCP=y CONFIG_NETFILTER=y CONFIG_BRIDGE_NETFILTER=m +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_SECMARK=y CONFIG_NF_CONNTRACK_EVENTS=y @@ -271,6 +278,7 @@ CONFIG_IP_VS_FTP=m CONFIG_IP_VS_PE_SIP=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y +CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m CONFIG_IP_NF_MATCH_ECN=m @@ -374,11 +382,10 @@ CONFIG_VSOCKETS=m CONFIG_VIRTIO_VSOCKETS=m CONFIG_NETLINK_DIAG=m CONFIG_CGROUP_NET_PRIO=y -CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m CONFIG_PCI=y -CONFIG_PCI_IOV=y # CONFIG_PCIEASPM is not set +CONFIG_PCI_IOV=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_UEVENT_HELPER=y @@ -488,6 +495,7 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_GOOGLE is not set # CONFIG_NET_VENDOR_HUAWEI is not set # CONFIG_NET_VENDOR_INTEL is not set +# CONFIG_NET_VENDOR_MICROSOFT is not set # CONFIG_NET_VENDOR_MARVELL is not set CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m @@ -544,7 +552,6 @@ CONFIG_INPUT_EVDEV=y CONFIG_LEGACY_PTY_COUNT=0 CONFIG_VIRTIO_CONSOLE=m CONFIG_HW_RANDOM_VIRTIO=m -CONFIG_RAW_DRIVER=m CONFIG_HANGCHECK_TIMER=m CONFIG_TN3270_FS=y # CONFIG_PTP_1588_CLOCK is not set @@ -567,7 +574,6 @@ CONFIG_SYNC_FILE=y CONFIG_VFIO=m CONFIG_VFIO_PCI=m CONFIG_VFIO_MDEV=m -CONFIG_VFIO_MDEV_DEVICE=m CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y @@ -608,6 +614,7 @@ CONFIG_FUSE_FS=y CONFIG_CUSE=m CONFIG_VIRTIO_FS=m CONFIG_OVERLAY_FS=m +CONFIG_NETFS_STATS=y CONFIG_FSCACHE=m CONFIG_CACHEFILES=m CONFIG_ISO9660_FS=y @@ -643,7 +650,6 @@ CONFIG_NFSD_V3_ACL=y CONFIG_NFSD_V4=y CONFIG_NFSD_V4_SECURITY_LABEL=y CONFIG_CIFS=m -CONFIG_CIFS_STATS2=y CONFIG_CIFS_WEAK_PW_HASH=y CONFIG_CIFS_UPCALL=y CONFIG_CIFS_XATTR=y @@ -670,6 +676,7 @@ CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_SECURITY_LOCKDOWN_LSM=y CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y +CONFIG_SECURITY_LANDLOCK=y CONFIG_INTEGRITY_SIGNATURE=y CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y CONFIG_IMA=y @@ -685,6 +692,7 @@ CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_ECDSA=m CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_SM2=m CONFIG_CRYPTO_CURVE25519=m @@ -755,6 +763,7 @@ CONFIG_CRC8=m CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=0 CONFIG_PRINTK_TIME=y +CONFIG_DYNAMIC_DEBUG=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_DWARF4=y CONFIG_GDB_SCRIPTS=y @@ -782,3 +791,4 @@ CONFIG_LKDTM=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y CONFIG_TEST_BPF=m +CONFIG_TEST_LIVEPATCH=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 76123a4b26ab..d576aaab27c9 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -29,9 +29,9 @@ CONFIG_PARTITION_ADVANCED=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set # CONFIG_COMPACTION is not set # CONFIG_MIGRATION is not set -# CONFIG_BOUNCE is not set CONFIG_NET=y # CONFIG_IUCV is not set +# CONFIG_PCPU_DEV_REFCNT is not set # CONFIG_ETHTOOL_NETLINK is not set CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_RAM=y @@ -51,7 +51,6 @@ CONFIG_ZFCP=y # CONFIG_SERIO is not set # CONFIG_HVC_IUCV is not set # CONFIG_HW_RANDOM_S390 is not set -CONFIG_RAW_DRIVER=y # CONFIG_HMC_DRV is not set # CONFIG_S390_TAPE is not set # CONFIG_VMCP is not set From 8ba89a3c7967808f33478a8573277cf6a7412c4c Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 14 Jul 2021 11:38:41 -0700 Subject: [PATCH 217/794] dmaengine: idxd: fix desc->vector that isn't being updated Missing update for desc->vector when the wq vector gets updated. This causes the desc->vector to always be at 0. Fixes: da435aedb00a ("dmaengine: idxd: fix array index when int_handles are being used") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162628784374.353761.4736602409627820431.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/submit.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index e29887528077..21d7d09f73dd 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -25,11 +25,10 @@ static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu) * Descriptor completion vectors are 1...N for MSIX. We will round * robin through the N vectors. */ - wq->vec_ptr = (wq->vec_ptr % idxd->num_wq_irqs) + 1; + wq->vec_ptr = desc->vector = (wq->vec_ptr % idxd->num_wq_irqs) + 1; if (!idxd->int_handles) { desc->hw->int_handle = wq->vec_ptr; } else { - desc->vector = wq->vec_ptr; /* * int_handles are only for descriptor completion. However for device * MSIX enumeration, vec 0 is used for misc interrupts. Therefore even From 7eb25da161befbc9a80e94e1bd90d6c06aa645cf Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 14 Jul 2021 14:57:19 -0700 Subject: [PATCH 218/794] dmaengine: idxd: fix sequence for pci driver remove() and shutdown() ->shutdown() call should only be responsible for quiescing the device. Currently it is doing PCI device tear down. This causes issue when things like MMIO mapping is removed while idxd_unregister_devices() will trigger removal of idxd device sub-driver and still initiates MMIO writes to the device. Another issue is with the unregistering of idxd 'struct device', the memory context gets freed. So the teardown calls are accessing freed memory and can cause kernel oops. Move all the teardown bits that doesn't belong in shutdown to ->remove() call. Move unregistering of the idxd conf_dev 'struct device' to after doing all the teardown to free all the memory that's no longer needed. Fixes: 47c16ac27d4c ("dmaengine: idxd: fix idxd conf_dev 'struct device' lifetime") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162629983901.395844.17964803190905549615.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/init.c | 26 +++++++++++++++++--------- drivers/dma/idxd/sysfs.c | 2 -- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 4e32a4dcc3ab..c0f4c0422f32 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -760,32 +760,40 @@ static void idxd_shutdown(struct pci_dev *pdev) for (i = 0; i < msixcnt; i++) { irq_entry = &idxd->irq_entries[i]; synchronize_irq(irq_entry->vector); - free_irq(irq_entry->vector, irq_entry); if (i == 0) continue; idxd_flush_pending_llist(irq_entry); idxd_flush_work_list(irq_entry); } - - idxd_msix_perm_clear(idxd); - idxd_release_int_handles(idxd); - pci_free_irq_vectors(pdev); - pci_iounmap(pdev, idxd->reg_base); - pci_disable_device(pdev); - destroy_workqueue(idxd->wq); + flush_workqueue(idxd->wq); } static void idxd_remove(struct pci_dev *pdev) { struct idxd_device *idxd = pci_get_drvdata(pdev); + struct idxd_irq_entry *irq_entry; + int msixcnt = pci_msix_vec_count(pdev); + int i; dev_dbg(&pdev->dev, "%s called\n", __func__); idxd_shutdown(pdev); if (device_pasid_enabled(idxd)) idxd_disable_system_pasid(idxd); idxd_unregister_devices(idxd); - perfmon_pmu_remove(idxd); + + for (i = 0; i < msixcnt; i++) { + irq_entry = &idxd->irq_entries[i]; + free_irq(irq_entry->vector, irq_entry); + } + idxd_msix_perm_clear(idxd); + idxd_release_int_handles(idxd); + pci_free_irq_vectors(pdev); + pci_iounmap(pdev, idxd->reg_base); iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); + pci_disable_device(pdev); + destroy_workqueue(idxd->wq); + perfmon_pmu_remove(idxd); + device_unregister(&idxd->conf_dev); } static struct pci_driver idxd_pci_driver = { diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 0460d58e3941..bb4df63906a7 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1744,8 +1744,6 @@ void idxd_unregister_devices(struct idxd_device *idxd) device_unregister(&group->conf_dev); } - - device_unregister(&idxd->conf_dev); } int idxd_register_bus_type(void) From 6b4b87f2c31ac1af4f244990a7cbfb50d3f3e33f Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 14 Jul 2021 11:50:06 -0700 Subject: [PATCH 219/794] dmaengine: idxd: fix submission race window Konstantin observed that when descriptors are submitted, the descriptor is added to the pending list after the submission. This creates a race window with the slight possibility that the descriptor can complete before it gets added to the pending list and this window would cause the completion handler to miss processing the descriptor. To address the issue, the addition of the descriptor to the pending list must be done before it gets submitted to the hardware. However, submitting to swq with ENQCMDS instruction can cause a failure with the condition of either wq is full or wq is not "active". With the descriptor allocation being the gate to the wq capacity, it is not possible to hit a retry with ENQCMDS submission to the swq. The only possible failure can happen is when wq is no longer "active" due to hw error and therefore we are moving towards taking down the portal. Given this is a rare condition and there's no longer concern over I/O performance, the driver can walk the completion lists in order to retrieve and abort the descriptor. The error path will set the descriptor to aborted status. It will take the work list lock to prevent further processing of worklist. It will do a delete_all on the pending llist to retrieve all descriptors on the pending llist. The delete_all action does not require a lock. It will walk through the acquired llist to find the aborted descriptor while add all remaining descriptors to the work list since it holds the lock. If it does not find the aborted descriptor on the llist, it will walk through the work list. And if it still does not find the descriptor, then it means the interrupt handler has removed the desc from the llist but is pending on the work list lock and will process it once the error path releases the lock. Fixes: eb15e7154fbf ("dmaengine: idxd: add interrupt handle request and release support") Reported-by: Konstantin Ananyev Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162628855747.360485.10101925573082466530.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/idxd.h | 14 +++++++ drivers/dma/idxd/irq.c | 27 +++++++++----- drivers/dma/idxd/submit.c | 78 ++++++++++++++++++++++++++++++++++----- 3 files changed, 101 insertions(+), 18 deletions(-) diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 26482c7d4c3a..fc708be7ad9a 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -294,6 +294,14 @@ struct idxd_desc { struct idxd_wq *wq; }; +/* + * This is software defined error for the completion status. We overload the error code + * that will never appear in completion status and only SWERR register. + */ +enum idxd_completion_status { + IDXD_COMP_DESC_ABORT = 0xff, +}; + #define confdev_to_idxd(dev) container_of(dev, struct idxd_device, conf_dev) #define confdev_to_wq(dev) container_of(dev, struct idxd_wq, conf_dev) @@ -482,4 +490,10 @@ static inline void perfmon_init(void) {} static inline void perfmon_exit(void) {} #endif +static inline void complete_desc(struct idxd_desc *desc, enum idxd_complete_type reason) +{ + idxd_dma_complete_txd(desc, reason); + idxd_free_desc(desc->wq, desc); +} + #endif diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index ae68e1e5487a..4e3a7198c0ca 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -245,12 +245,6 @@ static inline bool match_fault(struct idxd_desc *desc, u64 fault_addr) return false; } -static inline void complete_desc(struct idxd_desc *desc, enum idxd_complete_type reason) -{ - idxd_dma_complete_txd(desc, reason); - idxd_free_desc(desc->wq, desc); -} - static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry, enum irq_work_type wtype, int *processed, u64 data) @@ -272,8 +266,16 @@ static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry, reason = IDXD_COMPLETE_DEV_FAIL; llist_for_each_entry_safe(desc, t, head, llnode) { - if (desc->completion->status) { - if ((desc->completion->status & DSA_COMP_STATUS_MASK) != DSA_COMP_SUCCESS) + u8 status = desc->completion->status & DSA_COMP_STATUS_MASK; + + if (status) { + if (unlikely(status == IDXD_COMP_DESC_ABORT)) { + complete_desc(desc, IDXD_COMPLETE_ABORT); + (*processed)++; + continue; + } + + if (unlikely(status != DSA_COMP_SUCCESS)) match_fault(desc, data); complete_desc(desc, reason); (*processed)++; @@ -329,7 +331,14 @@ static int irq_process_work_list(struct idxd_irq_entry *irq_entry, spin_unlock_irqrestore(&irq_entry->list_lock, flags); list_for_each_entry(desc, &flist, list) { - if ((desc->completion->status & DSA_COMP_STATUS_MASK) != DSA_COMP_SUCCESS) + u8 status = desc->completion->status & DSA_COMP_STATUS_MASK; + + if (unlikely(status == IDXD_COMP_DESC_ABORT)) { + complete_desc(desc, IDXD_COMPLETE_ABORT); + continue; + } + + if (unlikely(status != DSA_COMP_SUCCESS)) match_fault(desc, data); complete_desc(desc, reason); } diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index 21d7d09f73dd..36c9c1a89b7e 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -87,9 +87,64 @@ void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc) sbitmap_queue_clear(&wq->sbq, desc->id, cpu); } +static struct idxd_desc *list_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, + struct idxd_desc *desc) +{ + struct idxd_desc *d, *n; + + lockdep_assert_held(&ie->list_lock); + list_for_each_entry_safe(d, n, &ie->work_list, list) { + if (d == desc) { + list_del(&d->list); + return d; + } + } + + /* + * At this point, the desc needs to be aborted is held by the completion + * handler where it has taken it off the pending list but has not added to the + * work list. It will be cleaned up by the interrupt handler when it sees the + * IDXD_COMP_DESC_ABORT for completion status. + */ + return NULL; +} + +static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, + struct idxd_desc *desc) +{ + struct idxd_desc *d, *t, *found = NULL; + struct llist_node *head; + unsigned long flags; + + desc->completion->status = IDXD_COMP_DESC_ABORT; + /* + * Grab the list lock so it will block the irq thread handler. This allows the + * abort code to locate the descriptor need to be aborted. + */ + spin_lock_irqsave(&ie->list_lock, flags); + head = llist_del_all(&ie->pending_llist); + if (head) { + llist_for_each_entry_safe(d, t, head, llnode) { + if (d == desc) { + found = desc; + continue; + } + list_add_tail(&desc->list, &ie->work_list); + } + } + + if (!found) + found = list_abort_desc(wq, ie, desc); + spin_unlock_irqrestore(&ie->list_lock, flags); + + if (found) + complete_desc(found, IDXD_COMPLETE_ABORT); +} + int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) { struct idxd_device *idxd = wq->idxd; + struct idxd_irq_entry *ie = NULL; void __iomem *portal; int rc; @@ -107,6 +162,16 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) * even on UP because the recipient is a device. */ wmb(); + + /* + * Pending the descriptor to the lockless list for the irq_entry + * that we designated the descriptor to. + */ + if (desc->hw->flags & IDXD_OP_FLAG_RCI) { + ie = &idxd->irq_entries[desc->vector]; + llist_add(&desc->llnode, &ie->pending_llist); + } + if (wq_dedicated(wq)) { iosubmit_cmds512(portal, desc->hw, 1); } else { @@ -117,18 +182,13 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) * device is not accepting descriptor at all. */ rc = enqcmds(portal, desc->hw); - if (rc < 0) + if (rc < 0) { + if (ie) + llist_abort_desc(wq, ie, desc); return rc; + } } percpu_ref_put(&wq->wq_active); - - /* - * Pending the descriptor to the lockless list for the irq_entry - * that we designated the descriptor to. - */ - if (desc->hw->flags & IDXD_OP_FLAG_RCI) - llist_add(&desc->llnode, &idxd->irq_entries[desc->vector].pending_llist); - return 0; } From 4a8ac5e45cdaa88884b4ce05303e304cbabeb367 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Fri, 16 Jul 2021 08:58:32 +1200 Subject: [PATCH 220/794] i2c: mpc: Poll for MCF During some transfers the bus can still be busy when an interrupt is received. Commit 763778cd7926 ("i2c: mpc: Restore reread of I2C status register") attempted to address this by re-reading MPC_I2C_SR once but that just made it less likely to happen without actually preventing it. Instead of a single re-read, poll with a timeout so that the bus is given enough time to settle but a genuine stuck SCL is still noticed. Fixes: 1538d82f4647 ("i2c: mpc: Interrupt driven transfer") Signed-off-by: Chris Packham Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mpc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c index 6d5014ebaab5..a6ea1eb1394e 100644 --- a/drivers/i2c/busses/i2c-mpc.c +++ b/drivers/i2c/busses/i2c-mpc.c @@ -635,8 +635,8 @@ static irqreturn_t mpc_i2c_isr(int irq, void *dev_id) status = readb(i2c->base + MPC_I2C_SR); if (status & CSR_MIF) { - /* Read again to allow register to stabilise */ - status = readb(i2c->base + MPC_I2C_SR); + /* Wait up to 100us for transfer to properly complete */ + readb_poll_timeout(i2c->base + MPC_I2C_SR, status, !(status & CSR_MCF), 0, 100); writeb(0, i2c->base + MPC_I2C_SR); mpc_i2c_do_intr(i2c, status); return IRQ_HANDLED; From 09cfae9f13d51700b0fecf591dcd658fc5375428 Mon Sep 17 00:00:00 2001 From: Markus Boehme Date: Tue, 20 Jul 2021 16:26:19 -0700 Subject: [PATCH 221/794] ixgbe: Fix packet corruption due to missing DMA sync When receiving a packet with multiple fragments, hardware may still touch the first fragment until the entire packet has been received. The driver therefore keeps the first fragment mapped for DMA until end of packet has been asserted, and delays its dma_sync call until then. The driver tries to fit multiple receive buffers on one page. When using 3K receive buffers (e.g. using Jumbo frames and legacy-rx is turned off/build_skb is being used) on an architecture with 4K pages, the driver allocates an order 1 compound page and uses one page per receive buffer. To determine the correct offset for a delayed DMA sync of the first fragment of a multi-fragment packet, the driver then cannot just use PAGE_MASK on the DMA address but has to construct a mask based on the actual size of the backing page. Using PAGE_MASK in the 3K RX buffer/4K page architecture configuration will always sync the first page of a compound page. With the SWIOTLB enabled this can lead to corrupted packets (zeroed out first fragment, re-used garbage from another packet) and various consequences, such as slow/stalling data transfers and connection resets. For example, testing on a link with MTU exceeding 3058 bytes on a host with SWIOTLB enabled (e.g. "iommu=soft swiotlb=262144,force") TCP transfers quickly fizzle out without this patch. Cc: stable@vger.kernel.org Fixes: 0c5661ecc5dd7 ("ixgbe: fix crash in build_skb Rx code path") Signed-off-by: Markus Boehme Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 913253f8ecb4..14aea40da50f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1825,7 +1825,8 @@ static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring, struct sk_buff *skb) { if (ring_uses_build_skb(rx_ring)) { - unsigned long offset = (unsigned long)(skb->data) & ~PAGE_MASK; + unsigned long mask = (unsigned long)ixgbe_rx_pg_size(rx_ring) - 1; + unsigned long offset = (unsigned long)(skb->data) & mask; dma_sync_single_range_for_cpu(rx_ring->dev, IXGBE_CB(skb)->dma, From a47fa41381a09e5997afd762664db4f5f6657e03 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Wed, 21 Jul 2021 00:55:22 +0200 Subject: [PATCH 222/794] scsi: target: Fix NULL dereference on XCOPY completion CPU affinity control added with commit 39ae3edda325 ("scsi: target: core: Make completion affinity configurable") makes target_complete_cmd() queue work on a CPU based on se_tpg->se_tpg_wwn->cmd_compl_affinity state. LIO's EXTENDED COPY worker is a special case in that read/write cmds are dispatched using the global xcopy_pt_tpg, which carries a NULL se_tpg_wwn pointer following initialization in target_xcopy_setup_pt(). The NULL xcopy_pt_tpg->se_tpg_wwn pointer is dereferenced on completion of any EXTENDED COPY initiated read/write cmds. E.g using the libiscsi SCSI.ExtendedCopy.Simple test: BUG: kernel NULL pointer dereference, address: 00000000000001a8 RIP: 0010:target_complete_cmd+0x9d/0x130 [target_core_mod] Call Trace: fd_execute_rw+0x148/0x42a [target_core_file] ? __dynamic_pr_debug+0xa7/0xe0 ? target_check_reservation+0x5b/0x940 [target_core_mod] __target_execute_cmd+0x1e/0x90 [target_core_mod] transport_generic_new_cmd+0x17c/0x330 [target_core_mod] target_xcopy_issue_pt_cmd+0x9/0x60 [target_core_mod] target_xcopy_read_source.isra.7+0x10b/0x1b0 [target_core_mod] ? target_check_fua+0x40/0x40 [target_core_mod] ? transport_complete_task_attr+0x130/0x130 [target_core_mod] target_xcopy_do_work+0x61f/0xc00 [target_core_mod] This fix makes target_complete_cmd() queue work on se_cmd->cpuid if se_tpg_wwn is NULL. Link: https://lore.kernel.org/r/20210720225522.26291-1-ddiss@suse.de Fixes: 39ae3edda325 ("scsi: target: core: Make completion affinity configurable") Cc: Lee Duncan Cc: Mike Christie Reviewed-by: Mike Christie Signed-off-by: David Disseldorp Signed-off-by: Martin K. Petersen --- drivers/target/target_core_transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 7e35eddd9eb7..26ceabe34de5 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -886,7 +886,7 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status) INIT_WORK(&cmd->work, success ? target_complete_ok_work : target_complete_failure_work); - if (wwn->cmd_compl_affinity == SE_COMPL_AFFINITY_CPUID) + if (!wwn || wwn->cmd_compl_affinity == SE_COMPL_AFFINITY_CPUID) cpu = cmd->cpuid; else cpu = wwn->cmd_compl_affinity; From ec185dd3ab257dc2a60953fdf1b6622f524cc5b7 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Mon, 14 Jun 2021 17:33:10 -0500 Subject: [PATCH 223/794] optee: Fix memory leak when failing to register shm pages Free the previously allocated pages when we encounter an error condition while attempting to register the pages with the secure world. Fixes: a249dd200d03 ("tee: optee: Fix dynamic shm pool allocations") Fixes: 5a769f6ff439 ("optee: Fix multi page dynamic shm pool alloc") Cc: stable@vger.kernel.org Signed-off-by: Tyler Hicks Reviewed-by: Jens Wiklander Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- drivers/tee/optee/shm_pool.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/tee/optee/shm_pool.c b/drivers/tee/optee/shm_pool.c index d767eebf30bd..da06ce9b9313 100644 --- a/drivers/tee/optee/shm_pool.c +++ b/drivers/tee/optee/shm_pool.c @@ -32,8 +32,10 @@ static int pool_op_alloc(struct tee_shm_pool_mgr *poolm, struct page **pages; pages = kcalloc(nr_pages, sizeof(pages), GFP_KERNEL); - if (!pages) - return -ENOMEM; + if (!pages) { + rc = -ENOMEM; + goto err; + } for (i = 0; i < nr_pages; i++) { pages[i] = page; @@ -44,8 +46,14 @@ static int pool_op_alloc(struct tee_shm_pool_mgr *poolm, rc = optee_shm_register(shm->ctx, shm, pages, nr_pages, (unsigned long)shm->kaddr); kfree(pages); + if (rc) + goto err; } + return 0; + +err: + __free_pages(page, order); return rc; } From adf752af454e91e123e85e3784972d166837af73 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Mon, 14 Jun 2021 17:33:11 -0500 Subject: [PATCH 224/794] optee: Refuse to load the driver under the kdump kernel Fix a hung task issue, seen when booting the kdump kernel, that is caused by all of the secure world threads being in a permanent suspended state: INFO: task swapper/0:1 blocked for more than 120 seconds. Not tainted 5.4.83 #1 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. swapper/0 D 0 1 0 0x00000028 Call trace: __switch_to+0xc8/0x118 __schedule+0x2e0/0x700 schedule+0x38/0xb8 schedule_timeout+0x258/0x388 wait_for_completion+0x16c/0x4b8 optee_cq_wait_for_completion+0x28/0xa8 optee_disable_shm_cache+0xb8/0xf8 optee_probe+0x560/0x61c platform_drv_probe+0x58/0xa8 really_probe+0xe0/0x338 driver_probe_device+0x5c/0xf0 device_driver_attach+0x74/0x80 __driver_attach+0x64/0xe0 bus_for_each_dev+0x84/0xd8 driver_attach+0x30/0x40 bus_add_driver+0x188/0x1e8 driver_register+0x64/0x110 __platform_driver_register+0x54/0x60 optee_driver_init+0x20/0x28 do_one_initcall+0x54/0x24c kernel_init_freeable+0x1e8/0x2c0 kernel_init+0x18/0x118 ret_from_fork+0x10/0x18 The invoke_fn hook returned OPTEE_SMC_RETURN_ETHREAD_LIMIT, indicating that the secure world threads were all in a suspended state at the time of the kernel crash. This intermittently prevented the kdump kernel from booting, resulting in a failure to collect the kernel dump. Make kernel dump collection more reliable on systems utilizing OP-TEE by refusing to load the driver under the kdump kernel. Cc: stable@vger.kernel.org Signed-off-by: Tyler Hicks Reviewed-by: Jens Wiklander Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- drivers/tee/optee/core.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c index ddb8f9ecf307..5288cd767d82 100644 --- a/drivers/tee/optee/core.c +++ b/drivers/tee/optee/core.c @@ -6,6 +6,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include #include #include @@ -612,6 +613,16 @@ static int optee_probe(struct platform_device *pdev) u32 sec_caps; int rc; + /* + * The kernel may have crashed at the same time that all available + * secure world threads were suspended and we cannot reschedule the + * suspended threads without access to the crashed kernel's wait_queue. + * Therefore, we cannot reliably initialize the OP-TEE driver in the + * kdump kernel. + */ + if (is_kdump_kernel()) + return -ENODEV; + invoke_fn = get_invoke_func(&pdev->dev); if (IS_ERR(invoke_fn)) return PTR_ERR(invoke_fn); From f25889f93184db8b07a543cc2bbbb9a8fcaf4333 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Mon, 14 Jun 2021 17:33:12 -0500 Subject: [PATCH 225/794] optee: fix tee out of memory failure seen during kexec reboot The following out of memory errors are seen on kexec reboot from the optee core. [ 0.368428] tee_bnxt_fw optee-clnt0: tee_shm_alloc failed [ 0.368461] tee_bnxt_fw: probe of optee-clnt0 failed with error -22 tee_shm_release() is not invoked on dma shm buffer. Implement .shutdown() method to handle the release of the buffers correctly. More info: https://github.com/OP-TEE/optee_os/issues/3637 Cc: stable@vger.kernel.org Signed-off-by: Allen Pais Reviewed-by: Tyler Hicks Reviewed-by: Jens Wiklander Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- drivers/tee/optee/core.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c index 5288cd767d82..0987074d7ed0 100644 --- a/drivers/tee/optee/core.c +++ b/drivers/tee/optee/core.c @@ -573,6 +573,13 @@ static optee_invoke_fn *get_invoke_func(struct device *dev) return ERR_PTR(-EINVAL); } +/* optee_remove - Device Removal Routine + * @pdev: platform device information struct + * + * optee_remove is called by platform subsystem to alert the driver + * that it should release the device + */ + static int optee_remove(struct platform_device *pdev) { struct optee *optee = platform_get_drvdata(pdev); @@ -603,6 +610,18 @@ static int optee_remove(struct platform_device *pdev) return 0; } +/* optee_shutdown - Device Removal Routine + * @pdev: platform device information struct + * + * platform_shutdown is called by the platform subsystem to alert + * the driver that a shutdown, reboot, or kexec is happening and + * device must be disabled. + */ +static void optee_shutdown(struct platform_device *pdev) +{ + optee_disable_shm_cache(platform_get_drvdata(pdev)); +} + static int optee_probe(struct platform_device *pdev) { optee_invoke_fn *invoke_fn; @@ -739,6 +758,7 @@ MODULE_DEVICE_TABLE(of, optee_dt_match); static struct platform_driver optee_driver = { .probe = optee_probe, .remove = optee_remove, + .shutdown = optee_shutdown, .driver = { .name = "optee", .of_match_table = optee_dt_match, From b5c10dd04b7418793517e3286cde5c04759a86de Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Mon, 14 Jun 2021 17:33:13 -0500 Subject: [PATCH 226/794] optee: Clear stale cache entries during initialization The shm cache could contain invalid addresses if optee_disable_shm_cache() was not called from the .shutdown hook of the previous kernel before a kexec. These addresses could be unmapped or they could point to mapped but unintended locations in memory. Clear the shared memory cache, while being careful to not translate the addresses returned from OPTEE_SMC_DISABLE_SHM_CACHE, during driver initialization. Once all pre-cache shm objects are removed, proceed with enabling the cache so that we know that we can handle cached shm objects with confidence later in the .shutdown hook. Cc: stable@vger.kernel.org Signed-off-by: Tyler Hicks Reviewed-by: Jens Wiklander Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- drivers/tee/optee/call.c | 36 ++++++++++++++++++++++++++++--- drivers/tee/optee/core.c | 9 ++++++++ drivers/tee/optee/optee_private.h | 1 + 3 files changed, 43 insertions(+), 3 deletions(-) diff --git a/drivers/tee/optee/call.c b/drivers/tee/optee/call.c index 6e6eb836e9b6..387e94768182 100644 --- a/drivers/tee/optee/call.c +++ b/drivers/tee/optee/call.c @@ -416,11 +416,13 @@ void optee_enable_shm_cache(struct optee *optee) } /** - * optee_disable_shm_cache() - Disables caching of some shared memory allocation - * in OP-TEE + * __optee_disable_shm_cache() - Disables caching of some shared memory + * allocation in OP-TEE * @optee: main service struct + * @is_mapped: true if the cached shared memory addresses were mapped by this + * kernel, are safe to dereference, and should be freed */ -void optee_disable_shm_cache(struct optee *optee) +static void __optee_disable_shm_cache(struct optee *optee, bool is_mapped) { struct optee_call_waiter w; @@ -439,6 +441,13 @@ void optee_disable_shm_cache(struct optee *optee) if (res.result.status == OPTEE_SMC_RETURN_OK) { struct tee_shm *shm; + /* + * Shared memory references that were not mapped by + * this kernel must be ignored to prevent a crash. + */ + if (!is_mapped) + continue; + shm = reg_pair_to_ptr(res.result.shm_upper32, res.result.shm_lower32); tee_shm_free(shm); @@ -449,6 +458,27 @@ void optee_disable_shm_cache(struct optee *optee) optee_cq_wait_final(&optee->call_queue, &w); } +/** + * optee_disable_shm_cache() - Disables caching of mapped shared memory + * allocations in OP-TEE + * @optee: main service struct + */ +void optee_disable_shm_cache(struct optee *optee) +{ + return __optee_disable_shm_cache(optee, true); +} + +/** + * optee_disable_unmapped_shm_cache() - Disables caching of shared memory + * allocations in OP-TEE which are not + * currently mapped + * @optee: main service struct + */ +void optee_disable_unmapped_shm_cache(struct optee *optee) +{ + return __optee_disable_shm_cache(optee, false); +} + #define PAGELIST_ENTRIES_PER_PAGE \ ((OPTEE_MSG_NONCONTIG_PAGE_SIZE / sizeof(u64)) - 1) diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c index 0987074d7ed0..651d49b53d3b 100644 --- a/drivers/tee/optee/core.c +++ b/drivers/tee/optee/core.c @@ -716,6 +716,15 @@ static int optee_probe(struct platform_device *pdev) optee->memremaped_shm = memremaped_shm; optee->pool = pool; + /* + * Ensure that there are no pre-existing shm objects before enabling + * the shm cache so that there's no chance of receiving an invalid + * address during shutdown. This could occur, for example, if we're + * kexec booting from an older kernel that did not properly cleanup the + * shm cache. + */ + optee_disable_unmapped_shm_cache(optee); + optee_enable_shm_cache(optee); if (optee->sec_caps & OPTEE_SMC_SEC_CAP_DYNAMIC_SHM) diff --git a/drivers/tee/optee/optee_private.h b/drivers/tee/optee/optee_private.h index e25b216a14ef..dbdd367be156 100644 --- a/drivers/tee/optee/optee_private.h +++ b/drivers/tee/optee/optee_private.h @@ -159,6 +159,7 @@ int optee_cancel_req(struct tee_context *ctx, u32 cancel_id, u32 session); void optee_enable_shm_cache(struct optee *optee); void optee_disable_shm_cache(struct optee *optee); +void optee_disable_unmapped_shm_cache(struct optee *optee); int optee_shm_register(struct tee_context *ctx, struct tee_shm *shm, struct page **pages, size_t num_pages, From dc7019b7d0e188d4093b34bd0747ed0d668c63bf Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Mon, 14 Jun 2021 17:33:14 -0500 Subject: [PATCH 227/794] tee: add tee_shm_alloc_kernel_buf() Adds a new function tee_shm_alloc_kernel_buf() to allocate shared memory from a kernel driver. This function can later be made more lightweight by unnecessary dma-buf export. Cc: stable@vger.kernel.org Reviewed-by: Tyler Hicks Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- drivers/tee/tee_shm.c | 18 ++++++++++++++++++ include/linux/tee_drv.h | 1 + 2 files changed, 19 insertions(+) diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index 00472f5ce22e..c65e44707cd6 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -193,6 +193,24 @@ err_dev_put: } EXPORT_SYMBOL_GPL(tee_shm_alloc); +/** + * tee_shm_alloc_kernel_buf() - Allocate shared memory for kernel buffer + * @ctx: Context that allocates the shared memory + * @size: Requested size of shared memory + * + * The returned memory registered in secure world and is suitable to be + * passed as a memory buffer in parameter argument to + * tee_client_invoke_func(). The memory allocated is later freed with a + * call to tee_shm_free(). + * + * @returns a pointer to 'struct tee_shm' + */ +struct tee_shm *tee_shm_alloc_kernel_buf(struct tee_context *ctx, size_t size) +{ + return tee_shm_alloc(ctx, size, TEE_SHM_MAPPED | TEE_SHM_DMA_BUF); +} +EXPORT_SYMBOL_GPL(tee_shm_alloc_kernel_buf); + struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr, size_t length, u32 flags) { diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 54269e47ac9a..8990f7628387 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -332,6 +332,7 @@ void *tee_get_drvdata(struct tee_device *teedev); * @returns a pointer to 'struct tee_shm' */ struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags); +struct tee_shm *tee_shm_alloc_kernel_buf(struct tee_context *ctx, size_t size); /** * tee_shm_register() - Register shared memory buffer From 376e4199e327a5cf29b8ec8fb0f64f3d8b429819 Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Mon, 14 Jun 2021 17:33:15 -0500 Subject: [PATCH 228/794] tee: Correct inappropriate usage of TEE_SHM_DMA_BUF flag Currently TEE_SHM_DMA_BUF flag has been inappropriately used to not register shared memory allocated for private usage by underlying TEE driver: OP-TEE in this case. So rather add a new flag as TEE_SHM_PRIV that can be utilized by underlying TEE drivers for private allocation and usage of shared memory. With this corrected, allow tee_shm_alloc_kernel_buf() to allocate a shared memory region without the backing of dma-buf. Cc: stable@vger.kernel.org Signed-off-by: Sumit Garg Co-developed-by: Tyler Hicks Signed-off-by: Tyler Hicks Reviewed-by: Jens Wiklander Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- drivers/tee/optee/call.c | 2 +- drivers/tee/optee/core.c | 3 ++- drivers/tee/optee/rpc.c | 5 +++-- drivers/tee/optee/shm_pool.c | 8 ++++++-- drivers/tee/tee_shm.c | 4 ++-- include/linux/tee_drv.h | 1 + 6 files changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/tee/optee/call.c b/drivers/tee/optee/call.c index 387e94768182..945f03da0223 100644 --- a/drivers/tee/optee/call.c +++ b/drivers/tee/optee/call.c @@ -184,7 +184,7 @@ static struct tee_shm *get_msg_arg(struct tee_context *ctx, size_t num_params, struct optee_msg_arg *ma; shm = tee_shm_alloc(ctx, OPTEE_MSG_GET_ARG_SIZE(num_params), - TEE_SHM_MAPPED); + TEE_SHM_MAPPED | TEE_SHM_PRIV); if (IS_ERR(shm)) return shm; diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c index 651d49b53d3b..5ce13b099d7d 100644 --- a/drivers/tee/optee/core.c +++ b/drivers/tee/optee/core.c @@ -278,7 +278,8 @@ static void optee_release(struct tee_context *ctx) if (!ctxdata) return; - shm = tee_shm_alloc(ctx, sizeof(struct optee_msg_arg), TEE_SHM_MAPPED); + shm = tee_shm_alloc(ctx, sizeof(struct optee_msg_arg), + TEE_SHM_MAPPED | TEE_SHM_PRIV); if (!IS_ERR(shm)) { arg = tee_shm_get_va(shm, 0); /* diff --git a/drivers/tee/optee/rpc.c b/drivers/tee/optee/rpc.c index 1849180b0278..efbaff7ad7e5 100644 --- a/drivers/tee/optee/rpc.c +++ b/drivers/tee/optee/rpc.c @@ -314,7 +314,7 @@ static void handle_rpc_func_cmd_shm_alloc(struct tee_context *ctx, shm = cmd_alloc_suppl(ctx, sz); break; case OPTEE_RPC_SHM_TYPE_KERNEL: - shm = tee_shm_alloc(ctx, sz, TEE_SHM_MAPPED); + shm = tee_shm_alloc(ctx, sz, TEE_SHM_MAPPED | TEE_SHM_PRIV); break; default: arg->ret = TEEC_ERROR_BAD_PARAMETERS; @@ -502,7 +502,8 @@ void optee_handle_rpc(struct tee_context *ctx, struct optee_rpc_param *param, switch (OPTEE_SMC_RETURN_GET_RPC_FUNC(param->a0)) { case OPTEE_SMC_RPC_FUNC_ALLOC: - shm = tee_shm_alloc(ctx, param->a1, TEE_SHM_MAPPED); + shm = tee_shm_alloc(ctx, param->a1, + TEE_SHM_MAPPED | TEE_SHM_PRIV); if (!IS_ERR(shm) && !tee_shm_get_pa(shm, 0, &pa)) { reg_pair_from_64(¶m->a1, ¶m->a2, pa); reg_pair_from_64(¶m->a4, ¶m->a5, diff --git a/drivers/tee/optee/shm_pool.c b/drivers/tee/optee/shm_pool.c index da06ce9b9313..c41a9a501a6e 100644 --- a/drivers/tee/optee/shm_pool.c +++ b/drivers/tee/optee/shm_pool.c @@ -27,7 +27,11 @@ static int pool_op_alloc(struct tee_shm_pool_mgr *poolm, shm->paddr = page_to_phys(page); shm->size = PAGE_SIZE << order; - if (shm->flags & TEE_SHM_DMA_BUF) { + /* + * Shared memory private to the OP-TEE driver doesn't need + * to be registered with OP-TEE. + */ + if (!(shm->flags & TEE_SHM_PRIV)) { unsigned int nr_pages = 1 << order, i; struct page **pages; @@ -60,7 +64,7 @@ err: static void pool_op_free(struct tee_shm_pool_mgr *poolm, struct tee_shm *shm) { - if (shm->flags & TEE_SHM_DMA_BUF) + if (!(shm->flags & TEE_SHM_PRIV)) optee_shm_unregister(shm->ctx, shm); free_pages((unsigned long)shm->kaddr, get_order(shm->size)); diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index c65e44707cd6..8a9384a64f3e 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -117,7 +117,7 @@ struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags) return ERR_PTR(-EINVAL); } - if ((flags & ~(TEE_SHM_MAPPED | TEE_SHM_DMA_BUF))) { + if ((flags & ~(TEE_SHM_MAPPED | TEE_SHM_DMA_BUF | TEE_SHM_PRIV))) { dev_err(teedev->dev.parent, "invalid shm flags 0x%x", flags); return ERR_PTR(-EINVAL); } @@ -207,7 +207,7 @@ EXPORT_SYMBOL_GPL(tee_shm_alloc); */ struct tee_shm *tee_shm_alloc_kernel_buf(struct tee_context *ctx, size_t size) { - return tee_shm_alloc(ctx, size, TEE_SHM_MAPPED | TEE_SHM_DMA_BUF); + return tee_shm_alloc(ctx, size, TEE_SHM_MAPPED); } EXPORT_SYMBOL_GPL(tee_shm_alloc_kernel_buf); diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 8990f7628387..3ebfea0781f1 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -27,6 +27,7 @@ #define TEE_SHM_USER_MAPPED BIT(4) /* Memory mapped in user space */ #define TEE_SHM_POOL BIT(5) /* Memory allocated from pool */ #define TEE_SHM_KERNEL_MAPPED BIT(6) /* Memory mapped in kernel space */ +#define TEE_SHM_PRIV BIT(7) /* Memory private to TEE driver */ struct device; struct tee_device; From dfb703ad2a8d366b829818a558337be779746575 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Mon, 14 Jun 2021 17:33:16 -0500 Subject: [PATCH 229/794] tpm_ftpm_tee: Free and unregister TEE shared memory during kexec dma-buf backed shared memory cannot be reliably freed and unregistered during a kexec operation even when tee_shm_free() is called on the shm from a .shutdown hook. The problem occurs because dma_buf_put() calls fput() which then uses task_work_add(), with the TWA_RESUME parameter, to queue tee_shm_release() to be called before the current task returns to user mode. However, the current task never returns to user mode before the kexec completes so the memory is never freed nor unregistered. Use tee_shm_alloc_kernel_buf() to avoid dma-buf backed shared memory allocation so that tee_shm_free() can directly call tee_shm_release(). This will ensure that the shm can be freed and unregistered during a kexec operation. Fixes: 09e574831b27 ("tpm/tpm_ftpm_tee: A driver for firmware TPM running inside TEE") Fixes: 1760eb689ed6 ("tpm/tpm_ftpm_tee: add shutdown call back") Cc: stable@vger.kernel.org Signed-off-by: Tyler Hicks Reviewed-by: Sumit Garg Acked-by: Jarkko Sakkinen Signed-off-by: Jens Wiklander --- drivers/char/tpm/tpm_ftpm_tee.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/char/tpm/tpm_ftpm_tee.c b/drivers/char/tpm/tpm_ftpm_tee.c index 2ccdf8ac6994..6e3235565a4d 100644 --- a/drivers/char/tpm/tpm_ftpm_tee.c +++ b/drivers/char/tpm/tpm_ftpm_tee.c @@ -254,11 +254,11 @@ static int ftpm_tee_probe(struct device *dev) pvt_data->session = sess_arg.session; /* Allocate dynamic shared memory with fTPM TA */ - pvt_data->shm = tee_shm_alloc(pvt_data->ctx, - MAX_COMMAND_SIZE + MAX_RESPONSE_SIZE, - TEE_SHM_MAPPED | TEE_SHM_DMA_BUF); + pvt_data->shm = tee_shm_alloc_kernel_buf(pvt_data->ctx, + MAX_COMMAND_SIZE + + MAX_RESPONSE_SIZE); if (IS_ERR(pvt_data->shm)) { - dev_err(dev, "%s: tee_shm_alloc failed\n", __func__); + dev_err(dev, "%s: tee_shm_alloc_kernel_buf failed\n", __func__); rc = -ENOMEM; goto out_shm_alloc; } From 914ab19e471d8fb535ed50dff108b0a615f3c2d8 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Mon, 14 Jun 2021 17:33:17 -0500 Subject: [PATCH 230/794] firmware: tee_bnxt: Release TEE shm, session, and context during kexec Implement a .shutdown hook that will be called during a kexec operation so that the TEE shared memory, session, and context that were set up during .probe can be properly freed/closed. Additionally, don't use dma-buf backed shared memory for the fw_shm_pool. dma-buf backed shared memory cannot be reliably freed and unregistered during a kexec operation even when tee_shm_free() is called on the shm from a .shutdown hook. The problem occurs because dma_buf_put() calls fput() which then uses task_work_add(), with the TWA_RESUME parameter, to queue tee_shm_release() to be called before the current task returns to user mode. However, the current task never returns to user mode before the kexec completes so the memory is never freed nor unregistered. Use tee_shm_alloc_kernel_buf() to avoid dma-buf backed shared memory allocation so that tee_shm_free() can directly call tee_shm_release(). This will ensure that the shm can be freed and unregistered during a kexec operation. Fixes: 246880958ac9 ("firmware: broadcom: add OP-TEE based BNXT f/w manager") Cc: stable@vger.kernel.org Signed-off-by: Allen Pais Co-developed-by: Tyler Hicks Signed-off-by: Tyler Hicks Reviewed-by: Sumit Garg Acked-by: Florian Fainelli Signed-off-by: Jens Wiklander --- drivers/firmware/broadcom/tee_bnxt_fw.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/broadcom/tee_bnxt_fw.c b/drivers/firmware/broadcom/tee_bnxt_fw.c index ed10da5313e8..a5bf4c3f6dc7 100644 --- a/drivers/firmware/broadcom/tee_bnxt_fw.c +++ b/drivers/firmware/broadcom/tee_bnxt_fw.c @@ -212,10 +212,9 @@ static int tee_bnxt_fw_probe(struct device *dev) pvt_data.dev = dev; - fw_shm_pool = tee_shm_alloc(pvt_data.ctx, MAX_SHM_MEM_SZ, - TEE_SHM_MAPPED | TEE_SHM_DMA_BUF); + fw_shm_pool = tee_shm_alloc_kernel_buf(pvt_data.ctx, MAX_SHM_MEM_SZ); if (IS_ERR(fw_shm_pool)) { - dev_err(pvt_data.dev, "tee_shm_alloc failed\n"); + dev_err(pvt_data.dev, "tee_shm_alloc_kernel_buf failed\n"); err = PTR_ERR(fw_shm_pool); goto out_sess; } @@ -242,6 +241,14 @@ static int tee_bnxt_fw_remove(struct device *dev) return 0; } +static void tee_bnxt_fw_shutdown(struct device *dev) +{ + tee_shm_free(pvt_data.fw_shm_pool); + tee_client_close_session(pvt_data.ctx, pvt_data.session_id); + tee_client_close_context(pvt_data.ctx); + pvt_data.ctx = NULL; +} + static const struct tee_client_device_id tee_bnxt_fw_id_table[] = { {UUID_INIT(0x6272636D, 0x2019, 0x0716, 0x42, 0x43, 0x4D, 0x5F, 0x53, 0x43, 0x48, 0x49)}, @@ -257,6 +264,7 @@ static struct tee_client_driver tee_bnxt_fw_driver = { .bus = &tee_bus_type, .probe = tee_bnxt_fw_probe, .remove = tee_bnxt_fw_remove, + .shutdown = tee_bnxt_fw_shutdown, }, }; From 235c3610d5f02ee91244239b43cd9ae8b4859dff Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 20 Jul 2021 13:13:55 -0500 Subject: [PATCH 231/794] drm/ttm: Force re-init if ttm_global_init() fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we have a failure, decrement the reference count so that the next call to ttm_global_init() will actually do something instead of assume everything is all set up. Signed-off-by: Jason Ekstrand Fixes: 62b53b37e4b1 ("drm/ttm: use a static ttm_bo_global instance") Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20210720181357.2760720-5-jason@jlekstrand.net Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 5f31acec3ad7..519deea8e39b 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -100,6 +100,8 @@ static int ttm_global_init(void) debugfs_create_atomic_t("buffer_objects", 0444, ttm_debugfs_root, &glob->bo_count); out: + if (ret) + --ttm_glob_use_count; mutex_unlock(&ttm_global_mutex); return ret; } From 44cf53602f5a0db80d53c8fff6cdbcae59650a42 Mon Sep 17 00:00:00 2001 From: Moritz Fischer Date: Mon, 19 Jul 2021 00:05:19 -0700 Subject: [PATCH 232/794] Revert "usb: renesas-xhci: Fix handling of unknown ROM state" This reverts commit d143825baf15f204dac60acdf95e428182aa3374. Justin reports some of his systems now fail as result of this commit: xhci_hcd 0000:04:00.0: Direct firmware load for renesas_usb_fw.mem failed with error -2 xhci_hcd 0000:04:00.0: request_firmware failed: -2 xhci_hcd: probe of 0000:04:00.0 failed with error -2 The revert brings back the original issue the commit tried to solve but at least unbreaks existing systems relying on previous behavior. Cc: stable@vger.kernel.org Cc: Mathias Nyman Cc: Vinod Koul Cc: Justin Forbes Reported-by: Justin Forbes Signed-off-by: Moritz Fischer Fixes: d143825baf15 ("usb: renesas-xhci: Fix handling of unknown ROM state") Link: https://lore.kernel.org/r/20210719070519.41114-1-mdf@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci-renesas.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/usb/host/xhci-pci-renesas.c b/drivers/usb/host/xhci-pci-renesas.c index 1da647961c25..5923844ed821 100644 --- a/drivers/usb/host/xhci-pci-renesas.c +++ b/drivers/usb/host/xhci-pci-renesas.c @@ -207,8 +207,7 @@ static int renesas_check_rom_state(struct pci_dev *pdev) return 0; case RENESAS_ROM_STATUS_NO_RESULT: /* No result yet */ - dev_dbg(&pdev->dev, "Unknown ROM status ...\n"); - break; + return 0; case RENESAS_ROM_STATUS_ERROR: /* Error State */ default: /* All other states are marked as "Reserved states" */ @@ -225,12 +224,13 @@ static int renesas_fw_check_running(struct pci_dev *pdev) u8 fw_state; int err; - /* - * Only if device has ROM and loaded FW we can skip loading and - * return success. Otherwise (even unknown state), attempt to load FW. - */ - if (renesas_check_rom(pdev) && !renesas_check_rom_state(pdev)) - return 0; + /* Check if device has ROM and loaded, if so skip everything */ + err = renesas_check_rom(pdev); + if (err) { /* we have rom */ + err = renesas_check_rom_state(pdev); + if (!err) + return err; + } /* * Test if the device is actually needing the firmware. As most From 72f68bf5c756f5ce1139b31daae2684501383ad5 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 15 Jul 2021 18:06:51 +0300 Subject: [PATCH 233/794] xhci: Fix lost USB 2 remote wake There's a small window where a USB 2 remote wake may be left unhandled due to a race between hub thread and xhci port event interrupt handler. When the resume event is detected in the xhci interrupt handler it kicks the hub timer, which should move the port from resume to U0 once resume has been signalled for long enough. To keep the hub "thread" running we set a bus_state->resuming_ports flag. This flag makes sure hub timer function kicks itself. checking this flag was not properly protected by the spinlock. Flag was copied to a local variable before lock was taken. The local variable was then checked later with spinlock held. If interrupt is handled right after copying the flag to the local variable we end up stopping the hub thread before it can handle the USB 2 resume. CPU0 CPU1 (hub thread) (xhci event handler) xhci_hub_status_data() status = bus_state->resuming_ports; handle_port_status() spin_lock() bus_state->resuming_ports = 1 set_flag(HCD_FLAG_POLL_RH) spin_unlock() spin_lock() if (!status) clear_flag(HCD_FLAG_POLL_RH) spin_unlock() Fix this by taking the lock a bit earlier so that it covers the resuming_ports flag copy in the hub thread Cc: Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20210715150651.1996099-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index e9b18fc17617..151e93c4bd57 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1638,11 +1638,12 @@ int xhci_hub_status_data(struct usb_hcd *hcd, char *buf) * Inform the usbcore about resume-in-progress by returning * a non-zero value even if there are no status changes. */ + spin_lock_irqsave(&xhci->lock, flags); + status = bus_state->resuming_ports; mask = PORT_CSC | PORT_PEC | PORT_OCC | PORT_PLC | PORT_WRC | PORT_CEC; - spin_lock_irqsave(&xhci->lock, flags); /* For each port, did anything change? If so, set that bit in buf. */ for (i = 0; i < max_ports; i++) { temp = readl(ports[i]->addr); From 57560ee95cb7f91cf0bc31d4ae8276e0dcfe17aa Mon Sep 17 00:00:00 2001 From: Martin Kepplinger Date: Wed, 14 Jul 2021 08:18:07 +0200 Subject: [PATCH 234/794] usb: typec: tipd: Don't block probing of consumer of "connector" nodes Similar as with tcpm this patch lets fw_devlink know not to wait on the fwnode to be populated as a struct device. Without this patch, USB functionality can be broken on some previously supported boards. Fixes: 28ec344bb891 ("usb: typec: tcpm: Don't block probing of consumers of "connector" nodes") Cc: stable Acked-by: Heikki Krogerus Signed-off-by: Martin Kepplinger Link: https://lore.kernel.org/r/20210714061807.5737-1-martin.kepplinger@puri.sm Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tipd/core.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c index 938219bc1b4b..21b3ae25c76d 100644 --- a/drivers/usb/typec/tipd/core.c +++ b/drivers/usb/typec/tipd/core.c @@ -629,6 +629,15 @@ static int tps6598x_probe(struct i2c_client *client) if (!fwnode) return -ENODEV; + /* + * This fwnode has a "compatible" property, but is never populated as a + * struct device. Instead we simply parse it to read the properties. + * This breaks fw_devlink=on. To maintain backward compatibility + * with existing DT files, we work around this by deleting any + * fwnode_links to/from this fwnode. + */ + fw_devlink_purge_absent_suppliers(fwnode); + tps->role_sw = fwnode_usb_role_switch_get(fwnode); if (IS_ERR(tps->role_sw)) { ret = PTR_ERR(tps->role_sw); From 1bf2761c837571a66ec290fb66c90413821ffda2 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 15 Jul 2021 18:01:21 +0300 Subject: [PATCH 235/794] usb: hub: Fix link power management max exit latency (MEL) calculations Maximum Exit Latency (MEL) value is used by host to know how much in advance it needs to start waking up a U1/U2 suspended link in order to service a periodic transfer in time. Current MEL calculation only includes the time to wake up the path from U1/U2 to U0. This is called tMEL1 in USB 3.1 section C 1.5.2 Total MEL = tMEL1 + tMEL2 +tMEL3 + tMEL4 which should additinally include: - tMEL2 which is the time it takes for PING message to reach device - tMEL3 time for device to process the PING and submit a PING_RESPONSE - tMEL4 time for PING_RESPONSE to traverse back upstream to host. Add the missing tMEL2, tMEL3 and tMEL4 to MEL calculation. Cc: # v3.5 Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20210715150122.1995966-1-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 52 +++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index d1efc7141333..a35d0bedafa3 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -48,6 +48,7 @@ #define USB_TP_TRANSMISSION_DELAY 40 /* ns */ #define USB_TP_TRANSMISSION_DELAY_MAX 65535 /* ns */ +#define USB_PING_RESPONSE_TIME 400 /* ns */ /* Protect struct usb_device->state and ->children members * Note: Both are also protected by ->dev.sem, except that ->state can @@ -182,8 +183,9 @@ int usb_device_supports_lpm(struct usb_device *udev) } /* - * Set the Maximum Exit Latency (MEL) for the host to initiate a transition from - * either U1 or U2. + * Set the Maximum Exit Latency (MEL) for the host to wakup up the path from + * U1/U2, send a PING to the device and receive a PING_RESPONSE. + * See USB 3.1 section C.1.5.2 */ static void usb_set_lpm_mel(struct usb_device *udev, struct usb3_lpm_parameters *udev_lpm_params, @@ -193,35 +195,37 @@ static void usb_set_lpm_mel(struct usb_device *udev, unsigned int hub_exit_latency) { unsigned int total_mel; - unsigned int device_mel; - unsigned int hub_mel; /* - * Calculate the time it takes to transition all links from the roothub - * to the parent hub into U0. The parent hub must then decode the - * packet (hub header decode latency) to figure out which port it was - * bound for. - * - * The Hub Header decode latency is expressed in 0.1us intervals (0x1 - * means 0.1us). Multiply that by 100 to get nanoseconds. + * tMEL1. time to transition path from host to device into U0. + * MEL for parent already contains the delay up to parent, so only add + * the exit latency for the last link (pick the slower exit latency), + * and the hub header decode latency. See USB 3.1 section C 2.2.1 + * Store MEL in nanoseconds */ total_mel = hub_lpm_params->mel + - (hub->descriptor->u.ss.bHubHdrDecLat * 100); + max(udev_exit_latency, hub_exit_latency) * 1000 + + hub->descriptor->u.ss.bHubHdrDecLat * 100; /* - * How long will it take to transition the downstream hub's port into - * U0? The greater of either the hub exit latency or the device exit - * latency. - * - * The BOS U1/U2 exit latencies are expressed in 1us intervals. - * Multiply that by 1000 to get nanoseconds. + * tMEL2. Time to submit PING packet. Sum of tTPTransmissionDelay for + * each link + wHubDelay for each hub. Add only for last link. + * tMEL4, the time for PING_RESPONSE to traverse upstream is similar. + * Multiply by 2 to include it as well. */ - device_mel = udev_exit_latency * 1000; - hub_mel = hub_exit_latency * 1000; - if (device_mel > hub_mel) - total_mel += device_mel; - else - total_mel += hub_mel; + total_mel += (__le16_to_cpu(hub->descriptor->u.ss.wHubDelay) + + USB_TP_TRANSMISSION_DELAY) * 2; + + /* + * tMEL3, tPingResponse. Time taken by device to generate PING_RESPONSE + * after receiving PING. Also add 2100ns as stated in USB 3.1 C 1.5.2.4 + * to cover the delay if the PING_RESPONSE is queued behind a Max Packet + * Size DP. + * Note these delays should be added only once for the entire path, so + * add them to the MEL of the device connected to the roothub. + */ + if (!hub->hdev->parent) + total_mel += USB_PING_RESPONSE_TIME + 2100; udev_lpm_params->mel = total_mel; } From 1b7f56fbc7a1b66967b6114d1b5f5a257c3abae6 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 15 Jul 2021 18:01:22 +0300 Subject: [PATCH 236/794] usb: hub: Disable USB 3 device initiated lpm if exit latency is too high The device initiated link power management U1/U2 states should not be enabled in case the system exit latency plus one bus interval (125us) is greater than the shortest service interval of any periodic endpoint. This is the case for both U1 and U2 sytstem exit latencies and link states. See USB 3.2 section 9.4.9 "Set Feature" for more details Note, before this patch the host and device initiated U1/U2 lpm states were both enabled with lpm. After this patch it's possible to end up with only host inititated U1/U2 lpm in case the exit latencies won't allow device initiated lpm. If this case we still want to set the udev->usb3_lpm_ux_enabled flag so that sysfs users can see the link may go to U1/U2. Signed-off-by: Mathias Nyman Cc: stable Link: https://lore.kernel.org/r/20210715150122.1995966-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 68 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 56 insertions(+), 12 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index a35d0bedafa3..86658a81d284 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -4116,6 +4116,47 @@ static int usb_set_lpm_timeout(struct usb_device *udev, return 0; } +/* + * Don't allow device intiated U1/U2 if the system exit latency + one bus + * interval is greater than the minimum service interval of any active + * periodic endpoint. See USB 3.2 section 9.4.9 + */ +static bool usb_device_may_initiate_lpm(struct usb_device *udev, + enum usb3_link_state state) +{ + unsigned int sel; /* us */ + int i, j; + + if (state == USB3_LPM_U1) + sel = DIV_ROUND_UP(udev->u1_params.sel, 1000); + else if (state == USB3_LPM_U2) + sel = DIV_ROUND_UP(udev->u2_params.sel, 1000); + else + return false; + + for (i = 0; i < udev->actconfig->desc.bNumInterfaces; i++) { + struct usb_interface *intf; + struct usb_endpoint_descriptor *desc; + unsigned int interval; + + intf = udev->actconfig->interface[i]; + if (!intf) + continue; + + for (j = 0; j < intf->cur_altsetting->desc.bNumEndpoints; j++) { + desc = &intf->cur_altsetting->endpoint[j].desc; + + if (usb_endpoint_xfer_int(desc) || + usb_endpoint_xfer_isoc(desc)) { + interval = (1 << (desc->bInterval - 1)) * 125; + if (sel + 125 > interval) + return false; + } + } + } + return true; +} + /* * Enable the hub-initiated U1/U2 idle timeouts, and enable device-initiated * U1/U2 entry. @@ -4188,20 +4229,23 @@ static void usb_enable_link_state(struct usb_hcd *hcd, struct usb_device *udev, * U1/U2_ENABLE */ if (udev->actconfig && - usb_set_device_initiated_lpm(udev, state, true) == 0) { - if (state == USB3_LPM_U1) - udev->usb3_lpm_u1_enabled = 1; - else if (state == USB3_LPM_U2) - udev->usb3_lpm_u2_enabled = 1; - } else { - /* Don't request U1/U2 entry if the device - * cannot transition to U1/U2. - */ - usb_set_lpm_timeout(udev, state, 0); - hcd->driver->disable_usb3_lpm_timeout(hcd, udev, state); + usb_device_may_initiate_lpm(udev, state)) { + if (usb_set_device_initiated_lpm(udev, state, true)) { + /* + * Request to enable device initiated U1/U2 failed, + * better to turn off lpm in this case. + */ + usb_set_lpm_timeout(udev, state, 0); + hcd->driver->disable_usb3_lpm_timeout(hcd, udev, state); + return; + } } -} + if (state == USB3_LPM_U1) + udev->usb3_lpm_u1_enabled = 1; + else if (state == USB3_LPM_U2) + udev->usb3_lpm_u2_enabled = 1; +} /* * Disable the hub-initiated U1/U2 idle timeouts, and disable device-initiated * U1/U2 entry. From 0b60557230adfdeb8164e0b342ac9cd469a75759 Mon Sep 17 00:00:00 2001 From: David Jeffery Date: Thu, 15 Jul 2021 17:37:44 -0400 Subject: [PATCH 237/794] usb: ehci: Prevent missed ehci interrupts with edge-triggered MSI When MSI is used by the ehci-hcd driver, it can cause lost interrupts which results in EHCI only continuing to work due to a polling fallback. But the reliance of polling drastically reduces performance of any I/O through EHCI. Interrupts are lost as the EHCI interrupt handler does not safely handle edge-triggered interrupts. It fails to ensure all interrupt status bits are cleared, which works with level-triggered interrupts but not the edge-triggered interrupts typical from using MSI. To fix this problem, check if the driver may have raced with the hardware setting additional interrupt status bits and clear status until it is in a stable state. Fixes: 306c54d0edb6 ("usb: hcd: Try MSI interrupts on PCI devices") Tested-by: Laurence Oberman Reviewed-by: Andy Shevchenko Acked-by: Alan Stern Signed-off-by: David Jeffery Link: https://lore.kernel.org/r/20210715213744.GA44506@redhat Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-hcd.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index 36f5bf6a0752..10b0365f3439 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -703,24 +703,28 @@ EXPORT_SYMBOL_GPL(ehci_setup); static irqreturn_t ehci_irq (struct usb_hcd *hcd) { struct ehci_hcd *ehci = hcd_to_ehci (hcd); - u32 status, masked_status, pcd_status = 0, cmd; + u32 status, current_status, masked_status, pcd_status = 0; + u32 cmd; int bh; spin_lock(&ehci->lock); - status = ehci_readl(ehci, &ehci->regs->status); + status = 0; + current_status = ehci_readl(ehci, &ehci->regs->status); +restart: /* e.g. cardbus physical eject */ - if (status == ~(u32) 0) { + if (current_status == ~(u32) 0) { ehci_dbg (ehci, "device removed\n"); goto dead; } + status |= current_status; /* * We don't use STS_FLR, but some controllers don't like it to * remain on, so mask it out along with the other status bits. */ - masked_status = status & (INTR_MASK | STS_FLR); + masked_status = current_status & (INTR_MASK | STS_FLR); /* Shared IRQ? */ if (!masked_status || unlikely(ehci->rh_state == EHCI_RH_HALTED)) { @@ -730,6 +734,12 @@ static irqreturn_t ehci_irq (struct usb_hcd *hcd) /* clear (just) interrupts */ ehci_writel(ehci, masked_status, &ehci->regs->status); + + /* For edge interrupts, don't race with an interrupt bit being raised */ + current_status = ehci_readl(ehci, &ehci->regs->status); + if (current_status & INTR_MASK) + goto restart; + cmd = ehci_readl(ehci, &ehci->regs->command); bh = 0; From 6abf2fe6b4bf6e5256b80c5817908151d2d33e9f Mon Sep 17 00:00:00 2001 From: Julian Sikorski Date: Tue, 20 Jul 2021 19:19:10 +0200 Subject: [PATCH 238/794] USB: usb-storage: Add LaCie Rugged USB3-FW to IGNORE_UAS LaCie Rugged USB3-FW appears to be incompatible with UAS. It generates errors like: [ 1151.582598] sd 14:0:0:0: tag#16 uas_eh_abort_handler 0 uas-tag 1 inflight: IN [ 1151.582602] sd 14:0:0:0: tag#16 CDB: Report supported operation codes a3 0c 01 12 00 00 00 00 02 00 00 00 [ 1151.588594] scsi host14: uas_eh_device_reset_handler start [ 1151.710482] usb 2-4: reset SuperSpeed Gen 1 USB device number 2 using xhci_hcd [ 1151.741398] scsi host14: uas_eh_device_reset_handler success [ 1181.785534] scsi host14: uas_eh_device_reset_handler start Signed-off-by: Julian Sikorski Cc: stable Link: https://lore.kernel.org/r/20210720171910.36497-1-belegdol+github@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_uas.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index f9677a5ec31b..c35a6db993f1 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -45,6 +45,13 @@ UNUSUAL_DEV(0x059f, 0x105f, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NO_REPORT_OPCODES | US_FL_NO_SAME), +/* Reported-by: Julian Sikorski */ +UNUSUAL_DEV(0x059f, 0x1061, 0x0000, 0x9999, + "LaCie", + "Rugged USB3-FW", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_IGNORE_UAS), + /* * Apricorn USB3 dongle sometimes returns "USBSUSBSUSBS" in response to SCSI * commands in UAS mode. Observed with the 1.28 firmware; are there others? From 86762ad4abcc549deb7a155c8e5e961b9755bcf0 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Fri, 16 Jul 2021 14:07:17 +0200 Subject: [PATCH 239/794] usb: typec: stusb160x: register role switch before interrupt registration During interrupt registration, attach state is checked. If attached, then the Type-C state is updated with typec_set_xxx functions and role switch is set with usb_role_switch_set_role(). If the usb_role_switch parameter is error or null, the function simply returns 0. So, to update usb_role_switch role if a device is attached before the irq is registered, usb_role_switch must be registered before irq registration. Fixes: da0cb6310094 ("usb: typec: add support for STUSB160x Type-C controller family") Cc: stable Signed-off-by: Amelie Delaunay Link: https://lore.kernel.org/r/20210716120718.20398-2-amelie.delaunay@foss.st.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/stusb160x.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/usb/typec/stusb160x.c b/drivers/usb/typec/stusb160x.c index 6eaeba9b096e..3d3848e7c2c2 100644 --- a/drivers/usb/typec/stusb160x.c +++ b/drivers/usb/typec/stusb160x.c @@ -739,10 +739,6 @@ static int stusb160x_probe(struct i2c_client *client) typec_set_pwr_opmode(chip->port, chip->pwr_opmode); if (client->irq) { - ret = stusb160x_irq_init(chip, client->irq); - if (ret) - goto port_unregister; - chip->role_sw = fwnode_usb_role_switch_get(fwnode); if (IS_ERR(chip->role_sw)) { ret = PTR_ERR(chip->role_sw); @@ -752,6 +748,10 @@ static int stusb160x_probe(struct i2c_client *client) ret); goto port_unregister; } + + ret = stusb160x_irq_init(chip, client->irq); + if (ret) + goto role_sw_put; } else { /* * If Source or Dual power role, need to enable VDD supply @@ -775,6 +775,9 @@ static int stusb160x_probe(struct i2c_client *client) return 0; +role_sw_put: + if (chip->role_sw) + usb_role_switch_put(chip->role_sw); port_unregister: typec_unregister_port(chip->port); all_reg_disable: From 6b63376722d9e1b915a2948e9b30f4ba2712e3f5 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Fri, 16 Jul 2021 14:07:18 +0200 Subject: [PATCH 240/794] usb: typec: stusb160x: Don't block probing of consumer of "connector" nodes Similar as with tcpm this patch lets fw_devlink know not to wait on the fwnode to be populated as a struct device. Without this patch, USB functionality can be broken on some previously supported boards. Fixes: 28ec344bb891 ("usb: typec: tcpm: Don't block probing of consumers of "connector" nodes") Cc: stable Signed-off-by: Amelie Delaunay Link: https://lore.kernel.org/r/20210716120718.20398-3-amelie.delaunay@foss.st.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/stusb160x.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/typec/stusb160x.c b/drivers/usb/typec/stusb160x.c index 3d3848e7c2c2..e7745d1c2a5c 100644 --- a/drivers/usb/typec/stusb160x.c +++ b/drivers/usb/typec/stusb160x.c @@ -685,6 +685,15 @@ static int stusb160x_probe(struct i2c_client *client) if (!fwnode) return -ENODEV; + /* + * This fwnode has a "compatible" property, but is never populated as a + * struct device. Instead we simply parse it to read the properties. + * This it breaks fw_devlink=on. To maintain backward compatibility + * with existing DT files, we work around this by deleting any + * fwnode_links to/from this fwnode. + */ + fw_devlink_purge_absent_suppliers(fwnode); + /* * When both VDD and VSYS power supplies are present, the low power * supply VSYS is selected when VSYS voltage is above 3.1 V. From a6b125621c081bef519fd78cf336de351390da3f Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Thu, 1 Jul 2021 11:39:03 +0200 Subject: [PATCH 241/794] MAINTAINERS: repair reference in USB IP DRIVER FOR HISILICON KIRIN 970 Commit 8de6b7edd493 ("phy: phy-hi3670-usb3: move driver from staging into phy") moves phy-hi3670-usb3.c from ./drivers/staging/hikey9xx/ to ./drivers/phy/hisilicon/, but the new file entry in MAINTAINERS refers to ./drivers/phy/hisilicon/phy-kirin970-usb3.c. Hence, ./scripts/get_maintainer.pl --self-test=patterns complains: warning: no file matches F: drivers/phy/hisilicon/phy-kirin970-usb3.c Repair the file entry by referring to the right location. Fixes: 8de6b7edd493 ("phy: phy-hi3670-usb3: move driver from staging into phy") Acked-by: Mauro Carvalho Chehab Signed-off-by: Lukas Bulwahn Link: https://lore.kernel.org/r/20210701093903.28733-1-lukas.bulwahn@gmail.com Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a61f4f3b78a9..6f86a58930bf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19114,7 +19114,7 @@ M: Mauro Carvalho Chehab L: linux-usb@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/phy/hisilicon,hi3670-usb3.yaml -F: drivers/phy/hisilicon/phy-kirin970-usb3.c +F: drivers/phy/hisilicon/phy-hi3670-usb3.c USB ISP116X DRIVER M: Olav Kongas From 5b01248156bd75303e66985c351dee648c149979 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Fri, 18 Jun 2021 22:14:41 +0800 Subject: [PATCH 242/794] usb: gadget: Fix Unbalanced pm_runtime_enable in tegra_xudc_probe Add missing pm_runtime_disable() when probe error out. It could avoid pm_runtime implementation complains when removing and probing again the driver. Fixes: 49db427232fe ("usb: gadget: Add UDC driver for tegra XUSB device mode controller") Cc: stable Signed-off-by: Zhang Qilong Link: https://lore.kernel.org/r/20210618141441.107817-1-zhangqilong3@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/tegra-xudc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/udc/tegra-xudc.c b/drivers/usb/gadget/udc/tegra-xudc.c index a54d1cef17db..c0ca7144e512 100644 --- a/drivers/usb/gadget/udc/tegra-xudc.c +++ b/drivers/usb/gadget/udc/tegra-xudc.c @@ -3853,6 +3853,7 @@ static int tegra_xudc_probe(struct platform_device *pdev) return 0; free_eps: + pm_runtime_disable(&pdev->dev); tegra_xudc_free_eps(xudc); free_event_ring: tegra_xudc_free_event_ring(xudc); From b5fdf5c6e6bee35837e160c00ac89327bdad031b Mon Sep 17 00:00:00 2001 From: Mark Tomlinson Date: Fri, 25 Jun 2021 15:14:56 +1200 Subject: [PATCH 243/794] usb: max-3421: Prevent corruption of freed memory The MAX-3421 USB driver remembers the state of the USB toggles for a device/endpoint. To save SPI writes, this was only done when a new device/endpoint was being used. Unfortunately, if the old device was removed, this would cause writes to freed memory. To fix this, a simpler scheme is used. The toggles are read from hardware when a URB is completed, and the toggles are always written to hardware when any URB transaction is started. This will cause a few more SPI transactions, but no causes kernel panics. Fixes: 2d53139f3162 ("Add support for using a MAX3421E chip as a host driver.") Cc: stable Signed-off-by: Mark Tomlinson Link: https://lore.kernel.org/r/20210625031456.8632-1-mark.tomlinson@alliedtelesis.co.nz Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/max3421-hcd.c | 44 +++++++++++----------------------- 1 file changed, 14 insertions(+), 30 deletions(-) diff --git a/drivers/usb/host/max3421-hcd.c b/drivers/usb/host/max3421-hcd.c index e7a8e0609853..59cc1bc7f12f 100644 --- a/drivers/usb/host/max3421-hcd.c +++ b/drivers/usb/host/max3421-hcd.c @@ -153,8 +153,6 @@ struct max3421_hcd { */ struct urb *curr_urb; enum scheduling_pass sched_pass; - struct usb_device *loaded_dev; /* dev that's loaded into the chip */ - int loaded_epnum; /* epnum whose toggles are loaded */ int urb_done; /* > 0 -> no errors, < 0: errno */ size_t curr_len; u8 hien; @@ -492,39 +490,17 @@ max3421_set_speed(struct usb_hcd *hcd, struct usb_device *dev) * Caller must NOT hold HCD spinlock. */ static void -max3421_set_address(struct usb_hcd *hcd, struct usb_device *dev, int epnum, - int force_toggles) +max3421_set_address(struct usb_hcd *hcd, struct usb_device *dev, int epnum) { - struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd); - int old_epnum, same_ep, rcvtog, sndtog; - struct usb_device *old_dev; + int rcvtog, sndtog; u8 hctl; - old_dev = max3421_hcd->loaded_dev; - old_epnum = max3421_hcd->loaded_epnum; - - same_ep = (dev == old_dev && epnum == old_epnum); - if (same_ep && !force_toggles) - return; - - if (old_dev && !same_ep) { - /* save the old end-points toggles: */ - u8 hrsl = spi_rd8(hcd, MAX3421_REG_HRSL); - - rcvtog = (hrsl >> MAX3421_HRSL_RCVTOGRD_BIT) & 1; - sndtog = (hrsl >> MAX3421_HRSL_SNDTOGRD_BIT) & 1; - - /* no locking: HCD (i.e., we) own toggles, don't we? */ - usb_settoggle(old_dev, old_epnum, 0, rcvtog); - usb_settoggle(old_dev, old_epnum, 1, sndtog); - } /* setup new endpoint's toggle bits: */ rcvtog = usb_gettoggle(dev, epnum, 0); sndtog = usb_gettoggle(dev, epnum, 1); hctl = (BIT(rcvtog + MAX3421_HCTL_RCVTOG0_BIT) | BIT(sndtog + MAX3421_HCTL_SNDTOG0_BIT)); - max3421_hcd->loaded_epnum = epnum; spi_wr8(hcd, MAX3421_REG_HCTL, hctl); /* @@ -532,7 +508,6 @@ max3421_set_address(struct usb_hcd *hcd, struct usb_device *dev, int epnum, * address-assignment so it's best to just always load the * address whenever the end-point changed/was forced. */ - max3421_hcd->loaded_dev = dev; spi_wr8(hcd, MAX3421_REG_PERADDR, dev->devnum); } @@ -667,7 +642,7 @@ max3421_select_and_start_urb(struct usb_hcd *hcd) struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd); struct urb *urb, *curr_urb = NULL; struct max3421_ep *max3421_ep; - int epnum, force_toggles = 0; + int epnum; struct usb_host_endpoint *ep; struct list_head *pos; unsigned long flags; @@ -777,7 +752,6 @@ done: usb_settoggle(urb->dev, epnum, 0, 1); usb_settoggle(urb->dev, epnum, 1, 1); max3421_ep->pkt_state = PKT_STATE_SETUP; - force_toggles = 1; } else max3421_ep->pkt_state = PKT_STATE_TRANSFER; } @@ -785,7 +759,7 @@ done: spin_unlock_irqrestore(&max3421_hcd->lock, flags); max3421_ep->last_active = max3421_hcd->frame_number; - max3421_set_address(hcd, urb->dev, epnum, force_toggles); + max3421_set_address(hcd, urb->dev, epnum); max3421_set_speed(hcd, urb->dev); max3421_next_transfer(hcd, 0); return 1; @@ -1379,6 +1353,16 @@ max3421_urb_done(struct usb_hcd *hcd) status = 0; urb = max3421_hcd->curr_urb; if (urb) { + /* save the old end-points toggles: */ + u8 hrsl = spi_rd8(hcd, MAX3421_REG_HRSL); + int rcvtog = (hrsl >> MAX3421_HRSL_RCVTOGRD_BIT) & 1; + int sndtog = (hrsl >> MAX3421_HRSL_SNDTOGRD_BIT) & 1; + int epnum = usb_endpoint_num(&urb->ep->desc); + + /* no locking: HCD (i.e., we) own toggles, don't we? */ + usb_settoggle(urb->dev, epnum, 0, rcvtog); + usb_settoggle(urb->dev, epnum, 1, sndtog); + max3421_hcd->curr_urb = NULL; spin_lock_irqsave(&max3421_hcd->lock, flags); usb_hcd_unlink_urb_from_ep(hcd, urb); From 40edb52298df4c1dbbdb30b19e3ce92cf612a918 Mon Sep 17 00:00:00 2001 From: Linyu Yuan Date: Tue, 29 Jun 2021 09:51:18 +0800 Subject: [PATCH 244/794] usb: dwc3: avoid NULL access of usb_gadget_driver we found crash in dwc3_disconnect_gadget(), it is because dwc->gadget_driver become NULL before async access. 7dc0c55e9f30 ('USB: UDC core: Add udc_async_callbacks gadget op') suggest a common way to avoid such kind of issue. this change implment the callback in dwc3 and change related functions which have callback to usb gadget driver. Acked-by: Alan Stern Signed-off-by: Linyu Yuan Link: https://lore.kernel.org/r/20210629015118.7944-1-linyyuan@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.h | 1 + drivers/usb/dwc3/ep0.c | 10 ++++++---- drivers/usb/dwc3/gadget.c | 21 ++++++++++++++++----- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index dccdf13b5f9e..5991766239ba 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -1279,6 +1279,7 @@ struct dwc3 { unsigned dis_metastability_quirk:1; unsigned dis_split_quirk:1; + unsigned async_callbacks:1; u16 imod_interval; }; diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 3cd294264372..2f9e45eed228 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -597,11 +597,13 @@ static int dwc3_ep0_set_address(struct dwc3 *dwc, struct usb_ctrlrequest *ctrl) static int dwc3_ep0_delegate_req(struct dwc3 *dwc, struct usb_ctrlrequest *ctrl) { - int ret; + int ret = -EINVAL; - spin_unlock(&dwc->lock); - ret = dwc->gadget_driver->setup(dwc->gadget, ctrl); - spin_lock(&dwc->lock); + if (dwc->async_callbacks) { + spin_unlock(&dwc->lock); + ret = dwc->gadget_driver->setup(dwc->gadget, ctrl); + spin_lock(&dwc->lock); + } return ret; } diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index af6d7f157989..45f2bc0807e8 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2585,6 +2585,16 @@ static int dwc3_gadget_vbus_draw(struct usb_gadget *g, unsigned int mA) return ret; } +static void dwc3_gadget_async_callbacks(struct usb_gadget *g, bool enable) +{ + struct dwc3 *dwc = gadget_to_dwc(g); + unsigned long flags; + + spin_lock_irqsave(&dwc->lock, flags); + dwc->async_callbacks = enable; + spin_unlock_irqrestore(&dwc->lock, flags); +} + static const struct usb_gadget_ops dwc3_gadget_ops = { .get_frame = dwc3_gadget_get_frame, .wakeup = dwc3_gadget_wakeup, @@ -2596,6 +2606,7 @@ static const struct usb_gadget_ops dwc3_gadget_ops = { .udc_set_ssp_rate = dwc3_gadget_set_ssp_rate, .get_config_params = dwc3_gadget_config_params, .vbus_draw = dwc3_gadget_vbus_draw, + .udc_async_callbacks = dwc3_gadget_async_callbacks, }; /* -------------------------------------------------------------------------- */ @@ -3231,7 +3242,7 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc, static void dwc3_disconnect_gadget(struct dwc3 *dwc) { - if (dwc->gadget_driver && dwc->gadget_driver->disconnect) { + if (dwc->async_callbacks && dwc->gadget_driver->disconnect) { spin_unlock(&dwc->lock); dwc->gadget_driver->disconnect(dwc->gadget); spin_lock(&dwc->lock); @@ -3240,7 +3251,7 @@ static void dwc3_disconnect_gadget(struct dwc3 *dwc) static void dwc3_suspend_gadget(struct dwc3 *dwc) { - if (dwc->gadget_driver && dwc->gadget_driver->suspend) { + if (dwc->async_callbacks && dwc->gadget_driver->suspend) { spin_unlock(&dwc->lock); dwc->gadget_driver->suspend(dwc->gadget); spin_lock(&dwc->lock); @@ -3249,7 +3260,7 @@ static void dwc3_suspend_gadget(struct dwc3 *dwc) static void dwc3_resume_gadget(struct dwc3 *dwc) { - if (dwc->gadget_driver && dwc->gadget_driver->resume) { + if (dwc->async_callbacks && dwc->gadget_driver->resume) { spin_unlock(&dwc->lock); dwc->gadget_driver->resume(dwc->gadget); spin_lock(&dwc->lock); @@ -3261,7 +3272,7 @@ static void dwc3_reset_gadget(struct dwc3 *dwc) if (!dwc->gadget_driver) return; - if (dwc->gadget->speed != USB_SPEED_UNKNOWN) { + if (dwc->async_callbacks && dwc->gadget->speed != USB_SPEED_UNKNOWN) { spin_unlock(&dwc->lock); usb_gadget_udc_reset(dwc->gadget, dwc->gadget_driver); spin_lock(&dwc->lock); @@ -3585,7 +3596,7 @@ static void dwc3_gadget_wakeup_interrupt(struct dwc3 *dwc) * implemented. */ - if (dwc->gadget_driver && dwc->gadget_driver->resume) { + if (dwc->async_callbacks && dwc->gadget_driver->resume) { spin_unlock(&dwc->lock); dwc->gadget_driver->resume(dwc->gadget); spin_lock(&dwc->lock); From 4bb233b7ba87785c7ac519863f51ba61f4dbc459 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 1 Jul 2021 15:43:05 +0100 Subject: [PATCH 245/794] usb: gadget: u_serial: remove WARN_ON on null port Loading and then unloading module g_dpgp on a VM that does not support the driver currently throws a WARN_ON message because the port has not been initialized. Removing an unused driver is a valid use-case and the WARN_ON kernel warning is a bit excessive, so remove it. Cleans up: [27654.638698] ------------[ cut here ]------------ [27654.638705] WARNING: CPU: 6 PID: 2956336 at drivers/usb/gadget/function/u_serial.c:1201 gserial_free_line+0x7c/0x90 [u_serial] [27654.638728] Modules linked in: g_dbgp(-) u_serial usb_f_tcm target_core_mod libcomposite udc_core vmw_vmci mcb i2c_nforce2 i2c_amd756 nfit cx8800 videobuf2_dma_sg videobuf2_memops videobuf2_v4l2 cx88xx tveeprom videobuf2_common videodev mc ccp hid_generic hid intel_ishtp cros_ec mc13xxx_core vfio_mdev mdev i915 i2c_algo_bit kvm ppdev parport zatm eni suni uPD98402 atm rio_scan binder_linux hwmon_vid video ipmi_devintf ipmi_msghandler zstd nls_utf8 decnet qrtr ns sctp ip6_udp_tunnel udp_tunnel fcrypt pcbc nhc_udp nhc_ipv6 nhc_routing nhc_mobility nhc_hop nhc_dest nhc_fragment 6lowpan ts_kmp dccp_ipv6 dccp_ipv4 dccp snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq_dummy snd_seq snd_seq_device xen_front_pgdir_shbuf binfmt_misc nls_iso8859_1 dm_multipath scsi_dh_rdac scsi_dh_emc scsi_dh_alua intel_rapl_msr intel_rapl_common snd_hda_codec_generic ledtrig_audio snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer snd rapl soundcore joydev input_leds mac_hid serio_raw efi_pstore [27654.638880] qemu_fw_cfg sch_fq_codel msr virtio_rng autofs4 btrfs blake2b_generic zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear qxl drm_ttm_helper crct10dif_pclmul ttm drm_kms_helper syscopyarea sysfillrect sysimgblt virtio_net fb_sys_fops cec net_failover rc_core ahci psmouse drm libahci lpc_ich virtio_blk failover [last unloaded: u_ether] [27654.638949] CPU: 6 PID: 2956336 Comm: modprobe Tainted: P O 5.13.0-9-generic #9 [27654.638956] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 [27654.638969] RIP: 0010:gserial_free_line+0x7c/0x90 [u_serial] [27654.638981] Code: 20 00 00 00 00 e8 74 1a ba c9 4c 89 e7 e8 8c fe ff ff 48 8b 3d 75 3b 00 00 44 89 f6 e8 3d 7c 69 c9 5b 41 5c 41 5d 41 5e 5d c3 <0f> 0b 4c 89 ef e8 4a 1a ba c9 5b 41 5c 41 5d 41 5e 5d c3 90 0f 1f [27654.638986] RSP: 0018:ffffba0b81403da0 EFLAGS: 00010246 [27654.638992] RAX: 0000000000000000 RBX: ffffffffc0eaf6a0 RCX: 0000000000000000 [27654.638996] RDX: ffff8e21c0cac8c0 RSI: 0000000000000006 RDI: ffffffffc0eaf6a0 [27654.639000] RBP: ffffba0b81403dc0 R08: ffffba0b81403de0 R09: fefefefefefefeff [27654.639003] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 [27654.639006] R13: ffffffffc0eaf6a0 R14: 0000000000000000 R15: 0000000000000000 [27654.639010] FS: 00007faa1935e740(0000) GS:ffff8e223bd80000(0000) knlGS:0000000000000000 [27654.639015] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [27654.639019] CR2: 00007ffc840cd4e8 CR3: 000000000e1ac006 CR4: 0000000000370ee0 [27654.639028] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [27654.639031] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [27654.639035] Call Trace: [27654.639044] dbgp_exit+0x1c/0xa1a [g_dbgp] [27654.639054] __do_sys_delete_module.constprop.0+0x144/0x260 [27654.639066] ? call_rcu+0xe/0x10 [27654.639073] __x64_sys_delete_module+0x12/0x20 [27654.639081] do_syscall_64+0x61/0xb0 [27654.639092] ? exit_to_user_mode_loop+0xec/0x160 [27654.639098] ? exit_to_user_mode_prepare+0x37/0xb0 [27654.639104] ? syscall_exit_to_user_mode+0x27/0x50 [27654.639110] ? __x64_sys_close+0x12/0x40 [27654.639119] ? do_syscall_64+0x6e/0xb0 [27654.639126] ? exit_to_user_mode_prepare+0x37/0xb0 [27654.639132] ? syscall_exit_to_user_mode+0x27/0x50 [27654.639137] ? __x64_sys_newfstatat+0x1e/0x20 [27654.639146] ? do_syscall_64+0x6e/0xb0 [27654.639154] ? exc_page_fault+0x8f/0x170 [27654.639159] ? asm_exc_page_fault+0x8/0x30 [27654.639166] entry_SYSCALL_64_after_hwframe+0x44/0xae [27654.639173] RIP: 0033:0x7faa194a4b2b [27654.639179] Code: 73 01 c3 48 8b 0d 3d 73 0c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 0d 73 0c 00 f7 d8 64 89 01 48 [27654.639185] RSP: 002b:00007ffc840d0578 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 [27654.639191] RAX: ffffffffffffffda RBX: 000056060f9f4e70 RCX: 00007faa194a4b2b [27654.639194] RDX: 0000000000000000 RSI: 0000000000000800 RDI: 000056060f9f4ed8 [27654.639197] RBP: 000056060f9f4e70 R08: 0000000000000000 R09: 0000000000000000 [27654.639200] R10: 00007faa1951eac0 R11: 0000000000000206 R12: 000056060f9f4ed8 [27654.639203] R13: 0000000000000000 R14: 000056060f9f4ed8 R15: 00007ffc840d06c8 [27654.639219] ---[ end trace 8dd0ea0bb32ce94a ]--- Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20210701144305.110078-1-colin.king@canonical.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/u_serial.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c index bffef8e47dac..281ca766698a 100644 --- a/drivers/usb/gadget/function/u_serial.c +++ b/drivers/usb/gadget/function/u_serial.c @@ -1198,7 +1198,7 @@ void gserial_free_line(unsigned char port_num) struct gs_port *port; mutex_lock(&ports[port_num].lock); - if (WARN_ON(!ports[port_num].port)) { + if (!ports[port_num].port) { mutex_unlock(&ports[port_num].lock); return; } From 0665e387318607d8269bfdea60723c627c8bae43 Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Fri, 2 Jul 2021 00:12:24 -0700 Subject: [PATCH 246/794] usb: xhci: avoid renesas_usb_fw.mem when it's unusable Commit a66d21d7dba8 ("usb: xhci: Add support for Renesas controller with memory") added renesas_usb_fw.mem firmware reference to xhci-pci. Thus modinfo indicates xhci-pci.ko has "firmware: renesas_usb_fw.mem". But the firmware is only actually used with CONFIG_USB_XHCI_PCI_RENESAS. An unusable firmware reference can trigger safety checkers which look for drivers with unmet firmware dependencies. Avoid referring to renesas_usb_fw.mem in circumstances when it cannot be loaded (when CONFIG_USB_XHCI_PCI_RENESAS isn't set). Fixes: a66d21d7dba8 ("usb: xhci: Add support for Renesas controller with memory") Cc: stable Signed-off-by: Greg Thelen Link: https://lore.kernel.org/r/20210702071224.3673568-1-gthelen@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 18c2bbddf080..1c9a7957c45c 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -636,7 +636,14 @@ static const struct pci_device_id pci_ids[] = { { /* end: all zeroes */ } }; MODULE_DEVICE_TABLE(pci, pci_ids); + +/* + * Without CONFIG_USB_XHCI_PCI_RENESAS renesas_xhci_check_request_fw() won't + * load firmware, so don't encumber the xhci-pci driver with it. + */ +#if IS_ENABLED(CONFIG_USB_XHCI_PCI_RENESAS) MODULE_FIRMWARE("renesas_usb_fw.mem"); +#endif /* pci driver glue; this is a "new style" PCI driver module */ static struct pci_driver xhci_pci_driver = { From 3d11de2d57b92e943767d7d070b0df9b18089d56 Mon Sep 17 00:00:00 2001 From: Artur Petrosyan Date: Sat, 10 Jul 2021 13:22:46 +0400 Subject: [PATCH 247/794] usb: phy: Fix page fault from usb_phy_uevent When the dwc2 platform device is removed, it unregisters the generic phy. usb_remove_phy() is called and the dwc2 usb_phy is removed from the "phy_list", but the uevent may still attempt to get the usb_phy from the list, resulting in a page fault bug. Currently we can't access the usb_phy from the "phy_list" after the device is removed. As a fix check to make sure that we can get the usb_phy before moving forward with the uevent. [ 84.949345] BUG: unable to handle page fault for address:00000007935688d8 [ 84.949349] #PF: supervisor read access in kernel mode [ 84.949351] #PF: error_code(0x0000) - not-present page [ 84.949353] PGD 0 P4D 0 [ 84.949356] Oops: 0000 [#1] SMP PTI [ 84.949360] CPU: 2 PID: 2081 Comm: rmmod Not tainted 5.13.0-rc4-snps-16547-ga8534cb092d7-dirty #32 [ 84.949363] Hardware name: Hewlett-Packard HP Z400 Workstation/0B4Ch, BIOS 786G3 v03.54 11/02/2011 [ 84.949365] RIP: 0010:usb_phy_uevent+0x99/0x121 [ 84.949372] Code: 8d 83 f8 00 00 00 48 3d b0 12 22 94 74 05 4c 3b 23 75 5b 8b 83 9c 00 00 00 be 32 00 00 00 48 8d 7c 24 04 48 c7 c2 d4 5d 7b 93 <48> 8b 0c c5 e0 88 56 93 e8 0f 63 8a ff 8b 83 98 00 00 00 be 32 00 [ 84.949375] RSP: 0018:ffffa46bc0f2fc70 EFLAGS: 00010246 [ 84.949378] RAX: 00000000ffffffff RBX: ffffffff942211b8 RCX: 0000000000000027 [ 84.949380] RDX: ffffffff937b5dd4 RSI: 0000000000000032 RDI: ffffa46bc0f2fc74 [ 84.949383] RBP: ffff94a306613000 R08: 0000000000000000 R09: 00000000fffeffff [ 84.949385] R10: ffffa46bc0f2faa8 R11: ffffa46bc0f2faa0 R12: ffff94a30186d410 [ 84.949387] R13: ffff94a32d188a80 R14: ffff94a30029f960 R15: ffffffff93522dd0 [ 84.949389] FS: 00007efdbd417540(0000) GS:ffff94a513a80000(0000) knlGS:0000000000000000 [ 84.949392] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 84.949394] CR2: 00000007935688d8 CR3: 0000000165606000 CR4: 00000000000006e0 [ 84.949396] Call Trace: [ 84.949401] dev_uevent+0x190/0x1ad [ 84.949408] kobject_uevent_env+0x18e/0x46c [ 84.949414] device_release_driver_internal+0x17f/0x18e [ 84.949418] bus_remove_device+0xd3/0xe5 [ 84.949421] device_del+0x1c3/0x31d [ 84.949425] ? kobject_put+0x97/0xa8 [ 84.949428] platform_device_del+0x1c/0x63 [ 84.949432] platform_device_unregister+0xa/0x11 [ 84.949436] dwc2_pci_remove+0x1e/0x2c [dwc2_pci] [ 84.949440] pci_device_remove+0x31/0x81 [ 84.949445] device_release_driver_internal+0xea/0x18e [ 84.949448] driver_detach+0x68/0x72 [ 84.949450] bus_remove_driver+0x63/0x82 [ 84.949453] pci_unregister_driver+0x1a/0x75 [ 84.949457] __do_sys_delete_module+0x149/0x1e9 [ 84.949462] ? task_work_run+0x64/0x6e [ 84.949465] ? exit_to_user_mode_prepare+0xd4/0x10d [ 84.949471] do_syscall_64+0x5d/0x70 [ 84.949475] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 84.949480] RIP: 0033:0x7efdbd563bcb [ 84.949482] Code: 73 01 c3 48 8b 0d c5 82 0c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 95 82 0c 00 f7 d8 64 89 01 48 [ 84.949485] RSP: 002b:00007ffe944d7d98 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 [ 84.949489] RAX: ffffffffffffffda RBX: 00005651072eb700 RCX: 00007efdbd563bcb [ 84.949491] RDX: 000000000000000a RSI: 0000000000000800 RDI: 00005651072eb768 [ 84.949493] RBP: 00007ffe944d7df8 R08: 0000000000000000 R09: 0000000000000000 [ 84.949495] R10: 00007efdbd5dfac0 R11: 0000000000000206 R12: 00007ffe944d7fd0 [ 84.949497] R13: 00007ffe944d8610 R14: 00005651072eb2a0 R15: 00005651072eb700 [ 84.949500] Modules linked in: uas configfs dwc2_pci(-) phy_generic fuse crc32c_intel [last unloaded: udc_core] [ 84.949508] CR2: 00000007935688d8 [ 84.949510] ---[ end trace e40c871ca3e4dc9e ]--- [ 84.949512] RIP: 0010:usb_phy_uevent+0x99/0x121 Fixes: a8534cb092d7 ("usb: phy: introduce usb_phy device type with its own uevent handler") Reviewed-by: Peter Chen Signed-off-by: Artur Petrosyan Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/20210710092247.D7AFEA005D@mailhost.synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/phy/phy.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/usb/phy/phy.c b/drivers/usb/phy/phy.c index 83ed5089475a..1b24492bb4e5 100644 --- a/drivers/usb/phy/phy.c +++ b/drivers/usb/phy/phy.c @@ -86,10 +86,10 @@ static struct usb_phy *__device_to_usb_phy(struct device *dev) list_for_each_entry(usb_phy, &phy_list, head) { if (usb_phy->dev == dev) - break; + return usb_phy; } - return usb_phy; + return NULL; } static void usb_phy_set_default_current(struct usb_phy *usb_phy) @@ -150,8 +150,14 @@ static int usb_phy_uevent(struct device *dev, struct kobj_uevent_env *env) struct usb_phy *usb_phy; char uchger_state[50] = { 0 }; char uchger_type[50] = { 0 }; + unsigned long flags; + spin_lock_irqsave(&phy_lock, flags); usb_phy = __device_to_usb_phy(dev); + spin_unlock_irqrestore(&phy_lock, flags); + + if (!usb_phy) + return -ENODEV; snprintf(uchger_state, ARRAY_SIZE(uchger_state), "USB_CHARGER_STATE=%s", usb_chger_state[usb_phy->chg_state]); From fecb3a171db425e5068b27231f8efe154bf72637 Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Tue, 13 Jul 2021 09:32:55 +0400 Subject: [PATCH 248/794] usb: dwc2: gadget: Fix GOUTNAK flow for Slave mode. Because of dwc2_hsotg_ep_stop_xfr() function uses poll mode, first need to mask GINTSTS_GOUTNAKEFF interrupt. In Slave mode GINTSTS_GOUTNAKEFF interrupt will be aserted only after pop OUT NAK status packet from RxFIFO. In dwc2_hsotg_ep_sethalt() function before setting DCTL_SGOUTNAK need to unmask GOUTNAKEFF interrupt. Tested by USBCV CH9 and MSC tests set in Slave, BDMA and DDMA. All tests are passed. Fixes: a4f827714539a ("usb: dwc2: gadget: Disable enabled HW endpoint in dwc2_hsotg_ep_disable") Fixes: 6070636c4918c ("usb: dwc2: Fix Stalling a Non-Isochronous OUT EP") Cc: stable Signed-off-by: Minas Harutyunyan Link: https://lore.kernel.org/r/e17fad802bbcaf879e1ed6745030993abb93baf8.1626152924.git.Minas.Harutyunyan@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/gadget.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index c581ee41ac81..74d25019272f 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -3900,9 +3900,27 @@ static void dwc2_hsotg_ep_stop_xfr(struct dwc2_hsotg *hsotg, __func__); } } else { + /* Mask GINTSTS_GOUTNAKEFF interrupt */ + dwc2_hsotg_disable_gsint(hsotg, GINTSTS_GOUTNAKEFF); + if (!(dwc2_readl(hsotg, GINTSTS) & GINTSTS_GOUTNAKEFF)) dwc2_set_bit(hsotg, DCTL, DCTL_SGOUTNAK); + if (!using_dma(hsotg)) { + /* Wait for GINTSTS_RXFLVL interrupt */ + if (dwc2_hsotg_wait_bit_set(hsotg, GINTSTS, + GINTSTS_RXFLVL, 100)) { + dev_warn(hsotg->dev, "%s: timeout GINTSTS.RXFLVL\n", + __func__); + } else { + /* + * Pop GLOBAL OUT NAK status packet from RxFIFO + * to assert GOUTNAKEFF interrupt + */ + dwc2_readl(hsotg, GRXSTSP); + } + } + /* Wait for global nak to take effect */ if (dwc2_hsotg_wait_bit_set(hsotg, GINTSTS, GINTSTS_GOUTNAKEFF, 100)) @@ -4348,6 +4366,9 @@ static int dwc2_hsotg_ep_sethalt(struct usb_ep *ep, int value, bool now) epctl = dwc2_readl(hs, epreg); if (value) { + /* Unmask GOUTNAKEFF interrupt */ + dwc2_hsotg_en_gsint(hs, GINTSTS_GOUTNAKEFF); + if (!(dwc2_readl(hs, GINTSTS) & GINTSTS_GOUTNAKEFF)) dwc2_set_bit(hs, DCTL, DCTL_SGOUTNAK); // STALL bit will be set in GOUTNAKEFF interrupt handler From 5719df243e118fb343725e8b2afb1637e1af1373 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Thu, 24 Jun 2021 21:20:39 +0900 Subject: [PATCH 249/794] usb: renesas_usbhs: Fix superfluous irqs happen after usb_pkt_pop() This driver has a potential issue which this driver is possible to cause superfluous irqs after usb_pkt_pop() is called. So, after the commit 3af32605289e ("usb: renesas_usbhs: fix error return code of usbhsf_pkt_handler()") had been applied, we could observe the following error happened when we used g_audio. renesas_usbhs e6590000.usb: irq_ready run_error 1 : -22 To fix the issue, disable the tx or rx interrupt in usb_pkt_pop(). Fixes: 2743e7f90dc0 ("usb: renesas_usbhs: fix the usb_pkt_pop()") Cc: # v4.4+ Signed-off-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/20210624122039.596528-1-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/renesas_usbhs/fifo.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c index b5e7991dc7d9..a3c2b01ccf7b 100644 --- a/drivers/usb/renesas_usbhs/fifo.c +++ b/drivers/usb/renesas_usbhs/fifo.c @@ -101,6 +101,8 @@ static struct dma_chan *usbhsf_dma_chan_get(struct usbhs_fifo *fifo, #define usbhsf_dma_map(p) __usbhsf_dma_map_ctrl(p, 1) #define usbhsf_dma_unmap(p) __usbhsf_dma_map_ctrl(p, 0) static int __usbhsf_dma_map_ctrl(struct usbhs_pkt *pkt, int map); +static void usbhsf_tx_irq_ctrl(struct usbhs_pipe *pipe, int enable); +static void usbhsf_rx_irq_ctrl(struct usbhs_pipe *pipe, int enable); struct usbhs_pkt *usbhs_pkt_pop(struct usbhs_pipe *pipe, struct usbhs_pkt *pkt) { struct usbhs_priv *priv = usbhs_pipe_to_priv(pipe); @@ -123,6 +125,11 @@ struct usbhs_pkt *usbhs_pkt_pop(struct usbhs_pipe *pipe, struct usbhs_pkt *pkt) if (chan) { dmaengine_terminate_all(chan); usbhsf_dma_unmap(pkt); + } else { + if (usbhs_pipe_is_dir_in(pipe)) + usbhsf_rx_irq_ctrl(pipe, 0); + else + usbhsf_tx_irq_ctrl(pipe, 0); } usbhs_pipe_clear_without_sequence(pipe, 0, 0); From c4a0f7a6ab5417eb6105b0e1d7e6e67f6ef7d4e5 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 16 Jul 2021 07:01:27 +0200 Subject: [PATCH 250/794] usb: dwc2: Skip clock gating on Samsung SoCs Commit 0112b7ce68ea ("usb: dwc2: Update dwc2_handle_usb_suspend_intr function.") changed the way the driver handles power down modes in a such way that it uses clock gating when no other power down mode is available. This however doesn't work well on the DWC2 implementation used on the Samsung SoCs. When a clock gating is enabled, system hangs. It looks that the proper clock gating requires some additional glue code in the shared USB2 PHY and/or Samsung glue code for the DWC2. To restore driver operation on the Samsung SoCs simply skip enabling clock gating mode until one finds what is really needed to make it working reliably. Fixes: 0112b7ce68ea ("usb: dwc2: Update dwc2_handle_usb_suspend_intr function.") Cc: stable Acked-by: Krzysztof Kozlowski Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20210716050127.4406-1-m.szyprowski@samsung.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/core.h | 4 ++++ drivers/usb/dwc2/core_intr.c | 3 ++- drivers/usb/dwc2/hcd.c | 6 ++++-- drivers/usb/dwc2/params.c | 1 + 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h index ab6b815e0089..483de2bbfaab 100644 --- a/drivers/usb/dwc2/core.h +++ b/drivers/usb/dwc2/core.h @@ -383,6 +383,9 @@ enum dwc2_ep0_state { * 0 - No (default) * 1 - Partial power down * 2 - Hibernation + * @no_clock_gating: Specifies whether to avoid clock gating feature. + * 0 - No (use clock gating) + * 1 - Yes (avoid it) * @lpm: Enable LPM support. * 0 - No * 1 - Yes @@ -480,6 +483,7 @@ struct dwc2_core_params { #define DWC2_POWER_DOWN_PARAM_NONE 0 #define DWC2_POWER_DOWN_PARAM_PARTIAL 1 #define DWC2_POWER_DOWN_PARAM_HIBERNATION 2 + bool no_clock_gating; bool lpm; bool lpm_clock_gating; diff --git a/drivers/usb/dwc2/core_intr.c b/drivers/usb/dwc2/core_intr.c index a5ab03808da6..a5c52b237e72 100644 --- a/drivers/usb/dwc2/core_intr.c +++ b/drivers/usb/dwc2/core_intr.c @@ -556,7 +556,8 @@ static void dwc2_handle_usb_suspend_intr(struct dwc2_hsotg *hsotg) * If neither hibernation nor partial power down are supported, * clock gating is used to save power. */ - dwc2_gadget_enter_clock_gating(hsotg); + if (!hsotg->params.no_clock_gating) + dwc2_gadget_enter_clock_gating(hsotg); } /* diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index 035d4911a3c3..2a7828971d05 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -3338,7 +3338,8 @@ int dwc2_port_suspend(struct dwc2_hsotg *hsotg, u16 windex) * If not hibernation nor partial power down are supported, * clock gating is used to save power. */ - dwc2_host_enter_clock_gating(hsotg); + if (!hsotg->params.no_clock_gating) + dwc2_host_enter_clock_gating(hsotg); break; } @@ -4402,7 +4403,8 @@ static int _dwc2_hcd_suspend(struct usb_hcd *hcd) * If not hibernation nor partial power down are supported, * clock gating is used to save power. */ - dwc2_host_enter_clock_gating(hsotg); + if (!hsotg->params.no_clock_gating) + dwc2_host_enter_clock_gating(hsotg); /* After entering suspend, hardware is not accessible */ clear_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags); diff --git a/drivers/usb/dwc2/params.c b/drivers/usb/dwc2/params.c index 67c5eb140232..59e119345994 100644 --- a/drivers/usb/dwc2/params.c +++ b/drivers/usb/dwc2/params.c @@ -76,6 +76,7 @@ static void dwc2_set_s3c6400_params(struct dwc2_hsotg *hsotg) struct dwc2_core_params *p = &hsotg->params; p->power_down = DWC2_POWER_DOWN_PARAM_NONE; + p->no_clock_gating = true; p->phy_utmi_width = 8; } From d53dc38857f6dbefabd9eecfcbf67b6eac9a1ef4 Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Tue, 20 Jul 2021 05:41:24 -0700 Subject: [PATCH 251/794] usb: dwc2: gadget: Fix sending zero length packet in DDMA mode. Sending zero length packet in DDMA mode perform by DMA descriptor by setting SP (short packet) flag. For DDMA in function dwc2_hsotg_complete_in() does not need to send zlp. Tested by USBCV MSC tests. Fixes: f71b5e2533de ("usb: dwc2: gadget: fix zero length packet transfers") Cc: stable Signed-off-by: Minas Harutyunyan Link: https://lore.kernel.org/r/967bad78c55dd2db1c19714eee3d0a17cf99d74a.1626777738.git.Minas.Harutyunyan@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/gadget.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 74d25019272f..3146df6e6510 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -2749,12 +2749,14 @@ static void dwc2_hsotg_complete_in(struct dwc2_hsotg *hsotg, return; } - /* Zlp for all endpoints, for ep0 only in DATA IN stage */ + /* Zlp for all endpoints in non DDMA, for ep0 only in DATA IN stage */ if (hs_ep->send_zlp) { - dwc2_hsotg_program_zlp(hsotg, hs_ep); hs_ep->send_zlp = 0; - /* transfer will be completed on next complete interrupt */ - return; + if (!using_desc_dma(hsotg)) { + dwc2_hsotg_program_zlp(hsotg, hs_ep); + /* transfer will be completed on next complete interrupt */ + return; + } } if (hs_ep->index == 0 && hsotg->ep0_state == DWC2_EP0_DATA_IN) { From 4e9505064f58d1252805952f8547a5b7dbc5c111 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 17 Jul 2021 16:02:21 +0100 Subject: [PATCH 252/794] net/xfrm/compat: Copy xfrm_spdattr_type_t atributes The attribute-translator has to take in mind maxtype, that is xfrm_link::nla_max. When it is set, attributes are not of xfrm_attr_type_t. Currently, they can be only XFRMA_SPD_MAX (message XFRM_MSG_NEWSPDINFO), their UABI is the same for 64/32-bit, so just copy them. Thanks to YueHaibing for reporting this: In xfrm_user_rcv_msg_compat() if maxtype is not zero and less than XFRMA_MAX, nlmsg_parse_deprecated() do not initialize attrs array fully. xfrm_xlate32() will access uninit 'attrs[i]' while iterating all attrs array. KASAN: probably user-memory-access in range [0x0000000041b58ab0-0x0000000041b58ab7] CPU: 0 PID: 15799 Comm: syz-executor.2 Tainted: G W 5.14.0-rc1-syzkaller #0 RIP: 0010:nla_type include/net/netlink.h:1130 [inline] RIP: 0010:xfrm_xlate32_attr net/xfrm/xfrm_compat.c:410 [inline] RIP: 0010:xfrm_xlate32 net/xfrm/xfrm_compat.c:532 [inline] RIP: 0010:xfrm_user_rcv_msg_compat+0x5e5/0x1070 net/xfrm/xfrm_compat.c:577 [...] Call Trace: xfrm_user_rcv_msg+0x556/0x8b0 net/xfrm/xfrm_user.c:2774 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2504 xfrm_netlink_rcv+0x6b/0x90 net/xfrm/xfrm_user.c:2824 netlink_unicast_kernel net/netlink/af_netlink.c:1314 [inline] netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1340 netlink_sendmsg+0x86d/0xdb0 net/netlink/af_netlink.c:1929 sock_sendmsg_nosec net/socket.c:702 [inline] Fixes: 5106f4a8acff ("xfrm/compat: Add 32=>64-bit messages translator") Cc: Reported-by: YueHaibing Signed-off-by: Dmitry Safonov Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_compat.c | 49 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c index a20aec9d7393..2bf269390163 100644 --- a/net/xfrm/xfrm_compat.c +++ b/net/xfrm/xfrm_compat.c @@ -298,8 +298,16 @@ static int xfrm_xlate64(struct sk_buff *dst, const struct nlmsghdr *nlh_src) len = nlmsg_attrlen(nlh_src, xfrm_msg_min[type]); nla_for_each_attr(nla, attrs, len, remaining) { - int err = xfrm_xlate64_attr(dst, nla); + int err; + switch (type) { + case XFRM_MSG_NEWSPDINFO: + err = xfrm_nla_cpy(dst, nla, nla_len(nla)); + break; + default: + err = xfrm_xlate64_attr(dst, nla); + break; + } if (err) return err; } @@ -341,7 +349,8 @@ static int xfrm_alloc_compat(struct sk_buff *skb, const struct nlmsghdr *nlh_src /* Calculates len of translated 64-bit message. */ static size_t xfrm_user_rcv_calculate_len64(const struct nlmsghdr *src, - struct nlattr *attrs[XFRMA_MAX+1]) + struct nlattr *attrs[XFRMA_MAX + 1], + int maxtype) { size_t len = nlmsg_len(src); @@ -358,10 +367,20 @@ static size_t xfrm_user_rcv_calculate_len64(const struct nlmsghdr *src, case XFRM_MSG_POLEXPIRE: len += 8; break; + case XFRM_MSG_NEWSPDINFO: + /* attirbutes are xfrm_spdattr_type_t, not xfrm_attr_type_t */ + return len; default: break; } + /* Unexpected for anything, but XFRM_MSG_NEWSPDINFO, please + * correct both 64=>32-bit and 32=>64-bit translators to copy + * new attributes. + */ + if (WARN_ON_ONCE(maxtype)) + return len; + if (attrs[XFRMA_SA]) len += 4; if (attrs[XFRMA_POLICY]) @@ -440,7 +459,8 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla, static int xfrm_xlate32(struct nlmsghdr *dst, const struct nlmsghdr *src, struct nlattr *attrs[XFRMA_MAX+1], - size_t size, u8 type, struct netlink_ext_ack *extack) + size_t size, u8 type, int maxtype, + struct netlink_ext_ack *extack) { size_t pos; int i; @@ -520,6 +540,25 @@ static int xfrm_xlate32(struct nlmsghdr *dst, const struct nlmsghdr *src, } pos = dst->nlmsg_len; + if (maxtype) { + /* attirbutes are xfrm_spdattr_type_t, not xfrm_attr_type_t */ + WARN_ON_ONCE(src->nlmsg_type != XFRM_MSG_NEWSPDINFO); + + for (i = 1; i <= maxtype; i++) { + int err; + + if (!attrs[i]) + continue; + + /* just copy - no need for translation */ + err = xfrm_attr_cpy32(dst, &pos, attrs[i], size, + nla_len(attrs[i]), nla_len(attrs[i])); + if (err) + return err; + } + return 0; + } + for (i = 1; i < XFRMA_MAX + 1; i++) { int err; @@ -564,7 +603,7 @@ static struct nlmsghdr *xfrm_user_rcv_msg_compat(const struct nlmsghdr *h32, if (err < 0) return ERR_PTR(err); - len = xfrm_user_rcv_calculate_len64(h32, attrs); + len = xfrm_user_rcv_calculate_len64(h32, attrs, maxtype); /* The message doesn't need translation */ if (len == nlmsg_len(h32)) return NULL; @@ -574,7 +613,7 @@ static struct nlmsghdr *xfrm_user_rcv_msg_compat(const struct nlmsghdr *h32, if (!h64) return ERR_PTR(-ENOMEM); - err = xfrm_xlate32(h64, h32, attrs, len, type, extack); + err = xfrm_xlate32(h64, h32, attrs, len, type, maxtype, extack); if (err < 0) { kvfree(h64); return ERR_PTR(err); From 70bfdf62e93a4d73cfbaf83a3ac708a483ef7a71 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 17 Jul 2021 16:02:22 +0100 Subject: [PATCH 253/794] selftests/net/ipsec: Add test for xfrm_spdattr_type_t Set hthresh, dump it again and verify thresh.lbits && thresh.rbits. They are passed as attributes of xfrm_spdattr_type_t, different from other message attributes that use xfrm_attr_type_t. Also, test attribute that is bigger than XFRMA_SPD_MAX, currently it should be silently ignored. Cc: Shuah Khan Cc: linux-kselftest@vger.kernel.org Signed-off-by: Dmitry Safonov Signed-off-by: Steffen Klassert --- tools/testing/selftests/net/ipsec.c | 165 +++++++++++++++++++++++++++- 1 file changed, 163 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c index f23438d512c5..3d7dde2c321b 100644 --- a/tools/testing/selftests/net/ipsec.c +++ b/tools/testing/selftests/net/ipsec.c @@ -484,13 +484,16 @@ enum desc_type { MONITOR_ACQUIRE, EXPIRE_STATE, EXPIRE_POLICY, + SPDINFO_ATTRS, }; const char *desc_name[] = { "create tunnel", "alloc spi", "monitor acquire", "expire state", - "expire policy" + "expire policy", + "spdinfo attributes", + "" }; struct xfrm_desc { enum desc_type type; @@ -1593,6 +1596,155 @@ out_close: return ret; } +static int xfrm_spdinfo_set_thresh(int xfrm_sock, uint32_t *seq, + unsigned thresh4_l, unsigned thresh4_r, + unsigned thresh6_l, unsigned thresh6_r, + bool add_bad_attr) + +{ + struct { + struct nlmsghdr nh; + union { + uint32_t unused; + int error; + }; + char attrbuf[MAX_PAYLOAD]; + } req; + struct xfrmu_spdhthresh thresh; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.unused)); + req.nh.nlmsg_type = XFRM_MSG_NEWSPDINFO; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = (*seq)++; + + thresh.lbits = thresh4_l; + thresh.rbits = thresh4_r; + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SPD_IPV4_HTHRESH, &thresh, sizeof(thresh))) + return -1; + + thresh.lbits = thresh6_l; + thresh.rbits = thresh6_r; + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SPD_IPV6_HTHRESH, &thresh, sizeof(thresh))) + return -1; + + if (add_bad_attr) { + BUILD_BUG_ON(XFRMA_IF_ID <= XFRMA_SPD_MAX + 1); + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_IF_ID, NULL, 0)) { + pr_err("adding attribute failed: no space"); + return -1; + } + } + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + + if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + return -1; + } else if (req.nh.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type); + return -1; + } + + if (req.error) { + printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error)); + return -1; + } + + return 0; +} + +static int xfrm_spdinfo_attrs(int xfrm_sock, uint32_t *seq) +{ + struct { + struct nlmsghdr nh; + union { + uint32_t unused; + int error; + }; + char attrbuf[MAX_PAYLOAD]; + } req; + + if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 31, 120, 16, false)) { + pr_err("Can't set SPD HTHRESH"); + return KSFT_FAIL; + } + + memset(&req, 0, sizeof(req)); + + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.unused)); + req.nh.nlmsg_type = XFRM_MSG_GETSPDINFO; + req.nh.nlmsg_flags = NLM_F_REQUEST; + req.nh.nlmsg_seq = (*seq)++; + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return KSFT_FAIL; + } + + if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + return KSFT_FAIL; + } else if (req.nh.nlmsg_type == XFRM_MSG_NEWSPDINFO) { + size_t len = NLMSG_PAYLOAD(&req.nh, sizeof(req.unused)); + struct rtattr *attr = (void *)req.attrbuf; + int got_thresh = 0; + + for (; RTA_OK(attr, len); attr = RTA_NEXT(attr, len)) { + if (attr->rta_type == XFRMA_SPD_IPV4_HTHRESH) { + struct xfrmu_spdhthresh *t = RTA_DATA(attr); + + got_thresh++; + if (t->lbits != 32 || t->rbits != 31) { + pr_err("thresh differ: %u, %u", + t->lbits, t->rbits); + return KSFT_FAIL; + } + } + if (attr->rta_type == XFRMA_SPD_IPV6_HTHRESH) { + struct xfrmu_spdhthresh *t = RTA_DATA(attr); + + got_thresh++; + if (t->lbits != 120 || t->rbits != 16) { + pr_err("thresh differ: %u, %u", + t->lbits, t->rbits); + return KSFT_FAIL; + } + } + } + if (got_thresh != 2) { + pr_err("only %d thresh returned by XFRM_MSG_GETSPDINFO", got_thresh); + return KSFT_FAIL; + } + } else if (req.nh.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type); + return KSFT_FAIL; + } else { + printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error)); + return -1; + } + + /* Restore the default */ + if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 32, 128, 128, false)) { + pr_err("Can't restore SPD HTHRESH"); + return KSFT_FAIL; + } + + /* + * At this moment xfrm uses nlmsg_parse_deprecated(), which + * implies NL_VALIDATE_LIBERAL - ignoring attributes with + * (type > maxtype). nla_parse_depricated_strict() would enforce + * it. Or even stricter nla_parse(). + * Right now it's not expected to fail, but to be ignored. + */ + if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 32, 128, 128, true)) + return KSFT_PASS; + + return KSFT_PASS; +} + static int child_serv(int xfrm_sock, uint32_t *seq, unsigned int nr, int cmd_fd, void *buf, struct xfrm_desc *desc) { @@ -1717,6 +1869,9 @@ static int child_f(unsigned int nr, int test_desc_fd, int cmd_fd, void *buf) case EXPIRE_POLICY: ret = xfrm_expire_policy(xfrm_sock, &seq, nr, &desc); break; + case SPDINFO_ATTRS: + ret = xfrm_spdinfo_attrs(xfrm_sock, &seq); + break; default: printk("Unknown desc type %d", desc.type); exit(KSFT_FAIL); @@ -1994,8 +2149,10 @@ static int write_proto_plan(int fd, int proto) * sizeof(xfrm_user_polexpire) = 168 | sizeof(xfrm_user_polexpire) = 176 * * Check the affected by the UABI difference structures. + * Also, check translation for xfrm_set_spdinfo: it has it's own attributes + * which needs to be correctly copied, but not translated. */ -const unsigned int compat_plan = 4; +const unsigned int compat_plan = 5; static int write_compat_struct_tests(int test_desc_fd) { struct xfrm_desc desc = {}; @@ -2019,6 +2176,10 @@ static int write_compat_struct_tests(int test_desc_fd) if (__write_desc(test_desc_fd, &desc)) return -1; + desc.type = SPDINFO_ATTRS; + if (__write_desc(test_desc_fd, &desc)) + return -1; + return 0; } From 1f958f3dff42a7239bbfdd9a33db5f11574d16bf Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 21 Jul 2021 09:55:38 +0200 Subject: [PATCH 254/794] Revert "arm64: dts: qcom: Harmonize DWC USB3 DT nodes name" This reverts commit eb9b7bfd5954f5f6ac4d57313541dd0294660aad as it breaks working userspace implementations (i.e. Android systems) The device node name here is part of configfs, so it is a user-visable api that can not be changed. Reported-by: John Stultz Cc: Serge Semin Cc: Krzysztof Kozlowski Cc: Bjorn Andersson Link: https://lore.kernel.org/r/CALAqxLX_FNvFndEDWtGbFPjSzuAbfqxQE07diBJFZtftwEJX5A@mail.gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi | 4 ++-- arch/arm64/boot/dts/qcom/ipq8074.dtsi | 4 ++-- arch/arm64/boot/dts/qcom/msm8996.dtsi | 4 ++-- arch/arm64/boot/dts/qcom/msm8998.dtsi | 2 +- arch/arm64/boot/dts/qcom/qcs404-evb.dtsi | 2 +- arch/arm64/boot/dts/qcom/qcs404.dtsi | 4 ++-- arch/arm64/boot/dts/qcom/sc7180.dtsi | 2 +- arch/arm64/boot/dts/qcom/sdm845.dtsi | 4 ++-- arch/arm64/boot/dts/qcom/sm8150.dtsi | 2 +- 9 files changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi index 068692350e00..51e17094d7b1 100644 --- a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi +++ b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi @@ -1063,7 +1063,7 @@ status = "okay"; extcon = <&usb2_id>; - usb@7600000 { + dwc3@7600000 { extcon = <&usb2_id>; dr_mode = "otg"; maximum-speed = "high-speed"; @@ -1074,7 +1074,7 @@ status = "okay"; extcon = <&usb3_id>; - usb@6a00000 { + dwc3@6a00000 { extcon = <&usb3_id>; dr_mode = "otg"; }; diff --git a/arch/arm64/boot/dts/qcom/ipq8074.dtsi b/arch/arm64/boot/dts/qcom/ipq8074.dtsi index 95d6cb8cd4c0..f39bc10cc5bd 100644 --- a/arch/arm64/boot/dts/qcom/ipq8074.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq8074.dtsi @@ -443,7 +443,7 @@ resets = <&gcc GCC_USB0_BCR>; status = "disabled"; - dwc_0: usb@8a00000 { + dwc_0: dwc3@8a00000 { compatible = "snps,dwc3"; reg = <0x8a00000 0xcd00>; interrupts = ; @@ -484,7 +484,7 @@ resets = <&gcc GCC_USB1_BCR>; status = "disabled"; - dwc_1: usb@8c00000 { + dwc_1: dwc3@8c00000 { compatible = "snps,dwc3"; reg = <0x8c00000 0xcd00>; interrupts = ; diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index 0e1bc4669d7e..78c55ca10ba9 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -2566,7 +2566,7 @@ power-domains = <&gcc USB30_GDSC>; status = "disabled"; - usb@6a00000 { + dwc3@6a00000 { compatible = "snps,dwc3"; reg = <0x06a00000 0xcc00>; interrupts = <0 131 IRQ_TYPE_LEVEL_HIGH>; @@ -2873,7 +2873,7 @@ qcom,select-utmi-as-pipe-clk; status = "disabled"; - usb@7600000 { + dwc3@7600000 { compatible = "snps,dwc3"; reg = <0x07600000 0xcc00>; interrupts = <0 138 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi b/arch/arm64/boot/dts/qcom/msm8998.dtsi index 6f294f9c0cdf..e9d3ce29937c 100644 --- a/arch/arm64/boot/dts/qcom/msm8998.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi @@ -1964,7 +1964,7 @@ resets = <&gcc GCC_USB_30_BCR>; - usb3_dwc3: usb@a800000 { + usb3_dwc3: dwc3@a800000 { compatible = "snps,dwc3"; reg = <0x0a800000 0xcd00>; interrupts = ; diff --git a/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi b/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi index f8a55307b855..a80c578484ba 100644 --- a/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi +++ b/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi @@ -337,7 +337,7 @@ &usb3 { status = "okay"; - usb@7580000 { + dwc3@7580000 { dr_mode = "host"; }; }; diff --git a/arch/arm64/boot/dts/qcom/qcs404.dtsi b/arch/arm64/boot/dts/qcom/qcs404.dtsi index 9c4be020d568..339790ba585d 100644 --- a/arch/arm64/boot/dts/qcom/qcs404.dtsi +++ b/arch/arm64/boot/dts/qcom/qcs404.dtsi @@ -544,7 +544,7 @@ assigned-clock-rates = <19200000>, <200000000>; status = "disabled"; - usb@7580000 { + dwc3@7580000 { compatible = "snps,dwc3"; reg = <0x07580000 0xcd00>; interrupts = ; @@ -573,7 +573,7 @@ assigned-clock-rates = <19200000>, <133333333>; status = "disabled"; - usb@78c0000 { + dwc3@78c0000 { compatible = "snps,dwc3"; reg = <0x078c0000 0xcc00>; interrupts = ; diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi index a5d58eb92896..a9a052f8c63c 100644 --- a/arch/arm64/boot/dts/qcom/sc7180.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi @@ -2756,7 +2756,7 @@ <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_USB3 0>; interconnect-names = "usb-ddr", "apps-usb"; - usb_1_dwc3: usb@a600000 { + usb_1_dwc3: dwc3@a600000 { compatible = "snps,dwc3"; reg = <0 0x0a600000 0 0xe000>; interrupts = ; diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index 1796ae8372be..0a86fe71a66d 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -3781,7 +3781,7 @@ <&gladiator_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_USB3_0 0>; interconnect-names = "usb-ddr", "apps-usb"; - usb_1_dwc3: usb@a600000 { + usb_1_dwc3: dwc3@a600000 { compatible = "snps,dwc3"; reg = <0 0x0a600000 0 0xcd00>; interrupts = ; @@ -3829,7 +3829,7 @@ <&gladiator_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_USB3_1 0>; interconnect-names = "usb-ddr", "apps-usb"; - usb_2_dwc3: usb@a800000 { + usb_2_dwc3: dwc3@a800000 { compatible = "snps,dwc3"; reg = <0 0x0a800000 0 0xcd00>; interrupts = ; diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index 612dda0fef43..eef9d79157e9 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -2344,7 +2344,7 @@ resets = <&gcc GCC_USB30_PRIM_BCR>; - usb_1_dwc3: usb@a600000 { + usb_1_dwc3: dwc3@a600000 { compatible = "snps,dwc3"; reg = <0 0x0a600000 0 0xcd00>; interrupts = ; From 7764656b108cd308c39e9a8554353b8f9ca232a3 Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Mon, 5 Jul 2021 21:38:29 +0800 Subject: [PATCH 255/794] nvme-pci: don't WARN_ON in nvme_reset_work if ctrl.state is not RESETTING Followling process: nvme_probe nvme_reset_ctrl nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING) queue_work(nvme_reset_wq, &ctrl->reset_work) --------------> nvme_remove nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING) worker_thread process_one_work nvme_reset_work WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING) , which will trigger WARN_ON in nvme_reset_work(): [ 127.534298] WARNING: CPU: 0 PID: 139 at drivers/nvme/host/pci.c:2594 [ 127.536161] CPU: 0 PID: 139 Comm: kworker/u8:7 Not tainted 5.13.0 [ 127.552518] Call Trace: [ 127.552840] ? kvm_sched_clock_read+0x25/0x40 [ 127.553936] ? native_send_call_func_single_ipi+0x1c/0x30 [ 127.555117] ? send_call_function_single_ipi+0x9b/0x130 [ 127.556263] ? __smp_call_single_queue+0x48/0x60 [ 127.557278] ? ttwu_queue_wakelist+0xfa/0x1c0 [ 127.558231] ? try_to_wake_up+0x265/0x9d0 [ 127.559120] ? ext4_end_io_rsv_work+0x160/0x290 [ 127.560118] process_one_work+0x28c/0x640 [ 127.561002] worker_thread+0x39a/0x700 [ 127.561833] ? rescuer_thread+0x580/0x580 [ 127.562714] kthread+0x18c/0x1e0 [ 127.563444] ? set_kthread_struct+0x70/0x70 [ 127.564347] ret_from_fork+0x1f/0x30 The preceding problem can be easily reproduced by executing following script (based on blktests suite): test() { pdev="$(_get_pci_dev_from_blkdev)" sysfs="/sys/bus/pci/devices/${pdev}" for ((i = 0; i < 10; i++)); do echo 1 > "$sysfs/remove" echo 1 > /sys/bus/pci/rescan done } Since the device ctrl could be updated as an non-RESETTING state by repeating probe/remove in userspace (which is a normal situation), we can replace stack dumping WARN_ON with a warnning message. Fixes: 82b057caefaff ("nvme-pci: fix multiple ctrl removal schedulin") Signed-off-by: Zhihao Cheng --- drivers/nvme/host/pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 320051f5a3dd..51852085239e 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2631,7 +2631,9 @@ static void nvme_reset_work(struct work_struct *work) bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL); int result; - if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING)) { + if (dev->ctrl.state != NVME_CTRL_RESETTING) { + dev_warn(dev->ctrl.device, "ctrl state %d is not RESETTING\n", + dev->ctrl.state); result = -ENODEV; goto out; } From 5396fdac56d87d04e75e5068c0c92d33625f51e7 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 16 Jul 2021 13:30:35 +0200 Subject: [PATCH 256/794] nvme: fix refcounting imbalance when all paths are down When the last path to a ns_head drops the current code removes the ns_head from the subsystem list, but will only delete the disk itself if the last reference to the ns_head drops. This is causing an refcounting imbalance eg when applications have a reference to the disk, as then they'll never get notified that the disk is in fact dead. This patch moves the call 'del_gendisk' into nvme_mpath_check_last_path(), ensuring that the disk can be properly removed and applications get the appropriate notifications. Signed-off-by: Hannes Reinecke Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 14 +++++++++++--- drivers/nvme/host/multipath.c | 9 ++++++++- drivers/nvme/host/nvme.h | 11 ++--------- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 11779be42186..17c05a4595f0 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3807,6 +3807,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, static void nvme_ns_remove(struct nvme_ns *ns) { + bool last_path = false; + if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags)) return; @@ -3815,8 +3817,6 @@ static void nvme_ns_remove(struct nvme_ns *ns) mutex_lock(&ns->ctrl->subsys->lock); list_del_rcu(&ns->siblings); - if (list_empty(&ns->head->list)) - list_del_init(&ns->head->entry); mutex_unlock(&ns->ctrl->subsys->lock); synchronize_rcu(); /* guarantee not available in head->list */ @@ -3836,7 +3836,15 @@ static void nvme_ns_remove(struct nvme_ns *ns) list_del_init(&ns->list); up_write(&ns->ctrl->namespaces_rwsem); - nvme_mpath_check_last_path(ns); + /* Synchronize with nvme_init_ns_head() */ + mutex_lock(&ns->head->subsys->lock); + if (list_empty(&ns->head->list)) { + list_del_init(&ns->head->entry); + last_path = true; + } + mutex_unlock(&ns->head->subsys->lock); + if (last_path) + nvme_mpath_shutdown_disk(ns->head); nvme_put_ns(ns); } diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 0ea5298469c3..3f32c5e86bfc 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -760,14 +760,21 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id) #endif } -void nvme_mpath_remove_disk(struct nvme_ns_head *head) +void nvme_mpath_shutdown_disk(struct nvme_ns_head *head) { if (!head->disk) return; + kblockd_schedule_work(&head->requeue_work); if (head->disk->flags & GENHD_FL_UP) { nvme_cdev_del(&head->cdev, &head->cdev_device); del_gendisk(head->disk); } +} + +void nvme_mpath_remove_disk(struct nvme_ns_head *head) +{ + if (!head->disk) + return; blk_set_queue_dying(head->disk->queue); /* make sure all pending bios are cleaned up */ kblockd_schedule_work(&head->requeue_work); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 18ef8dd03a90..5cd1fa3b8464 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -716,14 +716,7 @@ void nvme_mpath_uninit(struct nvme_ctrl *ctrl); void nvme_mpath_stop(struct nvme_ctrl *ctrl); bool nvme_mpath_clear_current_path(struct nvme_ns *ns); void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl); - -static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) -{ - struct nvme_ns_head *head = ns->head; - - if (head->disk && list_empty(&head->list)) - kblockd_schedule_work(&head->requeue_work); -} +void nvme_mpath_shutdown_disk(struct nvme_ns_head *head); static inline void nvme_trace_bio_complete(struct request *req) { @@ -772,7 +765,7 @@ static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns) static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl) { } -static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) +static inline void nvme_mpath_shutdown_disk(struct nvme_ns_head *head) { } static inline void nvme_trace_bio_complete(struct request *req) From 234211b8dd161fa25f192c78d5a8d2dd6bf920a0 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 19 Jul 2021 09:44:39 -0700 Subject: [PATCH 257/794] nvme: fix nvme_setup_command metadata trace event The metadata address is set after the trace event, so the trace is not capturing anything useful. Rather than logging the memory address, it's useful to know if the command carries a metadata payload, so change the trace event to log that true/false state instead. Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/trace.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h index daaf700eae79..35bac7a25422 100644 --- a/drivers/nvme/host/trace.h +++ b/drivers/nvme/host/trace.h @@ -56,7 +56,7 @@ TRACE_EVENT(nvme_setup_cmd, __field(u8, fctype) __field(u16, cid) __field(u32, nsid) - __field(u64, metadata) + __field(bool, metadata) __array(u8, cdw10, 24) ), TP_fast_assign( @@ -66,13 +66,13 @@ TRACE_EVENT(nvme_setup_cmd, __entry->flags = cmd->common.flags; __entry->cid = cmd->common.command_id; __entry->nsid = le32_to_cpu(cmd->common.nsid); - __entry->metadata = le64_to_cpu(cmd->common.metadata); + __entry->metadata = !!blk_integrity_rq(req); __entry->fctype = cmd->fabrics.fctype; __assign_disk_name(__entry->disk, req->rq_disk); memcpy(__entry->cdw10, &cmd->common.cdw10, sizeof(__entry->cdw10)); ), - TP_printk("nvme%d: %sqid=%d, cmdid=%u, nsid=%u, flags=0x%x, meta=0x%llx, cmd=(%s %s)", + TP_printk("nvme%d: %sqid=%d, cmdid=%u, nsid=%u, flags=0x%x, meta=0x%x, cmd=(%s %s)", __entry->ctrl_id, __print_disk_name(__entry->disk), __entry->qid, __entry->cid, __entry->nsid, __entry->flags, __entry->metadata, From 5c912e679506ef72adb95616d2f56a8a1b079a3d Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 21 May 2021 02:10:10 +0000 Subject: [PATCH 258/794] usb: cdc-wdm: fix build error when CONFIG_WWAN_CORE is not set Gcc report build error as following when CONFIG_WWAN_CORE is not set: x86_64-linux-gnu-ld: drivers/usb/class/cdc-wdm.o: in function `wdm_disconnect': cdc-wdm.c:(.text+0xb2a): undefined reference to `wwan_remove_port' x86_64-linux-gnu-ld: drivers/usb/class/cdc-wdm.o: in function `wdm_in_callback': cdc-wdm.c:(.text+0xf23): undefined reference to `wwan_port_rx' x86_64-linux-gnu-ld: drivers/usb/class/cdc-wdm.o: in function `wdm_wwan_port_stop': cdc-wdm.c:(.text+0x127d): undefined reference to `wwan_port_get_drvdata' x86_64-linux-gnu-ld: drivers/usb/class/cdc-wdm.o: in function `wdm_wwan_port_tx': cdc-wdm.c:(.text+0x12d9): undefined reference to `wwan_port_get_drvdata' x86_64-linux-gnu-ld: cdc-wdm.c:(.text+0x13c1): undefined reference to `wwan_port_txoff' x86_64-linux-gnu-ld: drivers/usb/class/cdc-wdm.o: in function `wdm_wwan_port_start': cdc-wdm.c:(.text+0x13e0): undefined reference to `wwan_port_get_drvdata' x86_64-linux-gnu-ld: cdc-wdm.c:(.text+0x1431): undefined reference to `wwan_port_txon' x86_64-linux-gnu-ld: drivers/usb/class/cdc-wdm.o: in function `wdm_wwan_port_tx_complete': cdc-wdm.c:(.text+0x14a4): undefined reference to `wwan_port_txon' x86_64-linux-gnu-ld: drivers/usb/class/cdc-wdm.o: in function `wdm_create.cold': cdc-wdm.c:(.text.unlikely+0x209): undefined reference to `wwan_create_port' Using CONFIG_WWAN_CORE instead of CONFIG_WWAN to avoid build error. Fixes: cac6fb015f71 ("usb: class: cdc-wdm: WWAN framework integration") Reported-by: Hulk Robot Reviewed-by: Loic Poulain Signed-off-by: Wei Yongjun Link: https://lore.kernel.org/r/20210521021010.2490930-1-weiyongjun1@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-wdm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index fdf79bcf7eb0..35d5908b5478 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -824,7 +824,7 @@ static struct usb_class_driver wdm_class = { }; /* --- WWAN framework integration --- */ -#ifdef CONFIG_WWAN +#ifdef CONFIG_WWAN_CORE static int wdm_wwan_port_start(struct wwan_port *port) { struct wdm_device *desc = wwan_port_get_drvdata(port); @@ -963,11 +963,11 @@ static void wdm_wwan_rx(struct wdm_device *desc, int length) /* inbuf has been copied, it is safe to check for outstanding data */ schedule_work(&desc->service_outs_intr); } -#else /* CONFIG_WWAN */ +#else /* CONFIG_WWAN_CORE */ static void wdm_wwan_init(struct wdm_device *desc) {} static void wdm_wwan_deinit(struct wdm_device *desc) {} static void wdm_wwan_rx(struct wdm_device *desc, int length) {} -#endif /* CONFIG_WWAN */ +#endif /* CONFIG_WWAN_CORE */ /* --- error handling --- */ static void wdm_rxwork(struct work_struct *work) From f3a1a937f7b240be623d989c8553a6d01465d04f Mon Sep 17 00:00:00 2001 From: Vincent Palatin Date: Wed, 21 Jul 2021 11:25:16 +0200 Subject: [PATCH 259/794] Revert "USB: quirks: ignore remote wake-up on Fibocom L850-GL LTE modem" This reverts commit 0bd860493f81eb2a46173f6f5e44cc38331c8dbd. While the patch was working as stated,ie preventing the L850-GL LTE modem from crashing on some U3 wake-ups due to a race condition between the host wake-up and the modem-side wake-up, when using the MBIM interface, this would force disabling the USB runtime PM on the device. The increased power consumption is significant for LTE laptops, and given that with decently recent modem firmwares, when the modem hits the bug, it automatically recovers (ie it drops from the bus, but automatically re-enumerates after less than half a second, rather than being stuck until a power cycle as it was doing with ancient firmware), for most people, the trade-off now seems in favor of re-enabling it by default. For people with access to the platform code, the bug can also be worked-around successfully by changing the USB3 LFPM polling off-time for the XHCI controller in the BIOS code. Signed-off-by: Vincent Palatin Link: https://lore.kernel.org/r/20210721092516.2775971-1-vpalatin@chromium.org Fixes: 0bd860493f81 ("USB: quirks: ignore remote wake-up on Fibocom L850-GL LTE modem") Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 6114cf83bb44..8239fe7129dd 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -501,10 +501,6 @@ static const struct usb_device_id usb_quirk_list[] = { /* DJI CineSSD */ { USB_DEVICE(0x2ca3, 0x0031), .driver_info = USB_QUIRK_NO_LPM }, - /* Fibocom L850-GL LTE Modem */ - { USB_DEVICE(0x2cb7, 0x0007), .driver_info = - USB_QUIRK_IGNORE_REMOTE_WAKEUP }, - /* INTEL VALUE SSD */ { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME }, From 990e4ad3ddcb72216caeddd6e62c5f45a21e8121 Mon Sep 17 00:00:00 2001 From: Xiangyang Zhang Date: Mon, 28 Jun 2021 23:22:39 +0800 Subject: [PATCH 260/794] staging: rtl8723bs: Fix a resource leak in sd_int_dpc The "c2h_evt" variable is not freed when function call "c2h_evt_read_88xx" failed Fixes: 554c0a3abf21 ("staging: Add rtl8723bs sdio wifi driver") Reviewed-by: Hans de Goede Signed-off-by: Xiangyang Zhang Cc: stable Link: https://lore.kernel.org/r/20210628152239.5475-1-xyz.sun.ok@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/hal/sdio_ops.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/staging/rtl8723bs/hal/sdio_ops.c b/drivers/staging/rtl8723bs/hal/sdio_ops.c index 2dd251ce177e..a545832a468e 100644 --- a/drivers/staging/rtl8723bs/hal/sdio_ops.c +++ b/drivers/staging/rtl8723bs/hal/sdio_ops.c @@ -909,6 +909,8 @@ void sd_int_dpc(struct adapter *adapter) } else { rtw_c2h_wk_cmd(adapter, (u8 *)c2h_evt); } + } else { + kfree(c2h_evt); } } else { /* Error handling for malloc fail */ From a7c3acca53801e10a77ede6b759a73f5ac4bc261 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 21 Jul 2021 09:18:35 +0200 Subject: [PATCH 261/794] arm64: smccc: Save lr before calling __arm_smccc_sve_check() Commit cfa7ff959a78 ("arm64: smccc: Support SMCCC v1.3 SVE register saving hint") added a call to __arm_smccc_sve_check() which clobbers the lr (register x30), causing __arm_smccc_hvc() to return to itself and crash. Save lr on the stack before calling __arm_smccc_sve_check(). Save the frame pointer (x29) to complete the frame record, and adjust the offsets used to access stack parameters. Acked-by: Ard Biesheuvel Acked-by: Mark Brown Fixes: cfa7ff959a78 ("arm64: smccc: Support SMCCC v1.3 SVE register saving hint") Signed-off-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20210721071834.69130-1-jean-philippe@linaro.org Signed-off-by: Will Deacon --- arch/arm64/kernel/smccc-call.S | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S index d3d37f932b97..487381164ff6 100644 --- a/arch/arm64/kernel/smccc-call.S +++ b/arch/arm64/kernel/smccc-call.S @@ -32,20 +32,23 @@ SYM_FUNC_END(__arm_smccc_sve_check) EXPORT_SYMBOL(__arm_smccc_sve_check) .macro SMCCC instr + stp x29, x30, [sp, #-16]! + mov x29, sp alternative_if ARM64_SVE bl __arm_smccc_sve_check alternative_else_nop_endif \instr #0 - ldr x4, [sp] + ldr x4, [sp, #16] stp x0, x1, [x4, #ARM_SMCCC_RES_X0_OFFS] stp x2, x3, [x4, #ARM_SMCCC_RES_X2_OFFS] - ldr x4, [sp, #8] + ldr x4, [sp, #24] cbz x4, 1f /* no quirk structure */ ldr x9, [x4, #ARM_SMCCC_QUIRK_ID_OFFS] cmp x9, #ARM_SMCCC_QUIRK_QCOM_A6 b.ne 1f str x6, [x4, ARM_SMCCC_QUIRK_STATE_OFFS] -1: ret +1: ldp x29, x30, [sp], #16 + ret .endm /* From d8a719059b9dc963aa190598778ac804ff3e6a87 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Wed, 21 Jul 2021 17:02:13 +1000 Subject: [PATCH 262/794] Revert "mm/pgtable: add stubs for {pmd/pub}_{set/clear}_huge" This reverts commit c742199a014de23ee92055c2473d91fe5561ffdf. c742199a014d ("mm/pgtable: add stubs for {pmd/pub}_{set/clear}_huge") breaks arm64 in at least two ways for configurations where PUD or PMD folding occur: 1. We no longer install huge-vmap mappings and silently fall back to page-granular entries, despite being able to install block entries at what is effectively the PGD level. 2. If the linear map is backed with block mappings, these will now silently fail to be created in alloc_init_pud(), causing a panic early during boot. The pgtable selftests caught this, although a fix has not been forthcoming and Christophe is AWOL at the moment, so just revert the change for now to get a working -rc3 on which we can queue patches for 5.15. A simple revert breaks the build for 32-bit PowerPC 8xx machines, which rely on the default function definitions when the corresponding page-table levels are folded, since commit a6a8f7c4aa7e ("powerpc/8xx: add support for huge pages on VMAP and VMALLOC"), eg: powerpc64-linux-ld: mm/vmalloc.o: in function `vunmap_pud_range': linux/mm/vmalloc.c:362: undefined reference to `pud_clear_huge' To avoid that, add stubs for pud_clear_huge() and pmd_clear_huge() in arch/powerpc/mm/nohash/8xx.c as suggested by Christophe. Cc: Christophe Leroy Cc: Catalin Marinas Cc: Andrew Morton Cc: Nicholas Piggin Cc: Mike Rapoport Cc: Mark Rutland Cc: Geert Uytterhoeven Fixes: c742199a014d ("mm/pgtable: add stubs for {pmd/pub}_{set/clear}_huge") Signed-off-by: Jonathan Marek Reviewed-by: Ard Biesheuvel Acked-by: Marc Zyngier [mpe: Fold in 8xx.c changes from Christophe and mention in change log] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/linux-arm-kernel/CAMuHMdXShORDox-xxaeUfDW3wx2PeggFSqhVSHVZNKCGK-y_vQ@mail.gmail.com/ Link: https://lore.kernel.org/r/20210717160118.9855-1-jonathan@marek.ca Link: https://lore.kernel.org/r/87r1fs1762.fsf@mpe.ellerman.id.au Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 20 ++++++++------------ arch/powerpc/mm/nohash/8xx.c | 10 ++++++++++ arch/x86/mm/pgtable.c | 34 +++++++++++++++------------------- include/linux/pgtable.h | 26 +------------------------- 4 files changed, 34 insertions(+), 56 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index d74586508448..9ff0de1b2b93 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1339,7 +1339,6 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) return dt_virt; } -#if CONFIG_PGTABLE_LEVELS > 3 int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) { pud_t new_pud = pfn_pud(__phys_to_pfn(phys), mk_pud_sect_prot(prot)); @@ -1354,16 +1353,6 @@ int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) return 1; } -int pud_clear_huge(pud_t *pudp) -{ - if (!pud_sect(READ_ONCE(*pudp))) - return 0; - pud_clear(pudp); - return 1; -} -#endif - -#if CONFIG_PGTABLE_LEVELS > 2 int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) { pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), mk_pmd_sect_prot(prot)); @@ -1378,6 +1367,14 @@ int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) return 1; } +int pud_clear_huge(pud_t *pudp) +{ + if (!pud_sect(READ_ONCE(*pudp))) + return 0; + pud_clear(pudp); + return 1; +} + int pmd_clear_huge(pmd_t *pmdp) { if (!pmd_sect(READ_ONCE(*pmdp))) @@ -1385,7 +1382,6 @@ int pmd_clear_huge(pmd_t *pmdp) pmd_clear(pmdp); return 1; } -#endif int pmd_free_pte_page(pmd_t *pmdp, unsigned long addr) { diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 60780e089118..0df9fe29dd56 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -240,3 +240,13 @@ void __init setup_kuap(bool disabled) mtspr(SPRN_MD_AP, MD_APG_KUAP); } #endif + +int pud_clear_huge(pud_t *pud) +{ + return 0; +} + +int pmd_clear_huge(pmd_t *pmd) +{ + return 0; +} diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 3364fe62b903..3481b35cb4ec 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -682,7 +682,6 @@ int p4d_clear_huge(p4d_t *p4d) } #endif -#if CONFIG_PGTABLE_LEVELS > 3 /** * pud_set_huge - setup kernel PUD mapping * @@ -721,23 +720,6 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) return 1; } -/** - * pud_clear_huge - clear kernel PUD mapping when it is set - * - * Returns 1 on success and 0 on failure (no PUD map is found). - */ -int pud_clear_huge(pud_t *pud) -{ - if (pud_large(*pud)) { - pud_clear(pud); - return 1; - } - - return 0; -} -#endif - -#if CONFIG_PGTABLE_LEVELS > 2 /** * pmd_set_huge - setup kernel PMD mapping * @@ -768,6 +750,21 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) return 1; } +/** + * pud_clear_huge - clear kernel PUD mapping when it is set + * + * Returns 1 on success and 0 on failure (no PUD map is found). + */ +int pud_clear_huge(pud_t *pud) +{ + if (pud_large(*pud)) { + pud_clear(pud); + return 1; + } + + return 0; +} + /** * pmd_clear_huge - clear kernel PMD mapping when it is set * @@ -782,7 +779,6 @@ int pmd_clear_huge(pmd_t *pmd) return 0; } -#endif #ifdef CONFIG_X86_64 /** diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index d147480cdefc..e24d2c992b11 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1397,34 +1397,10 @@ static inline int p4d_clear_huge(p4d_t *p4d) } #endif /* !__PAGETABLE_P4D_FOLDED */ -#ifndef __PAGETABLE_PUD_FOLDED int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot); -int pud_clear_huge(pud_t *pud); -#else -static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) -{ - return 0; -} -static inline int pud_clear_huge(pud_t *pud) -{ - return 0; -} -#endif /* !__PAGETABLE_PUD_FOLDED */ - -#ifndef __PAGETABLE_PMD_FOLDED int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot); +int pud_clear_huge(pud_t *pud); int pmd_clear_huge(pmd_t *pmd); -#else -static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) -{ - return 0; -} -static inline int pmd_clear_huge(pmd_t *pmd) -{ - return 0; -} -#endif /* !__PAGETABLE_PMD_FOLDED */ - int p4d_free_pud_page(p4d_t *p4d, unsigned long addr); int pud_free_pmd_page(pud_t *pud, unsigned long addr); int pmd_free_pte_page(pmd_t *pmd, unsigned long addr); From 61acabaae5ba58b3c32e6e90d24c2c0827fd27a8 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 25 Jun 2021 18:37:33 +0300 Subject: [PATCH 263/794] serial: max310x: Unprepare and disable clock in error path In one error case the clock may be left prepared and enabled. Unprepare and disable clock in that case to balance state of the hardware. Fixes: d4d6f03c4fb3 ("serial: max310x: Try to get crystal clock rate from property") Reported-by: Dan Carpenter Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210625153733.12911-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/max310x.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index 0c1e4df52215..ef11860cd69e 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -1293,7 +1293,8 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty freq = uartclk; if (freq == 0) { dev_err(dev, "Cannot get clock rate\n"); - return -EINVAL; + ret = -EINVAL; + goto out_clk; } if (xtal) { From e5227c51090e165db4b48dcaa300605bfced7014 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sat, 26 Jun 2021 06:11:05 +0200 Subject: [PATCH 264/794] serial: 8250: Mask out floating 16/32-bit bus bits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure only actual 8 bits of the IIR register are used in determining the port type in `autoconfig'. The `serial_in' port accessor returns the `unsigned int' type, meaning that with UPIO_AU, UPIO_MEM16, UPIO_MEM32, and UPIO_MEM32BE access types more than 8 bits of data are returned, of which the high order bits will often come from bus lines that are left floating in the data phase. For example with the MIPS Malta board's CBUS UART, where the registers are aligned on 8-byte boundaries and which uses 32-bit accesses, data as follows is returned: YAMON> dump -32 0xbf000900 0x40 BF000900: 1F000942 1F000942 1F000900 1F000900 ...B...B........ BF000910: 1F000901 1F000901 1F000900 1F000900 ................ BF000920: 1F000900 1F000900 1F000960 1F000960 ...........`...` BF000930: 1F000900 1F000900 1F0009FF 1F0009FF ................ YAMON> Evidently high-order 24 bits return values previously driven in the address phase (the 3 highest order address bits used with the command above are masked out in the simple virtual address mapping used here and come out at zeros on the external bus), a common scenario with bus lines left floating, due to bus capacitance. Consequently when the value of IIR, mapped at 0x1f000910, is retrieved in `autoconfig', it comes out at 0x1f0009c1 and when it is right-shifted by 6 and then assigned to 8-bit `scratch' variable, the value calculated is 0x27, not one of 0, 1, 2, 3 expected in port type determination. Fix the issue then, by assigning the value returned from `serial_in' to `scratch' first, which masks out 24 high-order bits retrieved, and only then right-shift the resulting 8-bit data quantity, producing the value of 3 in this case, as expected. Fix the same issue in `serial_dl_read'. The problem first appeared with Linux 2.6.9-rc3 which predates our repo history, but the origin could be identified with the old MIPS/Linux repo also at: as commit e0d2356c0777 ("Merge with Linux 2.6.9-rc3."), where code in `serial_in' was updated with this case: + case UPIO_MEM32: + return readl(up->port.membase + offset); + which made it produce results outside the unsigned 8-bit range for the first time, though obviously it is system dependent what actual values appear in the high order bits retrieved and it may well have been zeros in the relevant positions with the system the change originally was intended for. It is at that point that code in `autoconf' should have been updated accordingly, but clearly it was overlooked. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org # v2.6.12+ Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Maciej W. Rozycki Link: https://lore.kernel.org/r/alpine.DEB.2.21.2106260516220.37803@angie.orcam.me.uk Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_port.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 2164290cbd31..2e7000f79b03 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -311,7 +311,11 @@ static const struct serial8250_config uart_config[] = { /* Uart divisor latch read */ static int default_serial_dl_read(struct uart_8250_port *up) { - return serial_in(up, UART_DLL) | serial_in(up, UART_DLM) << 8; + /* Assign these in pieces to truncate any bits above 7. */ + unsigned char dll = serial_in(up, UART_DLL); + unsigned char dlm = serial_in(up, UART_DLM); + + return dll | dlm << 8; } /* Uart divisor latch write */ @@ -1297,9 +1301,11 @@ static void autoconfig(struct uart_8250_port *up) serial_out(up, UART_LCR, 0); serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO); - scratch = serial_in(up, UART_IIR) >> 6; - switch (scratch) { + /* Assign this as it is to truncate any bits above 7. */ + scratch = serial_in(up, UART_IIR); + + switch (scratch >> 6) { case 0: autoconfig_8250(up); break; From 9a936d6c3d3d6c33ecbadf72dccdb567b5cd3c72 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sat, 26 Jun 2021 06:11:13 +0200 Subject: [PATCH 265/794] MIPS: Malta: Do not byte-swap accesses to the CBUS UART MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Correct big-endian accesses to the CBUS UART, a Malta on-board discrete TI16C550C part wired directly to the system controller's device bus, and do not use byte swapping with the 32-bit accesses to the device. The CBUS is used for devices such as the boot flash memory needed early on in system bootstrap even before PCI has been initialised. Therefore it uses the system controller's device bus, which follows the endianness set with the CPU, which means no byte-swapping is ever required for data accesses to CBUS, unlike with PCI. The CBUS UART uses the UPIO_MEM32 access method, that is the `readl' and `writel' MMIO accessors, which on the MIPS platform imply byte-swapping with PCI systems. Consequently the wrong byte lane is accessed with the big-endian configuration and the UART is not correctly accessed. As it happens the UPIO_MEM32BE access method makes use of the `ioread32' and `iowrite32' MMIO accessors, which still use `readl' and `writel' respectively, however they byte-swap data passed, effectively cancelling swapping done with the accessors themselves and making it suitable for the CBUS UART. Make the CBUS UART switch between UPIO_MEM32 and UPIO_MEM32BE then, based on the endianness selected. With this change in place the device is correctly recognised with big-endian Malta at boot, along with the Super I/O devices behind PCI: Serial: 8250/16550 driver, 5 ports, IRQ sharing enabled printk: console [ttyS0] disabled serial8250.0: ttyS0 at I/O 0x3f8 (irq = 4, base_baud = 115200) is a 16550A printk: console [ttyS0] enabled printk: bootconsole [uart8250] disabled serial8250.0: ttyS1 at I/O 0x2f8 (irq = 3, base_baud = 115200) is a 16550A serial8250.0: ttyS2 at MMIO 0x1f000900 (irq = 20, base_baud = 230400) is a 16550A Fixes: e7c4782f92fc ("[MIPS] Put an end to 's long and annyoing existence") Cc: stable@vger.kernel.org # v2.6.23+ Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Maciej W. Rozycki Link: https://lore.kernel.org/r/alpine.DEB.2.21.2106260524430.37803@angie.orcam.me.uk Signed-off-by: Greg Kroah-Hartman --- arch/mips/mti-malta/malta-platform.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/mips/mti-malta/malta-platform.c b/arch/mips/mti-malta/malta-platform.c index ee7471984fe7..4ffbcc58c6f6 100644 --- a/arch/mips/mti-malta/malta-platform.c +++ b/arch/mips/mti-malta/malta-platform.c @@ -48,7 +48,8 @@ static struct plat_serial8250_port uart8250_data[] = { .mapbase = 0x1f000900, /* The CBUS UART */ .irq = MIPS_CPU_IRQ_BASE + MIPSCPU_INT_MB2, .uartclk = 3686400, /* Twice the usual clk! */ - .iotype = UPIO_MEM32, + .iotype = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) ? + UPIO_MEM32BE : UPIO_MEM32, .flags = CBUS_UART_FLAGS, .regshift = 3, }, From cc9ca4d95846cbbece48d9cd385550f8fba6a3c1 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Wed, 30 Jun 2021 13:56:43 +0100 Subject: [PATCH 266/794] serial: tegra: Only print FIFO error message when an error occurs The Tegra serial driver always prints an error message when enabling the FIFO for devices that have support for checking the FIFO enable status. Fix this by displaying the error message, only when an error occurs. Finally, update the error message to make it clear that enabling the FIFO failed and display the error code. Fixes: 222dcdff3405 ("serial: tegra: check for FIFO mode enabled status") Cc: Acked-by: Thierry Reding Signed-off-by: Jon Hunter Link: https://lore.kernel.org/r/20210630125643.264264-1-jonathanh@nvidia.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial-tegra.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/serial-tegra.c b/drivers/tty/serial/serial-tegra.c index 222032792d6c..eba5b9ecba34 100644 --- a/drivers/tty/serial/serial-tegra.c +++ b/drivers/tty/serial/serial-tegra.c @@ -1045,9 +1045,11 @@ static int tegra_uart_hw_init(struct tegra_uart_port *tup) if (tup->cdata->fifo_mode_enable_status) { ret = tegra_uart_wait_fifo_mode_enabled(tup); - dev_err(tup->uport.dev, "FIFO mode not enabled\n"); - if (ret < 0) + if (ret < 0) { + dev_err(tup->uport.dev, + "Failed to enable FIFO mode: %d\n", ret); return ret; + } } else { /* * For all tegra devices (up to t210), there is a hardware From 853a9ae29e978d37f5dfa72622a68c9ae3d7fa89 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 14 Jul 2021 10:04:27 +0200 Subject: [PATCH 267/794] serial: 8250: fix handle_irq locking The 8250 handle_irq callback is not just called from the interrupt handler but also from a timer callback when polling (e.g. for ports without an interrupt line). Consequently the callback must explicitly disable interrupts to avoid a potential deadlock with another interrupt in polled mode. Add back an irqrestore-version of the sysrq port-unlock helper and use it in the 8250 callbacks that need it. Fixes: 75f4e830fa9c ("serial: do not restore interrupt state in sysrq helper") Cc: stable@vger.kernel.org # 5.13 Cc: Joel Stanley Cc: Andrew Jeffery Reported-by: kernel test robot Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20210714080427.28164-1-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_aspeed_vuart.c | 5 +++-- drivers/tty/serial/8250/8250_fsl.c | 5 +++-- drivers/tty/serial/8250/8250_port.c | 5 +++-- include/linux/serial_core.h | 24 +++++++++++++++++++++ 4 files changed, 33 insertions(+), 6 deletions(-) diff --git a/drivers/tty/serial/8250/8250_aspeed_vuart.c b/drivers/tty/serial/8250/8250_aspeed_vuart.c index 4caab8714e2c..2350fb3bb5e4 100644 --- a/drivers/tty/serial/8250/8250_aspeed_vuart.c +++ b/drivers/tty/serial/8250/8250_aspeed_vuart.c @@ -329,6 +329,7 @@ static int aspeed_vuart_handle_irq(struct uart_port *port) { struct uart_8250_port *up = up_to_u8250p(port); unsigned int iir, lsr; + unsigned long flags; unsigned int space, count; iir = serial_port_in(port, UART_IIR); @@ -336,7 +337,7 @@ static int aspeed_vuart_handle_irq(struct uart_port *port) if (iir & UART_IIR_NO_INT) return 0; - spin_lock(&port->lock); + spin_lock_irqsave(&port->lock, flags); lsr = serial_port_in(port, UART_LSR); @@ -370,7 +371,7 @@ static int aspeed_vuart_handle_irq(struct uart_port *port) if (lsr & UART_LSR_THRE) serial8250_tx_chars(up); - uart_unlock_and_check_sysrq(port); + uart_unlock_and_check_sysrq_irqrestore(port, flags); return 1; } diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c index 4e75d2e4f87c..fc65a2293ce9 100644 --- a/drivers/tty/serial/8250/8250_fsl.c +++ b/drivers/tty/serial/8250/8250_fsl.c @@ -30,10 +30,11 @@ struct fsl8250_data { int fsl8250_handle_irq(struct uart_port *port) { unsigned char lsr, orig_lsr; + unsigned long flags; unsigned int iir; struct uart_8250_port *up = up_to_u8250p(port); - spin_lock(&up->port.lock); + spin_lock_irqsave(&up->port.lock, flags); iir = port->serial_in(port, UART_IIR); if (iir & UART_IIR_NO_INT) { @@ -82,7 +83,7 @@ int fsl8250_handle_irq(struct uart_port *port) up->lsr_saved_flags = orig_lsr; - uart_unlock_and_check_sysrq(&up->port); + uart_unlock_and_check_sysrq_irqrestore(&up->port, flags); return 1; } diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 2e7000f79b03..1da29a219842 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1899,11 +1899,12 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir) unsigned char status; struct uart_8250_port *up = up_to_u8250p(port); bool skip_rx = false; + unsigned long flags; if (iir & UART_IIR_NO_INT) return 0; - spin_lock(&port->lock); + spin_lock_irqsave(&port->lock, flags); status = serial_port_in(port, UART_LSR); @@ -1929,7 +1930,7 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir) (up->ier & UART_IER_THRI)) serial8250_tx_chars(up); - uart_unlock_and_check_sysrq(port); + uart_unlock_and_check_sysrq_irqrestore(port, flags); return 1; } diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 52d7fb92a69d..c58cc142d23f 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -518,6 +518,25 @@ static inline void uart_unlock_and_check_sysrq(struct uart_port *port) if (sysrq_ch) handle_sysrq(sysrq_ch); } + +static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port, + unsigned long flags) +{ + int sysrq_ch; + + if (!port->has_sysrq) { + spin_unlock_irqrestore(&port->lock, flags); + return; + } + + sysrq_ch = port->sysrq_ch; + port->sysrq_ch = 0; + + spin_unlock_irqrestore(&port->lock, flags); + + if (sysrq_ch) + handle_sysrq(sysrq_ch); +} #else /* CONFIG_MAGIC_SYSRQ_SERIAL */ static inline int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) { @@ -531,6 +550,11 @@ static inline void uart_unlock_and_check_sysrq(struct uart_port *port) { spin_unlock(&port->lock); } +static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port, + unsigned long flags) +{ + spin_unlock_irqrestore(&port->lock, flags); +} #endif /* CONFIG_MAGIC_SYSRQ_SERIAL */ /* From 7f0909db761535aefafa77031062603a71557267 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 13 Jul 2021 13:17:39 +0300 Subject: [PATCH 268/794] serial: 8250_pci: Enumerate Elkhart Lake UARTs via dedicated driver Elkhart Lake UARTs are PCI enumerated Synopsys DesignWare v4.0+ UART integrated with Intel iDMA 32-bit DMA controller. There is a specific driver to handle them, i.e. 8250_lpss. Hence, disable 8250_pci enumeration for these UARTs. Fixes: 1b91d97c66ef ("serial: 8250_lpss: Add ->setup() for Elkhart Lake ports") Fixes: 4f912b898dc2 ("serial: 8250_lpss: Enable HS UART on Elkhart Lake") Cc: stable Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210713101739.36962-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 75827b608fdb..02985cf90ef2 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -3836,6 +3836,12 @@ static const struct pci_device_id blacklist[] = { { PCI_VDEVICE(INTEL, 0x0f0c), }, { PCI_VDEVICE(INTEL, 0x228a), }, { PCI_VDEVICE(INTEL, 0x228c), }, + { PCI_VDEVICE(INTEL, 0x4b96), }, + { PCI_VDEVICE(INTEL, 0x4b97), }, + { PCI_VDEVICE(INTEL, 0x4b98), }, + { PCI_VDEVICE(INTEL, 0x4b99), }, + { PCI_VDEVICE(INTEL, 0x4b9a), }, + { PCI_VDEVICE(INTEL, 0x4b9b), }, { PCI_VDEVICE(INTEL, 0x9ce3), }, { PCI_VDEVICE(INTEL, 0x9ce4), }, From 2dbd9c27eda5cf83aa990266a3355960d860da71 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Thu, 8 Jul 2021 14:25:18 +0300 Subject: [PATCH 269/794] drm/ttm: add missing NULL checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit My local syzbot instance hit GPF in ttm_bo_release(). Unfortunately, syzbot didn't produce a reproducer for this, but I found out possible scenario: drm_gem_vram_create() <-- drm_gem_vram_object kzalloced (bo embedded in this object) ttm_bo_init() ttm_bo_init_reserved() ttm_resource_alloc() man->func->alloc() <-- allocation failure ttm_bo_put() ttm_bo_release() ttm_mem_io_free() <-- bo->resource == NULL passed as second argument *GPF* Added NULL check inside ttm_mem_io_free() to prevent reported GPF and make this function NULL save in future. Same problem was in ttm_bo_move_to_lru_tail() as Christian reported. ttm_bo_move_to_lru_tail() is called in ttm_bo_release() and mem pointer can be NULL as well as in ttm_mem_io_free(). Fail log: KASAN: null-ptr-deref in range [0x0000000000000020-0x0000000000000027] ... RIP: 0010:ttm_mem_io_free+0x28/0x170 drivers/gpu/drm/ttm/ttm_bo_util.c:66 .. Call Trace: ttm_bo_release+0xd94/0x10a0 drivers/gpu/drm/ttm/ttm_bo.c:422 kref_put include/linux/kref.h:65 [inline] ttm_bo_put drivers/gpu/drm/ttm/ttm_bo.c:470 [inline] ttm_bo_init_reserved+0x7cb/0x960 drivers/gpu/drm/ttm/ttm_bo.c:1050 ttm_bo_init+0x105/0x270 drivers/gpu/drm/ttm/ttm_bo.c:1074 drm_gem_vram_create+0x332/0x4c0 drivers/gpu/drm/drm_gem_vram_helper.c:228 Fixes: d3116756a710 ("drm/ttm: rename bo->mem and make it a pointer") Signed-off-by: Pavel Skripkin Reviewed-by: Christian König Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20210708112518.17271-1-paskripkin@gmail.com --- drivers/gpu/drm/ttm/ttm_bo.c | 3 +++ drivers/gpu/drm/ttm/ttm_bo_util.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 1b950b45cf4b..8d7fd65ccced 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -102,6 +102,9 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo, return; } + if (!mem) + return; + man = ttm_manager_type(bdev, mem->mem_type); list_move_tail(&bo->lru, &man->lru[bo->priority]); diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 2f57f824e6db..763fa6f4e07d 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -63,6 +63,9 @@ int ttm_mem_io_reserve(struct ttm_device *bdev, void ttm_mem_io_free(struct ttm_device *bdev, struct ttm_resource *mem) { + if (!mem) + return; + if (!mem->bus.offset && !mem->bus.addr) return; From 56f6f4c4eb2a710ec8878dd9373d3d2b2eb75f5c Mon Sep 17 00:00:00 2001 From: Bhaumik Bhatt Date: Fri, 16 Jul 2021 13:21:04 +0530 Subject: [PATCH 270/794] bus: mhi: pci_generic: Apply no-op for wake using sideband wake boolean Devices such as SDX24 do not have the provision for inband wake doorbell in the form of channel 127 and instead have a sideband GPIO for it. Newer devices such as SDX55 or SDX65 support inband wake method by default. Ensure the functionality is used based on this such that device wake stays held when a client driver uses mhi_device_get() API or the equivalent debugfs entry. Link: https://lore.kernel.org/r/1624560809-30610-1-git-send-email-bbhatt@codeaurora.org Fixes: e3e5e6508fc1 ("bus: mhi: pci_generic: No-Op for device_wake operations") Cc: stable@vger.kernel.org #5.12 Reviewed-by: Manivannan Sadhasivam Signed-off-by: Bhaumik Bhatt Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20210716075106.49938-2-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/bus/mhi/pci_generic.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c index ca3bc40427f8..3396cb30ebec 100644 --- a/drivers/bus/mhi/pci_generic.c +++ b/drivers/bus/mhi/pci_generic.c @@ -32,6 +32,8 @@ * @edl: emergency download mode firmware path (if any) * @bar_num: PCI base address register to use for MHI MMIO register space * @dma_data_width: DMA transfer word size (32 or 64 bits) + * @sideband_wake: Devices using dedicated sideband GPIO for wakeup instead + * of inband wake support (such as sdx24) */ struct mhi_pci_dev_info { const struct mhi_controller_config *config; @@ -40,6 +42,7 @@ struct mhi_pci_dev_info { const char *edl; unsigned int bar_num; unsigned int dma_data_width; + bool sideband_wake; }; #define MHI_CHANNEL_CONFIG_UL(ch_num, ch_name, el_count, ev_ring) \ @@ -242,7 +245,8 @@ static const struct mhi_pci_dev_info mhi_qcom_sdx65_info = { .edl = "qcom/sdx65m/edl.mbn", .config = &modem_qcom_v1_mhiv_config, .bar_num = MHI_PCI_DEFAULT_BAR_NUM, - .dma_data_width = 32 + .dma_data_width = 32, + .sideband_wake = false, }; static const struct mhi_pci_dev_info mhi_qcom_sdx55_info = { @@ -251,7 +255,8 @@ static const struct mhi_pci_dev_info mhi_qcom_sdx55_info = { .edl = "qcom/sdx55m/edl.mbn", .config = &modem_qcom_v1_mhiv_config, .bar_num = MHI_PCI_DEFAULT_BAR_NUM, - .dma_data_width = 32 + .dma_data_width = 32, + .sideband_wake = false, }; static const struct mhi_pci_dev_info mhi_qcom_sdx24_info = { @@ -259,7 +264,8 @@ static const struct mhi_pci_dev_info mhi_qcom_sdx24_info = { .edl = "qcom/prog_firehose_sdx24.mbn", .config = &modem_qcom_v1_mhiv_config, .bar_num = MHI_PCI_DEFAULT_BAR_NUM, - .dma_data_width = 32 + .dma_data_width = 32, + .sideband_wake = true, }; static const struct mhi_channel_config mhi_quectel_em1xx_channels[] = { @@ -301,7 +307,8 @@ static const struct mhi_pci_dev_info mhi_quectel_em1xx_info = { .edl = "qcom/prog_firehose_sdx24.mbn", .config = &modem_quectel_em1xx_config, .bar_num = MHI_PCI_DEFAULT_BAR_NUM, - .dma_data_width = 32 + .dma_data_width = 32, + .sideband_wake = true, }; static const struct mhi_channel_config mhi_foxconn_sdx55_channels[] = { @@ -339,7 +346,8 @@ static const struct mhi_pci_dev_info mhi_foxconn_sdx55_info = { .edl = "qcom/sdx55m/edl.mbn", .config = &modem_foxconn_sdx55_config, .bar_num = MHI_PCI_DEFAULT_BAR_NUM, - .dma_data_width = 32 + .dma_data_width = 32, + .sideband_wake = false, }; static const struct pci_device_id mhi_pci_id_table[] = { @@ -640,9 +648,12 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) mhi_cntrl->status_cb = mhi_pci_status_cb; mhi_cntrl->runtime_get = mhi_pci_runtime_get; mhi_cntrl->runtime_put = mhi_pci_runtime_put; - mhi_cntrl->wake_get = mhi_pci_wake_get_nop; - mhi_cntrl->wake_put = mhi_pci_wake_put_nop; - mhi_cntrl->wake_toggle = mhi_pci_wake_toggle_nop; + + if (info->sideband_wake) { + mhi_cntrl->wake_get = mhi_pci_wake_get_nop; + mhi_cntrl->wake_put = mhi_pci_wake_put_nop; + mhi_cntrl->wake_toggle = mhi_pci_wake_toggle_nop; + } err = mhi_pci_claim(mhi_cntrl, info->bar_num, DMA_BIT_MASK(info->dma_data_width)); if (err) From 546362a9ef2ef40b57c6605f14e88ced507f8dd0 Mon Sep 17 00:00:00 2001 From: Bhaumik Bhatt Date: Fri, 16 Jul 2021 13:21:05 +0530 Subject: [PATCH 271/794] bus: mhi: core: Validate channel ID when processing command completions MHI reads the channel ID from the event ring element sent by the device which can be any value between 0 and 255. In order to prevent any out of bound accesses, add a check against the maximum number of channels supported by the controller and those channels not configured yet so as to skip processing of that event ring element. Link: https://lore.kernel.org/r/1624558141-11045-1-git-send-email-bbhatt@codeaurora.org Fixes: 1d3173a3bae7 ("bus: mhi: core: Add support for processing events from client device") Cc: stable@vger.kernel.org #5.10 Reviewed-by: Hemant Kumar Reviewed-by: Manivannan Sadhasivam Reviewed-by: Jeffrey Hugo Signed-off-by: Bhaumik Bhatt Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20210716075106.49938-3-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/bus/mhi/core/main.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/bus/mhi/core/main.c b/drivers/bus/mhi/core/main.c index 22acde118bc3..fc9196f11cb7 100644 --- a/drivers/bus/mhi/core/main.c +++ b/drivers/bus/mhi/core/main.c @@ -773,11 +773,18 @@ static void mhi_process_cmd_completion(struct mhi_controller *mhi_cntrl, cmd_pkt = mhi_to_virtual(mhi_ring, ptr); chan = MHI_TRE_GET_CMD_CHID(cmd_pkt); - mhi_chan = &mhi_cntrl->mhi_chan[chan]; - write_lock_bh(&mhi_chan->lock); - mhi_chan->ccs = MHI_TRE_GET_EV_CODE(tre); - complete(&mhi_chan->completion); - write_unlock_bh(&mhi_chan->lock); + + if (chan < mhi_cntrl->max_chan && + mhi_cntrl->mhi_chan[chan].configured) { + mhi_chan = &mhi_cntrl->mhi_chan[chan]; + write_lock_bh(&mhi_chan->lock); + mhi_chan->ccs = MHI_TRE_GET_EV_CODE(tre); + complete(&mhi_chan->completion); + write_unlock_bh(&mhi_chan->lock); + } else { + dev_err(&mhi_cntrl->mhi_dev->dev, + "Completion packet for invalid channel ID: %d\n", chan); + } mhi_del_ring_element(mhi_cntrl, mhi_ring); } From b8a97f2a65388394f433bf0730293a94f7d49046 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Fri, 16 Jul 2021 13:21:06 +0530 Subject: [PATCH 272/794] bus: mhi: pci_generic: Fix inbound IPCR channel The qrtr-mhi client driver assumes that inbound buffers are automatically allocated and queued by the MHI core, but this doesn't happen for mhi pci devices since IPCR inbound channel is not flagged with auto_queue, causing unusable IPCR (qrtr) feature. Fix that. Link: https://lore.kernel.org/r/1625736749-24947-1-git-send-email-loic.poulain@linaro.org [mani: fixed a spelling mistake in commit description] Fixes: 855a70c12021 ("bus: mhi: Add MHI PCI support for WWAN modems") Cc: stable@vger.kernel.org #5.10 Reviewed-by: Hemant kumar Reviewed-by: Manivannan Sadhasivam Signed-off-by: Loic Poulain Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20210716075106.49938-4-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/bus/mhi/pci_generic.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c index 3396cb30ebec..4dd1077354af 100644 --- a/drivers/bus/mhi/pci_generic.c +++ b/drivers/bus/mhi/pci_generic.c @@ -75,6 +75,22 @@ struct mhi_pci_dev_info { .doorbell_mode_switch = false, \ } +#define MHI_CHANNEL_CONFIG_DL_AUTOQUEUE(ch_num, ch_name, el_count, ev_ring) \ + { \ + .num = ch_num, \ + .name = ch_name, \ + .num_elements = el_count, \ + .event_ring = ev_ring, \ + .dir = DMA_FROM_DEVICE, \ + .ee_mask = BIT(MHI_EE_AMSS), \ + .pollcfg = 0, \ + .doorbell = MHI_DB_BRST_DISABLE, \ + .lpm_notify = false, \ + .offload_channel = false, \ + .doorbell_mode_switch = false, \ + .auto_queue = true, \ + } + #define MHI_EVENT_CONFIG_CTRL(ev_ring, el_count) \ { \ .num_elements = el_count, \ @@ -213,7 +229,7 @@ static const struct mhi_channel_config modem_qcom_v1_mhi_channels[] = { MHI_CHANNEL_CONFIG_UL(14, "QMI", 4, 0), MHI_CHANNEL_CONFIG_DL(15, "QMI", 4, 0), MHI_CHANNEL_CONFIG_UL(20, "IPCR", 8, 0), - MHI_CHANNEL_CONFIG_DL(21, "IPCR", 8, 0), + MHI_CHANNEL_CONFIG_DL_AUTOQUEUE(21, "IPCR", 8, 0), MHI_CHANNEL_CONFIG_UL_FP(34, "FIREHOSE", 32, 0), MHI_CHANNEL_CONFIG_DL_FP(35, "FIREHOSE", 32, 0), MHI_CHANNEL_CONFIG_HW_UL(100, "IP_HW0", 128, 2), From e703eaff5089da93fd379678f0371f52497042ba Mon Sep 17 00:00:00 2001 From: Jorgen Hansen Date: Wed, 21 Jul 2021 03:02:46 -0700 Subject: [PATCH 273/794] MAINTAINERS: Update for VMCI driver Add maintainer info for the VMware VMCI driver. v2: moved pv-drivers to L: as private list Acked-by: Vishnu Dasa Signed-off-by: Jorgen Hansen Link: https://lore.kernel.org/r/1626861766-11115-1-git-send-email-jhansen@vmware.com Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 6c8be735cc91..61ff28cfdcde 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19800,6 +19800,14 @@ L: netdev@vger.kernel.org S: Supported F: drivers/ptp/ptp_vmw.c +VMWARE VMCI DRIVER +M: Jorgen Hansen +M: Vishnu Dasa +L: linux-kernel@vger.kernel.org +L: pv-drivers@vmware.com (private) +S: Maintained +F: drivers/misc/vmw_vmci/ + VMWARE VMMOUSE SUBDRIVER M: "VMware Graphics" M: "VMware, Inc." From ebea6761b620d758ed77d2df70fab1ae7a363151 Mon Sep 17 00:00:00 2001 From: Shuo Liu Date: Wed, 14 Jul 2021 16:26:14 +0800 Subject: [PATCH 274/794] MAINTAINERS: Change ACRN HSM driver maintainer Shuo steps down, Fei will take over. Acked-by: Fei Li Signed-off-by: Shuo Liu Link: https://lore.kernel.org/r/20210714082614.88560-1-shuo.a.liu@intel.com Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 61ff28cfdcde..99316c4370a9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -445,7 +445,7 @@ F: drivers/platform/x86/wmi.c F: include/uapi/linux/wmi.h ACRN HYPERVISOR SERVICE MODULE -M: Shuo Liu +M: Fei Li L: acrn-dev@lists.projectacrn.org (subscribers-only) S: Supported W: https://projectacrn.org From c453db6cd96418c79702eaf38259002755ab23ff Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 29 Jun 2021 12:40:24 +0200 Subject: [PATCH 275/794] nds32: fix up stack guard gap Commit 1be7107fbe18 ("mm: larger stack guard gap, between vmas") fixed up all architectures to deal with the stack guard gap. But when nds32 was added to the tree, it forgot to do the same thing. Resolve this by properly fixing up the nsd32's version of arch_get_unmapped_area() Cc: Nick Hu Cc: Greentime Hu Cc: Vincent Chen Cc: Michal Hocko Cc: Hugh Dickins Cc: Qiang Liu Cc: stable Reported-by: iLifetruth Acked-by: Hugh Dickins Link: https://lore.kernel.org/r/20210629104024.2293615-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- arch/nds32/mm/mmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/nds32/mm/mmap.c b/arch/nds32/mm/mmap.c index c206b31ce07a..1bdf5e7d1b43 100644 --- a/arch/nds32/mm/mmap.c +++ b/arch/nds32/mm/mmap.c @@ -59,7 +59,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } From 6c881ca0b3040f3e724eae513117ba4ddef86057 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 15 Jun 2021 11:57:26 +0100 Subject: [PATCH 276/794] afs: Fix tracepoint string placement with built-in AFS To quote Alexey[1]: I was adding custom tracepoint to the kernel, grabbed full F34 kernel .config, disabled modules and booted whole shebang as VM kernel. Then did perf record -a -e ... It crashed: general protection fault, probably for non-canonical address 0x435f5346592e4243: 0000 [#1] SMP PTI CPU: 1 PID: 842 Comm: cat Not tainted 5.12.6+ #26 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc33 04/01/2014 RIP: 0010:t_show+0x22/0xd0 Then reproducer was narrowed to # cat /sys/kernel/tracing/printk_formats Original F34 kernel with modules didn't crash. So I started to disable options and after disabling AFS everything started working again. The root cause is that AFS was placing char arrays content into a section full of _pointers_ to strings with predictable consequences. Non canonical address 435f5346592e4243 is "CB.YFS_" which came from CM_NAME macro. Steps to reproduce: CONFIG_AFS=y CONFIG_TRACING=y # cat /sys/kernel/tracing/printk_formats Fix this by the following means: (1) Add enum->string translation tables in the event header with the AFS and YFS cache/callback manager operations listed by RPC operation ID. (2) Modify the afs_cb_call tracepoint to print the string from the translation table rather than using the string at the afs_call name pointer. (3) Switch translation table depending on the service we're being accessed as (AFS or YFS) in the tracepoint print clause. Will this cause problems to userspace utilities? Note that the symbolic representation of the YFS service ID isn't available to this header, so I've put it in as a number. I'm not sure if this is the best way to do this. (4) Remove the name wrangling (CM_NAME) macro and put the names directly into the afs_call_type structs in cmservice.c. Fixes: 8e8d7f13b6d5a9 ("afs: Add some tracepoints") Reported-by: Alexey Dobriyan (SK hynix) Signed-off-by: David Howells Reviewed-by: Steven Rostedt (VMware) Reviewed-by: Marc Dionne cc: Andrew Morton cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/YLAXfvZ+rObEOdc%2F@localhost.localdomain/ [1] Link: https://lore.kernel.org/r/643721.1623754699@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/162430903582.2896199.6098150063997983353.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/162609463957.3133237.15916579353149746363.stgit@warthog.procyon.org.uk/ # v1 (repost) Link: https://lore.kernel.org/r/162610726860.3408253.445207609466288531.stgit@warthog.procyon.org.uk/ # v2 --- fs/afs/cmservice.c | 25 ++++---------- include/trace/events/afs.h | 67 +++++++++++++++++++++++++++++++++++--- 2 files changed, 69 insertions(+), 23 deletions(-) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index d3c6bb22c5f4..a3f5de28be79 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -29,16 +29,11 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *); static int afs_deliver_yfs_cb_callback(struct afs_call *); -#define CM_NAME(name) \ - char afs_SRXCB##name##_name[] __tracepoint_string = \ - "CB." #name - /* * CB.CallBack operation type */ -static CM_NAME(CallBack); static const struct afs_call_type afs_SRXCBCallBack = { - .name = afs_SRXCBCallBack_name, + .name = "CB.CallBack", .deliver = afs_deliver_cb_callback, .destructor = afs_cm_destructor, .work = SRXAFSCB_CallBack, @@ -47,9 +42,8 @@ static const struct afs_call_type afs_SRXCBCallBack = { /* * CB.InitCallBackState operation type */ -static CM_NAME(InitCallBackState); static const struct afs_call_type afs_SRXCBInitCallBackState = { - .name = afs_SRXCBInitCallBackState_name, + .name = "CB.InitCallBackState", .deliver = afs_deliver_cb_init_call_back_state, .destructor = afs_cm_destructor, .work = SRXAFSCB_InitCallBackState, @@ -58,9 +52,8 @@ static const struct afs_call_type afs_SRXCBInitCallBackState = { /* * CB.InitCallBackState3 operation type */ -static CM_NAME(InitCallBackState3); static const struct afs_call_type afs_SRXCBInitCallBackState3 = { - .name = afs_SRXCBInitCallBackState3_name, + .name = "CB.InitCallBackState3", .deliver = afs_deliver_cb_init_call_back_state3, .destructor = afs_cm_destructor, .work = SRXAFSCB_InitCallBackState, @@ -69,9 +62,8 @@ static const struct afs_call_type afs_SRXCBInitCallBackState3 = { /* * CB.Probe operation type */ -static CM_NAME(Probe); static const struct afs_call_type afs_SRXCBProbe = { - .name = afs_SRXCBProbe_name, + .name = "CB.Probe", .deliver = afs_deliver_cb_probe, .destructor = afs_cm_destructor, .work = SRXAFSCB_Probe, @@ -80,9 +72,8 @@ static const struct afs_call_type afs_SRXCBProbe = { /* * CB.ProbeUuid operation type */ -static CM_NAME(ProbeUuid); static const struct afs_call_type afs_SRXCBProbeUuid = { - .name = afs_SRXCBProbeUuid_name, + .name = "CB.ProbeUuid", .deliver = afs_deliver_cb_probe_uuid, .destructor = afs_cm_destructor, .work = SRXAFSCB_ProbeUuid, @@ -91,9 +82,8 @@ static const struct afs_call_type afs_SRXCBProbeUuid = { /* * CB.TellMeAboutYourself operation type */ -static CM_NAME(TellMeAboutYourself); static const struct afs_call_type afs_SRXCBTellMeAboutYourself = { - .name = afs_SRXCBTellMeAboutYourself_name, + .name = "CB.TellMeAboutYourself", .deliver = afs_deliver_cb_tell_me_about_yourself, .destructor = afs_cm_destructor, .work = SRXAFSCB_TellMeAboutYourself, @@ -102,9 +92,8 @@ static const struct afs_call_type afs_SRXCBTellMeAboutYourself = { /* * YFS CB.CallBack operation type */ -static CM_NAME(YFS_CallBack); static const struct afs_call_type afs_SRXYFSCB_CallBack = { - .name = afs_SRXCBYFS_CallBack_name, + .name = "YFSCB.CallBack", .deliver = afs_deliver_yfs_cb_callback, .destructor = afs_cm_destructor, .work = SRXAFSCB_CallBack, diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 3ccf591b2374..9f73ed2cf061 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -174,6 +174,34 @@ enum afs_vl_operation { afs_VL_GetCapabilities = 65537, /* AFS Get VL server capabilities */ }; +enum afs_cm_operation { + afs_CB_CallBack = 204, /* AFS break callback promises */ + afs_CB_InitCallBackState = 205, /* AFS initialise callback state */ + afs_CB_Probe = 206, /* AFS probe client */ + afs_CB_GetLock = 207, /* AFS get contents of CM lock table */ + afs_CB_GetCE = 208, /* AFS get cache file description */ + afs_CB_GetXStatsVersion = 209, /* AFS get version of extended statistics */ + afs_CB_GetXStats = 210, /* AFS get contents of extended statistics data */ + afs_CB_InitCallBackState3 = 213, /* AFS initialise callback state, version 3 */ + afs_CB_ProbeUuid = 214, /* AFS check the client hasn't rebooted */ +}; + +enum yfs_cm_operation { + yfs_CB_Probe = 206, /* YFS probe client */ + yfs_CB_GetLock = 207, /* YFS get contents of CM lock table */ + yfs_CB_XStatsVersion = 209, /* YFS get version of extended statistics */ + yfs_CB_GetXStats = 210, /* YFS get contents of extended statistics data */ + yfs_CB_InitCallBackState3 = 213, /* YFS initialise callback state, version 3 */ + yfs_CB_ProbeUuid = 214, /* YFS check the client hasn't rebooted */ + yfs_CB_GetServerPrefs = 215, + yfs_CB_GetCellServDV = 216, + yfs_CB_GetLocalCell = 217, + yfs_CB_GetCacheConfig = 218, + yfs_CB_GetCellByNum = 65537, + yfs_CB_TellMeAboutYourself = 65538, /* get client capabilities */ + yfs_CB_CallBack = 64204, +}; + enum afs_edit_dir_op { afs_edit_dir_create, afs_edit_dir_create_error, @@ -436,6 +464,32 @@ enum afs_cb_break_reason { EM(afs_YFSVL_GetCellName, "YFSVL.GetCellName") \ E_(afs_VL_GetCapabilities, "VL.GetCapabilities") +#define afs_cm_operations \ + EM(afs_CB_CallBack, "CB.CallBack") \ + EM(afs_CB_InitCallBackState, "CB.InitCallBackState") \ + EM(afs_CB_Probe, "CB.Probe") \ + EM(afs_CB_GetLock, "CB.GetLock") \ + EM(afs_CB_GetCE, "CB.GetCE") \ + EM(afs_CB_GetXStatsVersion, "CB.GetXStatsVersion") \ + EM(afs_CB_GetXStats, "CB.GetXStats") \ + EM(afs_CB_InitCallBackState3, "CB.InitCallBackState3") \ + E_(afs_CB_ProbeUuid, "CB.ProbeUuid") + +#define yfs_cm_operations \ + EM(yfs_CB_Probe, "YFSCB.Probe") \ + EM(yfs_CB_GetLock, "YFSCB.GetLock") \ + EM(yfs_CB_XStatsVersion, "YFSCB.XStatsVersion") \ + EM(yfs_CB_GetXStats, "YFSCB.GetXStats") \ + EM(yfs_CB_InitCallBackState3, "YFSCB.InitCallBackState3") \ + EM(yfs_CB_ProbeUuid, "YFSCB.ProbeUuid") \ + EM(yfs_CB_GetServerPrefs, "YFSCB.GetServerPrefs") \ + EM(yfs_CB_GetCellServDV, "YFSCB.GetCellServDV") \ + EM(yfs_CB_GetLocalCell, "YFSCB.GetLocalCell") \ + EM(yfs_CB_GetCacheConfig, "YFSCB.GetCacheConfig") \ + EM(yfs_CB_GetCellByNum, "YFSCB.GetCellByNum") \ + EM(yfs_CB_TellMeAboutYourself, "YFSCB.TellMeAboutYourself") \ + E_(yfs_CB_CallBack, "YFSCB.CallBack") + #define afs_edit_dir_ops \ EM(afs_edit_dir_create, "create") \ EM(afs_edit_dir_create_error, "c_fail") \ @@ -569,6 +623,8 @@ afs_server_traces; afs_cell_traces; afs_fs_operations; afs_vl_operations; +afs_cm_operations; +yfs_cm_operations; afs_edit_dir_ops; afs_edit_dir_reasons; afs_eproto_causes; @@ -649,20 +705,21 @@ TRACE_EVENT(afs_cb_call, TP_STRUCT__entry( __field(unsigned int, call ) - __field(const char *, name ) __field(u32, op ) + __field(u16, service_id ) ), TP_fast_assign( __entry->call = call->debug_id; - __entry->name = call->type->name; __entry->op = call->operation_ID; + __entry->service_id = call->service_id; ), - TP_printk("c=%08x %s o=%u", + TP_printk("c=%08x %s", __entry->call, - __entry->name, - __entry->op) + __entry->service_id == 2501 ? + __print_symbolic(__entry->op, yfs_cm_operations) : + __print_symbolic(__entry->op, afs_cm_operations)) ); TRACE_EVENT(afs_call, From afe6949862f77bcc14fa16ad7938a04e84586d6a Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Fri, 30 Apr 2021 08:50:31 -0700 Subject: [PATCH 277/794] afs: check function return Static analysis reports this problem write.c:773:29: warning: Assigned value is garbage or undefined mapping->writeback_index = next; ^ ~~~~ The call to afs_writepages_region() can return without setting next. So check the function return before using next. Changes: ver #2: - Need to fix the range_cyclic case also[1]. Fixes: e87b03f5830e ("afs: Prepare for use of THPs") Signed-off-by: Tom Rix Signed-off-by: David Howells Reviewed-by: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/20210430155031.3287870-1-trix@redhat.com Link: https://lore.kernel.org/r/CAB9dFdvHsLsw7CMnB+4cgciWDSqVjuij4mH3TaXnHQB8sz5rHw@mail.gmail.com/ [1] Link: https://lore.kernel.org/r/162609464716.3133237.10354897554363093252.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/162610727640.3408253.8687445613469681311.stgit@warthog.procyon.org.uk/ # v2 --- fs/afs/write.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/fs/afs/write.c b/fs/afs/write.c index 3104b62c2082..1ed62e0ccfe5 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -771,13 +771,19 @@ int afs_writepages(struct address_space *mapping, if (wbc->range_cyclic) { start = mapping->writeback_index * PAGE_SIZE; ret = afs_writepages_region(mapping, wbc, start, LLONG_MAX, &next); - if (start > 0 && wbc->nr_to_write > 0 && ret == 0) - ret = afs_writepages_region(mapping, wbc, 0, start, - &next); - mapping->writeback_index = next / PAGE_SIZE; + if (ret == 0) { + mapping->writeback_index = next / PAGE_SIZE; + if (start > 0 && wbc->nr_to_write > 0) { + ret = afs_writepages_region(mapping, wbc, 0, + start, &next); + if (ret == 0) + mapping->writeback_index = + next / PAGE_SIZE; + } + } } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { ret = afs_writepages_region(mapping, wbc, 0, LLONG_MAX, &next); - if (wbc->nr_to_write > 0) + if (wbc->nr_to_write > 0 && ret == 0) mapping->writeback_index = next; } else { ret = afs_writepages_region(mapping, wbc, From 5a972474cf685bf99ca430979657095bda3a15c8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 12 Jul 2021 17:04:47 +0100 Subject: [PATCH 278/794] afs: Fix setting of writeback_index Fix afs_writepages() to always set mapping->writeback_index to a page index and not a byte position[1]. Fixes: 31143d5d515e ("AFS: implement basic file write support") Reported-by: Marc Dionne Signed-off-by: David Howells Reviewed-by: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/CAB9dFdvHsLsw7CMnB+4cgciWDSqVjuij4mH3TaXnHQB8sz5rHw@mail.gmail.com/ [1] Link: https://lore.kernel.org/r/162610728339.3408253.4604750166391496546.stgit@warthog.procyon.org.uk/ # v2 (no v1) --- fs/afs/write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/write.c b/fs/afs/write.c index 1ed62e0ccfe5..c0534697268e 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -784,7 +784,7 @@ int afs_writepages(struct address_space *mapping, } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { ret = afs_writepages_region(mapping, wbc, 0, LLONG_MAX, &next); if (wbc->nr_to_write > 0 && ret == 0) - mapping->writeback_index = next; + mapping->writeback_index = next / PAGE_SIZE; } else { ret = afs_writepages_region(mapping, wbc, wbc->range_start, wbc->range_end, &next); From b428081282f85db8a0d4ae6206a8c39db9c8341b Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Thu, 29 Apr 2021 18:18:12 +0800 Subject: [PATCH 279/794] afs: Remove redundant assignment to ret Variable ret is set to -ENOENT and -ENOMEM but this value is never read as it is overwritten or not used later on, hence it is a redundant assignment and can be removed. Cleans up the following clang-analyzer warning: fs/afs/dir.c:2014:4: warning: Value stored to 'ret' is never read [clang-analyzer-deadcode.DeadStores]. fs/afs/dir.c:659:2: warning: Value stored to 'ret' is never read [clang-analyzer-deadcode.DeadStores]. [DH made the following modifications: - In afs_rename(), -ENOMEM should be placed in op->error instead of ret, rather than the assignment being removed entirely. afs_put_operation() will pick it up from there and return it. - If afs_sillyrename() fails, its error code should be placed in op->error rather than in ret also. ] Fixes: e49c7b2f6de7 ("afs: Build an abstraction around an "operation" concept") Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: David Howells Reviewed-by: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/1619691492-83866-1-git-send-email-jiapeng.chong@linux.alibaba.com Link: https://lore.kernel.org/r/162609465444.3133237.7562832521724298900.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/162610729052.3408253.17364333638838151299.stgit@warthog.procyon.org.uk/ # v2 --- fs/afs/dir.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 78719f2f567e..ac829e63c570 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -656,7 +656,6 @@ static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry, return ret; } - ret = -ENOENT; if (!cookie.found) { _leave(" = -ENOENT [not found]"); return -ENOENT; @@ -2020,17 +2019,20 @@ static int afs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, if (d_count(new_dentry) > 2) { /* copy the target dentry's name */ - ret = -ENOMEM; op->rename.tmp = d_alloc(new_dentry->d_parent, &new_dentry->d_name); - if (!op->rename.tmp) + if (!op->rename.tmp) { + op->error = -ENOMEM; goto error; + } ret = afs_sillyrename(new_dvnode, AFS_FS_I(d_inode(new_dentry)), new_dentry, op->key); - if (ret) + if (ret) { + op->error = ret; goto error; + } op->dentry_2 = op->rename.tmp; op->rename.rehash = NULL; From 4afa0c22eed33cfe0c590742387f0d16f32412f3 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 13 Jul 2021 12:34:38 +0300 Subject: [PATCH 280/794] driver core: auxiliary bus: Fix memory leak when driver_register() fail If driver_register() returns with error we need to free the memory allocated for auxdrv->driver.name before returning from __auxiliary_driver_register() Fixes: 7de3697e9cbd4 ("Add auxiliary bus support") Reviewed-by: Dan Williams Cc: stable Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20210713093438.3173-1-peter.ujfalusi@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/auxiliary.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/base/auxiliary.c b/drivers/base/auxiliary.c index adc199dfba3c..6a30264ab2ba 100644 --- a/drivers/base/auxiliary.c +++ b/drivers/base/auxiliary.c @@ -231,6 +231,8 @@ EXPORT_SYMBOL_GPL(auxiliary_find_device); int __auxiliary_driver_register(struct auxiliary_driver *auxdrv, struct module *owner, const char *modname) { + int ret; + if (WARN_ON(!auxdrv->probe) || WARN_ON(!auxdrv->id_table)) return -EINVAL; @@ -246,7 +248,11 @@ int __auxiliary_driver_register(struct auxiliary_driver *auxdrv, auxdrv->driver.bus = &auxiliary_bus_type; auxdrv->driver.mod_name = modname; - return driver_register(&auxdrv->driver); + ret = driver_register(&auxdrv->driver); + if (ret) + kfree(auxdrv->driver.name); + + return ret; } EXPORT_SYMBOL_GPL(__auxiliary_driver_register); From e9a72f874d5b95cef0765bafc56005a50f72c5fe Mon Sep 17 00:00:00 2001 From: Sayanta Pattanayak Date: Tue, 20 Jul 2021 17:17:40 +0100 Subject: [PATCH 281/794] r8169: Avoid duplicate sysfs entry creation error When registering the MDIO bus for a r8169 device, we use the PCI bus/device specifier as a (seemingly) unique device identifier. However the very same BDF number can be used on another PCI segment, which makes the driver fail probing: [ 27.544136] r8169 0002:07:00.0: enabling device (0000 -> 0003) [ 27.559734] sysfs: cannot create duplicate filename '/class/mdio_bus/r8169-700' .... [ 27.684858] libphy: mii_bus r8169-700 failed to register [ 27.695602] r8169: probe of 0002:07:00.0 failed with error -22 Add the segment number to the device name to make it more unique. This fixes operation on ARM N1SDP boards, with two boards connected together to form an SMP system, and all on-board devices showing up twice, just on different PCI segments. A similar issue would occur on large systems with many PCI slots and multiple RTL8169 NICs. Fixes: f1e911d5d0dfd ("r8169: add basic phylib support") Signed-off-by: Sayanta Pattanayak [Andre: expand commit message, use pci_domain_nr()] Signed-off-by: Andre Przywara Acked-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index f744557c33a3..c7af5bc3b8af 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -5084,7 +5084,8 @@ static int r8169_mdio_register(struct rtl8169_private *tp) new_bus->priv = tp; new_bus->parent = &pdev->dev; new_bus->irq[0] = PHY_MAC_INTERRUPT; - snprintf(new_bus->id, MII_BUS_ID_SIZE, "r8169-%x", pci_dev_id(pdev)); + snprintf(new_bus->id, MII_BUS_ID_SIZE, "r8169-%x-%x", + pci_domain_nr(pdev->bus), pci_dev_id(pdev)); new_bus->read = r8169_mdio_read_reg; new_bus->write = r8169_mdio_write_reg; From 0077a50082729c3f9ea2836f59e35d9b7dacfb12 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 21 Jul 2021 12:16:26 +0200 Subject: [PATCH 282/794] rbd: resurrect setting of disk->private_data in rbd_init_disk() rbd_open() and rbd_release() expect that disk->private_data is set to rbd_dev. Otherwise we hit a NULL pointer dereference when mapping the image. URL: https://tracker.ceph.com/issues/51759 Fixes: 195b1956b85b ("rbd: use blk_mq_alloc_disk and blk_cleanup_disk") Signed-off-by: Ilya Dryomov Reviewed-by: Christoph Hellwig --- drivers/block/rbd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 784797fa9a53..90b947c96402 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4943,6 +4943,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) disk->minors = RBD_MINORS_PER_MAJOR; } disk->fops = &rbd_bd_ops; + disk->private_data = rbd_dev; blk_queue_flag_set(QUEUE_FLAG_NONROT, q); /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */ From aaeb7bb061be545251606f4d9c82d710ca2a7c8e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Jul 2021 10:00:11 +0200 Subject: [PATCH 283/794] nvme: set the PRACT bit when using Write Zeroes with T10 PI When using Write Zeroes on a namespace that has protection information enabled they behavior without the PRACT bit counter-intuitive and will generally lead to validation failures when reading the written blocks. Fix this by always setting the PRACT bit that generates matching PI data on the fly. Fixes: 6e02318eaea5 ("nvme: add support for the Write Zeroes command") Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Martin K. Petersen --- drivers/nvme/host/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 17c05a4595f0..dfd9dec0c1f6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -900,7 +900,10 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns, cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); cmnd->write_zeroes.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); - cmnd->write_zeroes.control = 0; + if (nvme_ns_has_pi(ns)) + cmnd->write_zeroes.control = cpu_to_le16(NVME_RW_PRINFO_PRACT); + else + cmnd->write_zeroes.control = 0; return BLK_STS_OK; } From e64daad660a0c9ace3acdc57099fffe5ed83f977 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 16 Jul 2021 14:44:07 +0300 Subject: [PATCH 284/794] driver core: Prevent warning when removing a device link from unregistered consumer sysfs_remove_link() causes a warning if the parent directory does not exist. That can happen if the device link consumer has not been registered. So do not attempt sysfs_remove_link() in that case. Fixes: 287905e68dd29 ("driver core: Expose device link details in sysfs") Signed-off-by: Adrian Hunter Cc: stable@vger.kernel.org # 5.9+ Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20210716114408.17320-2-adrian.hunter@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index cadcade65825..f6360490a4a3 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -574,8 +574,10 @@ static void devlink_remove_symlinks(struct device *dev, return; } - snprintf(buf, len, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup)); - sysfs_remove_link(&con->kobj, buf); + if (device_is_registered(con)) { + snprintf(buf, len, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup)); + sysfs_remove_link(&con->kobj, buf); + } snprintf(buf, len, "consumer:%s:%s", dev_bus_name(con), dev_name(con)); sysfs_remove_link(&sup->kobj, buf); kfree(buf); From cb7abd1db6e5f99a05f1a00b65be29029a6a152a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 21 Jul 2021 17:35:44 +0200 Subject: [PATCH 285/794] staging: rtl8723bs: select CONFIG_CRYPTO_LIB_ARC4 The other rtlwifi drivers already have this, but r8723bs was converted to the generic implementation without adding the select: ERROR: modpost: "arc4_crypt" [drivers/staging/rtl8723bs/r8723bs.ko] undefined! ERROR: modpost: "arc4_setkey" [drivers/staging/rtl8723bs/r8723bs.ko] undefined! Fixes: 1b11e893eda0 ("staging: rtl8723bs: replace private arc4 encryption with in-kernel one") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20210721153550.3624490-1-arnd@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/rtl8723bs/Kconfig b/drivers/staging/rtl8723bs/Kconfig index a88467334dac..7eae820eae3b 100644 --- a/drivers/staging/rtl8723bs/Kconfig +++ b/drivers/staging/rtl8723bs/Kconfig @@ -5,6 +5,7 @@ config RTL8723BS depends on m select WIRELESS_EXT select WEXT_PRIV + select CRYPTO_LIB_ARC4 help This option enables support for RTL8723BS SDIO drivers, such as the wifi found on the 1st gen Intel Compute Stick, the CHIP From 58acd10092268831e49de279446c314727101292 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 20 Jul 2021 16:07:01 -0400 Subject: [PATCH 286/794] sctp: update active_key for asoc when old key is being replaced syzbot reported a call trace: BUG: KASAN: use-after-free in sctp_auth_shkey_hold+0x22/0xa0 net/sctp/auth.c:112 Call Trace: sctp_auth_shkey_hold+0x22/0xa0 net/sctp/auth.c:112 sctp_set_owner_w net/sctp/socket.c:131 [inline] sctp_sendmsg_to_asoc+0x152e/0x2180 net/sctp/socket.c:1865 sctp_sendmsg+0x103b/0x1d30 net/sctp/socket.c:2027 inet_sendmsg+0x99/0xe0 net/ipv4/af_inet.c:821 sock_sendmsg_nosec net/socket.c:703 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:723 This is an use-after-free issue caused by not updating asoc->shkey after it was replaced in the key list asoc->endpoint_shared_keys, and the old key was freed. This patch is to fix by also updating active_key for asoc when old key is being replaced with a new one. Note that this issue doesn't exist in sctp_auth_del_key_id(), as it's not allowed to delete the active_key from the asoc. Fixes: 1b1e0bc99474 ("sctp: add refcnt support for sh_key") Reported-by: syzbot+b774577370208727d12b@syzkaller.appspotmail.com Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/auth.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 6f8319b828b0..fe74c5f95630 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -860,6 +860,8 @@ int sctp_auth_set_key(struct sctp_endpoint *ep, if (replace) { list_del_init(&shkey->key_list); sctp_auth_shkey_release(shkey); + if (asoc && asoc->active_key_id == auth_key->sca_keynumber) + sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL); } list_add(&cur_key->key_list, sh_keys); From 9bfce73c8921c92a9565562e6e7d458d37b7ce80 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Tue, 20 Jul 2021 23:35:28 +0300 Subject: [PATCH 287/794] udp: check encap socket in __udp_lib_err Commit d26796ae5894 ("udp: check udp sock encap_type in __udp_lib_err") added checks for encapsulated sockets but it broke cases when there is no implementation of encap_err_lookup for encapsulation, i.e. ESP in UDP encapsulation. Fix it by calling encap_err_lookup only if socket implements this method otherwise treat it as legal socket. Fixes: d26796ae5894 ("udp: check udp sock encap_type in __udp_lib_err") Signed-off-by: Vadim Fedorenko Reviewed-by: Xin Long Signed-off-by: David S. Miller --- net/ipv4/udp.c | 25 +++++++++++++++++++------ net/ipv6/udp.c | 25 +++++++++++++++++++------ 2 files changed, 38 insertions(+), 12 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 62cd4cd52e84..1a742b710e54 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -645,10 +645,12 @@ static struct sock *__udp4_lib_err_encap(struct net *net, const struct iphdr *iph, struct udphdr *uh, struct udp_table *udptable, + struct sock *sk, struct sk_buff *skb, u32 info) { + int (*lookup)(struct sock *sk, struct sk_buff *skb); int network_offset, transport_offset; - struct sock *sk; + struct udp_sock *up; network_offset = skb_network_offset(skb); transport_offset = skb_transport_offset(skb); @@ -659,18 +661,28 @@ static struct sock *__udp4_lib_err_encap(struct net *net, /* Transport header needs to point to the UDP header */ skb_set_transport_header(skb, iph->ihl << 2); + if (sk) { + up = udp_sk(sk); + + lookup = READ_ONCE(up->encap_err_lookup); + if (lookup && lookup(sk, skb)) + sk = NULL; + + goto out; + } + sk = __udp4_lib_lookup(net, iph->daddr, uh->source, iph->saddr, uh->dest, skb->dev->ifindex, 0, udptable, NULL); if (sk) { - int (*lookup)(struct sock *sk, struct sk_buff *skb); - struct udp_sock *up = udp_sk(sk); + up = udp_sk(sk); lookup = READ_ONCE(up->encap_err_lookup); if (!lookup || lookup(sk, skb)) sk = NULL; } +out: if (!sk) sk = ERR_PTR(__udp4_lib_err_encap_no_sk(skb, info)); @@ -707,15 +719,16 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex, inet_sdif(skb), udptable, NULL); + if (!sk || udp_sk(sk)->encap_type) { /* No socket for error: try tunnels before discarding */ - sk = ERR_PTR(-ENOENT); if (static_branch_unlikely(&udp_encap_needed_key)) { - sk = __udp4_lib_err_encap(net, iph, uh, udptable, skb, + sk = __udp4_lib_err_encap(net, iph, uh, udptable, sk, skb, info); if (!sk) return 0; - } + } else + sk = ERR_PTR(-ENOENT); if (IS_ERR(sk)) { __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 0cc7ba531b34..c5e15e94bb00 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -502,12 +502,14 @@ static struct sock *__udp6_lib_err_encap(struct net *net, const struct ipv6hdr *hdr, int offset, struct udphdr *uh, struct udp_table *udptable, + struct sock *sk, struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, __be32 info) { + int (*lookup)(struct sock *sk, struct sk_buff *skb); int network_offset, transport_offset; - struct sock *sk; + struct udp_sock *up; network_offset = skb_network_offset(skb); transport_offset = skb_transport_offset(skb); @@ -518,18 +520,28 @@ static struct sock *__udp6_lib_err_encap(struct net *net, /* Transport header needs to point to the UDP header */ skb_set_transport_header(skb, offset); + if (sk) { + up = udp_sk(sk); + + lookup = READ_ONCE(up->encap_err_lookup); + if (lookup && lookup(sk, skb)) + sk = NULL; + + goto out; + } + sk = __udp6_lib_lookup(net, &hdr->daddr, uh->source, &hdr->saddr, uh->dest, inet6_iif(skb), 0, udptable, skb); if (sk) { - int (*lookup)(struct sock *sk, struct sk_buff *skb); - struct udp_sock *up = udp_sk(sk); + up = udp_sk(sk); lookup = READ_ONCE(up->encap_err_lookup); if (!lookup || lookup(sk, skb)) sk = NULL; } +out: if (!sk) { sk = ERR_PTR(__udp6_lib_err_encap_no_sk(skb, opt, type, code, offset, info)); @@ -558,16 +570,17 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, inet6_iif(skb), inet6_sdif(skb), udptable, NULL); + if (!sk || udp_sk(sk)->encap_type) { /* No socket for error: try tunnels before discarding */ - sk = ERR_PTR(-ENOENT); if (static_branch_unlikely(&udpv6_encap_needed_key)) { sk = __udp6_lib_err_encap(net, hdr, offset, uh, - udptable, skb, + udptable, sk, skb, opt, type, code, info); if (!sk) return 0; - } + } else + sk = ERR_PTR(-ENOENT); if (IS_ERR(sk)) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), From 6e0b6528d783b2b87bd9e1bea97cf4dac87540d7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 20 Jul 2021 13:21:08 -0500 Subject: [PATCH 288/794] drm/i915: Correct the docs for intel_engine_cmd_parser In 93b713304188 ("drm/i915: Revert "drm/i915/gem: Asynchronous cmdparser""), the parameters to intel_engine_cmd_parser() were altered without updating the docs, causing Fi.CI.DOCS to start failing. Fixes: c9d9fdbc108a ("drm/i915: Revert "drm/i915/gem: Asynchronous cmdparser"") Signed-off-by: Jason Ekstrand Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210720182108.2761496-1-jason@jlekstrand.net Signed-off-by: Rodrigo Vivi [Added 'Fixes:' tag and corrected the hash for the ancestor] (cherry picked from commit 15eb083bdb561bb4862cd04cd0523e55483e877e) Signed-off-by: Rodrigo Vivi [Updated Fixes tag to match fixes branch] --- drivers/gpu/drm/i915/i915_cmd_parser.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 00ec618d0159..a3b4d99d64b9 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1416,9 +1416,7 @@ static unsigned long *alloc_whitelist(u32 batch_length) * @batch_offset: byte offset in the batch at which execution starts * @batch_length: length of the commands in batch_obj * @shadow: validated copy of the batch buffer in question - * @jump_whitelist: buffer preallocated with intel_engine_cmd_parser_alloc_jump_whitelist() - * @shadow_map: mapping to @shadow vma - * @batch_map: mapping to @batch vma + * @trampoline: true if we need to trampoline into privileged execution * * Parses the specified batch buffer looking for privilege violations as * described in the overview. From ece1278a9b81bdfc088f087f8372a072b7010956 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Tue, 20 Jul 2021 23:35:29 +0300 Subject: [PATCH 289/794] selftests: net: add ESP-in-UDP PMTU test The case of ESP in UDP encapsulation was not covered before. Add cases of local changes of MTU and difference on routed path. Signed-off-by: Vadim Fedorenko Signed-off-by: David S. Miller --- tools/testing/selftests/net/nettest.c | 55 ++++++- tools/testing/selftests/net/pmtu.sh | 212 +++++++++++++++++++++++++- 2 files changed, 260 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index 6365c7fd1262..bd6288302094 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -11,9 +11,11 @@ #include #include #include +#include #include #include #include +#include #include #include #include @@ -27,6 +29,10 @@ #include #include +#include +#include +#include + #ifndef IPV6_UNICAST_IF #define IPV6_UNICAST_IF 76 #endif @@ -114,6 +120,9 @@ struct sock_args { struct in_addr in; struct in6_addr in6; } expected_raddr; + + /* ESP in UDP encap test */ + int use_xfrm; }; static int server_mode; @@ -1346,6 +1355,41 @@ static int bind_socket(int sd, struct sock_args *args) return 0; } +static int config_xfrm_policy(int sd, struct sock_args *args) +{ + struct xfrm_userpolicy_info policy = {}; + int type = UDP_ENCAP_ESPINUDP; + int xfrm_af = IP_XFRM_POLICY; + int level = SOL_IP; + + if (args->type != SOCK_DGRAM) { + log_error("Invalid socket type. Only DGRAM could be used for XFRM\n"); + return 1; + } + + policy.action = XFRM_POLICY_ALLOW; + policy.sel.family = args->version; + if (args->version == AF_INET6) { + xfrm_af = IPV6_XFRM_POLICY; + level = SOL_IPV6; + } + + policy.dir = XFRM_POLICY_OUT; + if (setsockopt(sd, level, xfrm_af, &policy, sizeof(policy)) < 0) + return 1; + + policy.dir = XFRM_POLICY_IN; + if (setsockopt(sd, level, xfrm_af, &policy, sizeof(policy)) < 0) + return 1; + + if (setsockopt(sd, IPPROTO_UDP, UDP_ENCAP, &type, sizeof(type)) < 0) { + log_err_errno("Failed to set xfrm encap"); + return 1; + } + + return 0; +} + static int lsock_init(struct sock_args *args) { long flags; @@ -1389,6 +1433,11 @@ static int lsock_init(struct sock_args *args) if (fcntl(sd, F_SETFD, FD_CLOEXEC) < 0) log_err_errno("Failed to set close-on-exec flag"); + if (args->use_xfrm && config_xfrm_policy(sd, args)) { + log_err_errno("Failed to set xfrm policy"); + goto err; + } + out: return sd; @@ -1772,7 +1821,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args) return client_status; } -#define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6L:0:1:2:3:Fbq" +#define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6xL:0:1:2:3:Fbq" static void print_usage(char *prog) { @@ -1795,6 +1844,7 @@ static void print_usage(char *prog) " -D|R datagram (D) / raw (R) socket (default stream)\n" " -l addr local address to bind to in server mode\n" " -c addr local address to bind to in client mode\n" + " -x configure XFRM policy on socket\n" "\n" " -d dev bind socket to given device name\n" " -I dev bind socket to given device name - server mode\n" @@ -1966,6 +2016,9 @@ int main(int argc, char *argv[]) case 'q': quiet = 1; break; + case 'x': + args.use_xfrm = 1; + break; default: print_usage(argv[0]); return 1; diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 64cd2e23c568..543ad7513a8e 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -118,6 +118,16 @@ # below for IPv6 doesn't apply here, because, on IPv4, administrative MTU # changes alone won't affect PMTU # +# - pmtu_vti4_udp_exception +# Same as pmtu_vti4_exception, but using ESP-in-UDP +# +# - pmtu_vti4_udp_routed_exception +# Set up vti tunnel on top of veth connected through routing namespace and +# add xfrm states and policies with ESP-in-UDP encapsulation. Check that +# route exception is not created if link layer MTU is not exceeded, then +# lower MTU on second part of routed environment and check that exception +# is created with the expected PMTU. +# # - pmtu_vti6_exception # Set up vti6 tunnel on top of veth, with xfrm states and policies, in two # namespaces with matching endpoints. Check that route exception is @@ -125,6 +135,13 @@ # decrease and increase MTU of tunnel, checking that route exception PMTU # changes accordingly # +# - pmtu_vti6_udp_exception +# Same as pmtu_vti6_exception, but using ESP-in-UDP +# +# - pmtu_vti6_udp_routed_exception +# Same as pmtu_vti6_udp_routed_exception but with routing between vti +# endpoints +# # - pmtu_vti4_default_mtu # Set up vti4 tunnel on top of veth, in two namespaces with matching # endpoints. Check that MTU assigned to vti interface is the MTU of the @@ -224,6 +241,10 @@ tests=" pmtu_ipv6_ipv6_exception IPv6 over IPv6: PMTU exceptions 1 pmtu_vti6_exception vti6: PMTU exceptions 0 pmtu_vti4_exception vti4: PMTU exceptions 0 + pmtu_vti6_udp_exception vti6: PMTU exceptions (ESP-in-UDP) 0 + pmtu_vti4_udp_exception vti4: PMTU exceptions (ESP-in-UDP) 0 + pmtu_vti6_udp_routed_exception vti6: PMTU exceptions, routed (ESP-in-UDP) 0 + pmtu_vti4_udp_routed_exception vti4: PMTU exceptions, routed (ESP-in-UDP) 0 pmtu_vti4_default_mtu vti4: default MTU assignment 0 pmtu_vti6_default_mtu vti6: default MTU assignment 0 pmtu_vti4_link_add_mtu vti4: MTU setting on link creation 0 @@ -246,7 +267,6 @@ ns_b="ip netns exec ${NS_B}" ns_c="ip netns exec ${NS_C}" ns_r1="ip netns exec ${NS_R1}" ns_r2="ip netns exec ${NS_R2}" - # Addressing and routing for tests with routers: four network segments, with # index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an # identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2). @@ -279,7 +299,6 @@ routes=" A ${prefix6}:${b_r2}::1 ${prefix6}:${a_r2}::2 B default ${prefix6}:${b_r1}::2 " - USE_NH="no" # ns family nh id destination gateway nexthops=" @@ -326,6 +345,7 @@ dummy6_mask="64" err_buf= tcpdump_pids= +nettest_pids= err() { err_buf="${err_buf}${1} @@ -548,6 +568,14 @@ setup_vti6() { setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask} } +setup_vti4routed() { + setup_vti 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 ${tunnel4_a_addr} ${tunnel4_b_addr} ${tunnel4_mask} +} + +setup_vti6routed() { + setup_vti 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask} +} + setup_vxlan_or_geneve() { type="${1}" a_addr="${2}" @@ -619,18 +647,36 @@ setup_xfrm() { proto=${1} veth_a_addr="${2}" veth_b_addr="${3}" + encap=${4} - run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1 - run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel + run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} || return 1 + run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} run_cmd ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel run_cmd ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel - run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel - run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel + run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} + run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} run_cmd ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel run_cmd ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel } +setup_nettest_xfrm() { + which nettest >/dev/null + if [ $? -ne 0 ]; then + echo "'nettest' command not found; skipping tests" + return 1 + fi + + [ ${1} -eq 6 ] && proto="-6" || proto="" + port=${2} + + run_cmd ${ns_a} nettest ${proto} -q -D -s -x -p ${port} -t 5 & + nettest_pids="${nettest_pids} $!" + + run_cmd ${ns_b} nettest ${proto} -q -D -s -x -p ${port} -t 5 & + nettest_pids="${nettest_pids} $!" +} + setup_xfrm4() { setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} } @@ -639,6 +685,26 @@ setup_xfrm6() { setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} } +setup_xfrm4udp() { + setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0" + setup_nettest_xfrm 4 4500 +} + +setup_xfrm6udp() { + setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0" + setup_nettest_xfrm 6 4500 +} + +setup_xfrm4udprouted() { + setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0" + setup_nettest_xfrm 4 4500 +} + +setup_xfrm6udprouted() { + setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0" + setup_nettest_xfrm 6 4500 +} + setup_routing_old() { for i in ${routes}; do [ "${ns}" = "" ] && ns="${i}" && continue @@ -823,6 +889,11 @@ cleanup() { done tcpdump_pids= + for pid in ${nettest_pids}; do + kill ${pid} + done + nettest_pids= + for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do ip netns del ${n} 2> /dev/null done @@ -1432,6 +1503,135 @@ test_pmtu_vti6_exception() { return ${fail} } +test_pmtu_vti4_udp_exception() { + setup namespaces veth vti4 xfrm4udp || return $ksft_skip + trace "${ns_a}" veth_a "${ns_b}" veth_b \ + "${ns_a}" vti4_a "${ns_b}" vti4_b + + veth_mtu=1500 + vti_mtu=$((veth_mtu - 20)) + + # UDP SPI SN IV ICV pad length next header + esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1)) + ping_payload=$((esp_payload_rfc4106 - 28)) + + mtu "${ns_a}" veth_a ${veth_mtu} + mtu "${ns_b}" veth_b ${veth_mtu} + mtu "${ns_a}" vti4_a ${vti_mtu} + mtu "${ns_b}" vti4_b ${vti_mtu} + + # Send DF packet without exceeding link layer MTU, check that no + # exception is created + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" + check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 + + # Now exceed link layer MTU by one byte, check that exception is created + # with the right PMTU value + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" + check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))" +} + +test_pmtu_vti6_udp_exception() { + setup namespaces veth vti6 xfrm6udp || return $ksft_skip + trace "${ns_a}" veth_a "${ns_b}" veth_b \ + "${ns_a}" vti6_a "${ns_b}" vti6_b + fail=0 + + # Create route exception by exceeding link layer MTU + mtu "${ns_a}" veth_a 4000 + mtu "${ns_b}" veth_b 4000 + mtu "${ns_a}" vti6_a 5000 + mtu "${ns_b}" vti6_b 5000 + run_cmd ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr} + + # Check that exception was created + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1 + + # Decrease tunnel MTU, check for PMTU decrease in route exception + mtu "${ns_a}" vti6_a 3000 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1 + + # Increase tunnel MTU, check for PMTU increase in route exception + mtu "${ns_a}" vti6_a 9000 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1 + + return ${fail} +} + +test_pmtu_vti4_udp_routed_exception() { + setup namespaces routing vti4routed xfrm4udprouted || return $ksft_skip + trace "${ns_a}" veth_A-R1 "${ns_b}" veth_B-R1 \ + "${ns_a}" vti4_a "${ns_b}" vti4_b + + veth_mtu=1500 + vti_mtu=$((veth_mtu - 20)) + + # UDP SPI SN IV ICV pad length next header + esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1)) + ping_payload=$((esp_payload_rfc4106 - 28)) + + mtu "${ns_a}" veth_A-R1 ${veth_mtu} + mtu "${ns_r1}" veth_R1-A ${veth_mtu} + mtu "${ns_b}" veth_B-R1 ${veth_mtu} + mtu "${ns_r1}" veth_R1-B ${veth_mtu} + + mtu "${ns_a}" vti4_a ${vti_mtu} + mtu "${ns_b}" vti4_b ${vti_mtu} + + # Send DF packet without exceeding link layer MTU, check that no + # exception is created + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" + check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 + + # Now decrease link layer MTU by 8 bytes on R1, check that exception is created + # with the right PMTU value + mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8)) + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel4_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" + check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))" +} + +test_pmtu_vti6_udp_routed_exception() { + setup namespaces routing vti6routed xfrm6udprouted || return $ksft_skip + trace "${ns_a}" veth_A-R1 "${ns_b}" veth_B-R1 \ + "${ns_a}" vti6_a "${ns_b}" vti6_b + + veth_mtu=1500 + vti_mtu=$((veth_mtu - 40)) + + # UDP SPI SN IV ICV pad length next header + esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1)) + ping_payload=$((esp_payload_rfc4106 - 48)) + + mtu "${ns_a}" veth_A-R1 ${veth_mtu} + mtu "${ns_r1}" veth_R1-A ${veth_mtu} + mtu "${ns_b}" veth_B-R1 ${veth_mtu} + mtu "${ns_r1}" veth_R1-B ${veth_mtu} + + # mtu "${ns_a}" vti6_a ${vti_mtu} + # mtu "${ns_b}" vti6_b ${vti_mtu} + + run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel6_b_addr} + + # Check that exception was not created + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 + + # Now decrease link layer MTU by 8 bytes on R1, check that exception is created + # with the right PMTU value + mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8)) + run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel6_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))" + +} + test_pmtu_vti4_default_mtu() { setup namespaces veth vti4 || return $ksft_skip From f5a11c69b69923a4367d24365ad4dff6d4f3fc42 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Wed, 21 Jul 2021 15:55:43 +0000 Subject: [PATCH 290/794] Revert "x86/hyperv: fix logical processor creation" This reverts commit 450605c28d571eddca39a65fdbc1338add44c6d9. Signed-off-by: Wei Liu --- arch/x86/kernel/cpu/mshyperv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index cc8f1773deca..c890d67a64ad 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -237,7 +237,7 @@ static void __init hv_smp_prepare_cpus(unsigned int max_cpus) for_each_present_cpu(i) { if (i == 0) continue; - ret = hv_call_add_logical_proc(numa_cpu_node(i), i, i); + ret = hv_call_add_logical_proc(numa_cpu_node(i), i, cpu_physical_id(i)); BUG_ON(ret); } From bb55362bd6976631b662ca712779b6532d8de0a6 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Tue, 20 Jul 2021 19:34:39 -0700 Subject: [PATCH 291/794] ibmvnic: Remove the proper scrq flush Commit 65d6470d139a ("ibmvnic: clean pending indirect buffs during reset") intended to remove the call to ibmvnic_tx_scrq_flush() when the ->resetting flag is true and was tested that way. But during the final rebase to net-next, the hunk got applied to a block few lines below (which happened to have the same diff context) and the wrong call to ibmvnic_tx_scrq_flush() got removed. Fix that by removing the correct ibmvnic_tx_scrq_flush() and restoring the one that was incorrectly removed. Fixes: 65d6470d139a ("ibmvnic: clean pending indirect buffs during reset") Reported-by: Dany Madden Signed-off-by: Sukadev Bhattiprolu Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index ed77191d19f4..a775c69e4fd7 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1731,7 +1731,6 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_send_failed++; tx_dropped++; ret = NETDEV_TX_OK; - ibmvnic_tx_scrq_flush(adapter, tx_scrq); goto out; } @@ -1753,6 +1752,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) dev_kfree_skb_any(skb); tx_send_failed++; tx_dropped++; + ibmvnic_tx_scrq_flush(adapter, tx_scrq); ret = NETDEV_TX_OK; goto out; } From 161dcc02428858fe338b7493158ed6f5fc2a8f26 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 21 Jul 2021 17:19:32 +0200 Subject: [PATCH 292/794] net: ixp46x: fix ptp build failure The rework of the ixp46x cpu detection left the network driver in a half broken state: drivers/net/ethernet/xscale/ptp_ixp46x.c: In function 'ptp_ixp_init': drivers/net/ethernet/xscale/ptp_ixp46x.c:290:51: error: 'IXP4XX_TIMESYNC_BASE_VIRT' undeclared (first use in this function) 290 | (struct ixp46x_ts_regs __iomem *) IXP4XX_TIMESYNC_BASE_VIRT; | ^~~~~~~~~~~~~~~~~~~~~~~~~ drivers/net/ethernet/xscale/ptp_ixp46x.c:290:51: note: each undeclared identifier is reported only once for each function it appears in drivers/net/ethernet/xscale/ptp_ixp46x.c: At top level: drivers/net/ethernet/xscale/ptp_ixp46x.c:323:1: error: data definition has no type or storage class [-Werror] 323 | module_init(ptp_ixp_init); I have patches to complete the transition for a future release, but for the moment, add the missing include statements to get it to build again. Fixes: 09aa9aabdcc4 ("soc: ixp4xx: move cpu detection to linux/soc/ixp4xx/cpu.h") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/xscale/ptp_ixp46x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/xscale/ptp_ixp46x.c b/drivers/net/ethernet/xscale/ptp_ixp46x.c index 99d4d9439d05..a6fb88fd42f7 100644 --- a/drivers/net/ethernet/xscale/ptp_ixp46x.c +++ b/drivers/net/ethernet/xscale/ptp_ixp46x.c @@ -14,6 +14,8 @@ #include #include #include +#include +#include #include "ixp46x_ts.h" From 1e7107c5ef44431bc1ebbd4c353f1d7c22e5f2ec Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Wed, 16 Jun 2021 08:51:57 -0400 Subject: [PATCH 293/794] cgroup1: fix leaked context root causing sporadic NULL deref in LTP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Richard reported sporadic (roughly one in 10 or so) null dereferences and other strange behaviour for a set of automated LTP tests. Things like: BUG: kernel NULL pointer dereference, address: 0000000000000008 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 0 PID: 1516 Comm: umount Not tainted 5.10.0-yocto-standard #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-48-gd9c812dda519-prebuilt.qemu.org 04/01/2014 RIP: 0010:kernfs_sop_show_path+0x1b/0x60 ...or these others: RIP: 0010:do_mkdirat+0x6a/0xf0 RIP: 0010:d_alloc_parallel+0x98/0x510 RIP: 0010:do_readlinkat+0x86/0x120 There were other less common instances of some kind of a general scribble but the common theme was mount and cgroup and a dubious dentry triggering the NULL dereference. I was only able to reproduce it under qemu by replicating Richard's setup as closely as possible - I never did get it to happen on bare metal, even while keeping everything else the same. In commit 71d883c37e8d ("cgroup_do_mount(): massage calling conventions") we see this as a part of the overall change: -------------- struct cgroup_subsys *ss; - struct dentry *dentry; [...] - dentry = cgroup_do_mount(&cgroup_fs_type, fc->sb_flags, root, - CGROUP_SUPER_MAGIC, ns); [...] - if (percpu_ref_is_dying(&root->cgrp.self.refcnt)) { - struct super_block *sb = dentry->d_sb; - dput(dentry); + ret = cgroup_do_mount(fc, CGROUP_SUPER_MAGIC, ns); + if (!ret && percpu_ref_is_dying(&root->cgrp.self.refcnt)) { + struct super_block *sb = fc->root->d_sb; + dput(fc->root); deactivate_locked_super(sb); msleep(10); return restart_syscall(); } -------------- In changing from the local "*dentry" variable to using fc->root, we now export/leave that dentry pointer in the file context after doing the dput() in the unlikely "is_dying" case. With LTP doing a crazy amount of back to back mount/unmount [testcases/bin/cgroup_regression_5_1.sh] the unlikely becomes slightly likely and then bad things happen. A fix would be to not leave the stale reference in fc->root as follows: --------------                 dput(fc->root); + fc->root = NULL;                 deactivate_locked_super(sb); -------------- ...but then we are just open-coding a duplicate of fc_drop_locked() so we simply use that instead. Cc: Al Viro Cc: Tejun Heo Cc: Zefan Li Cc: Johannes Weiner Cc: stable@vger.kernel.org # v5.1+ Reported-by: Richard Purdie Fixes: 71d883c37e8d ("cgroup_do_mount(): massage calling conventions") Signed-off-by: Paul Gortmaker Signed-off-by: Tejun Heo --- fs/internal.h | 1 - include/linux/fs_context.h | 1 + kernel/cgroup/cgroup-v1.c | 4 +--- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/internal.h b/fs/internal.h index 3ce8edbaa3ca..82e8eb32ff3d 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -61,7 +61,6 @@ extern void __init chrdev_init(void); */ extern const struct fs_context_operations legacy_fs_context_ops; extern int parse_monolithic_mount_data(struct fs_context *, void *); -extern void fc_drop_locked(struct fs_context *); extern void vfs_clean_context(struct fs_context *fc); extern int finish_clean_context(struct fs_context *fc); diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index e2bc16300c82..6b54982fc5f3 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -141,6 +141,7 @@ extern int vfs_get_tree(struct fs_context *fc); extern void put_fs_context(struct fs_context *fc); extern int vfs_parse_fs_param_source(struct fs_context *fc, struct fs_parameter *param); +extern void fc_drop_locked(struct fs_context *fc); /* * sget() wrappers to be called from the ->get_tree() op. diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 8d6bf56ed77a..de2c432dee20 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -1221,9 +1221,7 @@ int cgroup1_get_tree(struct fs_context *fc) ret = cgroup_do_get_tree(fc); if (!ret && percpu_ref_is_dying(&ctx->root->cgrp.self.refcnt)) { - struct super_block *sb = fc->root->d_sb; - dput(fc->root); - deactivate_locked_super(sb); + fc_drop_locked(fc); ret = 1; } From b42b0bddcbc87b4c66f6497f66fc72d52b712aa7 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 14 Jul 2021 17:19:33 +0800 Subject: [PATCH 294/794] workqueue: fix UAF in pwq_unbound_release_workfn() I got a UAF report when doing fuzz test: [ 152.880091][ T8030] ================================================================== [ 152.881240][ T8030] BUG: KASAN: use-after-free in pwq_unbound_release_workfn+0x50/0x190 [ 152.882442][ T8030] Read of size 4 at addr ffff88810d31bd00 by task kworker/3:2/8030 [ 152.883578][ T8030] [ 152.883932][ T8030] CPU: 3 PID: 8030 Comm: kworker/3:2 Not tainted 5.13.0+ #249 [ 152.885014][ T8030] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 [ 152.886442][ T8030] Workqueue: events pwq_unbound_release_workfn [ 152.887358][ T8030] Call Trace: [ 152.887837][ T8030] dump_stack_lvl+0x75/0x9b [ 152.888525][ T8030] ? pwq_unbound_release_workfn+0x50/0x190 [ 152.889371][ T8030] print_address_description.constprop.10+0x48/0x70 [ 152.890326][ T8030] ? pwq_unbound_release_workfn+0x50/0x190 [ 152.891163][ T8030] ? pwq_unbound_release_workfn+0x50/0x190 [ 152.891999][ T8030] kasan_report.cold.15+0x82/0xdb [ 152.892740][ T8030] ? pwq_unbound_release_workfn+0x50/0x190 [ 152.893594][ T8030] __asan_load4+0x69/0x90 [ 152.894243][ T8030] pwq_unbound_release_workfn+0x50/0x190 [ 152.895057][ T8030] process_one_work+0x47b/0x890 [ 152.895778][ T8030] worker_thread+0x5c/0x790 [ 152.896439][ T8030] ? process_one_work+0x890/0x890 [ 152.897163][ T8030] kthread+0x223/0x250 [ 152.897747][ T8030] ? set_kthread_struct+0xb0/0xb0 [ 152.898471][ T8030] ret_from_fork+0x1f/0x30 [ 152.899114][ T8030] [ 152.899446][ T8030] Allocated by task 8884: [ 152.900084][ T8030] kasan_save_stack+0x21/0x50 [ 152.900769][ T8030] __kasan_kmalloc+0x88/0xb0 [ 152.901416][ T8030] __kmalloc+0x29c/0x460 [ 152.902014][ T8030] alloc_workqueue+0x111/0x8e0 [ 152.902690][ T8030] __btrfs_alloc_workqueue+0x11e/0x2a0 [ 152.903459][ T8030] btrfs_alloc_workqueue+0x6d/0x1d0 [ 152.904198][ T8030] scrub_workers_get+0x1e8/0x490 [ 152.904929][ T8030] btrfs_scrub_dev+0x1b9/0x9c0 [ 152.905599][ T8030] btrfs_ioctl+0x122c/0x4e50 [ 152.906247][ T8030] __x64_sys_ioctl+0x137/0x190 [ 152.906916][ T8030] do_syscall_64+0x34/0xb0 [ 152.907535][ T8030] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 152.908365][ T8030] [ 152.908688][ T8030] Freed by task 8884: [ 152.909243][ T8030] kasan_save_stack+0x21/0x50 [ 152.909893][ T8030] kasan_set_track+0x20/0x30 [ 152.910541][ T8030] kasan_set_free_info+0x24/0x40 [ 152.911265][ T8030] __kasan_slab_free+0xf7/0x140 [ 152.911964][ T8030] kfree+0x9e/0x3d0 [ 152.912501][ T8030] alloc_workqueue+0x7d7/0x8e0 [ 152.913182][ T8030] __btrfs_alloc_workqueue+0x11e/0x2a0 [ 152.913949][ T8030] btrfs_alloc_workqueue+0x6d/0x1d0 [ 152.914703][ T8030] scrub_workers_get+0x1e8/0x490 [ 152.915402][ T8030] btrfs_scrub_dev+0x1b9/0x9c0 [ 152.916077][ T8030] btrfs_ioctl+0x122c/0x4e50 [ 152.916729][ T8030] __x64_sys_ioctl+0x137/0x190 [ 152.917414][ T8030] do_syscall_64+0x34/0xb0 [ 152.918034][ T8030] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 152.918872][ T8030] [ 152.919203][ T8030] The buggy address belongs to the object at ffff88810d31bc00 [ 152.919203][ T8030] which belongs to the cache kmalloc-512 of size 512 [ 152.921155][ T8030] The buggy address is located 256 bytes inside of [ 152.921155][ T8030] 512-byte region [ffff88810d31bc00, ffff88810d31be00) [ 152.922993][ T8030] The buggy address belongs to the page: [ 152.923800][ T8030] page:ffffea000434c600 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x10d318 [ 152.925249][ T8030] head:ffffea000434c600 order:2 compound_mapcount:0 compound_pincount:0 [ 152.926399][ T8030] flags: 0x57ff00000010200(slab|head|node=1|zone=2|lastcpupid=0x7ff) [ 152.927515][ T8030] raw: 057ff00000010200 dead000000000100 dead000000000122 ffff888009c42c80 [ 152.928716][ T8030] raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000 [ 152.929890][ T8030] page dumped because: kasan: bad access detected [ 152.930759][ T8030] [ 152.931076][ T8030] Memory state around the buggy address: [ 152.931851][ T8030] ffff88810d31bc00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 152.932967][ T8030] ffff88810d31bc80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 152.934068][ T8030] >ffff88810d31bd00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 152.935189][ T8030] ^ [ 152.935763][ T8030] ffff88810d31bd80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 152.936847][ T8030] ffff88810d31be00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 152.937940][ T8030] ================================================================== If apply_wqattrs_prepare() fails in alloc_workqueue(), it will call put_pwq() which invoke a work queue to call pwq_unbound_release_workfn() and use the 'wq'. The 'wq' allocated in alloc_workqueue() will be freed in error path when apply_wqattrs_prepare() fails. So it will lead a UAF. CPU0 CPU1 alloc_workqueue() alloc_and_link_pwqs() apply_wqattrs_prepare() fails apply_wqattrs_cleanup() schedule_work(&pwq->unbound_release_work) kfree(wq) worker_thread() pwq_unbound_release_workfn() <- trigger uaf here If apply_wqattrs_prepare() fails, the new pwq are not linked, it doesn't hold any reference to the 'wq', 'wq' is invalid to access in the worker, so add check pwq if linked to fix this. Fixes: 2d5f0764b526 ("workqueue: split apply_workqueue_attrs() into 3 stages") Cc: stable@vger.kernel.org # v4.2+ Reported-by: Hulk Robot Suggested-by: Lai Jiangshan Signed-off-by: Yang Yingliang Reviewed-by: Lai Jiangshan Tested-by: Pavel Skripkin Signed-off-by: Tejun Heo --- kernel/workqueue.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 50142fc08902..f148eacda55a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3676,15 +3676,21 @@ static void pwq_unbound_release_workfn(struct work_struct *work) unbound_release_work); struct workqueue_struct *wq = pwq->wq; struct worker_pool *pool = pwq->pool; - bool is_last; + bool is_last = false; - if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND))) - return; + /* + * when @pwq is not linked, it doesn't hold any reference to the + * @wq, and @wq is invalid to access. + */ + if (!list_empty(&pwq->pwqs_node)) { + if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND))) + return; - mutex_lock(&wq->mutex); - list_del_rcu(&pwq->pwqs_node); - is_last = list_empty(&wq->pwqs); - mutex_unlock(&wq->mutex); + mutex_lock(&wq->mutex); + list_del_rcu(&pwq->pwqs_node); + is_last = list_empty(&wq->pwqs); + mutex_unlock(&wq->mutex); + } mutex_lock(&wq_pool_mutex); put_unbound_pool(pool); From d6a206e60124a9759dd7f6dfb86b0e1d3b1df82e Mon Sep 17 00:00:00 2001 From: John Keeping Date: Wed, 21 Jul 2021 17:17:45 +0100 Subject: [PATCH 295/794] USB: serial: cp210x: add ID for CEL EM3588 USB ZigBee stick Add the USB serial device ID for the CEL ZigBee EM3588 radio stick. Signed-off-by: John Keeping Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index af286240807e..3c80bfbf3bec 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -155,6 +155,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x89A4) }, /* CESINEL FTBC Flexible Thyristor Bridge Controller */ { USB_DEVICE(0x10C4, 0x89FB) }, /* Qivicon ZigBee USB Radio Stick */ { USB_DEVICE(0x10C4, 0x8A2A) }, /* HubZ dual ZigBee and Z-Wave dongle */ + { USB_DEVICE(0x10C4, 0x8A5B) }, /* CEL EM3588 ZigBee USB Stick */ { USB_DEVICE(0x10C4, 0x8A5E) }, /* CEL EM3588 ZigBee USB Stick Long Range */ { USB_DEVICE(0x10C4, 0x8B34) }, /* Qivicon ZigBee USB Radio Stick */ { USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */ From ec3102dc6b36c692104c4a0546d4119de59a3bc1 Mon Sep 17 00:00:00 2001 From: Victor Lu Date: Thu, 24 Jun 2021 11:05:42 -0400 Subject: [PATCH 296/794] drm/amd/display: Fix comparison error in dcn21 DML [why] A comparison error made it possible to not iterate through all the specified prefetch modes. [how] Correct "<" to "<=" Reviewed-by: Dmytro Laktyushkin Reviewed-by: Yongqiang Sun Acked-by: Rodrigo Siqueira Signed-off-by: Victor Lu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c index c26e742e8137..d25a7d38d21f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c @@ -4889,7 +4889,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } } while ((locals->PrefetchSupported[i][j] != true || locals->VRatioInPrefetchSupported[i][j] != true) && (mode_lib->vba.NextMaxVStartup != mode_lib->vba.MaxMaxVStartup[0][0] - || mode_lib->vba.NextPrefetchMode < mode_lib->vba.MaxPrefetchMode)); + || mode_lib->vba.NextPrefetchMode <= mode_lib->vba.MaxPrefetchMode)); if (locals->PrefetchSupported[i][j] == true && locals->VRatioInPrefetchSupported[i][j] == true) { mode_lib->vba.BandwidthAvailableForImmediateFlip = locals->ReturnBWPerState[i][0]; From 32f1d0cfc3444fb44ff1dba10d28e479690bdd3e Mon Sep 17 00:00:00 2001 From: Eric Yang Date: Wed, 23 Jun 2021 15:48:02 -0400 Subject: [PATCH 297/794] drm/amd/display: implement workaround for riommu related hang [Why] During S4/S5/reboot, sometimes riommu invalidation request arrive too early, DCN may be unable to respond to the invalidation request resulting in pstate hang. [How] VBIOS will force allow pstate for riommu invalidation and driver will clear it after powering down display pipes. Acked-by: Rodrigo Siqueira Signed-off-by: Eric Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h | 4 +++- .../gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c | 18 ++++++++++++++++++ .../gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h | 1 + .../gpu/drm/amd/display/dc/dcn31/dcn31_init.c | 2 +- .../drm/amd/display/dc/dcn31/dcn31_resource.c | 3 +++ .../amd/display/dc/inc/hw_sequencer_private.h | 1 + 6 files changed, 27 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h index df6539e4c730..0464a8f3db3c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h @@ -636,6 +636,7 @@ struct dce_hwseq_registers { uint32_t ODM_MEM_PWR_CTRL3; uint32_t DMU_MEM_PWR_CNTL; uint32_t MMHUBBUB_MEM_PWR_CNTL; + uint32_t DCHUBBUB_ARB_HOSTVM_CNTL; }; /* set field name */ #define HWS_SF(blk_name, reg_name, field_name, post_fix)\ @@ -1110,7 +1111,8 @@ struct dce_hwseq_registers { type DOMAIN_POWER_FORCEON;\ type DOMAIN_POWER_GATE;\ type DOMAIN_PGFSM_PWR_STATUS;\ - type HPO_HDMISTREAMCLK_G_GATE_DIS; + type HPO_HDMISTREAMCLK_G_GATE_DIS;\ + type DISABLE_HOSTVM_FORCE_ALLOW_PSTATE; struct dce_hwseq_shift { HWSEQ_REG_FIELD_LIST(uint8_t) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c index 836864a5a5dc..6ac6faf0c533 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c @@ -47,6 +47,7 @@ #include "dce/dmub_outbox.h" #include "dc_link_dp.h" #include "inc/link_dpcd.h" +#include "dcn10/dcn10_hw_sequencer.h" #define DC_LOGGER_INIT(logger) @@ -594,3 +595,20 @@ bool dcn31_is_abm_supported(struct dc *dc, } return false; } + +static void apply_riommu_invalidation_wa(struct dc *dc) +{ + struct dce_hwseq *hws = dc->hwseq; + + if (!hws->wa.early_riommu_invalidation) + return; + + REG_UPDATE(DCHUBBUB_ARB_HOSTVM_CNTL, DISABLE_HOSTVM_FORCE_ALLOW_PSTATE, 0); +} + +void dcn31_init_pipes(struct dc *dc, struct dc_state *context) +{ + dcn10_init_pipes(dc, context); + apply_riommu_invalidation_wa(dc); + +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h index ff72f0fdd5be..40dfebe78fdd 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h @@ -52,5 +52,6 @@ void dcn31_reset_hw_ctx_wrap( struct dc_state *context); bool dcn31_is_abm_supported(struct dc *dc, struct dc_state *context, struct dc_stream_state *stream); +void dcn31_init_pipes(struct dc *dc, struct dc_state *context); #endif /* __DC_HWSS_DCN31_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c index e3048f8827d2..de74f62f96cd 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c @@ -104,7 +104,7 @@ static const struct hw_sequencer_funcs dcn31_funcs = { }; static const struct hwseq_private_funcs dcn31_private_funcs = { - .init_pipes = dcn10_init_pipes, + .init_pipes = dcn31_init_pipes, .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index c67bc9544f5d..3fe0aac4aaa6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -741,6 +741,7 @@ static const struct dccg_mask dccg_mask = { #define HWSEQ_DCN31_REG_LIST()\ SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \ + SR(DCHUBBUB_ARB_HOSTVM_CNTL), \ SR(DIO_MEM_PWR_CTRL), \ SR(ODM_MEM_PWR_CTRL3), \ SR(DMU_MEM_PWR_CNTL), \ @@ -801,6 +802,7 @@ static const struct dce_hwseq_registers hwseq_reg = { #define HWSEQ_DCN31_MASK_SH_LIST(mask_sh)\ HWSEQ_DCN_MASK_SH_LIST(mask_sh), \ HWS_SF(, DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \ + HWS_SF(, DCHUBBUB_ARB_HOSTVM_CNTL, DISABLE_HOSTVM_FORCE_ALLOW_PSTATE, mask_sh), \ HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ @@ -1299,6 +1301,7 @@ static struct dce_hwseq *dcn31_hwseq_create( hws->regs = &hwseq_reg; hws->shifts = &hwseq_shift; hws->masks = &hwseq_mask; + hws->wa.early_riommu_invalidation = true; } return hws; } diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h index f7f7e4fff0c2..082549f75978 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h @@ -41,6 +41,7 @@ struct dce_hwseq_wa { bool DEGVIDCN10_254; bool DEGVIDCN21; bool disallow_self_refresh_during_multi_plane_transition; + bool early_riommu_invalidation; }; struct hwseq_wa_state { From d7940911fc0754d99b208f0e3098762d39f403a0 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 7 Jul 2021 13:19:14 -0400 Subject: [PATCH 298/794] drm/amd/display: Fix max vstartup calculation for modes with borders [Why] Vertical and horizontal borders in timings are treated as increasing the active area - vblank and hblank actually shrink. Our input into DML does not include these borders so it incorrectly assumes it has more time than available for vstartup and tmdl calculations for some modes with borders. An example of such a timing would be 640x480@72Hz: h_total: 832 h_border_left: 8 h_addressable: 640 h_border_right: 8 h_front_porch: 16 h_sync_width: 40 v_total: 520 v_border_top: 8 v_addressable: 480 v_border_bottom: 8 v_front_porch: 1 v_sync_width: 3 pix_clk_100hz: 315000 [How] Include borders as part of destination vactive/hactive. This change DCN20+ so it has wide impact, but the destination vactive and hactive are only really used for vstartup calculation anyway. Most modes do not have vertical or horizontal borders. Reviewed-by: Dmytro Laktyushkin Acked-by: Rodrigo Siqueira Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 1b05a37b674d..98d21fb374b1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -2093,8 +2093,10 @@ int dcn20_populate_dml_pipes_from_context( - timing->v_border_bottom; pipes[pipe_cnt].pipe.dest.htotal = timing->h_total; pipes[pipe_cnt].pipe.dest.vtotal = v_total; - pipes[pipe_cnt].pipe.dest.hactive = timing->h_addressable; - pipes[pipe_cnt].pipe.dest.vactive = timing->v_addressable; + pipes[pipe_cnt].pipe.dest.hactive = + timing->h_addressable + timing->h_border_left + timing->h_border_right; + pipes[pipe_cnt].pipe.dest.vactive = + timing->v_addressable + timing->v_border_top + timing->v_border_bottom; pipes[pipe_cnt].pipe.dest.interlaced = timing->flags.INTERLACE; pipes[pipe_cnt].pipe.dest.pixel_rate_mhz = timing->pix_clk_100hz/10000.0; if (timing->timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) From f30f55158728b4423cf1caf666dd2e2c1943dc19 Mon Sep 17 00:00:00 2001 From: Bindu Ramamurthy Date: Thu, 27 May 2021 10:11:32 -0400 Subject: [PATCH 299/794] drm/amd/display: Populate socclk entries for dcn3.02/3.03 [Why] Initialize socclk entries in bandwidth params for dcn302, dcn303. [How] Fetch the sockclk values from smu for the DPM levels and for the DPM levels where smu returns 0, previous level values are reported. Reviewed-by: Roman Li Acked-by: Rodrigo Siqueira Signed-off-by: Bindu Ramamurthy Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c | 4 ++++ drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c | 7 +++++-- drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c | 7 +++++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c index 513676a6f52b..af7004b770ae 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c @@ -190,6 +190,10 @@ void dcn3_init_clocks(struct clk_mgr *clk_mgr_base) &clk_mgr_base->bw_params->clk_table.entries[0].dtbclk_mhz, &num_levels); + /* SOCCLK */ + dcn3_init_single_clock(clk_mgr, PPCLK_SOCCLK, + &clk_mgr_base->bw_params->clk_table.entries[0].socclk_mhz, + &num_levels); // DPREFCLK ??? /* DISPCLK */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c index 16a75ba0ca82..d65c097333a4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c @@ -1399,10 +1399,13 @@ void dcn302_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param dcn3_02_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; dcn3_02_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; dcn3_02_soc.clock_limits[i].dtbclk_mhz = dcn3_02_soc.clock_limits[0].dtbclk_mhz; + if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0) + dcn3_02_soc.clock_limits[i].socclk_mhz = dcn3_02_soc.clock_limits[i-1].socclk_mhz; + else + dcn3_02_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz; /* These clocks cannot come from bw_params, always fill from dcn3_02_soc[1] */ - /* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */ + /* FCLK, PHYCLK_D18, DSCCLK */ dcn3_02_soc.clock_limits[i].phyclk_d18_mhz = dcn3_02_soc.clock_limits[0].phyclk_d18_mhz; - dcn3_02_soc.clock_limits[i].socclk_mhz = dcn3_02_soc.clock_limits[0].socclk_mhz; dcn3_02_soc.clock_limits[i].dscclk_mhz = dcn3_02_soc.clock_limits[0].dscclk_mhz; } /* re-init DML with updated bb */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c index 34b89464ae02..f8b84722a389 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c @@ -1327,10 +1327,13 @@ void dcn303_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param dcn3_03_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; dcn3_03_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; dcn3_03_soc.clock_limits[i].dtbclk_mhz = dcn3_03_soc.clock_limits[0].dtbclk_mhz; + if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0) + dcn3_03_soc.clock_limits[i].socclk_mhz = dcn3_03_soc.clock_limits[i-1].socclk_mhz; + else + dcn3_03_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz; /* These clocks cannot come from bw_params, always fill from dcn3_03_soc[1] */ - /* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */ + /* FCLK, PHYCLK_D18, DSCCLK */ dcn3_03_soc.clock_limits[i].phyclk_d18_mhz = dcn3_03_soc.clock_limits[0].phyclk_d18_mhz; - dcn3_03_soc.clock_limits[i].socclk_mhz = dcn3_03_soc.clock_limits[0].socclk_mhz; dcn3_03_soc.clock_limits[i].dscclk_mhz = dcn3_03_soc.clock_limits[0].dscclk_mhz; } /* re-init DML with updated bb */ From b0364fa4fc045685f827ea34c0149c953d0240bd Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 7 Jul 2021 16:38:57 -0400 Subject: [PATCH 300/794] drm/amd/display: Query VCO frequency from register for DCN3.1 [Why] Hardcoding the VCO frequency isn't correct since we don't own or control the value. In the case where the hardcode is also missing we can't lightup display. [How] Query from the CLK register instead. Update the DFS frequency to be able to compute the VCO frequency. Reviewed-by: Eric Yang Acked-by: Rodrigo Siqueira Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- .../display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c | 43 ++++++++++++++- .../display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h | 54 ------------------- 2 files changed, 42 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c index 7b7d884d58be..d15c628a2ab0 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c @@ -48,6 +48,21 @@ #include "dc_dmub_srv.h" +#include "yellow_carp_offset.h" + +#define regCLK1_CLK_PLL_REQ 0x0237 +#define regCLK1_CLK_PLL_REQ_BASE_IDX 0 + +#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0 +#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc +#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10 +#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL +#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L +#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L + +#define REG(reg_name) \ + (CLK_BASE.instance[0].segment[reg ## reg_name ## _BASE_IDX] + reg ## reg_name) + #define TO_CLK_MGR_DCN31(clk_mgr)\ container_of(clk_mgr, struct clk_mgr_dcn31, base) @@ -229,7 +244,32 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base, static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr) { - return 0; + /* get FbMult value */ + struct fixed31_32 pll_req; + unsigned int fbmult_frac_val = 0; + unsigned int fbmult_int_val = 0; + + /* + * Register value of fbmult is in 8.16 format, we are converting to 31.32 + * to leverage the fix point operations available in driver + */ + + REG_GET(CLK1_CLK_PLL_REQ, FbMult_frac, &fbmult_frac_val); /* 16 bit fractional part*/ + REG_GET(CLK1_CLK_PLL_REQ, FbMult_int, &fbmult_int_val); /* 8 bit integer part */ + + pll_req = dc_fixpt_from_int(fbmult_int_val); + + /* + * since fractional part is only 16 bit in register definition but is 32 bit + * in our fix point definiton, need to shift left by 16 to obtain correct value + */ + pll_req.value |= fbmult_frac_val << 16; + + /* multiply by REFCLK period */ + pll_req = dc_fixpt_mul_int(pll_req, clk_mgr->dfs_ref_freq_khz); + + /* integer part is now VCO frequency in kHz */ + return dc_fixpt_floor(pll_req); } static void dcn31_enable_pme_wa(struct clk_mgr *clk_mgr_base) @@ -592,6 +632,7 @@ void dcn31_clk_mgr_construct( clk_mgr->base.dprefclk_ss_percentage = 0; clk_mgr->base.dprefclk_ss_divider = 1000; clk_mgr->base.ss_on_dprefclk = false; + clk_mgr->base.dfs_ref_freq_khz = 48000; clk_mgr->smu_wm_set.wm_set = (struct dcn31_watermarks *)dm_helpers_allocate_gpu_mem( clk_mgr->base.base.ctx, diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h index cc21cf75eafd..f8f100535526 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h @@ -27,60 +27,6 @@ #define __DCN31_CLK_MGR_H__ #include "clk_mgr_internal.h" -//CLK1_CLK_PLL_REQ -#ifndef CLK11_CLK1_CLK_PLL_REQ__FbMult_int__SHIFT -#define CLK11_CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0 -#define CLK11_CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc -#define CLK11_CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10 -#define CLK11_CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL -#define CLK11_CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L -#define CLK11_CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L -//CLK1_CLK0_DFS_CNTL -#define CLK11_CLK1_CLK0_DFS_CNTL__CLK0_DIVIDER__SHIFT 0x0 -#define CLK11_CLK1_CLK0_DFS_CNTL__CLK0_DIVIDER_MASK 0x0000007FL -/*DPREF clock related*/ -#define CLK0_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0 -#define CLK0_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL -#define CLK1_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0 -#define CLK1_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL -#define CLK2_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0 -#define CLK2_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL -#define CLK3_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0 -#define CLK3_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL - -//CLK3_0_CLK3_CLK_PLL_REQ -#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_int__SHIFT 0x0 -#define CLK3_0_CLK3_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc -#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10 -#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL -#define CLK3_0_CLK3_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L -#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L - -#define mmCLK0_CLK3_DFS_CNTL 0x16C60 -#define mmCLK00_CLK0_CLK3_DFS_CNTL 0x16C60 -#define mmCLK01_CLK0_CLK3_DFS_CNTL 0x16E60 -#define mmCLK02_CLK0_CLK3_DFS_CNTL 0x17060 -#define mmCLK03_CLK0_CLK3_DFS_CNTL 0x17260 - -#define mmCLK0_CLK_PLL_REQ 0x16C10 -#define mmCLK00_CLK0_CLK_PLL_REQ 0x16C10 -#define mmCLK01_CLK0_CLK_PLL_REQ 0x16E10 -#define mmCLK02_CLK0_CLK_PLL_REQ 0x17010 -#define mmCLK03_CLK0_CLK_PLL_REQ 0x17210 - -#define mmCLK1_CLK_PLL_REQ 0x1B00D -#define mmCLK10_CLK1_CLK_PLL_REQ 0x1B00D -#define mmCLK11_CLK1_CLK_PLL_REQ 0x1B20D -#define mmCLK12_CLK1_CLK_PLL_REQ 0x1B40D -#define mmCLK13_CLK1_CLK_PLL_REQ 0x1B60D - -#define mmCLK2_CLK_PLL_REQ 0x17E0D - -/*AMCLK*/ -#define mmCLK11_CLK1_CLK0_DFS_CNTL 0x1B23F -#define mmCLK11_CLK1_CLK_PLL_REQ 0x1B20D -#endif - struct dcn31_watermarks; struct dcn31_smu_watermark_set { From a8e380fd8d71493623c94511f75e81786cffa223 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 8 Jul 2021 12:59:59 -0400 Subject: [PATCH 301/794] drm/amd/display: Update bounding box for DCN3.1 [Why & How] We're missing a default value for dram_channel_width_bytes in the DCN3.1 SOC bounding box and we don't currently have the interface in place to query the actual value from VBIOS. Put in a hardcoded default until we have the interface in place. Reviewed-by: Eric Yang Acked-by: Rodrigo Siqueira Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 3fe0aac4aaa6..38c010afade1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -220,6 +220,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = { .sr_exit_z8_time_us = 402.0, .sr_enter_plus_exit_z8_time_us = 520.0, .writeback_latency_us = 12.0, + .dram_channel_width_bytes = 4, .round_trip_ping_latency_dcfclk_cycles = 106, .urgent_latency_pixel_data_only_us = 4.0, .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, From 2d73eabe2984a435737498ab39bb1500a9ffe9a9 Mon Sep 17 00:00:00 2001 From: Camille Cho Date: Thu, 8 Jul 2021 18:28:37 +0800 Subject: [PATCH 302/794] drm/amd/display: Only set default brightness for OLED [Why] We used to unconditionally set backlight path as AUX for panels capable of backlight adjustment via DPCD in set default brightness. [How] This should be limited to OLED panel only since we control backlight via PWM path for SDR mode in LCD HDR panel. Reviewed-by: Krunoslav Kovac Acked-by: Rodrigo Siqueira Signed-off-by: Camille Cho Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 6da226bf11d5..12066f5a53fc 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -4914,9 +4914,7 @@ bool dc_link_set_default_brightness_aux(struct dc_link *link) { uint32_t default_backlight; - if (link && - (link->dpcd_sink_ext_caps.bits.hdr_aux_backlight_control == 1 || - link->dpcd_sink_ext_caps.bits.sdr_aux_backlight_control == 1)) { + if (link && link->dpcd_sink_ext_caps.bits.oled == 1) { if (!dc_link_read_default_bl_aux(link, &default_backlight)) default_backlight = 150000; // if < 5 nits or > 5000, it might be wrong readback From 6580b28e0c734cea2f829e97827005c311293cb7 Mon Sep 17 00:00:00 2001 From: Mikita Lipski Date: Mon, 14 Jun 2021 20:21:42 -0400 Subject: [PATCH 303/794] drm/amd/display: Remove MALL function from DCN3.1 [why] DCN31 doesn't have MALL in DMUB so to avoid sending unknown commands to DMUB just remove the function pointer. [how] Remove apply_idle_power_optimizations from function pointers structure for DCN31 Reviewed-by: Nicholas Kazlauskas Acked-by: Rodrigo Siqueira Signed-off-by: Mikita Lipski Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c index de74f62f96cd..aaf2dbd095fe 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c @@ -93,7 +93,6 @@ static const struct hw_sequencer_funcs dcn31_funcs = { .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .apply_idle_power_optimizations = dcn30_apply_idle_power_optimizations, .set_backlight_level = dcn21_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, From 0f984c942cd1703b26dc01351dc47b0c93bc32f3 Mon Sep 17 00:00:00 2001 From: Nevenko Stupar Date: Fri, 9 Jul 2021 13:05:11 -0400 Subject: [PATCH 304/794] drm/amd/display: Line Buffer changes DCN 3x increased Line buffer size for DCHUB latency hiding, from 4 lines of 4K resolution lines to 5 lines of 4K resolution lines. All Line Buffer can be used as extended memory for P State change latency hiding. The maximum number of lines is increased to 32 lines. Finally, LB_MEMORY_CONFIG_1 (LB memory piece 1) and LB_MEMORY _CONFIG_2 (LB memory piece 2) are not affected, no change in size, only 3 pieces is affected, i.e., when all 3 pieces are used in both LB_MEMORY_CONFIG_0 and LB_MEMORY_CONFIG_3 (for 4:2:0) modes. Reviewed-by: Jun Lei Acked-by: Rodrigo Siqueira Signed-off-by: Nevenko Stupar Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c | 7 ++++++- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c | 16 ---------------- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.h | 3 +-- .../gpu/drm/amd/display/dc/inc/hw/transform.h | 3 +++ 4 files changed, 10 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c index 673b93f4fea5..cb9767ddf93d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c @@ -217,6 +217,8 @@ static void dpp1_dscl_set_lb( const struct line_buffer_params *lb_params, enum lb_memory_config mem_size_config) { + uint32_t max_partitions = 63; /* Currently hardcoded on all ASICs before DCN 3.2 */ + /* LB */ if (dpp->base.caps->dscl_data_proc_format == DSCL_DATA_PRCESSING_FIXED_FORMAT) { /* DSCL caps: pixel data processed in fixed format */ @@ -239,9 +241,12 @@ static void dpp1_dscl_set_lb( LB_DATA_FORMAT__ALPHA_EN, lb_params->alpha_en); /* Alpha enable */ } + if (dpp->base.caps->max_lb_partitions == 31) + max_partitions = 31; + REG_SET_2(LB_MEMORY_CTRL, 0, MEMORY_CONFIG, mem_size_config, - LB_MAX_PARTITIONS, 63); + LB_MAX_PARTITIONS, max_partitions); } static const uint16_t *dpp1_dscl_get_filter_coeffs_64p(int taps, struct fixed31_32 ratio) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c index 2140b75540cf..23a52d47e61c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c @@ -383,13 +383,6 @@ bool dpp3_get_optimal_number_of_taps( int min_taps_y, min_taps_c; enum lb_memory_config lb_config; - /* Some ASICs does not support FP16 scaling, so we reject modes require this*/ - if (scl_data->viewport.width != scl_data->h_active && - scl_data->viewport.height != scl_data->v_active && - dpp->caps->dscl_data_proc_format == DSCL_DATA_PRCESSING_FIXED_FORMAT && - scl_data->format == PIXEL_FORMAT_FP16) - return false; - if (scl_data->viewport.width > scl_data->h_active && dpp->ctx->dc->debug.max_downscale_src_width != 0 && scl_data->viewport.width > dpp->ctx->dc->debug.max_downscale_src_width) @@ -1440,15 +1433,6 @@ bool dpp3_construct( dpp->tf_shift = tf_shift; dpp->tf_mask = tf_mask; - dpp->lb_pixel_depth_supported = - LB_PIXEL_DEPTH_18BPP | - LB_PIXEL_DEPTH_24BPP | - LB_PIXEL_DEPTH_30BPP | - LB_PIXEL_DEPTH_36BPP; - - dpp->lb_bits_per_entry = LB_BITS_PER_ENTRY; - dpp->lb_memory_size = LB_TOTAL_NUMBER_OF_ENTRIES; /*0x1404*/ - return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.h index 3fa86cd090a0..ac644ae6b9f2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.h @@ -154,6 +154,7 @@ SRI(COLOR_KEYER_BLUE, CNVC_CFG, id), \ SRI(CURSOR_CONTROL, CURSOR0_, id),\ SRI(OBUF_MEM_PWR_CTRL, DSCL, id),\ + SRI(DSCL_MEM_PWR_STATUS, DSCL, id), \ SRI(DSCL_MEM_PWR_CTRL, DSCL, id) #define DPP_REG_LIST_DCN30(id)\ @@ -163,8 +164,6 @@ SRI(CM_SHAPER_LUT_DATA, CM, id),\ SRI(CM_MEM_PWR_CTRL2, CM, id), \ SRI(CM_MEM_PWR_STATUS2, CM, id), \ - SRI(DSCL_MEM_PWR_STATUS, DSCL, id), \ - SRI(DSCL_MEM_PWR_CTRL, DSCL, id), \ SRI(CM_BLNDGAM_RAMA_START_SLOPE_CNTL_B, CM, id),\ SRI(CM_BLNDGAM_RAMA_START_SLOPE_CNTL_G, CM, id),\ SRI(CM_BLNDGAM_RAMA_START_SLOPE_CNTL_R, CM, id),\ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h index 2a0db2b03047..9ac9d5e8df8b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h @@ -289,6 +289,9 @@ struct dpp_caps { /* DSCL processing pixel data in fixed or float format */ enum dscl_data_processing_format dscl_data_proc_format; + /* max LB partitions */ + unsigned int max_lb_partitions; + /* Calculates the number of partitions in the line buffer. * The implementation of this function is overloaded for * different versions of DSCL LB. From ce350c6e786ef4bc3a4ddb58f8bdf2e48219fdaa Mon Sep 17 00:00:00 2001 From: Bindu Ramamurthy Date: Fri, 9 Jul 2021 10:35:33 -0400 Subject: [PATCH 305/794] drm/amd/display: Populate dtbclk entries for dcn3.02/3.03 [Why] Populate dtbclk values from bwparams for dcn302, dcn303. [How] dtbclk values are fetched from bandwidthparams for all DPM levels and for DPM levels where smu returns 0, previous level values are reported. Reviewed-by: Roman Li Acked-by: Rodrigo Siqueira Signed-off-by: Bindu Ramamurthy Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c | 6 +++++- drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c index d65c097333a4..7d3ff5d44402 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c @@ -1398,7 +1398,11 @@ void dcn302_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param dcn3_02_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz; dcn3_02_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; dcn3_02_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; - dcn3_02_soc.clock_limits[i].dtbclk_mhz = dcn3_02_soc.clock_limits[0].dtbclk_mhz; + /* Populate from bw_params for DTBCLK, SOCCLK */ + if (!bw_params->clk_table.entries[i].dtbclk_mhz && i > 0) + dcn3_02_soc.clock_limits[i].dtbclk_mhz = dcn3_02_soc.clock_limits[i-1].dtbclk_mhz; + else + dcn3_02_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0) dcn3_02_soc.clock_limits[i].socclk_mhz = dcn3_02_soc.clock_limits[i-1].socclk_mhz; else diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c index f8b84722a389..833ab13fa834 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c @@ -1326,7 +1326,11 @@ void dcn303_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param dcn3_03_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz; dcn3_03_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; dcn3_03_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; - dcn3_03_soc.clock_limits[i].dtbclk_mhz = dcn3_03_soc.clock_limits[0].dtbclk_mhz; + /* Populate from bw_params for DTBCLK, SOCCLK */ + if (!bw_params->clk_table.entries[i].dtbclk_mhz && i > 0) + dcn3_03_soc.clock_limits[i].dtbclk_mhz = dcn3_03_soc.clock_limits[i-1].dtbclk_mhz; + else + dcn3_03_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0) dcn3_03_soc.clock_limits[i].socclk_mhz = dcn3_03_soc.clock_limits[i-1].socclk_mhz; else From 93b6bd307a54ea62bfcf89748c80bd25b7bd3205 Mon Sep 17 00:00:00 2001 From: Eric Yang Date: Wed, 30 Jun 2021 18:22:51 -0400 Subject: [PATCH 306/794] drm/amd/display: change zstate allow msg condition [Why] PMFW message which previously thought to only control Z9 controls both Z9 and Z10. Also HW design team requested that Z9 must only be supported on eDP due to content protection interop. [How] Change zstate support condition to match updated policy Reviewed-by: Nicholas Kazlauskas Acked-by: Rodrigo Siqueira Signed-off-by: Eric Yang Signed-off-by: Alex Deucher --- .../display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c | 16 +++---- drivers/gpu/drm/amd/display/dc/dc.h | 10 ++--- .../drm/amd/display/dc/dcn20/dcn20_resource.c | 44 ++++++++++++++----- 3 files changed, 45 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c index d15c628a2ab0..4a4894e9d9c9 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c @@ -139,10 +139,10 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base, * also if safe to lower is false, we just go in the higher state */ if (safe_to_lower) { - if (new_clocks->z9_support == DCN_Z9_SUPPORT_ALLOW && - new_clocks->z9_support != clk_mgr_base->clks.z9_support) { + if (new_clocks->zstate_support == DCN_ZSTATE_SUPPORT_ALLOW && + new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) { dcn31_smu_set_Z9_support(clk_mgr, true); - clk_mgr_base->clks.z9_support = new_clocks->z9_support; + clk_mgr_base->clks.zstate_support = new_clocks->zstate_support; } if (clk_mgr_base->clks.dtbclk_en && !new_clocks->dtbclk_en) { @@ -163,10 +163,10 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base, } } } else { - if (new_clocks->z9_support == DCN_Z9_SUPPORT_DISALLOW && - new_clocks->z9_support != clk_mgr_base->clks.z9_support) { + if (new_clocks->zstate_support == DCN_ZSTATE_SUPPORT_DISALLOW && + new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) { dcn31_smu_set_Z9_support(clk_mgr, false); - clk_mgr_base->clks.z9_support = new_clocks->z9_support; + clk_mgr_base->clks.zstate_support = new_clocks->zstate_support; } if (!clk_mgr_base->clks.dtbclk_en && new_clocks->dtbclk_en) { @@ -286,7 +286,7 @@ static void dcn31_init_clocks(struct clk_mgr *clk_mgr) clk_mgr->clks.p_state_change_support = true; clk_mgr->clks.prev_p_state_change_support = true; clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN; - clk_mgr->clks.z9_support = DCN_Z9_SUPPORT_UNKNOWN; + clk_mgr->clks.zstate_support = DCN_ZSTATE_SUPPORT_UNKNOWN; } static bool dcn31_are_clock_states_equal(struct dc_clocks *a, @@ -300,7 +300,7 @@ static bool dcn31_are_clock_states_equal(struct dc_clocks *a, return false; else if (a->dcfclk_deep_sleep_khz != b->dcfclk_deep_sleep_khz) return false; - else if (a->z9_support != b->z9_support) + else if (a->zstate_support != b->zstate_support) return false; else if (a->dtbclk_en != b->dtbclk_en) return false; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 45640f1c26c4..8dcea8ff5c5a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -354,10 +354,10 @@ enum dcn_pwr_state { }; #if defined(CONFIG_DRM_AMD_DC_DCN) -enum dcn_z9_support_state { - DCN_Z9_SUPPORT_UNKNOWN, - DCN_Z9_SUPPORT_ALLOW, - DCN_Z9_SUPPORT_DISALLOW, +enum dcn_zstate_support_state { + DCN_ZSTATE_SUPPORT_UNKNOWN, + DCN_ZSTATE_SUPPORT_ALLOW, + DCN_ZSTATE_SUPPORT_DISALLOW, }; #endif /* @@ -378,7 +378,7 @@ struct dc_clocks { int dramclk_khz; bool p_state_change_support; #if defined(CONFIG_DRM_AMD_DC_DCN) - enum dcn_z9_support_state z9_support; + enum dcn_zstate_support_state zstate_support; bool dtbclk_en; #endif enum dcn_pwr_state pwr_state; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 98d21fb374b1..b173fa3653b5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -3081,6 +3081,37 @@ static bool is_dtbclk_required(struct dc *dc, struct dc_state *context) return false; } +static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struct dc_state *context) +{ + int plane_count; + int i; + + plane_count = 0; + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (context->res_ctx.pipe_ctx[i].plane_state) + plane_count++; + } + + /* + * Zstate is allowed in following scenarios: + * 1. Single eDP with PSR enabled + * 2. 0 planes (No memory requests) + * 3. Single eDP without PSR but > 5ms stutter period + */ + if (plane_count == 0) + return DCN_ZSTATE_SUPPORT_ALLOW; + else if (context->stream_count == 1 && context->streams[0]->signal == SIGNAL_TYPE_EDP) { + struct dc_link *link = context->streams[0]->sink->link; + + if ((link->link_index == 0 && link->psr_settings.psr_feature_enabled) + || context->bw_ctx.dml.vba.StutterPeriod > 5000.0) + return DCN_ZSTATE_SUPPORT_ALLOW; + else + return DCN_ZSTATE_SUPPORT_DISALLOW; + } else + return DCN_ZSTATE_SUPPORT_DISALLOW; +} + void dcn20_calculate_dlg_params( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, @@ -3088,7 +3119,6 @@ void dcn20_calculate_dlg_params( int vlevel) { int i, pipe_idx; - int plane_count; /* Writeback MCIF_WB arbitration parameters */ dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt); @@ -3104,17 +3134,7 @@ void dcn20_calculate_dlg_params( != dm_dram_clock_change_unsupported; context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; - context->bw_ctx.bw.dcn.clk.z9_support = (context->bw_ctx.dml.vba.StutterPeriod > 5000.0) ? - DCN_Z9_SUPPORT_ALLOW : DCN_Z9_SUPPORT_DISALLOW; - - plane_count = 0; - for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (context->res_ctx.pipe_ctx[i].plane_state) - plane_count++; - } - - if (plane_count == 0) - context->bw_ctx.bw.dcn.clk.z9_support = DCN_Z9_SUPPORT_ALLOW; + context->bw_ctx.bw.dcn.clk.zstate_support = decide_zstate_support(dc, context); context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context); From 6588b101ed0a71a60fa7df0a18ed7db07026d109 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 15 Jul 2021 14:54:49 +0800 Subject: [PATCH 307/794] drm/amd/pm: Support board calibration on aldebaran Add support for board power calibration on Aldebaran. Board calibration is done after DC offset calibration. Signed-off-by: Lijo Lazar Reviewed-by: Kevin Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h | 3 +- drivers/gpu/drm/amd/pm/inc/smu_types.h | 3 +- .../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 46 +++++++++++++++---- 3 files changed, 40 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h b/drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h index 610266088ff1..35fa0d8e92dd 100644 --- a/drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h @@ -101,7 +101,8 @@ #define PPSMC_MSG_SetSystemVirtualSTBtoDramAddrLow 0x41 #define PPSMC_MSG_GfxDriverResetRecovery 0x42 -#define PPSMC_Message_Count 0x43 +#define PPSMC_MSG_BoardPowerCalibration 0x43 +#define PPSMC_Message_Count 0x44 //PPSMC Reset Types #define PPSMC_RESET_TYPE_WARM_RESET 0x00 diff --git a/drivers/gpu/drm/amd/pm/inc/smu_types.h b/drivers/gpu/drm/amd/pm/inc/smu_types.h index 89a16dcd0fff..1d3765b873df 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_types.h @@ -225,7 +225,8 @@ __SMU_DUMMY_MAP(DisableDeterminism), \ __SMU_DUMMY_MAP(SetUclkDpmMode), \ __SMU_DUMMY_MAP(LightSBR), \ - __SMU_DUMMY_MAP(GfxDriverResetRecovery), + __SMU_DUMMY_MAP(GfxDriverResetRecovery), \ + __SMU_DUMMY_MAP(BoardPowerCalibration), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index 9316a726195c..cb5485cf243f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -134,6 +134,7 @@ static const struct cmn2asic_msg_mapping aldebaran_message_map[SMU_MSG_MAX_COUNT MSG_MAP(DisableDeterminism, PPSMC_MSG_DisableDeterminism, 0), MSG_MAP(SetUclkDpmMode, PPSMC_MSG_SetUclkDpmMode, 0), MSG_MAP(GfxDriverResetRecovery, PPSMC_MSG_GfxDriverResetRecovery, 0), + MSG_MAP(BoardPowerCalibration, PPSMC_MSG_BoardPowerCalibration, 0), }; static const struct cmn2asic_mapping aldebaran_clk_map[SMU_CLK_COUNT] = { @@ -440,6 +441,39 @@ static int aldebaran_setup_pptable(struct smu_context *smu) return ret; } +static bool aldebaran_is_primary(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + + if (adev->smuio.funcs && adev->smuio.funcs->get_die_id) + return adev->smuio.funcs->get_die_id(adev) == 0; + + return true; +} + +static int aldebaran_run_board_btc(struct smu_context *smu) +{ + u32 smu_version; + int ret; + + if (!aldebaran_is_primary(smu)) + return 0; + + ret = smu_cmn_get_smc_version(smu, NULL, &smu_version); + if (ret) { + dev_err(smu->adev->dev, "Failed to get smu version!\n"); + return ret; + } + if (smu_version <= 0x00441d00) + return 0; + + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_BoardPowerCalibration, NULL); + if (ret) + dev_err(smu->adev->dev, "Board power calibration failed!\n"); + + return ret; +} + static int aldebaran_run_btc(struct smu_context *smu) { int ret; @@ -447,6 +481,8 @@ static int aldebaran_run_btc(struct smu_context *smu) ret = smu_cmn_send_smc_msg(smu, SMU_MSG_RunDcBtc, NULL); if (ret) dev_err(smu->adev->dev, "RunDcBtc failed!\n"); + else + ret = aldebaran_run_board_btc(smu); return ret; } @@ -524,16 +560,6 @@ static int aldebaran_freqs_in_same_level(int32_t frequency1, return (abs(frequency1 - frequency2) <= EPSILON); } -static bool aldebaran_is_primary(struct smu_context *smu) -{ - struct amdgpu_device *adev = smu->adev; - - if (adev->smuio.funcs && adev->smuio.funcs->get_die_id) - return adev->smuio.funcs->get_die_id(adev) == 0; - - return true; -} - static int aldebaran_get_smu_metrics_data(struct smu_context *smu, MetricsMember_t member, uint32_t *value) From ab7a11bd36ca6cd4d4dab2846eaacafaa5963cc1 Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Wed, 2 Jun 2021 10:32:41 +0800 Subject: [PATCH 308/794] drm/amdgpu: update yellow carp external rev_id handling 0x1681 has a different external revision id. Signed-off-by: Aaron Liu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index cf73a6923203..f589b8334be6 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -1236,7 +1236,10 @@ static int nv_common_early_init(void *handle) AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG | AMD_PG_SUPPORT_JPEG; - adev->external_rev_id = adev->rev_id + 0x01; + if (adev->pdev->device == 0x1681) + adev->external_rev_id = adev->rev_id + 0x19; + else + adev->external_rev_id = adev->rev_id + 0x01; break; default: /* FIXME: not supported yet */ From 27f5355f5d9706dfc1c2542253689f421008c969 Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Wed, 4 Nov 2020 13:04:06 +0800 Subject: [PATCH 309/794] drm/amdgpu: add yellow carp pci id (v2) Add Yellow Carp PCI id support. v2: add another DID Signed-off-by: Aaron Liu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index abb928894eac..361b86b71b56 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1190,6 +1190,10 @@ static const struct pci_device_id pciidlist[] = { /* Van Gogh */ {0x1002, 0x163F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VANGOGH|AMD_IS_APU}, + /* Yellow Carp */ + {0x1002, 0x164D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_YELLOW_CARP|AMD_IS_APU}, + {0x1002, 0x1681, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_YELLOW_CARP|AMD_IS_APU}, + /* Navy_Flounder */ {0x1002, 0x73C0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, {0x1002, 0x73C1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, From 6be50f5d83adc9541de3d5be26e968182b5ac150 Mon Sep 17 00:00:00 2001 From: Stylon Wang Date: Wed, 21 Jul 2021 12:25:24 +0800 Subject: [PATCH 310/794] drm/amd/display: Fix ASSR regression on embedded panels [Why] Regression found in some embedded panels traces back to the earliest upstreamed ASSR patch. The changed code flow are causing problems with some panels. [How] - Change ASSR enabling code while preserving original code flow as much as possible - Simplify the code on guarding with internal display flag Bug: https://bugzilla.kernel.org/show_bug.cgi?id=213779 Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1620 Reviewed-by: Alex Deucher Signed-off-by: Stylon Wang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 12066f5a53fc..9fb8c46dc606 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1820,8 +1820,7 @@ bool perform_link_training_with_retries( */ panel_mode = DP_PANEL_MODE_DEFAULT; } - } else - panel_mode = DP_PANEL_MODE_DEFAULT; + } } #endif @@ -4650,7 +4649,10 @@ enum dp_panel_mode dp_get_panel_mode(struct dc_link *link) } } - if (link->dpcd_caps.panel_mode_edp) { + if (link->dpcd_caps.panel_mode_edp && + (link->connector_signal == SIGNAL_TYPE_EDP || + (link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT && + link->is_internal_display))) { return DP_PANEL_MODE_EDP; } From 02dc2ee7c7476dd831df63d2b10cc0a162a531f1 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 21 Jul 2021 14:45:54 -0400 Subject: [PATCH 311/794] sctp: do not update transport pathmtu if SPP_PMTUD_ENABLE is not set Currently, in sctp_packet_config(), sctp_transport_pmtu_check() is called to update transport pathmtu with dst's mtu when dst's mtu has been changed by non sctp stack like xfrm. However, this should only happen when SPP_PMTUD_ENABLE is set, no matter where dst's mtu changed. This patch is to fix by checking SPP_PMTUD_ENABLE flag before calling sctp_transport_pmtu_check(). Thanks Jacek for reporting and looking into this issue. v1->v2: - add the missing "{" to fix the build error. Fixes: 69fec325a643 ('Revert "sctp: remove sctp_transport_pmtu_check"') Reported-by: Jacek Szafraniec Tested-by: Jacek Szafraniec Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sctp/output.c b/net/sctp/output.c index 9032ce60d50e..4dfb5ea82b05 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -104,8 +104,8 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag, if (asoc->param_flags & SPP_PMTUD_ENABLE) sctp_assoc_sync_pmtu(asoc); } else if (!sctp_transport_pl_enabled(tp) && - !sctp_transport_pmtu_check(tp)) { - if (asoc->param_flags & SPP_PMTUD_ENABLE) + asoc->param_flags & SPP_PMTUD_ENABLE) { + if (!sctp_transport_pmtu_check(tp)) sctp_assoc_sync_pmtu(asoc); } From d80cded9cc25f841d5250d2e94a7b42be1e81c97 Mon Sep 17 00:00:00 2001 From: Veerabadhran Gopalakrishnan Date: Mon, 19 Jul 2021 19:06:23 +0530 Subject: [PATCH 312/794] drm/amdgpu - Corrected the video codecs array name for yellow carp Signed-off-by: Veerabadhran Gopalakrishnan Reviewed-by: James Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index f589b8334be6..94d029dbf30d 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -173,8 +173,8 @@ static const struct amdgpu_video_codec_info yc_video_codecs_decode_array[] = { }; static const struct amdgpu_video_codecs yc_video_codecs_decode = { - .codec_count = ARRAY_SIZE(bg_video_codecs_decode_array), - .codec_array = bg_video_codecs_decode_array, + .codec_count = ARRAY_SIZE(yc_video_codecs_decode_array), + .codec_array = yc_video_codecs_decode_array, }; static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode, From d0e4dae74470fb709fc0ab61862c317938f4cc4d Mon Sep 17 00:00:00 2001 From: Bin Meng Date: Sun, 27 Jun 2021 21:51:17 +0800 Subject: [PATCH 313/794] riscv: Fix 32-bit RISC-V boot failure Commit dd2d082b5760 ("riscv: Cleanup setup_bootmem()") adjusted the calling sequence in setup_bootmem(), which invalidates the fix commit de043da0b9e7 ("RISC-V: Fix usage of memblock_enforce_memory_limit") did for 32-bit RISC-V unfortunately. So now 32-bit RISC-V does not boot again when testing booting kernel on QEMU 'virt' with '-m 2G', which was exactly what the original commit de043da0b9e7 ("RISC-V: Fix usage of memblock_enforce_memory_limit") tried to fix. Fixes: dd2d082b5760 ("riscv: Cleanup setup_bootmem()") Signed-off-by: Bin Meng Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 4faf8bd157ea..0fc72603c699 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -123,7 +123,7 @@ void __init setup_bootmem(void) { phys_addr_t vmlinux_end = __pa_symbol(&_end); phys_addr_t vmlinux_start = __pa_symbol(&_start); - phys_addr_t dram_end = memblock_end_of_DRAM(); + phys_addr_t dram_end; phys_addr_t max_mapped_addr = __pa(~(ulong)0); #ifdef CONFIG_XIP_KERNEL @@ -146,6 +146,8 @@ void __init setup_bootmem(void) #endif memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); + dram_end = memblock_end_of_DRAM(); + /* * memblock allocator is not aware of the fact that last 4K bytes of * the addressable memory can not be mapped because of IS_ERR_VALUE From 213ad73d06073b197a02476db3a4998e219ddb06 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Wed, 21 Jul 2021 10:27:38 -0700 Subject: [PATCH 314/794] tcp: disable TFO blackhole logic by default Multiple complaints have been raised from the TFO users on the internet stating that the TFO blackhole logic is too aggressive and gets falsely triggered too often. (e.g. https://blog.apnic.net/2021/07/05/tcp-fast-open-not-so-fast/) Considering that most middleboxes no longer drop TFO packets, we decide to disable the blackhole logic by setting /proc/sys/net/ipv4/tcp_fastopen_blackhole_timeout_set to 0 by default. Fixes: cf1ef3f0719b4 ("net/tcp_fastopen: Disable active side TFO in certain scenarios") Signed-off-by: Wei Wang Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.rst | 2 +- net/ipv4/tcp_fastopen.c | 9 ++++++++- net/ipv4/tcp_ipv4.c | 2 +- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index b3fa522e4cd9..316c7dfa9693 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -826,7 +826,7 @@ tcp_fastopen_blackhole_timeout_sec - INTEGER initial value when the blackhole issue goes away. 0 to disable the blackhole detection. - By default, it is set to 1hr. + By default, it is set to 0 (feature is disabled). tcp_fastopen_key - list of comma separated 32-digit hexadecimal INTEGERs The list consists of a primary key and an optional backup key. The diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index b32af76e2132..25fa4c01a17f 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -507,6 +507,9 @@ void tcp_fastopen_active_disable(struct sock *sk) { struct net *net = sock_net(sk); + if (!sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout) + return; + /* Paired with READ_ONCE() in tcp_fastopen_active_should_disable() */ WRITE_ONCE(net->ipv4.tfo_active_disable_stamp, jiffies); @@ -526,10 +529,14 @@ void tcp_fastopen_active_disable(struct sock *sk) bool tcp_fastopen_active_should_disable(struct sock *sk) { unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout; - int tfo_da_times = atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times); unsigned long timeout; + int tfo_da_times; int multiplier; + if (!tfo_bh_timeout) + return false; + + tfo_da_times = atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times); if (!tfo_da_times) return false; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b9dc2d6197be..a692626c19e4 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2965,7 +2965,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_comp_sack_nr = 44; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); - net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60; + net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 0; atomic_set(&net->ipv4.tfo_active_disable_times, 0); /* Reno is always built in */ From e40cba9490bab1414d45c2d62defc0ad4f6e4136 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 21 Jul 2021 15:37:59 +0300 Subject: [PATCH 315/794] net: dsa: sja1105: make VID 4095 a bridge VLAN too This simple series of commands: ip link add br0 type bridge vlan_filtering 1 ip link set swp0 master br0 fails on sja1105 with the following error: [ 33.439103] sja1105 spi0.1: vlan-lookup-table needs to have at least the default untagged VLAN [ 33.447710] sja1105 spi0.1: Invalid config, cannot upload Warning: sja1105: Failed to change VLAN Ethertype. For context, sja1105 has 3 operating modes: - SJA1105_VLAN_UNAWARE: the dsa_8021q_vlans are committed to hardware - SJA1105_VLAN_FILTERING_FULL: the bridge_vlans are committed to hardware - SJA1105_VLAN_FILTERING_BEST_EFFORT: both the dsa_8021q_vlans and the bridge_vlans are committed to hardware Swapping out a VLAN list and another in happens in sja1105_build_vlan_table(), which performs a delta update procedure. That function is called from a few places, notably from sja1105_vlan_filtering() which is called from the SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING handler. The above set of 2 commands fails when run on a kernel pre-commit 8841f6e63f2c ("net: dsa: sja1105: make devlink property best_effort_vlan_filtering true by default"). So the priv->vlan_state transition that takes place is between VLAN-unaware and full VLAN filtering. So the dsa_8021q_vlans are swapped out and the bridge_vlans are swapped in. So why does it fail? Well, the bridge driver, through nbp_vlan_init(), first sets up the SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING attribute, and only then proceeds to call nbp_vlan_add for the default_pvid. So when we swap out the dsa_8021q_vlans and swap in the bridge_vlans in the SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING handler, there are no bridge VLANs (yet). So we have wiped the VLAN table clean, and the low-level static config checker complains of an invalid configuration. We _will_ add the bridge VLANs using the dynamic config interface, albeit later, when nbp_vlan_add() calls us. So it is natural that it fails. So why did it ever work? Surprisingly, it looks like I only tested this configuration with 2 things set up in a particular way: - a network manager that brings all ports up - a kernel with CONFIG_VLAN_8021Q=y It is widely known that commit ad1afb003939 ("vlan_dev: VLAN 0 should be treated as "no vlan tag" (802.1p packet)") installs VID 0 to every net device that comes up. DSA treats these VLANs as bridge VLANs, and therefore, in my testing, the list of bridge_vlans was never empty. However, if CONFIG_VLAN_8021Q is not enabled, or the port is not up when it joins a VLAN-aware bridge, the bridge_vlans list will be temporarily empty, and the sja1105_static_config_reload() call from sja1105_vlan_filtering() will fail. To fix this, the simplest thing is to keep VID 4095, the one used for CPU-injected control packets since commit ed040abca4c1 ("net: dsa: sja1105: use 4095 as the private VLAN for untagged traffic"), in the list of bridge VLANs too, not just the list of tag_8021q VLANs. This ensures that the list of bridge VLANs will never be empty. Fixes: ec5ae61076d0 ("net: dsa: sja1105: save/restore VLANs using a delta commit method") Reported-by: Radu Pirea (NXP OSS) Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index ced8c9cb29c2..e2dc997580a8 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -397,6 +397,12 @@ static int sja1105_init_static_vlan(struct sja1105_private *priv) if (dsa_is_cpu_port(ds, port)) v->pvid = true; list_add(&v->list, &priv->dsa_8021q_vlans); + + v = kmemdup(v, sizeof(*v), GFP_KERNEL); + if (!v) + return -ENOMEM; + + list_add(&v->list, &priv->bridge_vlans); } ((struct sja1105_vlan_lookup_entry *)table->entries)[0] = pvid; From 291d0a2c1fa6ff437c8f1156646fdd2525714c80 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Wed, 21 Jul 2021 19:17:21 +0100 Subject: [PATCH 316/794] ravb: Fix a typo in comment Fix the typo RX->TX in comment, as the code following the comment process TX and not RX. Signed-off-by: Biju Das Reviewed-by: Lad Prabhakar Reviewed-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 69c50f81e1cb..805397088850 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -920,7 +920,7 @@ static int ravb_poll(struct napi_struct *napi, int budget) if (ravb_rx(ndev, "a, q)) goto out; - /* Processing RX Descriptor Ring */ + /* Processing TX Descriptor Ring */ spin_lock_irqsave(&priv->lock, flags); /* Clear TX interrupt */ ravb_write(ndev, ~(mask | TIS_RESERVED), TIS); From 9f061b9acbb0bdf5317b301c1608ca55be522c92 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Wed, 21 Jul 2021 19:21:26 +0100 Subject: [PATCH 317/794] ravb: Remove extra TAB Align the member description comments for struct ravb_desc by removing the extra TAB. Signed-off-by: Biju Das Reviewed-by: Lad Prabhakar Reviewed-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index 86a1eb0634e8..80e62ca2e3d3 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -864,7 +864,7 @@ enum GECMR_BIT { /* The Ethernet AVB descriptor definitions. */ struct ravb_desc { - __le16 ds; /* Descriptor size */ + __le16 ds; /* Descriptor size */ u8 cc; /* Content control MSBs (reserved) */ u8 die_dt; /* Descriptor interrupt enable and type */ __le32 dptr; /* Descriptor pointer */ From c79e89ecaa246c880292ba68cbe08c9c30db77e3 Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Tue, 29 Jun 2021 15:40:18 +0200 Subject: [PATCH 318/794] RISC-V: load initrd wherever it fits into memory Requiring that initrd is loaded below RAM start + 256 MiB led to failure to boot SUSE Linux with GRUB on QEMU, cf. https://lists.gnu.org/archive/html/grub-devel/2021-06/msg00037.html Remove the constraint. Reported-by: Andreas Schwab Signed-off-by: Heinrich Schuchardt Reviewed-by: Atish Patra Acked-by: Ard Biesheuvel Fixes: d7071743db31 ("RISC-V: Add EFI stub support.") Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/efi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h index 6d98cd999680..7b3483ba2e84 100644 --- a/arch/riscv/include/asm/efi.h +++ b/arch/riscv/include/asm/efi.h @@ -27,10 +27,10 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); #define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE) -/* Load initrd at enough distance from DRAM start */ +/* Load initrd anywhere in system RAM */ static inline unsigned long efi_get_max_initrd_addr(unsigned long image_addr) { - return image_addr + SZ_256M; + return ULONG_MAX; } #define alloc_screen_info(x...) (&screen_info) From 21cf377a9c40658777ecbd6242be449a19a84e44 Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Wed, 21 Jul 2021 23:56:41 +0200 Subject: [PATCH 319/794] net: dsa: ensure linearized SKBs in case of tail taggers The function skb_put() that is used by tail taggers to make room for the DSA tag must only be called for linearized SKBS. However in case that the slave device inherited features like NETIF_F_HW_SG or NETIF_F_FRAGLIST the SKB passed to the slaves transmit function may not be linearized. Avoid those SKBs by clearing the NETIF_F_HW_SG and NETIF_F_FRAGLIST flags for tail taggers. Furthermore since the tagging protocol can be changed at runtime move the code for setting up the slaves features into dsa_slave_setup_tagger(). Suggested-by: Vladimir Oltean Signed-off-by: Lino Sanfilippo Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/slave.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index ffbba1e71551..532085da8d8f 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1808,6 +1808,7 @@ void dsa_slave_setup_tagger(struct net_device *slave) struct dsa_slave_priv *p = netdev_priv(slave); const struct dsa_port *cpu_dp = dp->cpu_dp; struct net_device *master = cpu_dp->master; + const struct dsa_switch *ds = dp->ds; slave->needed_headroom = cpu_dp->tag_ops->needed_headroom; slave->needed_tailroom = cpu_dp->tag_ops->needed_tailroom; @@ -1819,6 +1820,14 @@ void dsa_slave_setup_tagger(struct net_device *slave) slave->needed_tailroom += master->needed_tailroom; p->xmit = cpu_dp->tag_ops->xmit; + + slave->features = master->vlan_features | NETIF_F_HW_TC; + if (ds->ops->port_vlan_add && ds->ops->port_vlan_del) + slave->features |= NETIF_F_HW_VLAN_CTAG_FILTER; + slave->hw_features |= NETIF_F_HW_TC; + slave->features |= NETIF_F_LLTX; + if (slave->needed_tailroom) + slave->features &= ~(NETIF_F_SG | NETIF_F_FRAGLIST); } static struct lock_class_key dsa_slave_netdev_xmit_lock_key; @@ -1881,11 +1890,6 @@ int dsa_slave_create(struct dsa_port *port) if (slave_dev == NULL) return -ENOMEM; - slave_dev->features = master->vlan_features | NETIF_F_HW_TC; - if (ds->ops->port_vlan_add && ds->ops->port_vlan_del) - slave_dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; - slave_dev->hw_features |= NETIF_F_HW_TC; - slave_dev->features |= NETIF_F_LLTX; slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; if (!is_zero_ether_addr(port->mac)) ether_addr_copy(slave_dev->dev_addr, port->mac); From 37120f23ac8998c250573ea3247ff77426551f69 Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Wed, 21 Jul 2021 23:56:42 +0200 Subject: [PATCH 320/794] net: dsa: tag_ksz: dont let the hardware process the layer 4 checksum If the checksum calculation is offloaded to the network device (e.g due to NETIF_F_HW_CSUM inherited from the DSA master device), the calculated layer 4 checksum is incorrect. This is since the DSA tag which is placed after the layer 4 data is considered as being part of the daa and thus errorneously included into the checksum calculation. To avoid this, always calculate the layer 4 checksum in software. Signed-off-by: Lino Sanfilippo Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/tag_ksz.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index 53565f48934c..a201ccf2435d 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -53,6 +53,9 @@ static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev) u8 *tag; u8 *addr; + if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb)) + return NULL; + /* Tag encoding */ tag = skb_put(skb, KSZ_INGRESS_TAG_LEN); addr = skb_mac_header(skb); @@ -114,6 +117,9 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb, u8 *addr; u16 val; + if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb)) + return NULL; + /* Tag encoding */ tag = skb_put(skb, KSZ9477_INGRESS_TAG_LEN); addr = skb_mac_header(skb); @@ -164,6 +170,9 @@ static struct sk_buff *ksz9893_xmit(struct sk_buff *skb, u8 *addr; u8 *tag; + if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb)) + return NULL; + /* Tag encoding */ tag = skb_put(skb, KSZ_INGRESS_TAG_LEN); addr = skb_mac_header(skb); From b0084afde27fe8a504377dee65f55bc6aa776937 Mon Sep 17 00:00:00 2001 From: Alexander Tsoy Date: Thu, 22 Jul 2021 02:56:05 +0300 Subject: [PATCH 321/794] ALSA: usb-audio: Add registration quirk for JBL Quantum headsets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These devices has two interfaces, but only the second interface contains the capture endpoint, thus quirk is required to delay the registration until the second interface appears. Tested-by: Jakub Fišer Signed-off-by: Alexander Tsoy Cc: Link: https://lore.kernel.org/r/20210721235605.53741-1-alexander@tsoy.me Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 8b8bee3c3dd6..e7accd87e063 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1897,6 +1897,9 @@ static const struct registration_quirk registration_quirks[] = { REG_QUIRK_ENTRY(0x0951, 0x16d8, 2), /* Kingston HyperX AMP */ REG_QUIRK_ENTRY(0x0951, 0x16ed, 2), /* Kingston HyperX Cloud Alpha S */ REG_QUIRK_ENTRY(0x0951, 0x16ea, 2), /* Kingston HyperX Cloud Flight S */ + REG_QUIRK_ENTRY(0x0ecb, 0x1f46, 2), /* JBL Quantum 600 */ + REG_QUIRK_ENTRY(0x0ecb, 0x2039, 2), /* JBL Quantum 400 */ + REG_QUIRK_ENTRY(0x0ecb, 0x203e, 2), /* JBL Quantum 800 */ { 0 } /* terminator */ }; From 98c5b13f3a878066741a907a9d0f1f388556ed5c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 21 Jul 2021 15:33:36 -0700 Subject: [PATCH 322/794] net: sparx5: fix unmet dependencies warning WARNING: unmet direct dependencies detected for PHY_SPARX5_SERDES Depends on [n]: (ARCH_SPARX5 || COMPILE_TEST [=n]) && OF [=y] && HAS_IOMEM [=y] Selected by [y]: - SPARX5_SWITCH [=y] && NETDEVICES [=y] && ETHERNET [=y] && NET_VENDOR_MICROCHIP [=y] && NET_SWITCHDEV [=y] && HAS_IOMEM [=y] && OF [=y] Signed-off-by: Randy Dunlap Cc: Lars Povlsen Cc: Steen Hegelund Cc: UNGLinuxDriver@microchip.com Cc: "David S. Miller" Cc: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/sparx5/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/microchip/sparx5/Kconfig b/drivers/net/ethernet/microchip/sparx5/Kconfig index ac403d43c74c..7bdbb2d09a14 100644 --- a/drivers/net/ethernet/microchip/sparx5/Kconfig +++ b/drivers/net/ethernet/microchip/sparx5/Kconfig @@ -3,6 +3,7 @@ config SPARX5_SWITCH depends on NET_SWITCHDEV depends on HAS_IOMEM depends on OF + depends on ARCH_SPARX5 || COMPILE_TEST select PHYLINK select PHY_SPARX5_SERDES select RESET_CONTROLLER From 9d85a6f44bd5585761947f40f7821c9cd78a1bbe Mon Sep 17 00:00:00 2001 From: Yajun Deng Date: Thu, 22 Jul 2021 11:23:43 +0800 Subject: [PATCH 323/794] net: sched: cls_api: Fix the the wrong parameter The 4th parameter in tc_chain_notify() should be flags rather than seq. Let's change it back correctly. Fixes: 32a4f5ecd738 ("net: sched: introduce chain object to uapi") Signed-off-by: Yajun Deng Signed-off-by: David S. Miller --- net/sched/cls_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index d73b5c5514a9..e3e79e9bd706 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -2904,7 +2904,7 @@ replay: break; case RTM_GETCHAIN: err = tc_chain_notify(chain, skb, n->nlmsg_seq, - n->nlmsg_seq, n->nlmsg_type, true); + n->nlmsg_flags, n->nlmsg_type, true); if (err < 0) NL_SET_ERR_MSG(extack, "Failed to send chain notify message"); break; From 456a9dace42ecfcec7ce6e17c18d1985d628dcd0 Mon Sep 17 00:00:00 2001 From: Mike Tipton Date: Wed, 21 Jul 2021 10:54:29 -0700 Subject: [PATCH 324/794] interconnect: Zero initial BW after sync-state The initial BW values may be used by providers to enforce floors. Zero these values after sync-state so that providers know when to stop enforcing them. Fixes: b1d681d8d324 ("interconnect: Add sync state support") Signed-off-by: Mike Tipton Link: https://lore.kernel.org/r/20210721175432.2119-2-mdtipton@codeaurora.org Signed-off-by: Georgi Djakov --- drivers/interconnect/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index 8a1e70e00876..945121e18b5c 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -1106,6 +1106,8 @@ void icc_sync_state(struct device *dev) dev_dbg(p->dev, "interconnect provider is in synced state\n"); list_for_each_entry(n, &p->nodes, node_list) { if (n->init_avg || n->init_peak) { + n->init_avg = 0; + n->init_peak = 0; aggregate_requests(n); p->set(n, n); } From 73606ba9242f8e32023699b500b7922b4cf2993c Mon Sep 17 00:00:00 2001 From: Mike Tipton Date: Wed, 21 Jul 2021 10:54:30 -0700 Subject: [PATCH 325/794] interconnect: Always call pre_aggregate before aggregate The pre_aggregate callback isn't called in all cases before calling aggregate. Add the missing calls so providers can rely on consistent framework behavior. Fixes: d3703b3e255f ("interconnect: Aggregate before setting initial bandwidth") Signed-off-by: Mike Tipton Link: https://lore.kernel.org/r/20210721175432.2119-3-mdtipton@codeaurora.org Signed-off-by: Georgi Djakov --- drivers/interconnect/core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index 945121e18b5c..1b2c564eaa99 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -973,9 +973,14 @@ void icc_node_add(struct icc_node *node, struct icc_provider *provider) } node->avg_bw = node->init_avg; node->peak_bw = node->init_peak; + + if (provider->pre_aggregate) + provider->pre_aggregate(node); + if (provider->aggregate) provider->aggregate(node, 0, node->init_avg, node->init_peak, &node->avg_bw, &node->peak_bw); + provider->set(node, node); node->avg_bw = 0; node->peak_bw = 0; From 7bbcb919e32d776ca8ddce08abb391ab92eef6a9 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 20 Jul 2021 15:45:23 +0200 Subject: [PATCH 326/794] drm/panel: raspberrypi-touchscreen: Prevent double-free The mipi_dsi_device allocated by mipi_dsi_device_register_full() is already free'd on release. Fixes: 2f733d6194bd ("drm/panel: Add support for the Raspberry Pi 7" Touchscreen.") Signed-off-by: Maxime Ripard Reviewed-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20210720134525.563936-9-maxime@cerno.tech --- drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c index 2229f1af2ca8..46029c5610c8 100644 --- a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c +++ b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c @@ -447,7 +447,6 @@ static int rpi_touchscreen_remove(struct i2c_client *i2c) drm_panel_remove(&ts->base); mipi_dsi_device_unregister(ts->dsi); - kfree(ts->dsi); return 0; } From 69de4421bb4c103ef42a32bafc596e23918c106f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 21 Jul 2021 10:23:57 -0500 Subject: [PATCH 327/794] drm/ttm: Initialize debugfs from ttm_global_init() We create a bunch of debugfs entries as a side-effect of ttm_global_init() and then never clean them up. This isn't usually a problem because we free the whole debugfs directory on module unload. However, if the global reference count ever goes to zero and then ttm_global_init() is called again, we'll re-create those debugfs entries and debugfs will complain in dmesg that we're creating entries that already exist. This patch fixes this problem by changing the lifetime of the whole TTM debugfs directory to match that of the TTM global state. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210721152358.2893314-6-jason@jlekstrand.net --- drivers/gpu/drm/ttm/ttm_device.c | 12 ++++++++++++ drivers/gpu/drm/ttm/ttm_module.c | 16 ---------------- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 519deea8e39b..74e3b460132b 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -44,6 +44,8 @@ static unsigned ttm_glob_use_count; struct ttm_global ttm_glob; EXPORT_SYMBOL(ttm_glob); +struct dentry *ttm_debugfs_root; + static void ttm_global_release(void) { struct ttm_global *glob = &ttm_glob; @@ -53,6 +55,7 @@ static void ttm_global_release(void) goto out; ttm_pool_mgr_fini(); + debugfs_remove(ttm_debugfs_root); __free_page(glob->dummy_read_page); memset(glob, 0, sizeof(*glob)); @@ -73,6 +76,13 @@ static int ttm_global_init(void) si_meminfo(&si); + ttm_debugfs_root = debugfs_create_dir("ttm", NULL); + if (IS_ERR(ttm_debugfs_root)) { + ret = PTR_ERR(ttm_debugfs_root); + ttm_debugfs_root = NULL; + goto out; + } + /* Limit the number of pages in the pool to about 50% of the total * system memory. */ @@ -100,6 +110,8 @@ static int ttm_global_init(void) debugfs_create_atomic_t("buffer_objects", 0444, ttm_debugfs_root, &glob->bo_count); out: + if (ret && ttm_debugfs_root) + debugfs_remove(ttm_debugfs_root); if (ret) --ttm_glob_use_count; mutex_unlock(&ttm_global_mutex); diff --git a/drivers/gpu/drm/ttm/ttm_module.c b/drivers/gpu/drm/ttm/ttm_module.c index 997c458f68a9..7fcdef278c74 100644 --- a/drivers/gpu/drm/ttm/ttm_module.c +++ b/drivers/gpu/drm/ttm/ttm_module.c @@ -72,22 +72,6 @@ pgprot_t ttm_prot_from_caching(enum ttm_caching caching, pgprot_t tmp) return tmp; } -struct dentry *ttm_debugfs_root; - -static int __init ttm_init(void) -{ - ttm_debugfs_root = debugfs_create_dir("ttm", NULL); - return 0; -} - -static void __exit ttm_exit(void) -{ - debugfs_remove(ttm_debugfs_root); -} - -module_init(ttm_init); -module_exit(ttm_exit); - MODULE_AUTHOR("Thomas Hellstrom, Jerome Glisse"); MODULE_DESCRIPTION("TTM memory manager subsystem (for DRM device)"); MODULE_LICENSE("GPL and additional rights"); From 1d5ccab95f06675a269f4cb223a1e3f6d1ebef42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 21 Jul 2021 11:53:21 +0200 Subject: [PATCH 328/794] spi: spi-mux: Add module info needed for autoloading MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the spi device table udev can autoload the spi-mux module in the presence of an spi-mux device. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20210721095321.2165453-1-u.kleine-koenig@pengutronix.de Signed-off-by: Mark Brown --- drivers/spi/spi-mux.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/spi/spi-mux.c b/drivers/spi/spi-mux.c index 37dfc6e82804..9708b7827ff7 100644 --- a/drivers/spi/spi-mux.c +++ b/drivers/spi/spi-mux.c @@ -167,10 +167,17 @@ err_put_ctlr: return ret; } +static const struct spi_device_id spi_mux_id[] = { + { "spi-mux" }, + { } +}; +MODULE_DEVICE_TABLE(spi, spi_mux_id); + static const struct of_device_id spi_mux_of_match[] = { { .compatible = "spi-mux" }, { } }; +MODULE_DEVICE_TABLE(of, spi_mux_of_match); static struct spi_driver spi_mux_driver = { .probe = spi_mux_probe, @@ -178,6 +185,7 @@ static struct spi_driver spi_mux_driver = { .name = "spi-mux", .of_match_table = spi_mux_of_match, }, + .id_table = spi_mux_id, }; module_spi_driver(spi_mux_driver); From 7aaa0f311e2df2704fa8ddb8ed681a3b5841d0bf Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Thu, 22 Jul 2021 15:15:51 +0300 Subject: [PATCH 329/794] dpaa2-switch: seed the buffer pool after allocating the swp Any interraction with the buffer pool (seeding a buffer, acquire one) is made through a software portal (SWP, a DPIO object). There are circumstances where the dpaa2-switch driver probes on a DPSW before any DPIO devices have been probed. In this case, seeding of the buffer pool will lead to a panic since no SWPs are initialized. To fix this, seed the buffer pool after making sure that the software portals have been probed and are ready to be used. Fixes: 0b1b71370458 ("staging: dpaa2-switch: handle Rx path on control interface") Signed-off-by: Ioana Ciornei Signed-off-by: David S. Miller --- .../net/ethernet/freescale/dpaa2/dpaa2-switch.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index f3d12d0714fb..68b78642c045 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -2770,32 +2770,32 @@ static int dpaa2_switch_ctrl_if_setup(struct ethsw_core *ethsw) if (err) return err; - err = dpaa2_switch_seed_bp(ethsw); - if (err) - goto err_free_dpbp; - err = dpaa2_switch_alloc_rings(ethsw); if (err) - goto err_drain_dpbp; + goto err_free_dpbp; err = dpaa2_switch_setup_dpio(ethsw); if (err) goto err_destroy_rings; + err = dpaa2_switch_seed_bp(ethsw); + if (err) + goto err_deregister_dpio; + err = dpsw_ctrl_if_enable(ethsw->mc_io, 0, ethsw->dpsw_handle); if (err) { dev_err(ethsw->dev, "dpsw_ctrl_if_enable err %d\n", err); - goto err_deregister_dpio; + goto err_drain_dpbp; } return 0; +err_drain_dpbp: + dpaa2_switch_drain_bp(ethsw); err_deregister_dpio: dpaa2_switch_free_dpio(ethsw); err_destroy_rings: dpaa2_switch_destroy_rings(ethsw); -err_drain_dpbp: - dpaa2_switch_drain_bp(ethsw); err_free_dpbp: dpaa2_switch_free_dpbp(ethsw); From 9acc8103ab594f72250788cb45a43427f36d685d Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 6 Jul 2021 15:41:15 +0100 Subject: [PATCH 330/794] btrfs: fix unpersisted i_size on fsync after expanding truncate If we have an inode that does not have the full sync flag set, was changed in the current transaction, then it is logged while logging some other inode (like its parent directory for example), its i_size is increased by a truncate operation, the log is synced through an fsync of some other inode and then finally we explicitly call fsync on our inode, the new i_size is not persisted. The following example shows how to trigger it, with comments explaining how and why the issue happens: $ mkfs.btrfs -f /dev/sdc $ mount /dev/sdc /mnt $ touch /mnt/foo $ xfs_io -f -c "pwrite -S 0xab 0 1M" /mnt/bar $ sync # Fsync bar, this will be a noop since the file has not yet been # modified in the current transaction. The goal here is to clear # BTRFS_INODE_NEEDS_FULL_SYNC from the inode's runtime flags. $ xfs_io -c "fsync" /mnt/bar # Now rename both files, without changing their parent directory. $ mv /mnt/bar /mnt/bar2 $ mv /mnt/foo /mnt/foo2 # Increase the size of bar2 with a truncate operation. $ xfs_io -c "truncate 2M" /mnt/bar2 # Now fsync foo2, this results in logging its parent inode (the root # directory), and logging the parent results in logging the inode of # file bar2 (its inode item and the new name). The inode of file bar2 # is logged with an i_size of 0 bytes since it's logged in # LOG_INODE_EXISTS mode, meaning we are only logging its names (and # xattrs if it had any) and the i_size of the inode will not be changed # when the log is replayed. $ xfs_io -c "fsync" /mnt/foo2 # Now explicitly fsync bar2. This resulted in doing nothing, not # logging the inode with the new i_size of 2M and the hole from file # offset 1M to 2M. Because the inode did not have the flag # BTRFS_INODE_NEEDS_FULL_SYNC set, when it was logged through the # fsync of file foo2, its last_log_commit field was updated, # resulting in this explicit of file bar2 not doing anything. $ xfs_io -c "fsync" /mnt/bar2 # File bar2 content and size before a power failure. $ od -A d -t x1 /mnt/bar2 0000000 ab ab ab ab ab ab ab ab ab ab ab ab ab ab ab ab * 1048576 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 2097152 # Mount the filesystem to replay the log. $ mount /dev/sdc /mnt # Read the file again, should have the same content and size as before # the power failure happened, but it doesn't, i_size is still at 1M. $ od -A d -t x1 /mnt/bar2 0000000 ab ab ab ab ab ab ab ab ab ab ab ab ab ab ab ab * 1048576 This started to happen after commit 209ecbb8585bf6 ("btrfs: remove stale comment and logic from btrfs_inode_in_log()"), since btrfs_inode_in_log() no longer checks if the inode's list of modified extents is not empty. However, checking that list is not the right way to address this case and the check was added long time ago in commit 125c4cf9f37c98 ("Btrfs: set inode's logged_trans/last_log_commit after ranged fsync") for a different purpose, to address consecutive ranged fsyncs. The reason that checking for the list emptiness makes this test pass is because during an expanding truncate we create an extent map to represent a hole from the old i_size to the new i_size, and add that extent map to the list of modified extents in the inode. However if we are low on available memory and we can not allocate a new extent map, then we don't treat it as an error and just set the full sync flag on the inode, so that the next fsync does not rely on the list of modified extents - so checking for the emptiness of the list to decide if the inode needs to be logged is not reliable, and results in not logging the inode if it was not possible to allocate the extent map for the hole. Fix this by ensuring that if we are only logging that an inode exists (inode item, names/references and xattrs), we don't update the inode's last_log_commit even if it does not have the full sync runtime flag set. A test case for fstests follows soon. CC: stable@vger.kernel.org # 5.13+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index dc6eb088d73e..9fd0348be7f5 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -5526,16 +5526,29 @@ log_extents: spin_lock(&inode->lock); inode->logged_trans = trans->transid; /* - * Don't update last_log_commit if we logged that an inode exists - * after it was loaded to memory (full_sync bit set). - * This is to prevent data loss when we do a write to the inode, - * then the inode gets evicted after all delalloc was flushed, - * then we log it exists (due to a rename for example) and then - * fsync it. This last fsync would do nothing (not logging the - * extents previously written). + * Don't update last_log_commit if we logged that an inode exists. + * We do this for two reasons: + * + * 1) We might have had buffered writes to this inode that were + * flushed and had their ordered extents completed in this + * transaction, but we did not previously log the inode with + * LOG_INODE_ALL. Later the inode was evicted and after that + * it was loaded again and this LOG_INODE_EXISTS log operation + * happened. We must make sure that if an explicit fsync against + * the inode is performed later, it logs the new extents, an + * updated inode item, etc, and syncs the log. The same logic + * applies to direct IO writes instead of buffered writes. + * + * 2) When we log the inode with LOG_INODE_EXISTS, its inode item + * is logged with an i_size of 0 or whatever value was logged + * before. If later the i_size of the inode is increased by a + * truncate operation, the log is synced through an fsync of + * some other inode and then finally an explicit fsync against + * this inode is made, we must make sure this fsync logs the + * inode with the new i_size, the hole between old i_size and + * the new i_size, and syncs the log. */ - if (inode_only != LOG_INODE_EXISTS || - !test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags)) + if (inode_only != LOG_INODE_EXISTS) inode->last_log_commit = inode->last_sub_trans; spin_unlock(&inode->lock); } From 16a200f66ede3f9afa2e51d90ade017aaa18d213 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Sun, 4 Jul 2021 19:14:39 +0800 Subject: [PATCH 331/794] btrfs: check for missing device in btrfs_trim_fs A fstrim on a degraded raid1 can trigger the following null pointer dereference: BTRFS info (device loop0): allowing degraded mounts BTRFS info (device loop0): disk space caching is enabled BTRFS info (device loop0): has skinny extents BTRFS warning (device loop0): devid 2 uuid 97ac16f7-e14d-4db1-95bc-3d489b424adb is missing BTRFS warning (device loop0): devid 2 uuid 97ac16f7-e14d-4db1-95bc-3d489b424adb is missing BTRFS info (device loop0): enabling ssd optimizations BUG: kernel NULL pointer dereference, address: 0000000000000620 PGD 0 P4D 0 Oops: 0000 [#1] SMP NOPTI CPU: 0 PID: 4574 Comm: fstrim Not tainted 5.13.0-rc7+ #31 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 RIP: 0010:btrfs_trim_fs+0x199/0x4a0 [btrfs] RSP: 0018:ffff959541797d28 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff946f84eca508 RCX: a7a67937adff8608 RDX: ffff946e8122d000 RSI: 0000000000000000 RDI: ffffffffc02fdbf0 RBP: ffff946ea4615000 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: ffff946e8122d960 R12: 0000000000000000 R13: ffff959541797db8 R14: ffff946e8122d000 R15: ffff959541797db8 FS: 00007f55917a5080(0000) GS:ffff946f9bc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000620 CR3: 000000002d2c8001 CR4: 00000000000706f0 Call Trace: btrfs_ioctl_fitrim+0x167/0x260 [btrfs] btrfs_ioctl+0x1c00/0x2fe0 [btrfs] ? selinux_file_ioctl+0x140/0x240 ? syscall_trace_enter.constprop.0+0x188/0x240 ? __x64_sys_ioctl+0x83/0xb0 __x64_sys_ioctl+0x83/0xb0 Reproducer: $ mkfs.btrfs -fq -d raid1 -m raid1 /dev/loop0 /dev/loop1 $ mount /dev/loop0 /btrfs $ umount /btrfs $ btrfs dev scan --forget $ mount -o degraded /dev/loop0 /btrfs $ fstrim /btrfs The reason is we call btrfs_trim_free_extents() for the missing device, which uses device->bdev (NULL for missing device) to find if the device supports discard. Fix is to check if the device is missing before calling btrfs_trim_free_extents(). CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Filipe Manana Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d296483d148f..268ce58d4569 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6019,6 +6019,9 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) mutex_lock(&fs_info->fs_devices->device_list_mutex); devices = &fs_info->fs_devices->devices; list_for_each_entry(device, devices, dev_list) { + if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) + continue; + ret = btrfs_trim_free_extents(device, &group_trimmed); if (ret) { dev_failed++; From 8949b9a114019b03fbd0d03d65b8647cba4feef3 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 21 Jul 2021 17:31:48 +0100 Subject: [PATCH 332/794] btrfs: fix lock inversion problem when doing qgroup extent tracing At btrfs_qgroup_trace_extent_post() we call btrfs_find_all_roots() with a NULL value as the transaction handle argument, which makes that function take the commit_root_sem semaphore, which is necessary when we don't hold a transaction handle or any other mechanism to prevent a transaction commit from wiping out commit roots. However btrfs_qgroup_trace_extent_post() can be called in a context where we are holding a write lock on an extent buffer from a subvolume tree, namely from btrfs_truncate_inode_items(), called either during truncate or unlink operations. In this case we end up with a lock inversion problem because the commit_root_sem is a higher level lock, always supposed to be acquired before locking any extent buffer. Lockdep detects this lock inversion problem since we switched the extent buffer locks from custom locks to semaphores, and when running btrfs/158 from fstests, it reported the following trace: [ 9057.626435] ====================================================== [ 9057.627541] WARNING: possible circular locking dependency detected [ 9057.628334] 5.14.0-rc2-btrfs-next-93 #1 Not tainted [ 9057.628961] ------------------------------------------------------ [ 9057.629867] kworker/u16:4/30781 is trying to acquire lock: [ 9057.630824] ffff8e2590f58760 (btrfs-tree-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x24/0x110 [btrfs] [ 9057.632542] but task is already holding lock: [ 9057.633551] ffff8e25582d4b70 (&fs_info->commit_root_sem){++++}-{3:3}, at: iterate_extent_inodes+0x10b/0x280 [btrfs] [ 9057.635255] which lock already depends on the new lock. [ 9057.636292] the existing dependency chain (in reverse order) is: [ 9057.637240] -> #1 (&fs_info->commit_root_sem){++++}-{3:3}: [ 9057.638138] down_read+0x46/0x140 [ 9057.638648] btrfs_find_all_roots+0x41/0x80 [btrfs] [ 9057.639398] btrfs_qgroup_trace_extent_post+0x37/0x70 [btrfs] [ 9057.640283] btrfs_add_delayed_data_ref+0x418/0x490 [btrfs] [ 9057.641114] btrfs_free_extent+0x35/0xb0 [btrfs] [ 9057.641819] btrfs_truncate_inode_items+0x424/0xf70 [btrfs] [ 9057.642643] btrfs_evict_inode+0x454/0x4f0 [btrfs] [ 9057.643418] evict+0xcf/0x1d0 [ 9057.643895] do_unlinkat+0x1e9/0x300 [ 9057.644525] do_syscall_64+0x3b/0xc0 [ 9057.645110] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 9057.645835] -> #0 (btrfs-tree-00){++++}-{3:3}: [ 9057.646600] __lock_acquire+0x130e/0x2210 [ 9057.647248] lock_acquire+0xd7/0x310 [ 9057.647773] down_read_nested+0x4b/0x140 [ 9057.648350] __btrfs_tree_read_lock+0x24/0x110 [btrfs] [ 9057.649175] btrfs_read_lock_root_node+0x31/0x40 [btrfs] [ 9057.650010] btrfs_search_slot+0x537/0xc00 [btrfs] [ 9057.650849] scrub_print_warning_inode+0x89/0x370 [btrfs] [ 9057.651733] iterate_extent_inodes+0x1e3/0x280 [btrfs] [ 9057.652501] scrub_print_warning+0x15d/0x2f0 [btrfs] [ 9057.653264] scrub_handle_errored_block.isra.0+0x135f/0x1640 [btrfs] [ 9057.654295] scrub_bio_end_io_worker+0x101/0x2e0 [btrfs] [ 9057.655111] btrfs_work_helper+0xf8/0x400 [btrfs] [ 9057.655831] process_one_work+0x247/0x5a0 [ 9057.656425] worker_thread+0x55/0x3c0 [ 9057.656993] kthread+0x155/0x180 [ 9057.657494] ret_from_fork+0x22/0x30 [ 9057.658030] other info that might help us debug this: [ 9057.659064] Possible unsafe locking scenario: [ 9057.659824] CPU0 CPU1 [ 9057.660402] ---- ---- [ 9057.660988] lock(&fs_info->commit_root_sem); [ 9057.661581] lock(btrfs-tree-00); [ 9057.662348] lock(&fs_info->commit_root_sem); [ 9057.663254] lock(btrfs-tree-00); [ 9057.663690] *** DEADLOCK *** [ 9057.664437] 4 locks held by kworker/u16:4/30781: [ 9057.665023] #0: ffff8e25922a1148 ((wq_completion)btrfs-scrub){+.+.}-{0:0}, at: process_one_work+0x1c7/0x5a0 [ 9057.666260] #1: ffffabb3451ffe70 ((work_completion)(&work->normal_work)){+.+.}-{0:0}, at: process_one_work+0x1c7/0x5a0 [ 9057.667639] #2: ffff8e25922da198 (&ret->mutex){+.+.}-{3:3}, at: scrub_handle_errored_block.isra.0+0x5d2/0x1640 [btrfs] [ 9057.669017] #3: ffff8e25582d4b70 (&fs_info->commit_root_sem){++++}-{3:3}, at: iterate_extent_inodes+0x10b/0x280 [btrfs] [ 9057.670408] stack backtrace: [ 9057.670976] CPU: 7 PID: 30781 Comm: kworker/u16:4 Not tainted 5.14.0-rc2-btrfs-next-93 #1 [ 9057.672030] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 [ 9057.673492] Workqueue: btrfs-scrub btrfs_work_helper [btrfs] [ 9057.674258] Call Trace: [ 9057.674588] dump_stack_lvl+0x57/0x72 [ 9057.675083] check_noncircular+0xf3/0x110 [ 9057.675611] __lock_acquire+0x130e/0x2210 [ 9057.676132] lock_acquire+0xd7/0x310 [ 9057.676605] ? __btrfs_tree_read_lock+0x24/0x110 [btrfs] [ 9057.677313] ? lock_is_held_type+0xe8/0x140 [ 9057.677849] down_read_nested+0x4b/0x140 [ 9057.678349] ? __btrfs_tree_read_lock+0x24/0x110 [btrfs] [ 9057.679068] __btrfs_tree_read_lock+0x24/0x110 [btrfs] [ 9057.679760] btrfs_read_lock_root_node+0x31/0x40 [btrfs] [ 9057.680458] btrfs_search_slot+0x537/0xc00 [btrfs] [ 9057.681083] ? _raw_spin_unlock+0x29/0x40 [ 9057.681594] ? btrfs_find_all_roots_safe+0x11f/0x140 [btrfs] [ 9057.682336] scrub_print_warning_inode+0x89/0x370 [btrfs] [ 9057.683058] ? btrfs_find_all_roots_safe+0x11f/0x140 [btrfs] [ 9057.683834] ? scrub_write_block_to_dev_replace+0xb0/0xb0 [btrfs] [ 9057.684632] iterate_extent_inodes+0x1e3/0x280 [btrfs] [ 9057.685316] scrub_print_warning+0x15d/0x2f0 [btrfs] [ 9057.685977] ? ___ratelimit+0xa4/0x110 [ 9057.686460] scrub_handle_errored_block.isra.0+0x135f/0x1640 [btrfs] [ 9057.687316] scrub_bio_end_io_worker+0x101/0x2e0 [btrfs] [ 9057.688021] btrfs_work_helper+0xf8/0x400 [btrfs] [ 9057.688649] ? lock_is_held_type+0xe8/0x140 [ 9057.689180] process_one_work+0x247/0x5a0 [ 9057.689696] worker_thread+0x55/0x3c0 [ 9057.690175] ? process_one_work+0x5a0/0x5a0 [ 9057.690731] kthread+0x155/0x180 [ 9057.691158] ? set_kthread_struct+0x40/0x40 [ 9057.691697] ret_from_fork+0x22/0x30 Fix this by making btrfs_find_all_roots() never attempt to lock the commit_root_sem when it is called from btrfs_qgroup_trace_extent_post(). We can't just pass a non-NULL transaction handle to btrfs_find_all_roots() from btrfs_qgroup_trace_extent_post(), because that would make backref lookup not use commit roots and acquire read locks on extent buffers, and therefore could deadlock when btrfs_qgroup_trace_extent_post() is called from the btrfs_truncate_inode_items() code path which has acquired a write lock on an extent buffer of the subvolume btree. CC: stable@vger.kernel.org # 4.19+ Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 6 +++--- fs/btrfs/backref.h | 3 ++- fs/btrfs/delayed-ref.c | 4 ++-- fs/btrfs/qgroup.c | 38 +++++++++++++++++++++++++++-------- fs/btrfs/qgroup.h | 2 +- fs/btrfs/tests/qgroup-tests.c | 20 +++++++++--------- 6 files changed, 48 insertions(+), 25 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 7a8a2fc19533..78b202d198b8 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1488,15 +1488,15 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans, int btrfs_find_all_roots(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, u64 time_seq, struct ulist **roots, - bool ignore_offset) + bool ignore_offset, bool skip_commit_root_sem) { int ret; - if (!trans) + if (!trans && !skip_commit_root_sem) down_read(&fs_info->commit_root_sem); ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr, time_seq, roots, ignore_offset); - if (!trans) + if (!trans && !skip_commit_root_sem) up_read(&fs_info->commit_root_sem); return ret; } diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 17abde7f794c..ff5f07f9940b 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -47,7 +47,8 @@ int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, const u64 *extent_item_pos, bool ignore_offset); int btrfs_find_all_roots(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, - u64 time_seq, struct ulist **roots, bool ignore_offset); + u64 time_seq, struct ulist **roots, bool ignore_offset, + bool skip_commit_root_sem); char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, u32 name_len, unsigned long name_off, struct extent_buffer *eb_in, u64 parent, diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 06bc842ecdb3..ca848b183474 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -974,7 +974,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); if (qrecord_inserted) - btrfs_qgroup_trace_extent_post(fs_info, record); + btrfs_qgroup_trace_extent_post(trans, record); return 0; } @@ -1069,7 +1069,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, if (qrecord_inserted) - return btrfs_qgroup_trace_extent_post(fs_info, record); + return btrfs_qgroup_trace_extent_post(trans, record); return 0; } diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 07ec06d4e972..0fa121171ca1 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1704,17 +1704,39 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info, return 0; } -int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info, +int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans, struct btrfs_qgroup_extent_record *qrecord) { struct ulist *old_root; u64 bytenr = qrecord->bytenr; int ret; - ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root, false); + /* + * We are always called in a context where we are already holding a + * transaction handle. Often we are called when adding a data delayed + * reference from btrfs_truncate_inode_items() (truncating or unlinking), + * in which case we will be holding a write lock on extent buffer from a + * subvolume tree. In this case we can't allow btrfs_find_all_roots() to + * acquire fs_info->commit_root_sem, because that is a higher level lock + * that must be acquired before locking any extent buffers. + * + * So we want btrfs_find_all_roots() to not acquire the commit_root_sem + * but we can't pass it a non-NULL transaction handle, because otherwise + * it would not use commit roots and would lock extent buffers, causing + * a deadlock if it ends up trying to read lock the same extent buffer + * that was previously write locked at btrfs_truncate_inode_items(). + * + * So pass a NULL transaction handle to btrfs_find_all_roots() and + * explicitly tell it to not acquire the commit_root_sem - if we are + * holding a transaction handle we don't need its protection. + */ + ASSERT(trans != NULL); + + ret = btrfs_find_all_roots(NULL, trans->fs_info, bytenr, 0, &old_root, + false, true); if (ret < 0) { - fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; - btrfs_warn(fs_info, + trans->fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; + btrfs_warn(trans->fs_info, "error accounting new delayed refs extent (err code: %d), quota inconsistent", ret); return 0; @@ -1758,7 +1780,7 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr, kfree(record); return 0; } - return btrfs_qgroup_trace_extent_post(fs_info, record); + return btrfs_qgroup_trace_extent_post(trans, record); } int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, @@ -2629,7 +2651,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans) /* Search commit root to find old_roots */ ret = btrfs_find_all_roots(NULL, fs_info, record->bytenr, 0, - &record->old_roots, false); + &record->old_roots, false, false); if (ret < 0) goto cleanup; } @@ -2645,7 +2667,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans) * current root. It's safe inside commit_transaction(). */ ret = btrfs_find_all_roots(trans, fs_info, - record->bytenr, BTRFS_SEQ_LAST, &new_roots, false); + record->bytenr, BTRFS_SEQ_LAST, &new_roots, false, false); if (ret < 0) goto cleanup; if (qgroup_to_skip) { @@ -3179,7 +3201,7 @@ static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans, num_bytes = found.offset; ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0, - &roots, false); + &roots, false, false); if (ret < 0) goto out; /* For rescan, just pass old_roots as NULL */ diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index 7283e4f549af..880e9df0dac1 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -298,7 +298,7 @@ int btrfs_qgroup_trace_extent_nolock( * using current root, then we can move all expensive backref walk out of * transaction committing, but not now as qgroup accounting will be wrong again. */ -int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info, +int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans, struct btrfs_qgroup_extent_record *qrecord); /* diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index f3137285a9e2..98b5aaba46f1 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -224,7 +224,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root, * quota. */ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, - false); + false, false); if (ret) { ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); @@ -237,7 +237,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root, return ret; ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, - false); + false, false); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -261,7 +261,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root, new_roots = NULL; ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, - false); + false, false); if (ret) { ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); @@ -273,7 +273,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root, return -EINVAL; ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, - false); + false, false); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -325,7 +325,7 @@ static int test_multiple_refs(struct btrfs_root *root, } ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, - false); + false, false); if (ret) { ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); @@ -338,7 +338,7 @@ static int test_multiple_refs(struct btrfs_root *root, return ret; ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, - false); + false, false); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -360,7 +360,7 @@ static int test_multiple_refs(struct btrfs_root *root, } ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, - false); + false, false); if (ret) { ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); @@ -373,7 +373,7 @@ static int test_multiple_refs(struct btrfs_root *root, return ret; ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, - false); + false, false); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -401,7 +401,7 @@ static int test_multiple_refs(struct btrfs_root *root, } ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, - false); + false, false); if (ret) { ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); @@ -414,7 +414,7 @@ static int test_multiple_refs(struct btrfs_root *root, return ret; ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, - false); + false, false); if (ret) { ulist_free(old_roots); ulist_free(new_roots); From c7c3a6dcb1efd52949acc1e640be9aad1206a13a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 22 Jul 2021 09:53:59 +0200 Subject: [PATCH 333/794] btrfs: store a block_device in struct btrfs_ordered_extent Store the block device instead of the gendisk in the btrfs_ordered_extent structure instead of acquiring a reference to it later. Note: this is from series removing bdgrab/bdput, btrfs is one of the last users. Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 +- fs/btrfs/ordered-data.c | 2 -- fs/btrfs/ordered-data.h | 3 +-- fs/btrfs/zoned.c | 12 ++++-------- 4 files changed, 6 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8f60314c36c5..0117d867ecf8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2992,7 +2992,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) goto out; } - if (ordered_extent->disk) + if (ordered_extent->bdev) btrfs_rewrite_logical_zoned(ordered_extent); btrfs_free_io_failure_record(inode, start, end); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 6eb41b7c0c84..5c0f8481e25e 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -190,8 +190,6 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset entry->truncated_len = (u64)-1; entry->qgroup_rsv = ret; entry->physical = (u64)-1; - entry->disk = NULL; - entry->partno = (u8)-1; ASSERT(type == BTRFS_ORDERED_REGULAR || type == BTRFS_ORDERED_NOCOW || diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 566472004edd..b2d88aba8420 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -145,8 +145,7 @@ struct btrfs_ordered_extent { * command in a workqueue context */ u64 physical; - struct gendisk *disk; - u8 partno; + struct block_device *bdev; }; /* diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 297c0b1c0634..907c2cc45c9c 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1349,8 +1349,7 @@ void btrfs_record_physical_zoned(struct inode *inode, u64 file_offset, return; ordered->physical = physical; - ordered->disk = bio->bi_bdev->bd_disk; - ordered->partno = bio->bi_bdev->bd_partno; + ordered->bdev = bio->bi_bdev; btrfs_put_ordered_extent(ordered); } @@ -1362,18 +1361,16 @@ void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered) struct extent_map_tree *em_tree; struct extent_map *em; struct btrfs_ordered_sum *sum; - struct block_device *bdev; u64 orig_logical = ordered->disk_bytenr; u64 *logical = NULL; int nr, stripe_len; /* Zoned devices should not have partitions. So, we can assume it is 0 */ - ASSERT(ordered->partno == 0); - bdev = bdgrab(ordered->disk->part0); - if (WARN_ON(!bdev)) + ASSERT(!bdev_is_partition(ordered->bdev)); + if (WARN_ON(!ordered->bdev)) return; - if (WARN_ON(btrfs_rmap_block(fs_info, orig_logical, bdev, + if (WARN_ON(btrfs_rmap_block(fs_info, orig_logical, ordered->bdev, ordered->physical, &logical, &nr, &stripe_len))) goto out; @@ -1402,7 +1399,6 @@ void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered) out: kfree(logical); - bdput(bdev); } bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info, From 8311ee2164c5cd1b63a601ea366f540eae89f10e Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Tue, 20 Jul 2021 18:01:16 +0800 Subject: [PATCH 334/794] spi: meson-spicc: fix memory leak in meson_spicc_remove In meson_spicc_probe, the error handling code needs to clean up master by calling spi_master_put, but the remove function does not have this function call. This will lead to memory leak of spicc->master. Reported-by: Dongliang Mu Fixes: 454fa271bc4e("spi: Add Meson SPICC driver") Signed-off-by: Dongliang Mu Link: https://lore.kernel.org/r/20210720100116.1438974-1-mudongliangabcd@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-meson-spicc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/spi/spi-meson-spicc.c b/drivers/spi/spi-meson-spicc.c index b2c4621db34d..c208efeadd18 100644 --- a/drivers/spi/spi-meson-spicc.c +++ b/drivers/spi/spi-meson-spicc.c @@ -785,6 +785,8 @@ static int meson_spicc_remove(struct platform_device *pdev) clk_disable_unprepare(spicc->core); clk_disable_unprepare(spicc->pclk); + spi_master_put(spicc->master); + return 0; } From e09f2ab8eecc6dcbd7013a1303cbe56b00dc9fb0 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Thu, 22 Jul 2021 15:48:45 +0200 Subject: [PATCH 335/794] spi: update modalias_show after of_device_uevent_modalias support Commit 3ce6c9e2617e ("spi: add of_device_uevent_modalias support") is incomplete, as it didn't update the modalias_show function to generate the of: modalias string if available. Fixes: 3ce6c9e2617e ("spi: add of_device_uevent_modalias support") Signed-off-by: Andreas Schwab Link: https://lore.kernel.org/r/mvmwnpi4fya.fsf@suse.de Signed-off-by: Mark Brown --- drivers/spi/spi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 35928d0843d9..397dd2959bfd 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -58,6 +58,10 @@ modalias_show(struct device *dev, struct device_attribute *a, char *buf) const struct spi_device *spi = to_spi_device(dev); int len; + len = of_device_modalias(dev, buf, PAGE_SIZE); + if (len != -ENODEV) + return len; + len = acpi_device_modalias(dev, buf, PAGE_SIZE - 1); if (len != -ENODEV) return len; From 67f0d6d9883c13174669f88adac4f0ee656cc16a Mon Sep 17 00:00:00 2001 From: Haoran Luo Date: Wed, 21 Jul 2021 14:12:07 +0000 Subject: [PATCH 336/794] tracing: Fix bug in rb_per_cpu_empty() that might cause deadloop. The "rb_per_cpu_empty()" misinterpret the condition (as not-empty) when "head_page" and "commit_page" of "struct ring_buffer_per_cpu" points to the same buffer page, whose "buffer_data_page" is empty and "read" field is non-zero. An error scenario could be constructed as followed (kernel perspective): 1. All pages in the buffer has been accessed by reader(s) so that all of them will have non-zero "read" field. 2. Read and clear all buffer pages so that "rb_num_of_entries()" will return 0 rendering there's no more data to read. It is also required that the "read_page", "commit_page" and "tail_page" points to the same page, while "head_page" is the next page of them. 3. Invoke "ring_buffer_lock_reserve()" with large enough "length" so that it shot pass the end of current tail buffer page. Now the "head_page", "commit_page" and "tail_page" points to the same page. 4. Discard current event with "ring_buffer_discard_commit()", so that "head_page", "commit_page" and "tail_page" points to a page whose buffer data page is now empty. When the error scenario has been constructed, "tracing_read_pipe" will be trapped inside a deadloop: "trace_empty()" returns 0 since "rb_per_cpu_empty()" returns 0 when it hits the CPU containing such constructed ring buffer. Then "trace_find_next_entry_inc()" always return NULL since "rb_num_of_entries()" reports there's no more entry to read. Finally "trace_seq_to_user()" returns "-EBUSY" spanking "tracing_read_pipe" back to the start of the "waitagain" loop. I've also written a proof-of-concept script to construct the scenario and trigger the bug automatically, you can use it to trace and validate my reasoning above: https://github.com/aegistudio/RingBufferDetonator.git Tests has been carried out on linux kernel 5.14-rc2 (2734d6c1b1a089fb593ef6a23d4b70903526fe0c), my fixed version of kernel (for testing whether my update fixes the bug) and some older kernels (for range of affected kernels). Test result is also attached to the proof-of-concept repository. Link: https://lore.kernel.org/linux-trace-devel/YPaNxsIlb2yjSi5Y@aegistudio/ Link: https://lore.kernel.org/linux-trace-devel/YPgrN85WL9VyrZ55@aegistudio Cc: stable@vger.kernel.org Fixes: bf41a158cacba ("ring-buffer: make reentrant") Suggested-by: Linus Torvalds Signed-off-by: Haoran Luo Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index d1463eac11a3..e592d1df6f88 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3880,10 +3880,30 @@ static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) if (unlikely(!head)) return true; - return reader->read == rb_page_commit(reader) && - (commit == reader || - (commit == head && - head->read == rb_page_commit(commit))); + /* Reader should exhaust content in reader page */ + if (reader->read != rb_page_commit(reader)) + return false; + + /* + * If writers are committing on the reader page, knowing all + * committed content has been read, the ring buffer is empty. + */ + if (commit == reader) + return true; + + /* + * If writers are committing on a page other than reader page + * and head page, there should always be content to read. + */ + if (commit != head) + return false; + + /* + * Writers are committing on the head page, we just need + * to care about there're committed data, and the reader will + * swap reader page with head page when it is to read data. + */ + return rb_page_commit(commit) == 0; } /** From 2485bd7557a7edb4520b4072af464f0a08c8efe0 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 22 Jul 2021 14:53:32 +1000 Subject: [PATCH 337/794] cifs: only write 64kb at a time when fallocating a small region of a file We only allow sending single credit writes through the SMB2_write() synchronous api so split this into smaller chunks. Fixes: 966a3cb7c7db ("cifs: improve fallocate emulation") Signed-off-by: Ronnie Sahlberg Reported-by: Namjae Jeon Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index ba3c58e1f725..5cefb5972396 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3617,7 +3617,7 @@ static int smb3_simple_fallocate_write_range(unsigned int xid, char *buf) { struct cifs_io_parms io_parms = {0}; - int nbytes; + int rc, nbytes; struct kvec iov[2]; io_parms.netfid = cfile->fid.netfid; @@ -3625,13 +3625,25 @@ static int smb3_simple_fallocate_write_range(unsigned int xid, io_parms.tcon = tcon; io_parms.persistent_fid = cfile->fid.persistent_fid; io_parms.volatile_fid = cfile->fid.volatile_fid; - io_parms.offset = off; - io_parms.length = len; - /* iov[0] is reserved for smb header */ - iov[1].iov_base = buf; - iov[1].iov_len = io_parms.length; - return SMB2_write(xid, &io_parms, &nbytes, iov, 1); + while (len) { + io_parms.offset = off; + io_parms.length = len; + if (io_parms.length > SMB2_MAX_BUFFER_SIZE) + io_parms.length = SMB2_MAX_BUFFER_SIZE; + /* iov[0] is reserved for smb header */ + iov[1].iov_base = buf; + iov[1].iov_len = io_parms.length; + rc = SMB2_write(xid, &io_parms, &nbytes, iov, 1); + if (rc) + break; + if (nbytes > len) + return -EINVAL; + buf += nbytes; + off += nbytes; + len -= nbytes; + } + return rc; } static int smb3_simple_fallocate_range(unsigned int xid, From b62366181a5e9473e9c10e98f400049491c55876 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 16 Jul 2021 03:26:41 -0300 Subject: [PATCH 338/794] cifs: support share failover when remounting When remouting a DFS share, force a new DFS referral of the path and if the currently cached targets do not match any of the new targets or there was no cached targets, then mark it for reconnect. For example: $ mount //dom/dfs/link /mnt -o username=foo,password=bar $ ls /mnt oldfile.txt change target share of 'link' in server settings $ mount /mnt -o remount,username=foo,password=bar $ ls /mnt newfile.txt Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/connect.c | 4 +- fs/cifs/dfs_cache.c | 229 ++++++++++++++++++++++++++++++++++++------- fs/cifs/dfs_cache.h | 3 + fs/cifs/fs_context.c | 7 ++ 4 files changed, 203 insertions(+), 40 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 1b04d6ec14dd..3781eee9360a 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -220,7 +220,7 @@ cifs_reconnect(struct TCP_Server_Info *server) #ifdef CONFIG_CIFS_DFS_UPCALL struct super_block *sb = NULL; struct cifs_sb_info *cifs_sb = NULL; - struct dfs_cache_tgt_list tgt_list = {0}; + struct dfs_cache_tgt_list tgt_list = DFS_CACHE_TGT_LIST_INIT(tgt_list); struct dfs_cache_tgt_iterator *tgt_it = NULL; #endif @@ -3130,7 +3130,7 @@ static int do_dfs_failover(const char *path, const char *full_path, struct cifs_ { int rc; char *npath = NULL; - struct dfs_cache_tgt_list tgt_list = {0}; + struct dfs_cache_tgt_list tgt_list = DFS_CACHE_TGT_LIST_INIT(tgt_list); struct dfs_cache_tgt_iterator *tgt_it = NULL; struct smb3_fs_context tmp_ctx = {NULL}; diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c index 7c1769714609..283745592844 100644 --- a/fs/cifs/dfs_cache.c +++ b/fs/cifs/dfs_cache.c @@ -19,6 +19,7 @@ #include "cifs_debug.h" #include "cifs_unicode.h" #include "smb2glob.h" +#include "dns_resolve.h" #include "dfs_cache.h" @@ -911,6 +912,7 @@ static int get_targets(struct cache_entry *ce, struct dfs_cache_tgt_list *tl) err_free_it: list_for_each_entry_safe(it, nit, head, it_list) { + list_del(&it->it_list); kfree(it->it_name); kfree(it); } @@ -1293,6 +1295,194 @@ int dfs_cache_get_tgt_share(char *path, const struct dfs_cache_tgt_iterator *it, return 0; } +static bool target_share_equal(struct TCP_Server_Info *server, const char *s1, const char *s2) +{ + char unc[sizeof("\\\\") + SERVER_NAME_LENGTH] = {0}; + const char *host; + size_t hostlen; + char *ip = NULL; + struct sockaddr sa; + bool match; + int rc; + + if (strcasecmp(s1, s2)) + return false; + + /* + * Resolve share's hostname and check if server address matches. Otherwise just ignore it + * as we could not have upcall to resolve hostname or failed to convert ip address. + */ + match = true; + extract_unc_hostname(s1, &host, &hostlen); + scnprintf(unc, sizeof(unc), "\\\\%.*s", (int)hostlen, host); + + rc = dns_resolve_server_name_to_ip(unc, &ip, NULL); + if (rc < 0) { + cifs_dbg(FYI, "%s: could not resolve %.*s. assuming server address matches.\n", + __func__, (int)hostlen, host); + return true; + } + + if (!cifs_convert_address(&sa, ip, strlen(ip))) { + cifs_dbg(VFS, "%s: failed to convert address \'%s\'. skip address matching.\n", + __func__, ip); + } else { + mutex_lock(&server->srv_mutex); + match = cifs_match_ipaddr((struct sockaddr *)&server->dstaddr, &sa); + mutex_unlock(&server->srv_mutex); + } + + kfree(ip); + return match; +} + +/* + * Mark dfs tcon for reconnecting when the currently connected tcon does not match any of the new + * target shares in @refs. + */ +static void mark_for_reconnect_if_needed(struct cifs_tcon *tcon, struct dfs_cache_tgt_list *tl, + const struct dfs_info3_param *refs, int numrefs) +{ + struct dfs_cache_tgt_iterator *it; + int i; + + for (it = dfs_cache_get_tgt_iterator(tl); it; it = dfs_cache_get_next_tgt(tl, it)) { + for (i = 0; i < numrefs; i++) { + if (target_share_equal(tcon->ses->server, dfs_cache_get_tgt_name(it), + refs[i].node_name)) + return; + } + } + + cifs_dbg(FYI, "%s: no cached or matched targets. mark dfs share for reconnect.\n", __func__); + for (i = 0; i < tcon->ses->chan_count; i++) { + spin_lock(&GlobalMid_Lock); + if (tcon->ses->chans[i].server->tcpStatus != CifsExiting) + tcon->ses->chans[i].server->tcpStatus = CifsNeedReconnect; + spin_unlock(&GlobalMid_Lock); + } +} + +/* Refresh dfs referral of tcon and mark it for reconnect if needed */ +static int refresh_tcon(struct cifs_ses **sessions, struct cifs_tcon *tcon, bool force_refresh) +{ + const char *path = tcon->dfs_path + 1; + struct cifs_ses *ses; + struct cache_entry *ce; + struct dfs_info3_param *refs = NULL; + int numrefs = 0; + bool needs_refresh = false; + struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl); + int rc = 0; + unsigned int xid; + + ses = find_ipc_from_server_path(sessions, path); + if (IS_ERR(ses)) { + cifs_dbg(FYI, "%s: could not find ipc session\n", __func__); + return PTR_ERR(ses); + } + + down_read(&htable_rw_lock); + ce = lookup_cache_entry(path); + needs_refresh = force_refresh || IS_ERR(ce) || cache_entry_expired(ce); + if (!IS_ERR(ce)) { + rc = get_targets(ce, &tl); + if (rc) + cifs_dbg(FYI, "%s: could not get dfs targets: %d\n", __func__, rc); + } + up_read(&htable_rw_lock); + + if (!needs_refresh) { + rc = 0; + goto out; + } + + xid = get_xid(); + rc = get_dfs_referral(xid, ses, path, &refs, &numrefs); + free_xid(xid); + + /* Create or update a cache entry with the new referral */ + if (!rc) { + dump_refs(refs, numrefs); + + down_write(&htable_rw_lock); + ce = lookup_cache_entry(path); + if (IS_ERR(ce)) + add_cache_entry_locked(refs, numrefs); + else if (force_refresh || cache_entry_expired(ce)) + update_cache_entry_locked(ce, refs, numrefs); + up_write(&htable_rw_lock); + + mark_for_reconnect_if_needed(tcon, &tl, refs, numrefs); + } + +out: + dfs_cache_free_tgts(&tl); + free_dfs_info_array(refs, numrefs); + return rc; +} + +/** + * dfs_cache_remount_fs - remount a DFS share + * + * Reconfigure dfs mount by forcing a new DFS referral and if the currently cached targets do not + * match any of the new targets, mark it for reconnect. + * + * @cifs_sb: cifs superblock. + * + * Return zero if remounted, otherwise non-zero. + */ +int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb) +{ + struct cifs_tcon *tcon; + struct mount_group *mg; + struct cifs_ses *sessions[CACHE_MAX_ENTRIES + 1] = {NULL}; + int rc; + + if (!cifs_sb || !cifs_sb->master_tlink) + return -EINVAL; + + tcon = cifs_sb_master_tcon(cifs_sb); + if (!tcon->dfs_path) { + cifs_dbg(FYI, "%s: not a dfs tcon\n", __func__); + return 0; + } + + if (uuid_is_null(&cifs_sb->dfs_mount_id)) { + cifs_dbg(FYI, "%s: tcon has no dfs mount group id\n", __func__); + return -EINVAL; + } + + mutex_lock(&mount_group_list_lock); + mg = find_mount_group_locked(&cifs_sb->dfs_mount_id); + if (IS_ERR(mg)) { + mutex_unlock(&mount_group_list_lock); + cifs_dbg(FYI, "%s: tcon has ipc session to refresh referral\n", __func__); + return PTR_ERR(mg); + } + kref_get(&mg->refcount); + mutex_unlock(&mount_group_list_lock); + + spin_lock(&mg->lock); + memcpy(&sessions, mg->sessions, mg->num_sessions * sizeof(mg->sessions[0])); + spin_unlock(&mg->lock); + + /* + * After reconnecting to a different server, unique ids won't match anymore, so we disable + * serverino. This prevents dentry revalidation to think the dentry are stale (ESTALE). + */ + cifs_autodisable_serverino(cifs_sb); + /* + * Force the use of prefix path to support failover on DFS paths that resolve to targets + * that have different prefix paths. + */ + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH; + rc = refresh_tcon(sessions, tcon, true); + + kref_put(&mg->refcount, mount_group_release); + return rc; +} + /* * Refresh all active dfs mounts regardless of whether they are in cache or not. * (cache can be cleared) @@ -1303,7 +1493,6 @@ static void refresh_mounts(struct cifs_ses **sessions) struct cifs_ses *ses; struct cifs_tcon *tcon, *ntcon; struct list_head tcons; - unsigned int xid; INIT_LIST_HEAD(&tcons); @@ -1321,44 +1510,8 @@ static void refresh_mounts(struct cifs_ses **sessions) spin_unlock(&cifs_tcp_ses_lock); list_for_each_entry_safe(tcon, ntcon, &tcons, ulist) { - const char *path = tcon->dfs_path + 1; - struct cache_entry *ce; - struct dfs_info3_param *refs = NULL; - int numrefs = 0; - bool needs_refresh = false; - int rc = 0; - list_del_init(&tcon->ulist); - - ses = find_ipc_from_server_path(sessions, path); - if (IS_ERR(ses)) - goto next_tcon; - - down_read(&htable_rw_lock); - ce = lookup_cache_entry(path); - needs_refresh = IS_ERR(ce) || cache_entry_expired(ce); - up_read(&htable_rw_lock); - - if (!needs_refresh) - goto next_tcon; - - xid = get_xid(); - rc = get_dfs_referral(xid, ses, path, &refs, &numrefs); - free_xid(xid); - - /* Create or update a cache entry with the new referral */ - if (!rc) { - down_write(&htable_rw_lock); - ce = lookup_cache_entry(path); - if (IS_ERR(ce)) - add_cache_entry_locked(refs, numrefs); - else if (cache_entry_expired(ce)) - update_cache_entry_locked(ce, refs, numrefs); - up_write(&htable_rw_lock); - } - -next_tcon: - free_dfs_info_array(refs, numrefs); + refresh_tcon(sessions, tcon, false); cifs_put_tcon(tcon); } } diff --git a/fs/cifs/dfs_cache.h b/fs/cifs/dfs_cache.h index b29d3ae64829..52070d1df189 100644 --- a/fs/cifs/dfs_cache.h +++ b/fs/cifs/dfs_cache.h @@ -13,6 +13,8 @@ #include #include "cifsglob.h" +#define DFS_CACHE_TGT_LIST_INIT(var) { .tl_numtgts = 0, .tl_list = LIST_HEAD_INIT((var).tl_list), } + struct dfs_cache_tgt_list { int tl_numtgts; struct list_head tl_list; @@ -44,6 +46,7 @@ int dfs_cache_get_tgt_share(char *path, const struct dfs_cache_tgt_iterator *it, void dfs_cache_put_refsrv_sessions(const uuid_t *mount_id); void dfs_cache_add_refsrv_session(const uuid_t *mount_id, struct cifs_ses *ses); char *dfs_cache_canonical_path(const char *path, const struct nls_table *cp, int remap); +int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb); static inline struct dfs_cache_tgt_iterator * dfs_cache_get_next_tgt(struct dfs_cache_tgt_list *tl, diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c index 553adfbcc22a..9a59d7ff9a11 100644 --- a/fs/cifs/fs_context.c +++ b/fs/cifs/fs_context.c @@ -13,6 +13,9 @@ #include #include #include +#ifdef CONFIG_CIFS_DFS_UPCALL +#include "dfs_cache.h" +#endif */ #include @@ -779,6 +782,10 @@ static int smb3_reconfigure(struct fs_context *fc) smb3_cleanup_fs_context_contents(cifs_sb->ctx); rc = smb3_fs_context_dup(cifs_sb->ctx, ctx); smb3_update_mnt_flags(cifs_sb); +#ifdef CONFIG_CIFS_DFS_UPCALL + if (!rc) + rc = dfs_cache_remount_fs(cifs_sb); +#endif return rc; } From 21a64910997e0c1d268bebf9b1217ba5804d592d Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 22 Jul 2021 13:50:41 -0500 Subject: [PATCH 339/794] CIFS: Clarify SMB1 code for POSIX Create Coverity also complains about the way we calculate the offset (starting from the address of a 4 byte array within the header structure rather than from the beginning of the struct plus 4 bytes) for SMB1 CIFSPOSIXCreate. This changeset doesn't change the address but makes it slightly clearer. Addresses-Coverity: 711518 ("Out of bounds write") Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index f72e3b3dca69..d4144c182604 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1081,7 +1081,8 @@ PsxCreat: param_offset = offsetof(struct smb_com_transaction2_spi_req, InformationLevel) - 4; offset = param_offset + params; - pdata = (OPEN_PSX_REQ *)(((char *)&pSMB->hdr.Protocol) + offset); + /* SMB offsets are from the beginning of SMB which is 4 bytes in, after RFC1001 field */ + pdata = (OPEN_PSX_REQ *)((char *)(pSMB) + offset + 4); pdata->Level = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC); pdata->Permissions = cpu_to_le64(mode); pdata->PosixOpenFlags = cpu_to_le32(posix_flags); From 7b09d4e0be94968b7c6c117e34ca90cea9c6d986 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 22 Jul 2021 14:35:15 -0500 Subject: [PATCH 340/794] CIFS: Clarify SMB1 code for POSIX delete file Coverity also complains about the way we calculate the offset (starting from the address of a 4 byte array within the header structure rather than from the beginning of the struct plus 4 bytes) for SMB1 CIFSPOSIXDelFile. This changeset doesn't change the address but makes it slightly clearer. Addresses-Coverity: 711519 ("Out of bounds write") Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index d4144c182604..65d1a65bfc37 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -873,8 +873,11 @@ PsxDelete: InformationLevel) - 4; offset = param_offset + params; - /* Setup pointer to Request Data (inode type) */ - pRqD = (struct unlink_psx_rq *)(((char *)&pSMB->hdr.Protocol) + offset); + /* Setup pointer to Request Data (inode type). + * Note that SMB offsets are from the beginning of SMB which is 4 bytes + * in, after RFC1001 field + */ + pRqD = (struct unlink_psx_rq *)((char *)(pSMB) + offset + 4); pRqD->type = cpu_to_le16(type); pSMB->ParameterOffset = cpu_to_le16(param_offset); pSMB->DataOffset = cpu_to_le16(offset); From 0cc936f74bcacb039b7533aeac0a887dfc896bf6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 22 Jul 2021 17:08:07 -0600 Subject: [PATCH 341/794] io_uring: fix early fdput() of file A previous commit shuffled some code around, and inadvertently used struct file after fdput() had been called on it. As we can't touch the file post fdput() dropping our reference, move the fdput() to after that has been done. Cc: Pavel Begunkov Cc: stable@vger.kernel.org Link: https://lore.kernel.org/io-uring/YPnqM0fY3nM5RdRI@zeniv-ca.linux.org.uk/ Fixes: f2a48dd09b8e ("io_uring: refactor io_sq_offload_create()") Reported-by: Al Viro Signed-off-by: Jens Axboe --- fs/io_uring.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index fe3d948658ad..f2fe4eca150b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7991,9 +7991,11 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, f = fdget(p->wq_fd); if (!f.file) return -ENXIO; - fdput(f); - if (f.file->f_op != &io_uring_fops) + if (f.file->f_op != &io_uring_fops) { + fdput(f); return -EINVAL; + } + fdput(f); } if (ctx->flags & IORING_SETUP_SQPOLL) { struct task_struct *tsk; From 488968a8945c119859d91bb6a8dc13bf50002f15 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Fri, 23 Jul 2021 11:21:24 +1000 Subject: [PATCH 342/794] cifs: fix fallocate when trying to allocate a hole. Remove the conditional checking for out_data_len and skipping the fallocate if it is 0. This is wrong will actually change any legitimate the fallocate where the entire region is unallocated into a no-op. Additionally, before allocating the range, if FALLOC_FL_KEEP_SIZE is set then we need to clamp the length of the fallocate region as to not extend the size of the file. Fixes: 966a3cb7c7db ("cifs: improve fallocate emulation") Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 5cefb5972396..23d6f4d71649 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3667,11 +3667,6 @@ static int smb3_simple_fallocate_range(unsigned int xid, (char **)&out_data, &out_data_len); if (rc) goto out; - /* - * It is already all allocated - */ - if (out_data_len == 0) - goto out; buf = kzalloc(1024 * 1024, GFP_KERNEL); if (buf == NULL) { @@ -3794,6 +3789,24 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon, goto out; } + if (keep_size == true) { + /* + * We can not preallocate pages beyond the end of the file + * in SMB2 + */ + if (off >= i_size_read(inode)) { + rc = 0; + goto out; + } + /* + * For fallocates that are partially beyond the end of file, + * clamp len so we only fallocate up to the end of file. + */ + if (off + len > i_size_read(inode)) { + len = i_size_read(inode) - off; + } + } + if ((keep_size == true) || (i_size_read(inode) >= off + len)) { /* * At this point, we are trying to fallocate an internal From 29f6a20c21b5bdc7eb623a712bbf7b99612ee746 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Fri, 2 Jul 2021 21:49:14 +0200 Subject: [PATCH 343/794] arm64: dts: ls1028: sl28: fix networking for variant 2 The PHY configuration for the variant 2 is still missing the flag for in-band signalling between PHY and MAC. Both sides - MAC and PHY - have to match the setting. For now, Linux only supports setting the MAC side and thus it has to match the setting the bootloader is configuring. Enable in-band signalling to make ethernet work. Fixes: ab43f0307449 ("arm64: dts: ls1028a: sl28: add support for variant 2") Signed-off-by: Michael Walle Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts index dd764b720fb0..f6a79c8080d1 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts @@ -54,6 +54,7 @@ &mscc_felix_port0 { label = "swp0"; + managed = "in-band-status"; phy-handle = <&phy0>; phy-mode = "sgmii"; status = "okay"; @@ -61,6 +62,7 @@ &mscc_felix_port1 { label = "swp1"; + managed = "in-band-status"; phy-handle = <&phy1>; phy-mode = "sgmii"; status = "okay"; From 828db68f4ff1ab6982a36a56522b585160dc8c8e Mon Sep 17 00:00:00 2001 From: Oleksandr Suvorov Date: Tue, 13 Jul 2021 23:21:07 +0300 Subject: [PATCH 344/794] ARM: dts: colibri-imx6ull: limit SDIO clock to 25MHz NXP and AzureWave don't recommend using SDIO bus mode 3.3V@50MHz due to noise affecting the wireless throughput. Colibri iMX6ULL uses only 3.3V signaling for Wi-Fi module AW-CM276NF. Limit the SDIO Clock on Colibri iMX6ULL to 25MHz. Fixes: c2e4987e0e02 ("ARM: dts: imx6ull: add Toradex Colibri iMX6ULL support") Signed-off-by: Oleksandr Suvorov Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6ull-colibri-wifi.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/imx6ull-colibri-wifi.dtsi b/arch/arm/boot/dts/imx6ull-colibri-wifi.dtsi index a0545431b3dc..9f1e38282bee 100644 --- a/arch/arm/boot/dts/imx6ull-colibri-wifi.dtsi +++ b/arch/arm/boot/dts/imx6ull-colibri-wifi.dtsi @@ -43,6 +43,7 @@ assigned-clock-rates = <0>, <198000000>; cap-power-off-card; keep-power-in-suspend; + max-frequency = <25000000>; mmc-pwrseq = <&wifi_pwrseq>; no-1-8-v; non-removable; From c09dc9e1cd3c205f66b2505d742e819735c0eb6f Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Tue, 29 Jun 2021 11:13:46 +0200 Subject: [PATCH 345/794] riscv: Fix memory_limit for 64-bit kernel As described in Documentation/riscv/vm-layout.rst, the end of the virtual address space for 64-bit kernel is occupied by the modules/BPF/ kernel mappings so this actually reduces the amount of memory we are able to map and then use in the linear mapping. So make sure this limit is correctly set. Signed-off-by: Alexandre Ghiti Fixes: 2bfc6cd81bd1 ("riscv: Move kernel mapping outside of linear mapping") Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index ac48742fa6fc..e4356d65fdce 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -127,10 +127,17 @@ void __init mem_init(void) } /* - * The default maximal physical memory size is -PAGE_OFFSET, - * limit the memory size via mem. + * The default maximal physical memory size is -PAGE_OFFSET for 32-bit kernel, + * whereas for 64-bit kernel, the end of the virtual address space is occupied + * by the modules/BPF/kernel mappings which reduces the available size of the + * linear mapping. + * Limit the memory size via mem. */ +#ifdef CONFIG_64BIT +static phys_addr_t memory_limit = -PAGE_OFFSET - SZ_4G; +#else static phys_addr_t memory_limit = -PAGE_OFFSET; +#endif static int __init early_mem(char *p) { From c99127c452484ac89c75af39c3c865174ce2de99 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Tue, 29 Jun 2021 11:13:47 +0200 Subject: [PATCH 346/794] riscv: Make sure the linear mapping does not use the kernel mapping For 64-bit kernel, the end of the address space is occupied by the kernel mapping and currently, the functions to populate the kernel page tables (i.e. create_p*d_mapping) do not override existing mapping so we must make sure the linear mapping does not map memory in the kernel mapping by clipping the memory above the memory limit. Signed-off-by: Alexandre Ghiti Fixes: c9811e379b21 ("riscv: Add mem kernel parameter support") Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index e4356d65fdce..644a34b0d77d 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -717,6 +717,8 @@ static void __init setup_vm_final(void) if (start <= __pa(PAGE_OFFSET) && __pa(PAGE_OFFSET) < end) start = __pa(PAGE_OFFSET); + if (end >= __pa(PAGE_OFFSET) + memory_limit) + end = __pa(PAGE_OFFSET) + memory_limit; map_size = best_map_size(start, end - start); for (pa = start; pa < end; pa += map_size) { From db6b84a368b495cb7e41be9cb9e73d4d0537d027 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Tue, 29 Jun 2021 11:13:48 +0200 Subject: [PATCH 347/794] riscv: Make sure the kernel mapping does not overlap with IS_ERR_VALUE The check that is done in setup_bootmem currently only works for 32-bit kernel since the kernel mapping has been moved outside of the linear mapping for 64-bit kernel. So make sure that for 64-bit kernel, the kernel mapping does not overlap with the last 4K of the addressable memory. Signed-off-by: Alexandre Ghiti Fixes: 2bfc6cd81bd1 ("riscv: Move kernel mapping outside of linear mapping") Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 644a34b0d77d..a14bf3910eec 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -159,7 +159,7 @@ static void __init setup_bootmem(void) { phys_addr_t vmlinux_end = __pa_symbol(&_end); phys_addr_t vmlinux_start = __pa_symbol(&_start); - phys_addr_t max_mapped_addr = __pa(~(ulong)0); + phys_addr_t __maybe_unused max_mapped_addr; phys_addr_t dram_end; #ifdef CONFIG_XIP_KERNEL @@ -183,14 +183,20 @@ static void __init setup_bootmem(void) dram_end = memblock_end_of_DRAM(); +#ifndef CONFIG_64BIT /* * memblock allocator is not aware of the fact that last 4K bytes of * the addressable memory can not be mapped because of IS_ERR_VALUE * macro. Make sure that last 4k bytes are not usable by memblock - * if end of dram is equal to maximum addressable memory. + * if end of dram is equal to maximum addressable memory. For 64-bit + * kernel, this problem can't happen here as the end of the virtual + * address space is occupied by the kernel mapping then this check must + * be done in create_kernel_page_table. */ + max_mapped_addr = __pa(~(ulong)0); if (max_mapped_addr == (dram_end - 1)) memblock_set_current_limit(max_mapped_addr - 4096); +#endif min_low_pfn = PFN_UP(memblock_start_of_DRAM()); max_low_pfn = max_pfn = PFN_DOWN(dram_end); @@ -578,6 +584,14 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0); BUG_ON((kernel_map.phys_addr % map_size) != 0); +#ifdef CONFIG_64BIT + /* + * The last 4K bytes of the addressable memory can not be mapped because + * of IS_ERR_VALUE macro. + */ + BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K); +#endif + pt_ops.alloc_pte = alloc_pte_early; pt_ops.get_pte_virt = get_pte_virt_early; #ifndef __PAGETABLE_PMD_FOLDED From e39cdacf2f664b09029e7c1eb354c91a20c367af Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Tue, 22 Jun 2021 07:11:31 +0000 Subject: [PATCH 348/794] pcmcia: i82092: fix a null pointer dereference bug During the driver loading process, the 'dev' field was not assigned, but the 'dev' field was referenced in the subsequent 'i82092aa_set_mem_map' function. Signed-off-by: Zheyu Ma CC: [linux@dominikbrodowski.net: shorten commit message, add Cc to stable] Signed-off-by: Dominik Brodowski --- drivers/pcmcia/i82092.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pcmcia/i82092.c b/drivers/pcmcia/i82092.c index 85887d885b5f..192c9049d654 100644 --- a/drivers/pcmcia/i82092.c +++ b/drivers/pcmcia/i82092.c @@ -112,6 +112,7 @@ static int i82092aa_pci_probe(struct pci_dev *dev, for (i = 0; i < socket_count; i++) { sockets[i].card_state = 1; /* 1 = present but empty */ sockets[i].io_base = pci_resource_start(dev, 0); + sockets[i].dev = dev; sockets[i].socket.features |= SS_CAP_PCCARD; sockets[i].socket.map_size = 0x1000; sockets[i].socket.irq_mask = 0; From f62f3c20647ebd5fb6ecb8f0b477b9281c44c10a Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 20 Jul 2021 20:43:09 +1000 Subject: [PATCH 349/794] KVM: PPC: Book3S: Fix H_RTAS rets buffer overflow The kvmppc_rtas_hcall() sets the host rtas_args.rets pointer based on the rtas_args.nargs that was provided by the guest. That guest nargs value is not range checked, so the guest can cause the host rets pointer to be pointed outside the args array. The individual rtas function handlers check the nargs and nrets values to ensure they are correct, but if they are not, the handlers store a -3 (0xfffffffd) failure indication in rets[0] which corrupts host memory. Fix this by testing up front whether the guest supplied nargs and nret would exceed the array size, and fail the hcall directly without storing a failure indication to rets[0]. Also expand on a comment about why we kill the guest and try not to return errors directly if we have a valid rets[0] pointer. Fixes: 8e591cb72047 ("KVM: PPC: Book3S: Add infrastructure to implement kernel-side RTAS calls") Cc: stable@vger.kernel.org # v3.10+ Reported-by: Alexey Kardashevskiy Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kvm/book3s_rtas.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index c5e677508d3b..0f847f1e5ddd 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -242,6 +242,17 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) * value so we can restore it on the way out. */ orig_rets = args.rets; + if (be32_to_cpu(args.nargs) >= ARRAY_SIZE(args.args)) { + /* + * Don't overflow our args array: ensure there is room for + * at least rets[0] (even if the call specifies 0 nret). + * + * Each handler must then check for the correct nargs and nret + * values, but they may always return failure in rets[0]. + */ + rc = -EINVAL; + goto fail; + } args.rets = &args.args[be32_to_cpu(args.nargs)]; mutex_lock(&vcpu->kvm->arch.rtas_token_lock); @@ -269,9 +280,17 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) fail: /* * We only get here if the guest has called RTAS with a bogus - * args pointer. That means we can't get to the args, and so we - * can't fail the RTAS call. So fail right out to userspace, - * which should kill the guest. + * args pointer or nargs/nret values that would overflow the + * array. That means we can't get to the args, and so we can't + * fail the RTAS call. So fail right out to userspace, which + * should kill the guest. + * + * SLOF should actually pass the hcall return value from the + * rtas handler call in r3, so enter_rtas could be modified to + * return a failure indication in r3 and we could return such + * errors to the guest rather than failing to host userspace. + * However old guests that don't test for failure could then + * continue silently after errors, so for now we won't do this. */ return rc; } From d9c57d3ed52a92536f5fa59dc5ccdd58b4875076 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 8 Jul 2021 21:26:22 +1000 Subject: [PATCH 350/794] KVM: PPC: Book3S HV Nested: Sanitise H_ENTER_NESTED TM state The H_ENTER_NESTED hypercall is handled by the L0, and it is a request by the L1 to switch the context of the vCPU over to that of its L2 guest, and return with an interrupt indication. The L1 is responsible for switching some registers to guest context, and the L0 switches others (including all the hypervisor privileged state). If the L2 MSR has TM active, then the L1 is responsible for recheckpointing the L2 TM state. Then the L1 exits to L0 via the H_ENTER_NESTED hcall, and the L0 saves the TM state as part of the exit, and then it recheckpoints the TM state as part of the nested entry and finally HRFIDs into the L2 with TM active MSR. Not efficient, but about the simplest approach for something that's horrendously complicated. Problems arise if the L1 exits to the L0 with a TM state which does not match the L2 TM state being requested. For example if the L1 is transactional but the L2 MSR is non-transactional, or vice versa. The L0's HRFID can take a TM Bad Thing interrupt and crash. Fix this by disallowing H_ENTER_NESTED in TM[T] state entirely, and then ensuring that if the L1 is suspended then the L2 must have TM active, and if the L1 is not suspended then the L2 must not have TM active. Fixes: 360cae313702 ("KVM: PPC: Book3S HV: Nested guest entry via hypercall") Cc: stable@vger.kernel.org # v4.20+ Reported-by: Alexey Kardashevskiy Acked-by: Michael Neuling Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kvm/book3s_hv_nested.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 8543ad538b0c..898f942eb198 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -302,6 +302,9 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) if (vcpu->kvm->arch.l1_ptcr == 0) return H_NOT_AVAILABLE; + if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr)) + return H_BAD_MODE; + /* copy parameters in */ hv_ptr = kvmppc_get_gpr(vcpu, 4); regs_ptr = kvmppc_get_gpr(vcpu, 5); @@ -322,6 +325,23 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) if (l2_hv.vcpu_token >= NR_CPUS) return H_PARAMETER; + /* + * L1 must have set up a suspended state to enter the L2 in a + * transactional state, and only in that case. These have to be + * filtered out here to prevent causing a TM Bad Thing in the + * host HRFID. We could synthesize a TM Bad Thing back to the L1 + * here but there doesn't seem like much point. + */ + if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr)) { + if (!MSR_TM_ACTIVE(l2_regs.msr)) + return H_BAD_MODE; + } else { + if (l2_regs.msr & MSR_TS_MASK) + return H_BAD_MODE; + if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_TS_MASK)) + return H_BAD_MODE; + } + /* translate lpid */ l2 = kvmhv_get_nested(vcpu->kvm, l2_hv.lpid, true); if (!l2) From 20fb73911fec01f06592de1cdbca00b66602ebd7 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 15 Jul 2021 14:23:21 +0100 Subject: [PATCH 351/794] ARM: imx: fix missing 3rd argument in macro imx_mmdc_perf_init The function imx_mmdc_perf_init recently had a 3rd argument added to it but the equivalent macro was not updated and is still the older 2 argument version. Fix this by adding in the missing 3rd argumement mmdc_ipg_clk. Fixes: f07ec8536580 ("ARM: imx: add missing clk_disable_unprepare()") Signed-off-by: Colin Ian King Signed-off-by: Shawn Guo --- arch/arm/mach-imx/mmdc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c index 4a6f1359e1e9..af12668d0bf5 100644 --- a/arch/arm/mach-imx/mmdc.c +++ b/arch/arm/mach-imx/mmdc.c @@ -534,7 +534,7 @@ pmu_free: #else #define imx_mmdc_remove NULL -#define imx_mmdc_perf_init(pdev, mmdc_base) 0 +#define imx_mmdc_perf_init(pdev, mmdc_base, mmdc_ipg_clk) 0 #endif static int imx_mmdc_probe(struct platform_device *pdev) From 3d9e30a52047f2d464efdfd1d561ae1f707a0286 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 18 Jul 2021 23:43:02 +0200 Subject: [PATCH 352/794] ARM: dts: imx: Swap M53Menlo pinctrl_power_button/pinctrl_power_out pins The pinctrl_power_button/pinctrl_power_out each define single GPIO pinmux, except it is exactly the other one than the matching gpio-keys and gpio-poweroff DT nodes use for that functionality. Swap the two GPIOs to correct this error. Fixes: 50d29fdb765d ("ARM: dts: imx53: Add power GPIOs on M53Menlo") Signed-off-by: Marek Vasut Cc: Shawn Guo Cc: Fabio Estevam Cc: NXP Linux Team Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx53-m53menlo.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/imx53-m53menlo.dts b/arch/arm/boot/dts/imx53-m53menlo.dts index f98691ae4415..d3082b9774e4 100644 --- a/arch/arm/boot/dts/imx53-m53menlo.dts +++ b/arch/arm/boot/dts/imx53-m53menlo.dts @@ -388,13 +388,13 @@ pinctrl_power_button: powerbutgrp { fsl,pins = < - MX53_PAD_SD2_DATA2__GPIO1_13 0x1e4 + MX53_PAD_SD2_DATA0__GPIO1_15 0x1e4 >; }; pinctrl_power_out: poweroutgrp { fsl,pins = < - MX53_PAD_SD2_DATA0__GPIO1_15 0x1e4 + MX53_PAD_SD2_DATA2__GPIO1_13 0x1e4 >; }; From ec61cd49bf566401306cfc4855bda8c08bbaa46c Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Mon, 28 Jun 2021 14:37:13 +0200 Subject: [PATCH 353/794] mac80211: Do not strip skb headroom on monitor frames When a monitor interface is present together with other interfaces, a received skb is copied and received on the monitor netdev. Before, the copied skb was allocated with exactly the amount of space needed for the radiotap header, resulting in an skb without any headroom at all being received on the monitor netdev. With the introduction of eBPF and XDP in the kernel, skbs may be processed by custom eBPF programs. However, since the skb cannot be reallocated in the eBPF program, no more data or headers can be pushed. The old code made sure the final headroom was zero regardless of the value of NET_SKB_PAD, so increasing that constant would have no effect. Now we allocate monitor skb copies with a headroom of NET_SKB_PAD bytes before the radiotap header. Monitor interfaces now behave in the same way as other netdev interfaces that honor the NET_SKB_PAD constant. Signed-off-by: Johan Almbladh Link: https://lore.kernel.org/r/20210628123713.2070753-1-johan.almbladh@anyfinetworks.com Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 771921c057e8..2563473b5cf1 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -730,7 +730,8 @@ ieee80211_make_monitor_skb(struct ieee80211_local *local, * Need to make a copy and possibly remove radiotap header * and FCS from the original. */ - skb = skb_copy_expand(*origskb, needed_headroom, 0, GFP_ATOMIC); + skb = skb_copy_expand(*origskb, needed_headroom + NET_SKB_PAD, + 0, GFP_ATOMIC); if (!skb) return NULL; From 1a7915501ca94a1f10288defe333cd5ade210b63 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 29 Jun 2021 13:28:53 +0200 Subject: [PATCH 354/794] mac80211: fix starting aggregation sessions on mesh interfaces The logic for starting aggregation sessions was recently moved from minstrel_ht to mac80211, into the subif tx handler just after the sta lookup. Unfortunately this didn't work for mesh interfaces, since the sta lookup is deferred until a much later point in time on those. Fix this by also calling the aggregation check right after the deferred sta lookup. Fixes: 08a46c642001 ("mac80211: move A-MPDU session check from minstrel_ht to mac80211") Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20210629112853.29785-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 57 ++++++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index e96981144358..8509778ff31f 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1147,6 +1147,29 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx, return queued; } +static void +ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct sk_buff *skb) +{ + struct rate_control_ref *ref = sdata->local->rate_ctrl; + u16 tid; + + if (!ref || !(ref->ops->capa & RATE_CTRL_CAPA_AMPDU_TRIGGER)) + return; + + if (!sta || !sta->sta.ht_cap.ht_supported || + !sta->sta.wme || skb_get_queue_mapping(skb) == IEEE80211_AC_VO || + skb->protocol == sdata->control_port_protocol) + return; + + tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; + if (likely(sta->ampdu_mlme.tid_tx[tid])) + return; + + ieee80211_start_tx_ba_session(&sta->sta, tid, 0); +} + /* * initialises @tx * pass %NULL for the station if unknown, a valid pointer if known @@ -1160,6 +1183,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_hdr *hdr; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + bool aggr_check = false; int tid; memset(tx, 0, sizeof(*tx)); @@ -1188,8 +1212,10 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, } else if (tx->sdata->control_port_protocol == tx->skb->protocol) { tx->sta = sta_info_get_bss(sdata, hdr->addr1); } - if (!tx->sta && !is_multicast_ether_addr(hdr->addr1)) + if (!tx->sta && !is_multicast_ether_addr(hdr->addr1)) { tx->sta = sta_info_get(sdata, hdr->addr1); + aggr_check = true; + } } if (tx->sta && ieee80211_is_data_qos(hdr->frame_control) && @@ -1199,8 +1225,12 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, struct tid_ampdu_tx *tid_tx; tid = ieee80211_get_tid(hdr); - tid_tx = rcu_dereference(tx->sta->ampdu_mlme.tid_tx[tid]); + if (!tid_tx && aggr_check) { + ieee80211_aggr_check(sdata, tx->sta, skb); + tid_tx = rcu_dereference(tx->sta->ampdu_mlme.tid_tx[tid]); + } + if (tid_tx) { bool queued; @@ -4120,29 +4150,6 @@ void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac) } EXPORT_SYMBOL(ieee80211_txq_schedule_start); -static void -ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata, - struct sta_info *sta, - struct sk_buff *skb) -{ - struct rate_control_ref *ref = sdata->local->rate_ctrl; - u16 tid; - - if (!ref || !(ref->ops->capa & RATE_CTRL_CAPA_AMPDU_TRIGGER)) - return; - - if (!sta || !sta->sta.ht_cap.ht_supported || - !sta->sta.wme || skb_get_queue_mapping(skb) == IEEE80211_AC_VO || - skb->protocol == sdata->control_port_protocol) - return; - - tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; - if (likely(sta->ampdu_mlme.tid_tx[tid])) - return; - - ieee80211_start_tx_ba_session(&sta->sta, tid, 0); -} - void __ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev, u32 info_flags, From a5d3cbdb09ff1f52cbe040932e06c8b9915c6dad Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 2 Jul 2021 07:01:11 +0200 Subject: [PATCH 355/794] mac80211: fix enabling 4-address mode on a sta vif after assoc Notify the driver about the 4-address mode change and also send a nulldata packet to the AP to notify it about the change Fixes: 1ff4e8f2dec8 ("mac80211: notify the driver when a sta uses 4-address mode") Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20210702050111.47546-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 19 +++++++++++++++++++ net/mac80211/ieee80211_i.h | 2 ++ net/mac80211/mlme.c | 4 ++-- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 84cc7733ea66..4e6f11e63df3 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -152,6 +152,8 @@ static int ieee80211_change_iface(struct wiphy *wiphy, struct vif_params *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; int ret; ret = ieee80211_if_change_type(sdata, type); @@ -162,7 +164,24 @@ static int ieee80211_change_iface(struct wiphy *wiphy, RCU_INIT_POINTER(sdata->u.vlan.sta, NULL); ieee80211_check_fast_rx_iface(sdata); } else if (type == NL80211_IFTYPE_STATION && params->use_4addr >= 0) { + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + + if (params->use_4addr == ifmgd->use_4addr) + return 0; + sdata->u.mgd.use_4addr = params->use_4addr; + if (!ifmgd->associated) + return 0; + + mutex_lock(&local->sta_mtx); + sta = sta_info_get(sdata, ifmgd->bssid); + if (sta) + drv_sta_set_4addr(local, sdata, &sta->sta, + params->use_4addr); + mutex_unlock(&local->sta_mtx); + + if (params->use_4addr) + ieee80211_send_4addr_nullfunc(local, sdata); } if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 22549b95d1aa..30ce6d2ec7ce 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2201,6 +2201,8 @@ void ieee80211_dynamic_ps_timer(struct timer_list *t); void ieee80211_send_nullfunc(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, bool powersave); +void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata); void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata, struct ieee80211_hdr *hdr, bool ack, u16 tx_time); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a00f11a33699..c0ea3b1aa9e1 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1095,8 +1095,8 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, ieee80211_tx_skb(sdata, skb); } -static void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata) +void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) { struct sk_buff *skb; struct ieee80211_hdr *nullfunc; From 17109e9783799be2a063b2bd861a508194b0a487 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Tue, 6 Jul 2021 17:44:23 +0200 Subject: [PATCH 356/794] virt_wifi: fix error on connect When connecting without first doing a scan, the BSS list is empty and __cfg80211_connect_result() generates this warning: $ iw dev wlan0 connect -w VirtWifi [ 15.371989] ------------[ cut here ]------------ [ 15.372179] WARNING: CPU: 0 PID: 92 at net/wireless/sme.c:756 __cfg80211_connect_result+0x402/0x440 [ 15.372383] CPU: 0 PID: 92 Comm: kworker/u2:2 Not tainted 5.13.0-kvm #444 [ 15.372512] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-3.fc34 04/01/2014 [ 15.372597] Workqueue: cfg80211 cfg80211_event_work [ 15.372756] RIP: 0010:__cfg80211_connect_result+0x402/0x440 [ 15.372818] Code: 48 2b 04 25 28 00 00 00 75 59 48 8b 3b 48 8b 76 10 48 8d 65 e0 5b 41 5c 41 5d 41 5e 5d 49 8d 65 f0 41 5d e9 d0 d4 fd ff 0f 0b <0f> 0b e9 f6 fd ff ff e8 f2 4a b4 ff e9 ec fd ff ff 0f 0b e9 19 fd [ 15.372966] RSP: 0018:ffffc900005cbdc0 EFLAGS: 00010246 [ 15.373022] RAX: 0000000000000000 RBX: ffff8880028e2400 RCX: ffff8880028e2472 [ 15.373088] RDX: 0000000000000002 RSI: 00000000fffffe01 RDI: ffffffff815335ba [ 15.373149] RBP: ffffc900005cbe00 R08: 0000000000000008 R09: ffff888002bdf8b8 [ 15.373209] R10: ffff88803ec208f0 R11: ffffffffffffe9ae R12: ffff88801d687d98 [ 15.373280] R13: ffff88801b5fe000 R14: ffffc900005cbdc0 R15: dead000000000100 [ 15.373330] FS: 0000000000000000(0000) GS:ffff88803ec00000(0000) knlGS:0000000000000000 [ 15.373382] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 15.373425] CR2: 000056421c468958 CR3: 000000001b458001 CR4: 0000000000170eb0 [ 15.373478] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 15.373529] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 15.373580] Call Trace: [ 15.373611] ? cfg80211_process_wdev_events+0x10e/0x170 [ 15.373743] cfg80211_process_wdev_events+0x10e/0x170 [ 15.373783] cfg80211_process_rdev_events+0x21/0x40 [ 15.373846] cfg80211_event_work+0x20/0x30 [ 15.373892] process_one_work+0x1e9/0x340 [ 15.373956] worker_thread+0x4b/0x3f0 [ 15.374017] ? process_one_work+0x340/0x340 [ 15.374053] kthread+0x11f/0x140 [ 15.374089] ? set_kthread_struct+0x30/0x30 [ 15.374153] ret_from_fork+0x1f/0x30 [ 15.374187] ---[ end trace 321ef0cb7e9c0be1 ]--- wlan0 (phy #0): connected to 00:00:00:00:00:00 Add the fake bss just before the connect so that cfg80211_get_bss() finds the virtual network. As some code was duplicated, move it in a common function. Signed-off-by: Matteo Croce Link: https://lore.kernel.org/r/20210706154423.11065-1-mcroce@linux.microsoft.com Signed-off-by: Johannes Berg --- drivers/net/wireless/virt_wifi.c | 52 ++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/drivers/net/wireless/virt_wifi.c b/drivers/net/wireless/virt_wifi.c index 1df959532c7d..514f2c1124b6 100644 --- a/drivers/net/wireless/virt_wifi.c +++ b/drivers/net/wireless/virt_wifi.c @@ -136,6 +136,29 @@ static struct ieee80211_supported_band band_5ghz = { /* Assigned at module init. Guaranteed locally-administered and unicast. */ static u8 fake_router_bssid[ETH_ALEN] __ro_after_init = {}; +static void virt_wifi_inform_bss(struct wiphy *wiphy) +{ + u64 tsf = div_u64(ktime_get_boottime_ns(), 1000); + struct cfg80211_bss *informed_bss; + static const struct { + u8 tag; + u8 len; + u8 ssid[8]; + } __packed ssid = { + .tag = WLAN_EID_SSID, + .len = 8, + .ssid = "VirtWifi", + }; + + informed_bss = cfg80211_inform_bss(wiphy, &channel_5ghz, + CFG80211_BSS_FTYPE_PRESP, + fake_router_bssid, tsf, + WLAN_CAPABILITY_ESS, 0, + (void *)&ssid, sizeof(ssid), + DBM_TO_MBM(-50), GFP_KERNEL); + cfg80211_put_bss(wiphy, informed_bss); +} + /* Called with the rtnl lock held. */ static int virt_wifi_scan(struct wiphy *wiphy, struct cfg80211_scan_request *request) @@ -156,28 +179,13 @@ static int virt_wifi_scan(struct wiphy *wiphy, /* Acquires and releases the rdev BSS lock. */ static void virt_wifi_scan_result(struct work_struct *work) { - struct { - u8 tag; - u8 len; - u8 ssid[8]; - } __packed ssid = { - .tag = WLAN_EID_SSID, .len = 8, .ssid = "VirtWifi", - }; - struct cfg80211_bss *informed_bss; struct virt_wifi_wiphy_priv *priv = container_of(work, struct virt_wifi_wiphy_priv, scan_result.work); struct wiphy *wiphy = priv_to_wiphy(priv); struct cfg80211_scan_info scan_info = { .aborted = false }; - u64 tsf = div_u64(ktime_get_boottime_ns(), 1000); - informed_bss = cfg80211_inform_bss(wiphy, &channel_5ghz, - CFG80211_BSS_FTYPE_PRESP, - fake_router_bssid, tsf, - WLAN_CAPABILITY_ESS, 0, - (void *)&ssid, sizeof(ssid), - DBM_TO_MBM(-50), GFP_KERNEL); - cfg80211_put_bss(wiphy, informed_bss); + virt_wifi_inform_bss(wiphy); /* Schedules work which acquires and releases the rtnl lock. */ cfg80211_scan_done(priv->scan_request, &scan_info); @@ -225,10 +233,12 @@ static int virt_wifi_connect(struct wiphy *wiphy, struct net_device *netdev, if (!could_schedule) return -EBUSY; - if (sme->bssid) + if (sme->bssid) { ether_addr_copy(priv->connect_requested_bss, sme->bssid); - else + } else { + virt_wifi_inform_bss(wiphy); eth_zero_addr(priv->connect_requested_bss); + } wiphy_debug(wiphy, "connect\n"); @@ -241,11 +251,13 @@ static void virt_wifi_connect_complete(struct work_struct *work) struct virt_wifi_netdev_priv *priv = container_of(work, struct virt_wifi_netdev_priv, connect.work); u8 *requested_bss = priv->connect_requested_bss; - bool has_addr = !is_zero_ether_addr(requested_bss); bool right_addr = ether_addr_equal(requested_bss, fake_router_bssid); u16 status = WLAN_STATUS_SUCCESS; - if (!priv->is_up || (has_addr && !right_addr)) + if (is_zero_ether_addr(requested_bss)) + requested_bss = NULL; + + if (!priv->is_up || (requested_bss && !right_addr)) status = WLAN_STATUS_UNSPECIFIED_FAILURE; else priv->is_connected = true; From 0d059964504a1605d84938c0b5b38f6573121c4a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 12 Jul 2021 21:53:30 +0200 Subject: [PATCH 357/794] nl80211: limit band information in non-split data In non-split data, we shouldn't be adding S1G and 6 GHz data (or future bands) since we're really close to the 4k message size limit. Remove those bands, any modern userspace that can use S1G or 6 GHz should already be using split dumps, and if not then it needs to update. Link: https://lore.kernel.org/r/20210712215329.31444162a2c2.I5555312e4a074c84f8b4e7ad79dc4d1fbfc5126c@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 50eb405b0690..16c88beea48b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2351,7 +2351,10 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, goto nla_put_failure; for (band = state->band_start; - band < NUM_NL80211_BANDS; band++) { + band < (state->split ? + NUM_NL80211_BANDS : + NL80211_BAND_60GHZ + 1); + band++) { struct ieee80211_supported_band *sband; /* omit higher bands for ancient software */ From f9a5c358c8d26fed0cc45f2afc64633d4ba21dff Mon Sep 17 00:00:00 2001 From: Nguyen Dinh Phi Date: Mon, 28 Jun 2021 21:23:34 +0800 Subject: [PATCH 358/794] cfg80211: Fix possible memory leak in function cfg80211_bss_update When we exceed the limit of BSS entries, this function will free the new entry, however, at this time, it is the last door to access the inputed ies, so these ies will be unreferenced objects and cause memory leak. Therefore we should free its ies before deallocating the new entry, beside of dropping it from hidden_list. Signed-off-by: Nguyen Dinh Phi Link: https://lore.kernel.org/r/20210628132334.851095-1-phind.uet@gmail.com Signed-off-by: Johannes Berg --- net/wireless/scan.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index f03c7ac8e184..7897b1478c3c 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1754,16 +1754,14 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, * be grouped with this beacon for updates ... */ if (!cfg80211_combine_bsses(rdev, new)) { - kfree(new); + bss_ref_put(rdev, new); goto drop; } } if (rdev->bss_entries >= bss_entries_limit && !cfg80211_bss_expire_oldest(rdev)) { - if (!list_empty(&new->hidden_list)) - list_del(&new->hidden_list); - kfree(new); + bss_ref_put(rdev, new); goto drop; } From 923f98929182dfd04e9149be839160b63a3db145 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 22 Jul 2021 13:11:34 +0300 Subject: [PATCH 359/794] arm64: dts: armada-3720-turris-mox: fixed indices for the SDHC controllers Since drivers/mmc/host/sdhci-xenon.c declares the PROBE_PREFER_ASYNCHRONOUS probe type, it is not guaranteed whether /dev/mmcblk0 will belong to sdhci0 or sdhci1. In turn, this will break booting by: root=/dev/mmcblk0p1 Fix the issue by adding aliases so that the old MMC controller indices are preserved. Fixes: 7320915c8861 ("mmc: Set PROBE_PREFER_ASYNCHRONOUS for drivers that existed in v4.14") Signed-off-by: Vladimir Oltean Signed-off-by: Gregory CLEMENT --- arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts index ce2bcddf396f..f2d7d6f071bc 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts @@ -19,6 +19,8 @@ aliases { spi0 = &spi0; ethernet1 = ð1; + mmc0 = &sdhci0; + mmc1 = &sdhci1; }; chosen { From b66541422824cf6cf20e9a35112e9cb5d82cdf62 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Tue, 13 Jul 2021 10:27:28 +0800 Subject: [PATCH 360/794] ext4: fix potential uninitialized access to retval in kmmpd if (!ext4_has_feature_mmp(sb)) then retval can be unitialized before we jump to the wait_to_exit label. Fixes: 61bb4a1c417e ("ext4: fix possible UAF when remounting r/o a mmp-protected file system") Signed-off-by: Ye Bin Link: https://lore.kernel.org/r/20210713022728.2533770-1-yebin10@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/mmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index bc364c119af6..cebea4270817 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -138,7 +138,7 @@ static int kmmpd(void *data) unsigned mmp_check_interval; unsigned long last_update_time; unsigned long diff; - int retval; + int retval = 0; mmp_block = le64_to_cpu(es->s_mmp_block); mmp = (struct mmp_struct *)(bh->b_data); From 73dc707161a83c24a9e6804b2d60e6f4a4d6be74 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Wed, 14 Jul 2021 13:59:40 +0800 Subject: [PATCH 361/794] ext4: remove conflicting comment from __ext4_forget We do a bforget and return for no journal case, so let's remove this conflict comment. Reviewed-by: Jan Kara Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20210714055940.1553705-1-guoqing.jiang@linux.dev Signed-off-by: Theodore Ts'o --- fs/ext4/ext4_jbd2.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index b96ecba91899..b60f0152ea57 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -244,9 +244,6 @@ int __ext4_journal_get_write_access(const char *where, unsigned int line, * "bh" may be NULL: a metadata block may have been freed from memory * but there may still be a record of it in the journal, and that record * still needs to be revoked. - * - * If the handle isn't valid we're not journaling, but we still need to - * call into ext4_journal_revoke() to put the buffer head. */ int __ext4_forget(const char *where, unsigned int line, handle_t *handle, int is_metadata, struct inode *inode, From 32c3973d808301e7a980f80fee8818fdf7c82b09 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 17 Jul 2021 10:10:29 +0200 Subject: [PATCH 362/794] netfilter: flowtable: avoid possible false sharing The flowtable follows the same timeout approach as conntrack, use the same idiom as in cc16921351d8 ("netfilter: conntrack: avoid same-timeout update") but also include the fix provided by e37542ba111f ("netfilter: conntrack: avoid possible false sharing"). Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 1e50908b1b7e..551976e4284c 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -331,7 +331,11 @@ EXPORT_SYMBOL_GPL(flow_offload_add); void flow_offload_refresh(struct nf_flowtable *flow_table, struct flow_offload *flow) { - flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); + u32 timeout; + + timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); + if (READ_ONCE(flow->timeout) != timeout) + WRITE_ONCE(flow->timeout, timeout); if (likely(!nf_flowtable_hw_offload(flow_table))) return; From 32953df7a6eb56bd9b8f18a13034d55f9fc96cfa Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 17 Jul 2021 10:20:08 +0200 Subject: [PATCH 363/794] netfilter: nft_last: avoid possible false sharing Use the idiom described in: https://github.com/google/ktsan/wiki/READ_ONCE-and-WRITE_ONCE#it-may-improve-performance Moreover, prevent a compiler optimization. Fixes: 836382dc2471 ("netfilter: nf_tables: add last expression") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_last.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c index 8088b99f2ee3..304e33cbed9b 100644 --- a/net/netfilter/nft_last.c +++ b/net/netfilter/nft_last.c @@ -48,24 +48,30 @@ static void nft_last_eval(const struct nft_expr *expr, { struct nft_last_priv *priv = nft_expr_priv(expr); - priv->last_jiffies = jiffies; - priv->last_set = 1; + if (READ_ONCE(priv->last_jiffies) != jiffies) + WRITE_ONCE(priv->last_jiffies, jiffies); + if (READ_ONCE(priv->last_set) == 0) + WRITE_ONCE(priv->last_set, 1); } static int nft_last_dump(struct sk_buff *skb, const struct nft_expr *expr) { struct nft_last_priv *priv = nft_expr_priv(expr); + unsigned long last_jiffies = READ_ONCE(priv->last_jiffies); + u32 last_set = READ_ONCE(priv->last_set); __be64 msecs; - if (time_before(jiffies, priv->last_jiffies)) - priv->last_set = 0; + if (time_before(jiffies, last_jiffies)) { + WRITE_ONCE(priv->last_set, 0); + last_set = 0; + } - if (priv->last_set) - msecs = nf_jiffies64_to_msecs(jiffies - priv->last_jiffies); + if (last_set) + msecs = nf_jiffies64_to_msecs(jiffies - last_jiffies); else msecs = 0; - if (nla_put_be32(skb, NFTA_LAST_SET, htonl(priv->last_set)) || + if (nla_put_be32(skb, NFTA_LAST_SET, htonl(last_set)) || nla_put_be64(skb, NFTA_LAST_MSECS, msecs, NFTA_LAST_PAD)) goto nla_put_failure; From 30a56a2b881821625f79837d4d968c679852444e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 18 Jul 2021 18:36:00 +0200 Subject: [PATCH 364/794] netfilter: conntrack: adjust stop timestamp to real expiry value In case the entry is evicted via garbage collection there is delay between the timeout value and the eviction event. This adjusts the stop value based on how much time has passed. Fixes: b87a2f9199ea82 ("netfilter: conntrack: add gc worker to remove timed-out entries") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 83c52df85870..5c03e5106751 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -670,8 +670,13 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) return false; tstamp = nf_conn_tstamp_find(ct); - if (tstamp && tstamp->stop == 0) + if (tstamp) { + s32 timeout = ct->timeout - nfct_time_stamp; + tstamp->stop = ktime_get_real_ns(); + if (timeout < 0) + tstamp->stop -= jiffies_to_nsecs(-timeout); + } if (nf_conntrack_event_report(IPCT_DESTROY, ct, portid, report) < 0) { From a33f387ecd5aafae514095c2c4a8c24f7aea7e8b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 20 Jul 2021 18:22:50 +0200 Subject: [PATCH 365/794] netfilter: nft_nat: allow to specify layer 4 protocol NAT only nft_nat reports a bogus EAFNOSUPPORT if no layer 3 information is specified. Fixes: d07db9884a5f ("netfilter: nf_tables: introduce nft_validate_register_load()") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_nat.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 0840c635b752..be1595d6979d 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -201,7 +201,9 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, alen = sizeof_field(struct nf_nat_range, min_addr.ip6); break; default: - return -EAFNOSUPPORT; + if (tb[NFTA_NAT_REG_ADDR_MIN]) + return -EAFNOSUPPORT; + break; } priv->family = family; From ee7ab3f263f8131722cff3871b9618b1e7478f07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 28 Jun 2021 17:12:29 +0200 Subject: [PATCH 366/794] arm64: dts: armada-3720-turris-mox: remove mrvl,i2c-fast-mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some SFP modules are not detected when i2c-fast-mode is enabled even when clock-frequency is already set to 100000. The I2C bus violates the timing specifications when run in fast mode. So disable fast mode on Turris Mox. Same change was already applied for uDPU (also Armada 3720 board with SFP) in commit fe3ec631a77d ("arm64: dts: uDPU: remove i2c-fast-mode"). Fixes: 7109d817db2e ("arm64: dts: marvell: add DTS for Turris Mox") Signed-off-by: Pali Rohár Reviewed-by: Marek Behún Acked-by: Russell King (Oracle) Signed-off-by: Gregory CLEMENT --- arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts index f2d7d6f071bc..a05b1ab2dd12 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts @@ -121,6 +121,7 @@ pinctrl-names = "default"; pinctrl-0 = <&i2c1_pins>; clock-frequency = <100000>; + /delete-property/ mrvl,i2c-fast-mode; status = "okay"; rtc@6f { From 3b13911a2fd0dd0146c9777a254840c5466cf120 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 21 Jul 2021 19:10:08 -0400 Subject: [PATCH 367/794] tracing: Synthetic event field_pos is an index not a boolean Performing the following: ># echo 'wakeup_lat s32 pid; u64 delta; char wake_comm[]' > synthetic_events ># echo 'hist:keys=pid:__arg__1=common_timestamp.usecs' > events/sched/sched_waking/trigger ># echo 'hist:keys=next_pid:pid=next_pid,delta=common_timestamp.usecs-$__arg__1:onmatch(sched.sched_waking).trace(wakeup_lat,$pid,$delta,prev_comm)'\ > events/sched/sched_switch/trigger ># echo 1 > events/synthetic/enable Crashed the kernel: BUG: kernel NULL pointer dereference, address: 000000000000001b #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP CPU: 7 PID: 0 Comm: swapper/7 Not tainted 5.13.0-rc5-test+ #104 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v03.03 07/14/2016 RIP: 0010:strlen+0x0/0x20 Code: f6 82 80 2b 0b bc 20 74 11 0f b6 50 01 48 83 c0 01 f6 82 80 2b 0b bc 20 75 ef c3 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 <80> 3f 00 74 10 48 89 f8 48 83 c0 01 80 38 9 f8 c3 31 RSP: 0018:ffffaa75000d79d0 EFLAGS: 00010046 RAX: 0000000000000002 RBX: ffff9cdb55575270 RCX: 0000000000000000 RDX: ffff9cdb58c7a320 RSI: ffffaa75000d7b40 RDI: 000000000000001b RBP: ffffaa75000d7b40 R08: ffff9cdb40a4f010 R09: ffffaa75000d7ab8 R10: ffff9cdb4398c700 R11: 0000000000000008 R12: ffff9cdb58c7a320 R13: ffff9cdb55575270 R14: ffff9cdb58c7a000 R15: 0000000000000018 FS: 0000000000000000(0000) GS:ffff9cdb5aa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000000001b CR3: 00000000c0612006 CR4: 00000000001706e0 Call Trace: trace_event_raw_event_synth+0x90/0x1d0 action_trace+0x5b/0x70 event_hist_trigger+0x4bd/0x4e0 ? cpumask_next_and+0x20/0x30 ? update_sd_lb_stats.constprop.0+0xf6/0x840 ? __lock_acquire.constprop.0+0x125/0x550 ? find_held_lock+0x32/0x90 ? sched_clock_cpu+0xe/0xd0 ? lock_release+0x155/0x440 ? update_load_avg+0x8c/0x6f0 ? enqueue_entity+0x18a/0x920 ? __rb_reserve_next+0xe5/0x460 ? ring_buffer_lock_reserve+0x12a/0x3f0 event_triggers_call+0x52/0xe0 trace_event_buffer_commit+0x1ae/0x240 trace_event_raw_event_sched_switch+0x114/0x170 __traceiter_sched_switch+0x39/0x50 __schedule+0x431/0xb00 schedule_idle+0x28/0x40 do_idle+0x198/0x2e0 cpu_startup_entry+0x19/0x20 secondary_startup_64_no_verify+0xc2/0xcb The reason is that the dynamic events array keeps track of the field position of the fields array, via the field_pos variable in the synth_field structure. Unfortunately, that field is a boolean for some reason, which means any field_pos greater than 1 will be a bug (in this case it was 2). Link: https://lkml.kernel.org/r/20210721191008.638bce34@oasis.local.home Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Ingo Molnar Cc: Andrew Morton Cc: stable@vger.kernel.org Fixes: bd82631d7ccdc ("tracing: Add support for dynamic strings to synthetic events") Reviewed-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_synth.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_synth.h b/kernel/trace/trace_synth.h index 6e146b959dcd..4007fe95cf42 100644 --- a/kernel/trace/trace_synth.h +++ b/kernel/trace/trace_synth.h @@ -14,10 +14,10 @@ struct synth_field { char *name; size_t size; unsigned int offset; + unsigned int field_pos; bool is_signed; bool is_string; bool is_dynamic; - bool field_pos; }; struct synth_event { From 1e3bac71c5053c99d438771fc9fa5082ae5d90aa Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 21 Jul 2021 11:00:53 -0400 Subject: [PATCH 368/794] tracing/histogram: Rename "cpu" to "common_cpu" Currently the histogram logic allows the user to write "cpu" in as an event field, and it will record the CPU that the event happened on. The problem with this is that there's a lot of events that have "cpu" as a real field, and using "cpu" as the CPU it ran on, makes it impossible to run histograms on the "cpu" field of events. For example, if I want to have a histogram on the count of the workqueue_queue_work event on its cpu field, running: ># echo 'hist:keys=cpu' > events/workqueue/workqueue_queue_work/trigger Gives a misleading and wrong result. Change the command to "common_cpu" as no event should have "common_*" fields as that's a reserved name for fields used by all events. And this makes sense here as common_cpu would be a field used by all events. Now we can even do: ># echo 'hist:keys=common_cpu,cpu if cpu < 100' > events/workqueue/workqueue_queue_work/trigger ># cat events/workqueue/workqueue_queue_work/hist # event histogram # # trigger info: hist:keys=common_cpu,cpu:vals=hitcount:sort=hitcount:size=2048 if cpu < 100 [active] # { common_cpu: 0, cpu: 2 } hitcount: 1 { common_cpu: 0, cpu: 4 } hitcount: 1 { common_cpu: 7, cpu: 7 } hitcount: 1 { common_cpu: 0, cpu: 7 } hitcount: 1 { common_cpu: 0, cpu: 1 } hitcount: 1 { common_cpu: 0, cpu: 6 } hitcount: 2 { common_cpu: 0, cpu: 5 } hitcount: 2 { common_cpu: 1, cpu: 1 } hitcount: 4 { common_cpu: 6, cpu: 6 } hitcount: 4 { common_cpu: 5, cpu: 5 } hitcount: 14 { common_cpu: 4, cpu: 4 } hitcount: 26 { common_cpu: 0, cpu: 0 } hitcount: 39 { common_cpu: 2, cpu: 2 } hitcount: 184 Now for backward compatibility, I added a trick. If "cpu" is used, and the field is not found, it will fall back to "common_cpu" and work as it did before. This way, it will still work for old programs that use "cpu" to get the actual CPU, but if the event has a "cpu" as a field, it will get that event's "cpu" field, which is probably what it wants anyway. I updated the tracefs/README to include documentation about both the common_timestamp and the common_cpu. This way, if that text is present in the README, then an application can know that common_cpu is supported over just plain "cpu". Link: https://lkml.kernel.org/r/20210721110053.26b4f641@oasis.local.home Cc: Namhyung Kim Cc: Ingo Molnar Cc: Andrew Morton Cc: stable@vger.kernel.org Fixes: 8b7622bf94a44 ("tracing: Add cpu field for hist triggers") Reviewed-by: Tom Zanussi Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- Documentation/trace/histogram.rst | 2 +- kernel/trace/trace.c | 4 ++++ kernel/trace/trace_events_hist.c | 22 ++++++++++++++++------ 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/Documentation/trace/histogram.rst b/Documentation/trace/histogram.rst index b71e09f745c3..f99be8062bc8 100644 --- a/Documentation/trace/histogram.rst +++ b/Documentation/trace/histogram.rst @@ -191,7 +191,7 @@ Documentation written by Tom Zanussi with the event, in nanoseconds. May be modified by .usecs to have timestamps interpreted as microseconds. - cpu int the cpu on which the event occurred. + common_cpu int the cpu on which the event occurred. ====================== ==== ======================================= Extended error information diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f8b80b5bab71..c59dd35a6da5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5609,6 +5609,10 @@ static const char readme_msg[] = "\t [:name=histname1]\n" "\t [:.]\n" "\t [if ]\n\n" + "\t Note, special fields can be used as well:\n" + "\t common_timestamp - to record current timestamp\n" + "\t common_cpu - to record the CPU the event happened on\n" + "\n" "\t When a matching event is hit, an entry is added to a hash\n" "\t table using the key(s) and value(s) named, and the value of a\n" "\t sum called 'hitcount' is incremented. Keys and values\n" diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 16a9dfc9fffc..34325f41ebc0 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -1111,7 +1111,7 @@ static const char *hist_field_name(struct hist_field *field, field->flags & HIST_FIELD_FL_ALIAS) field_name = hist_field_name(field->operands[0], ++level); else if (field->flags & HIST_FIELD_FL_CPU) - field_name = "cpu"; + field_name = "common_cpu"; else if (field->flags & HIST_FIELD_FL_EXPR || field->flags & HIST_FIELD_FL_VAR_REF) { if (field->system) { @@ -1991,14 +1991,24 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, hist_data->enable_timestamps = true; if (*flags & HIST_FIELD_FL_TIMESTAMP_USECS) hist_data->attrs->ts_in_usecs = true; - } else if (strcmp(field_name, "cpu") == 0) + } else if (strcmp(field_name, "common_cpu") == 0) *flags |= HIST_FIELD_FL_CPU; else { field = trace_find_event_field(file->event_call, field_name); if (!field || !field->size) { - hist_err(tr, HIST_ERR_FIELD_NOT_FOUND, errpos(field_name)); - field = ERR_PTR(-EINVAL); - goto out; + /* + * For backward compatibility, if field_name + * was "cpu", then we treat this the same as + * common_cpu. + */ + if (strcmp(field_name, "cpu") == 0) { + *flags |= HIST_FIELD_FL_CPU; + } else { + hist_err(tr, HIST_ERR_FIELD_NOT_FOUND, + errpos(field_name)); + field = ERR_PTR(-EINVAL); + goto out; + } } } out: @@ -5085,7 +5095,7 @@ static void hist_field_print(struct seq_file *m, struct hist_field *hist_field) seq_printf(m, "%s=", hist_field->var.name); if (hist_field->flags & HIST_FIELD_FL_CPU) - seq_puts(m, "cpu"); + seq_puts(m, "common_cpu"); else if (field_name) { if (hist_field->flags & HIST_FIELD_FL_VAR_REF || hist_field->flags & HIST_FIELD_FL_ALIAS) From 217e26bd87b2930856726b48a4e71c768b8c9bf5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 21 Jul 2021 17:22:32 +0200 Subject: [PATCH 369/794] netfilter: nfnl_hook: fix unused variable warning The only user of this variable is in an #ifdef: net/netfilter/nfnetlink_hook.c: In function 'nfnl_hook_entries_head': net/netfilter/nfnetlink_hook.c:177:28: error: unused variable 'netdev' [-Werror=unused-variable] Fixes: e2cf17d3774c ("netfilter: add new hook nfnl subsystem") Signed-off-by: Arnd Bergmann Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_hook.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c index 50b4e3c9347a..202f57d17bab 100644 --- a/net/netfilter/nfnetlink_hook.c +++ b/net/netfilter/nfnetlink_hook.c @@ -174,7 +174,9 @@ static const struct nf_hook_entries * nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *dev) { const struct nf_hook_entries *hook_head = NULL; +#ifdef CONFIG_NETFILTER_INGRESS struct net_device *netdev; +#endif switch (pf) { case NFPROTO_IPV4: From 9528c19507dc9bc3d6cd96f4611d7cb80c5afcde Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 21 Jul 2021 19:53:41 -0400 Subject: [PATCH 370/794] tracing: Clean up alloc_synth_event() alloc_synth_event() currently has the following code to initialize the event fields and dynamic_fields: for (i = 0, j = 0; i < n_fields; i++) { event->fields[i] = fields[i]; if (fields[i]->is_dynamic) { event->dynamic_fields[j] = fields[i]; event->dynamic_fields[j]->field_pos = i; event->dynamic_fields[j++] = fields[i]; event->n_dynamic_fields++; } } 1) It would make more sense to have all fields keep track of their field_pos. 2) event->dynmaic_fields[j] is assigned twice for no reason. 3) We can move updating event->n_dynamic_fields outside the loop, and just assign it to j. This combination makes the code much cleaner. Link: https://lkml.kernel.org/r/20210721195341.29bb0f77@oasis.local.home Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_synth.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 2ac75eb6aa86..9315fc03e303 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -893,15 +893,13 @@ static struct synth_event *alloc_synth_event(const char *name, int n_fields, dyn_event_init(&event->devent, &synth_event_ops); for (i = 0, j = 0; i < n_fields; i++) { + fields[i]->field_pos = i; event->fields[i] = fields[i]; - if (fields[i]->is_dynamic) { - event->dynamic_fields[j] = fields[i]; - event->dynamic_fields[j]->field_pos = i; + if (fields[i]->is_dynamic) event->dynamic_fields[j++] = fields[i]; - event->n_dynamic_fields++; - } } + event->n_dynamic_fields = j; event->n_fields = n_fields; out: return event; From 68e83498cb4fad31963b5c76a71e80b824bc316e Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Wed, 21 Jul 2021 13:47:26 +0200 Subject: [PATCH 371/794] ftrace: Avoid synchronize_rcu_tasks_rude() call when not necessary synchronize_rcu_tasks_rude() triggers IPIs and forces rescheduling on all CPUs. It is a costly operation and, when targeting nohz_full CPUs, very disrupting (hence the name). So avoid calling it when 'old_hash' doesn't need to be freed. Link: https://lkml.kernel.org/r/20210721114726.1545103-1-nsaenzju@redhat.com Signed-off-by: Nicolas Saenz Julienne Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index e6fb3e6e1ffc..4fbcf560dd03 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5985,7 +5985,8 @@ ftrace_graph_release(struct inode *inode, struct file *file) * infrastructure to do the synchronization, thus we must do it * ourselves. */ - synchronize_rcu_tasks_rude(); + if (old_hash != EMPTY_HASH) + synchronize_rcu_tasks_rude(); free_ftrace_hash(old_hash); } From 3b1a8f457fcf105924c72e99f1191834837c978d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 21 Jul 2021 13:09:15 +0100 Subject: [PATCH 372/794] ftrace: Remove redundant initialization of variable ret The variable ret is being initialized with a value that is never read, it is being updated later on. The assignment is redundant and can be removed. Link: https://lkml.kernel.org/r/20210721120915.122278-1-colin.king@canonical.com Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4fbcf560dd03..7b180f61e6d3 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -7545,7 +7545,7 @@ int ftrace_is_dead(void) */ int register_ftrace_function(struct ftrace_ops *ops) { - int ret = -1; + int ret; ftrace_ops_init(ops); From 352384d5c84ebe40fa77098cc234fe173247d8ef Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 22 Jul 2021 21:52:18 -0400 Subject: [PATCH 373/794] tracepoints: Update static_call before tp_funcs when adding a tracepoint Because of the significant overhead that retpolines pose on indirect calls, the tracepoint code was updated to use the new "static_calls" that can modify the running code to directly call a function instead of using an indirect caller, and this function can be changed at runtime. In the tracepoint code that calls all the registered callbacks that are attached to a tracepoint, the following is done: it_func_ptr = rcu_dereference_raw((&__tracepoint_##name)->funcs); if (it_func_ptr) { __data = (it_func_ptr)->data; static_call(tp_func_##name)(__data, args); } If there's just a single callback, the static_call is updated to just call that callback directly. Once another handler is added, then the static caller is updated to call the iterator, that simply loops over all the funcs in the array and calls each of the callbacks like the old method using indirect calling. The issue was discovered with a race between updating the funcs array and updating the static_call. The funcs array was updated first and then the static_call was updated. This is not an issue as long as the first element in the old array is the same as the first element in the new array. But that assumption is incorrect, because callbacks also have a priority field, and if there's a callback added that has a higher priority than the callback on the old array, then it will become the first callback in the new array. This means that it is possible to call the old callback with the new callback data element, which can cause a kernel panic. static_call = callback1() funcs[] = {callback1,data1}; callback2 has higher priority than callback1 CPU 1 CPU 2 ----- ----- new_funcs = {callback2,data2}, {callback1,data1} rcu_assign_pointer(tp->funcs, new_funcs); /* * Now tp->funcs has the new array * but the static_call still calls callback1 */ it_func_ptr = tp->funcs [ new_funcs ] data = it_func_ptr->data [ data2 ] static_call(callback1, data); /* Now callback1 is called with * callback2's data */ [ KERNEL PANIC ] update_static_call(iterator); To prevent this from happening, always switch the static_call to the iterator before assigning the tp->funcs to the new array. The iterator will always properly match the callback with its data. To trigger this bug: In one terminal: while :; do hackbench 50; done In another terminal echo 1 > /sys/kernel/tracing/events/sched/sched_waking/enable while :; do echo 1 > /sys/kernel/tracing/set_event_pid; sleep 0.5 echo 0 > /sys/kernel/tracing/set_event_pid; sleep 0.5 done And it doesn't take long to crash. This is because the set_event_pid adds a callback to the sched_waking tracepoint with a high priority, which will be called before the sched_waking trace event callback is called. Note, the removal to a single callback updates the array first, before changing the static_call to single callback, which is the proper order as the first element in the array is the same as what the static_call is being changed to. Link: https://lore.kernel.org/io-uring/4ebea8f0-58c9-e571-fd30-0ce4f6f09c70@samba.org/ Cc: stable@vger.kernel.org Fixes: d25e37d89dd2f ("tracepoint: Optimize using static_call()") Reported-by: Stefan Metzmacher tested-by: Stefan Metzmacher Signed-off-by: Steven Rostedt (VMware) --- kernel/tracepoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 976bf8ce8039..fc32821f8240 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -299,8 +299,8 @@ static int tracepoint_add_func(struct tracepoint *tp, * a pointer to it. This array is referenced by __DO_TRACE from * include/linux/tracepoint.h using rcu_dereference_sched(). */ - rcu_assign_pointer(tp->funcs, tp_funcs); tracepoint_update_call(tp, tp_funcs, false); + rcu_assign_pointer(tp->funcs, tp_funcs); static_key_enable(&tp->key); release_probes(old); From 65662a8dcdd01342b71ee44234bcfd0162e195af Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Thu, 29 Apr 2021 19:49:47 +0200 Subject: [PATCH 374/794] i40e: Fix logic of disabling queues Correct the message flow between driver and firmware when disabling queues. Previously in case of PF reset (due to required reinit after reconfig), the error like: "VSI seid 397 Tx ring 60 disable timeout" could show up occasionally. The error was not a real issue of hardware or firmware, it was caused by wrong sequence of messages invoked by the driver. Fixes: 41c445ff0f48 ("i40e: main driver core") Signed-off-by: Aleksandr Loktionov Signed-off-by: Arkadiusz Kubalewski Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 58 ++++++++++++--------- 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 861e59a350bd..5297e6c59083 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -4454,11 +4454,10 @@ int i40e_control_wait_tx_q(int seid, struct i40e_pf *pf, int pf_q, } /** - * i40e_vsi_control_tx - Start or stop a VSI's rings + * i40e_vsi_enable_tx - Start a VSI's rings * @vsi: the VSI being configured - * @enable: start or stop the rings **/ -static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable) +static int i40e_vsi_enable_tx(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; int i, pf_q, ret = 0; @@ -4467,7 +4466,7 @@ static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable) for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) { ret = i40e_control_wait_tx_q(vsi->seid, pf, pf_q, - false /*is xdp*/, enable); + false /*is xdp*/, true); if (ret) break; @@ -4476,7 +4475,7 @@ static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable) ret = i40e_control_wait_tx_q(vsi->seid, pf, pf_q + vsi->alloc_queue_pairs, - true /*is xdp*/, enable); + true /*is xdp*/, true); if (ret) break; } @@ -4574,32 +4573,25 @@ int i40e_control_wait_rx_q(struct i40e_pf *pf, int pf_q, bool enable) } /** - * i40e_vsi_control_rx - Start or stop a VSI's rings + * i40e_vsi_enable_rx - Start a VSI's rings * @vsi: the VSI being configured - * @enable: start or stop the rings **/ -static int i40e_vsi_control_rx(struct i40e_vsi *vsi, bool enable) +static int i40e_vsi_enable_rx(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; int i, pf_q, ret = 0; pf_q = vsi->base_queue; for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) { - ret = i40e_control_wait_rx_q(pf, pf_q, enable); + ret = i40e_control_wait_rx_q(pf, pf_q, true); if (ret) { dev_info(&pf->pdev->dev, - "VSI seid %d Rx ring %d %sable timeout\n", - vsi->seid, pf_q, (enable ? "en" : "dis")); + "VSI seid %d Rx ring %d enable timeout\n", + vsi->seid, pf_q); break; } } - /* Due to HW errata, on Rx disable only, the register can indicate done - * before it really is. Needs 50ms to be sure - */ - if (!enable) - mdelay(50); - return ret; } @@ -4612,29 +4604,47 @@ int i40e_vsi_start_rings(struct i40e_vsi *vsi) int ret = 0; /* do rx first for enable and last for disable */ - ret = i40e_vsi_control_rx(vsi, true); + ret = i40e_vsi_enable_rx(vsi); if (ret) return ret; - ret = i40e_vsi_control_tx(vsi, true); + ret = i40e_vsi_enable_tx(vsi); return ret; } +#define I40E_DISABLE_TX_GAP_MSEC 50 + /** * i40e_vsi_stop_rings - Stop a VSI's rings * @vsi: the VSI being configured **/ void i40e_vsi_stop_rings(struct i40e_vsi *vsi) { + struct i40e_pf *pf = vsi->back; + int pf_q, err, q_end; + /* When port TX is suspended, don't wait */ if (test_bit(__I40E_PORT_SUSPENDED, vsi->back->state)) return i40e_vsi_stop_rings_no_wait(vsi); - /* do rx first for enable and last for disable - * Ignore return value, we need to shutdown whatever we can - */ - i40e_vsi_control_tx(vsi, false); - i40e_vsi_control_rx(vsi, false); + q_end = vsi->base_queue + vsi->num_queue_pairs; + for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++) + i40e_pre_tx_queue_cfg(&pf->hw, (u32)pf_q, false); + + for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++) { + err = i40e_control_wait_rx_q(pf, pf_q, false); + if (err) + dev_info(&pf->pdev->dev, + "VSI seid %d Rx ring %d dissable timeout\n", + vsi->seid, pf_q); + } + + msleep(I40E_DISABLE_TX_GAP_MSEC); + pf_q = vsi->base_queue; + for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++) + wr32(&pf->hw, I40E_QTX_ENA(pf_q), 0); + + i40e_vsi_wait_queues_disabled(vsi); } /** From 71d6fdba4b2d82fdd883fec31dee77fbcf59773a Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Fri, 21 May 2021 18:41:26 +0200 Subject: [PATCH 375/794] i40e: Fix firmware LLDP agent related warning Make warning meaningful for the user. Previously the trace: "Starting FW LLDP agent failed: error: I40E_ERR_ADMIN_QUEUE_ERROR, I40E_AQ_RC_EAGAIN" was produced when user tried to start Firmware LLDP agent, just after it was stopped with sequence: ethtool --set-priv-flags disable-fw-lldp on ethtool --set-priv-flags disable-fw-lldp off (without any delay between the commands) At that point the firmware is still processing stop command, the behavior is expected. Fixes: c1041d070437 ("i40e: Missing response checks in driver when starting/stopping FW LLDP") Signed-off-by: Aleksandr Loktionov Signed-off-by: Arkadiusz Kubalewski Tested-by: Imam Hassan Reza Biswas Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 3e822bad4851..d9e26f9713a5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -5294,6 +5294,10 @@ flags_complete: dev_warn(&pf->pdev->dev, "Device configuration forbids SW from starting the LLDP agent.\n"); return -EINVAL; + case I40E_AQ_RC_EAGAIN: + dev_warn(&pf->pdev->dev, + "Stop FW LLDP agent command is still being processed, please try again in a second.\n"); + return -EBUSY; default: dev_warn(&pf->pdev->dev, "Starting FW LLDP agent failed: error: %s, %s\n", From dc614c46178b0b89bde86ac54fc687a28580d2b7 Mon Sep 17 00:00:00 2001 From: Lukasz Cieplicki Date: Mon, 31 May 2021 16:55:49 +0000 Subject: [PATCH 376/794] i40e: Add additional info to PHY type error In case of PHY type error occurs, the message was too generic. Add additional info to PHY type error indicating that it can be wrong cable connected. Fixes: 124ed15bf126 ("i40e: Add dual speed module support") Signed-off-by: Lukasz Cieplicki Signed-off-by: Michal Maloszewski Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index d9e26f9713a5..2c9e4eeb7270 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -980,7 +980,7 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw, default: /* if we got here and link is up something bad is afoot */ netdev_info(netdev, - "WARNING: Link is up but PHY type 0x%x is not recognized.\n", + "WARNING: Link is up but PHY type 0x%x is not recognized, or incorrect cable is in use\n", hw_link_info->phy_type); } From 89ec1f0886c127c7e41ac61a6b6d539f4fb2510b Mon Sep 17 00:00:00 2001 From: Jedrzej Jagielski Date: Wed, 2 Jun 2021 00:47:03 +0000 Subject: [PATCH 377/794] i40e: Fix queue-to-TC mapping on Tx In SW DCB mode the packets sent receive incorrect UP tags. They are constructed correctly and put into tx_ring, but UP is later remapped by HW on the basis of TCTUPR register contents according to Tx queue selected, and BW used is consistent with the new UP values. This is caused by Tx queue selection in kernel not taking into account DCB configuration. This patch fixes the issue by implementing the ndo_select_queue NDO callback. Fixes: fd0a05ce74ef ("i40e: transmit, receive, and NAPI") Signed-off-by: Arkadiusz Kubalewski Signed-off-by: Jedrzej Jagielski Tested-by: Imam Hassan Reza Biswas Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 1 + drivers/net/ethernet/intel/i40e/i40e_txrx.c | 50 +++++++++++++++++++++ drivers/net/ethernet/intel/i40e/i40e_txrx.h | 2 + 3 files changed, 53 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 5297e6c59083..278077208f37 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -13271,6 +13271,7 @@ static const struct net_device_ops i40e_netdev_ops = { .ndo_poll_controller = i40e_netpoll, #endif .ndo_setup_tc = __i40e_setup_tc, + .ndo_select_queue = i40e_lan_select_queue, .ndo_set_features = i40e_set_features, .ndo_set_vf_mac = i40e_ndo_set_vf_mac, .ndo_set_vf_vlan = i40e_ndo_set_vf_port_vlan, diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 38eb8151ee9a..3f25bd8c4924 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -3631,6 +3631,56 @@ dma_error: return -1; } +static u16 i40e_swdcb_skb_tx_hash(struct net_device *dev, + const struct sk_buff *skb, + u16 num_tx_queues) +{ + u32 jhash_initval_salt = 0xd631614b; + u32 hash; + + if (skb->sk && skb->sk->sk_hash) + hash = skb->sk->sk_hash; + else + hash = (__force u16)skb->protocol ^ skb->hash; + + hash = jhash_1word(hash, jhash_initval_salt); + + return (u16)(((u64)hash * num_tx_queues) >> 32); +} + +u16 i40e_lan_select_queue(struct net_device *netdev, + struct sk_buff *skb, + struct net_device __always_unused *sb_dev) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_vsi *vsi = np->vsi; + struct i40e_hw *hw; + u16 qoffset; + u16 qcount; + u8 tclass; + u16 hash; + u8 prio; + + /* is DCB enabled at all? */ + if (vsi->tc_config.numtc == 1) + return i40e_swdcb_skb_tx_hash(netdev, skb, + netdev->real_num_tx_queues); + + prio = skb->priority; + hw = &vsi->back->hw; + tclass = hw->local_dcbx_config.etscfg.prioritytable[prio]; + /* sanity check */ + if (unlikely(!(vsi->tc_config.enabled_tc & BIT(tclass)))) + tclass = 0; + + /* select a queue assigned for the given TC */ + qcount = vsi->tc_config.tc_info[tclass].qcount; + hash = i40e_swdcb_skb_tx_hash(netdev, skb, qcount); + + qoffset = vsi->tc_config.tc_info[tclass].qoffset; + return qoffset + hash; +} + /** * i40e_xmit_xdp_ring - transmits an XDP buffer to an XDP Tx ring * @xdpf: data to transmit diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 86fed05b4f19..bfc2845c99d1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -451,6 +451,8 @@ static inline unsigned int i40e_rx_pg_order(struct i40e_ring *ring) bool i40e_alloc_rx_buffers(struct i40e_ring *rxr, u16 cleaned_count); netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev); +u16 i40e_lan_select_queue(struct net_device *netdev, struct sk_buff *skb, + struct net_device *sb_dev); void i40e_clean_tx_ring(struct i40e_ring *tx_ring); void i40e_clean_rx_ring(struct i40e_ring *rx_ring); int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring); From ea52faae1d17cd3048681d86d2e8641f44de484d Mon Sep 17 00:00:00 2001 From: Jedrzej Jagielski Date: Fri, 18 Jun 2021 08:49:49 +0000 Subject: [PATCH 378/794] i40e: Fix log TC creation failure when max num of queues is exceeded Fix missing failed message if driver does not have enough queues to complete TC command. Without this fix no message is displayed in dmesg. Fixes: a9ce82f744dc ("i40e: Enable 'channel' mode in mqprio for TC configs") Signed-off-by: Grzegorz Szczurek Signed-off-by: Jedrzej Jagielski Tested-by: Imam Hassan Reza Biswas Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 278077208f37..1d1f52756a93 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -7290,6 +7290,8 @@ static int i40e_validate_mqprio_qopt(struct i40e_vsi *vsi, } if (vsi->num_queue_pairs < (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) { + dev_err(&vsi->back->pdev->dev, + "Failed to create traffic channel, insufficient number of queues.\n"); return -EINVAL; } if (sum_max_rate > i40e_get_link_speed(vsi)) { From d72e91efcae12f2f24ced984d00d60517c677857 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Thu, 22 Jul 2021 18:15:51 +0530 Subject: [PATCH 379/794] octeontx2-af: Remove unnecessary devm_kfree Remove devm_kfree of memory where VLAN entry to RVU PF mapping info is saved. This will be freed anyway at driver exit. Having this could result in warning from devm_kfree() if the memory is not allocated due to errors in rvu_nix_block_init() before nix_setup_txvlan(). Fixes: 9a946def264d ("octeontx2-af: Modify nix_vtag_cfg mailbox to support TX VTAG entries") Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 0933699a0d2d..0d2cd5169018 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -3842,7 +3842,6 @@ static void rvu_nix_block_freemem(struct rvu *rvu, int blkaddr, vlan = &nix_hw->txvlan; kfree(vlan->rsrc.bmap); mutex_destroy(&vlan->rsrc_lock); - devm_kfree(rvu->dev, vlan->entry2pfvf_map); mcast = &nix_hw->mcast; qmem_free(rvu->dev, mcast->mce_ctx); From f8dd60de194817c86bf812700980762bb5a8d9a4 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 22 Jul 2021 12:05:41 -0400 Subject: [PATCH 380/794] tipc: fix implicit-connect for SYN+ For implicit-connect, when it's either SYN- or SYN+, an ACK should be sent back to the client immediately. It's not appropriate for the client to enter established state only after receiving data from the server. On client side, after the SYN is sent out, tipc_wait_for_connect() should be called to wait for the ACK if timeout is set. This patch also restricts __tipc_sendstream() to call __sendmsg() only when it's in TIPC_OPEN state, so that the client can program in a single loop doing both connecting and data sending like: for (...) sendmsg(dest, buf); This makes the implicit-connect more implicit. Fixes: b97bf3fd8f6a ("[TIPC] Initial merge") Signed-off-by: Xin Long Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 34a97ea36cc8..ebd300c26a44 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -158,6 +158,7 @@ static void tipc_sk_remove(struct tipc_sock *tsk); static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz); static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz); static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack); +static int tipc_wait_for_connect(struct socket *sock, long *timeo_p); static const struct proto_ops packet_ops; static const struct proto_ops stream_ops; @@ -1515,8 +1516,13 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) rc = 0; } - if (unlikely(syn && !rc)) + if (unlikely(syn && !rc)) { tipc_set_sk_state(sk, TIPC_CONNECTING); + if (timeout) { + timeout = msecs_to_jiffies(timeout); + tipc_wait_for_connect(sock, &timeout); + } + } return rc ? rc : dlen; } @@ -1564,7 +1570,7 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen) return -EMSGSIZE; /* Handle implicit connection setup */ - if (unlikely(dest)) { + if (unlikely(dest && sk->sk_state == TIPC_OPEN)) { rc = __tipc_sendmsg(sock, m, dlen); if (dlen && dlen == rc) { tsk->peer_caps = tipc_node_get_capabilities(net, dnode); @@ -2689,9 +2695,10 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, bool kern) { struct sock *new_sk, *sk = sock->sk; - struct sk_buff *buf; struct tipc_sock *new_tsock; + struct msghdr m = {NULL,}; struct tipc_msg *msg; + struct sk_buff *buf; long timeo; int res; @@ -2737,19 +2744,17 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, } /* - * Respond to 'SYN-' by discarding it & returning 'ACK'-. - * Respond to 'SYN+' by queuing it on new socket. + * Respond to 'SYN-' by discarding it & returning 'ACK'. + * Respond to 'SYN+' by queuing it on new socket & returning 'ACK'. */ if (!msg_data_sz(msg)) { - struct msghdr m = {NULL,}; - tsk_advance_rx_queue(sk); - __tipc_sendstream(new_sock, &m, 0); } else { __skb_dequeue(&sk->sk_receive_queue); __skb_queue_head(&new_sk->sk_receive_queue, buf); skb_set_owner_r(buf, new_sk); } + __tipc_sendstream(new_sock, &m, 0); release_sock(new_sk); exit: release_sock(sk); From d237a7f11719ff9320721be5818352e48071aab6 Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Fri, 23 Jul 2021 09:25:34 +0700 Subject: [PATCH 381/794] tipc: fix sleeping in tipc accept routine The release_sock() is blocking function, it would change the state after sleeping. In order to evaluate the stated condition outside the socket lock context, switch to use wait_woken() instead. Fixes: 6398e23cdb1d8 ("tipc: standardize accept routine") Acked-by: Jon Maloy Signed-off-by: Hoang Le Signed-off-by: David S. Miller --- net/tipc/socket.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index ebd300c26a44..75b99b7eda22 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2652,7 +2652,7 @@ static int tipc_listen(struct socket *sock, int len) static int tipc_wait_for_accept(struct socket *sock, long timeo) { struct sock *sk = sock->sk; - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); int err; /* True wake-one mechanism for incoming connections: only @@ -2661,12 +2661,12 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) * anymore, the common case will execute the loop only once. */ for (;;) { - prepare_to_wait_exclusive(sk_sleep(sk), &wait, - TASK_INTERRUPTIBLE); if (timeo && skb_queue_empty(&sk->sk_receive_queue)) { + add_wait_queue(sk_sleep(sk), &wait); release_sock(sk); - timeo = schedule_timeout(timeo); + timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); lock_sock(sk); + remove_wait_queue(sk_sleep(sk), &wait); } err = 0; if (!skb_queue_empty(&sk->sk_receive_queue)) @@ -2678,7 +2678,6 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) if (signal_pending(current)) break; } - finish_wait(sk_sleep(sk), &wait); return err; } From 227adfb2b1dfbc53dfc53b9dd7a93a6298ff7c56 Mon Sep 17 00:00:00 2001 From: Gilad Naaman Date: Thu, 22 Jul 2021 20:01:28 +0300 Subject: [PATCH 382/794] net: Set true network header for ECN decapsulation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In cases where the header straight after the tunnel header was another ethernet header (TEB), instead of the network header, the ECN decapsulation code would treat the ethernet header as if it was an IP header, resulting in mishandling and possible wrong drops or corruption of the IP header. In this case, ECT(1) is sent, so IP_ECN_decapsulate tries to copy it to the inner IPv4 header, and correct its checksum. The offset of the ECT bits in an IPv4 header corresponds to the lower 2 bits of the second octet of the destination MAC address in the ethernet header. The IPv4 checksum corresponds to end of the source address. In order to reproduce: $ ip netns add A $ ip netns add B $ ip -n A link add _v0 type veth peer name _v1 netns B $ ip -n A link set _v0 up $ ip -n A addr add dev _v0 10.254.3.1/24 $ ip -n A route add default dev _v0 scope global $ ip -n B link set _v1 up $ ip -n B addr add dev _v1 10.254.1.6/24 $ ip -n B route add default dev _v1 scope global $ ip -n B link add gre1 type gretap local 10.254.1.6 remote 10.254.3.1 key 0x49000000 $ ip -n B link set gre1 up # Now send an IPv4/GRE/Eth/IPv4 frame where the outer header has ECT(1), # and the inner header has no ECT bits set: $ cat send_pkt.py #!/usr/bin/env python3 from scapy.all import * pkt = IP(b'E\x01\x00\xa7\x00\x00\x00\x00@/`%\n\xfe\x03\x01\n\xfe\x01\x06 \x00eXI\x00' b'\x00\x00\x18\xbe\x92\xa0\xee&\x18\xb0\x92\xa0l&\x08\x00E\x00\x00}\x8b\x85' b'@\x00\x01\x01\xe4\xf2\x82\x82\x82\x01\x82\x82\x82\x02\x08\x00d\x11\xa6\xeb' b'3\x1e\x1e\\xf3\\xf7`\x00\x00\x00\x00ZN\x00\x00\x00\x00\x00\x00\x10\x11\x12' b'\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./01234' b'56789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ') send(pkt) $ sudo ip netns exec B tcpdump -neqlllvi gre1 icmp & ; sleep 1 $ sudo ip netns exec A python3 send_pkt.py In the original packet, the source/destinatio MAC addresses are dst=18:be:92:a0:ee:26 src=18:b0:92:a0:6c:26 In the received packet, they are dst=18:bd:92:a0:ee:26 src=18:b0:92:a0:6c:27 Thanks to Lahav Schlesinger and Isaac Garzon for helping me pinpoint the origin. Fixes: b723748750ec ("tunnel: Propagate ECT(1) when decapsulating as recommended by RFC6040") Cc: David S. Miller Cc: Hideaki YOSHIFUJI Cc: David Ahern Cc: Jakub Kicinski Cc: Toke Høiland-Jørgensen Signed-off-by: Gilad Naaman Acked-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 0dca00745ac3..be75b409445c 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -390,7 +390,7 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, tunnel->i_seqno = ntohl(tpi->seq) + 1; } - skb_reset_network_header(skb); + skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0); err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { From 46c7655f0b56b1ac864115441064cde9ed124f4a Mon Sep 17 00:00:00 2001 From: Kangmin Park Date: Fri, 23 Jul 2021 02:44:43 +0900 Subject: [PATCH 383/794] ipv6: decrease hop limit counter in ip6_forward() Decrease hop limit counter when deliver skb to ndp proxy. Signed-off-by: Kangmin Park Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index e1b9f7ac8bad..8e6ca9ad6812 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -549,9 +549,10 @@ int ip6_forward(struct sk_buff *skb) if (net->ipv6.devconf_all->proxy_ndp && pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { int proxied = ip6_forward_proxy_check(skb); - if (proxied > 0) + if (proxied > 0) { + hdr->hop_limit--; return ip6_input(skb); - else if (proxied < 0) { + } else if (proxied < 0) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); goto drop; } From c92c74131a84b508aa8f079a25d7bbe10748449e Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 22 Jul 2021 16:05:51 +0300 Subject: [PATCH 384/794] net: dsa: mv88e6xxx: silently accept the deletion of VID 0 too The blamed commit modified the driver to accept the addition of VID 0 without doing anything, but deleting that VID still fails: [ 32.080780] mv88e6085 d0032004.mdio-mii:10 lan8: failed to kill vid 0081/0 Modify mv88e6xxx_port_vlan_leave() to do the same thing as the addition. Fixes: b8b79c414eca ("net: dsa: mv88e6xxx: Fix adding vlan 0") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index beb41572d04e..272b0535d946 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2155,7 +2155,7 @@ static int mv88e6xxx_port_vlan_leave(struct mv88e6xxx_chip *chip, int i, err; if (!vid) - return -EOPNOTSUPP; + return 0; err = mv88e6xxx_vtu_get(chip, vid, &vlan); if (err) From 68d1f1d4af188c290087958c75c7b89a816e1137 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Thu, 22 Jul 2021 20:21:05 +0200 Subject: [PATCH 385/794] wwan: core: Fix missing RTM_NEWLINK event for default link A wwan link created via the wwan_create_default_link procedure is never notified to the user (RTM_NEWLINK), causing issues with user tools relying on such event to track network links (NetworkManager). This is because the procedure misses a call to rtnl_configure_link(), which sets the link as initialized and notifies the new link (cf proper usage in __rtnl_newlink()). Cc: stable@vger.kernel.org Fixes: ca374290aaad ("wwan: core: support default netdev creation") Suggested-by: Sergey Ryazanov Signed-off-by: Loic Poulain Acked-by: Sergey Ryazanov Signed-off-by: David S. Miller --- drivers/net/wwan/wwan_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c index 3e16c318e705..674a81d79db3 100644 --- a/drivers/net/wwan/wwan_core.c +++ b/drivers/net/wwan/wwan_core.c @@ -984,6 +984,8 @@ static void wwan_create_default_link(struct wwan_device *wwandev, goto unlock; } + rtnl_configure_link(dev, NULL); /* Link initialized, notify new link */ + unlock: rtnl_unlock(); From 3ce6e1f662a910970880188ea7bfd00542bd3934 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 6 Jul 2021 23:40:34 +0900 Subject: [PATCH 386/794] loop: reintroduce global lock for safe loop_validate_file() traversal Commit 6cc8e7430801fa23 ("loop: scale loop device by introducing per device lock") re-opened a race window for NULL pointer dereference at loop_validate_file() where commit 310ca162d779efee ("block/loop: Use global lock for ioctl() operation.") has closed. Although we need to guarantee that other loop devices will not change during traversal, we can't take remote "struct loop_device"->lo_mutex inside loop_validate_file() in order to avoid AB-BA deadlock. Therefore, introduce a global lock dedicated for loop_validate_file() which is conditionally taken before local "struct loop_device"->lo_mutex is taken. Signed-off-by: Tetsuo Handa Fixes: 6cc8e7430801fa23 ("loop: scale loop device by introducing per device lock") Signed-off-by: Jens Axboe --- drivers/block/loop.c | 128 ++++++++++++++++++++++++++++++++----------- 1 file changed, 97 insertions(+), 31 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f37b9e3d833c..f0cdff0c5fbf 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -88,6 +88,47 @@ static DEFINE_IDR(loop_index_idr); static DEFINE_MUTEX(loop_ctl_mutex); +static DEFINE_MUTEX(loop_validate_mutex); + +/** + * loop_global_lock_killable() - take locks for safe loop_validate_file() test + * + * @lo: struct loop_device + * @global: true if @lo is about to bind another "struct loop_device", false otherwise + * + * Returns 0 on success, -EINTR otherwise. + * + * Since loop_validate_file() traverses on other "struct loop_device" if + * is_loop_device() is true, we need a global lock for serializing concurrent + * loop_configure()/loop_change_fd()/__loop_clr_fd() calls. + */ +static int loop_global_lock_killable(struct loop_device *lo, bool global) +{ + int err; + + if (global) { + err = mutex_lock_killable(&loop_validate_mutex); + if (err) + return err; + } + err = mutex_lock_killable(&lo->lo_mutex); + if (err && global) + mutex_unlock(&loop_validate_mutex); + return err; +} + +/** + * loop_global_unlock() - release locks taken by loop_global_lock_killable() + * + * @lo: struct loop_device + * @global: true if @lo was about to bind another "struct loop_device", false otherwise + */ +static void loop_global_unlock(struct loop_device *lo, bool global) +{ + mutex_unlock(&lo->lo_mutex); + if (global) + mutex_unlock(&loop_validate_mutex); +} static int max_part; static int part_shift; @@ -672,13 +713,15 @@ static int loop_validate_file(struct file *file, struct block_device *bdev) while (is_loop_device(f)) { struct loop_device *l; + lockdep_assert_held(&loop_validate_mutex); if (f->f_mapping->host->i_rdev == bdev->bd_dev) return -EBADF; l = I_BDEV(f->f_mapping->host)->bd_disk->private_data; - if (l->lo_state != Lo_bound) { + if (l->lo_state != Lo_bound) return -EINVAL; - } + /* Order wrt setting lo->lo_backing_file in loop_configure(). */ + rmb(); f = l->lo_backing_file; } if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) @@ -697,13 +740,18 @@ static int loop_validate_file(struct file *file, struct block_device *bdev) static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, unsigned int arg) { - struct file *file = NULL, *old_file; - int error; - bool partscan; + struct file *file = fget(arg); + struct file *old_file; + int error; + bool partscan; + bool is_loop; - error = mutex_lock_killable(&lo->lo_mutex); + if (!file) + return -EBADF; + is_loop = is_loop_device(file); + error = loop_global_lock_killable(lo, is_loop); if (error) - return error; + goto out_putf; error = -ENXIO; if (lo->lo_state != Lo_bound) goto out_err; @@ -713,11 +761,6 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, if (!(lo->lo_flags & LO_FLAGS_READ_ONLY)) goto out_err; - error = -EBADF; - file = fget(arg); - if (!file) - goto out_err; - error = loop_validate_file(file, bdev); if (error) goto out_err; @@ -740,7 +783,16 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, loop_update_dio(lo); blk_mq_unfreeze_queue(lo->lo_queue); partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; - mutex_unlock(&lo->lo_mutex); + loop_global_unlock(lo, is_loop); + + /* + * Flush loop_validate_file() before fput(), for l->lo_backing_file + * might be pointing at old_file which might be the last reference. + */ + if (!is_loop) { + mutex_lock(&loop_validate_mutex); + mutex_unlock(&loop_validate_mutex); + } /* * We must drop file reference outside of lo_mutex as dropping * the file ref can take open_mutex which creates circular locking @@ -752,9 +804,9 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, return 0; out_err: - mutex_unlock(&lo->lo_mutex); - if (file) - fput(file); + loop_global_unlock(lo, is_loop); +out_putf: + fput(file); return error; } @@ -1136,22 +1188,22 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, struct block_device *bdev, const struct loop_config *config) { - struct file *file; - struct inode *inode; + struct file *file = fget(config->fd); + struct inode *inode; struct address_space *mapping; - int error; - loff_t size; - bool partscan; - unsigned short bsize; + int error; + loff_t size; + bool partscan; + unsigned short bsize; + bool is_loop; + + if (!file) + return -EBADF; + is_loop = is_loop_device(file); /* This is safe, since we have a reference from open(). */ __module_get(THIS_MODULE); - error = -EBADF; - file = fget(config->fd); - if (!file) - goto out; - /* * If we don't hold exclusive handle for the device, upgrade to it * here to avoid changing device under exclusive owner. @@ -1162,7 +1214,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, goto out_putf; } - error = mutex_lock_killable(&lo->lo_mutex); + error = loop_global_lock_killable(lo, is_loop); if (error) goto out_bdev; @@ -1242,6 +1294,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, size = get_loop_size(lo, file); loop_set_size(lo, size); + /* Order wrt reading lo_state in loop_validate_file(). */ + wmb(); + lo->lo_state = Lo_bound; if (part_shift) lo->lo_flags |= LO_FLAGS_PARTSCAN; @@ -1253,7 +1308,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev). */ bdgrab(bdev); - mutex_unlock(&lo->lo_mutex); + loop_global_unlock(lo, is_loop); if (partscan) loop_reread_partitions(lo); if (!(mode & FMODE_EXCL)) @@ -1261,13 +1316,12 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, return 0; out_unlock: - mutex_unlock(&lo->lo_mutex); + loop_global_unlock(lo, is_loop); out_bdev: if (!(mode & FMODE_EXCL)) bd_abort_claiming(bdev, loop_configure); out_putf: fput(file); -out: /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); return error; @@ -1283,6 +1337,18 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) int lo_number; struct loop_worker *pos, *worker; + /* + * Flush loop_configure() and loop_change_fd(). It is acceptable for + * loop_validate_file() to succeed, for actual clear operation has not + * started yet. + */ + mutex_lock(&loop_validate_mutex); + mutex_unlock(&loop_validate_mutex); + /* + * loop_validate_file() now fails because l->lo_state != Lo_bound + * became visible. + */ + mutex_lock(&lo->lo_mutex); if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) { err = -ENXIO; From 9986066d94c971edf19464ed7bf5b26a91520e97 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Fri, 23 Jul 2021 13:36:18 +0530 Subject: [PATCH 387/794] octeontx2-af: Fix uninitialized variables in rvu_switch Get the number of VFs of a PF correctly by calling rvu_get_pf_numvfs in rvu_switch_disable function. Also hwvf is not required hence remove it. Fixes: 23109f8dd06d ("octeontx2-af: Introduce internal packet switching") Reported-by: kernel test robot Reported-by: Colin Ian King Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 6 ++++-- .../net/ethernet/marvell/octeontx2/af/rvu_switch.c | 11 ++++++----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 017163fb3cd5..5fe277e354f7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -391,8 +391,10 @@ void rvu_get_pf_numvfs(struct rvu *rvu, int pf, int *numvfs, int *hwvf) /* Get numVFs attached to this PF and first HWVF */ cfg = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_CFG(pf)); - *numvfs = (cfg >> 12) & 0xFF; - *hwvf = cfg & 0xFFF; + if (numvfs) + *numvfs = (cfg >> 12) & 0xFF; + if (hwvf) + *hwvf = cfg & 0xFFF; } static int rvu_get_hwvf(struct rvu *rvu, int pcifunc) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c index 2e5379710aa5..820adf390b8e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c @@ -71,8 +71,8 @@ static int rvu_switch_install_rules(struct rvu *rvu) struct rvu_switch *rswitch = &rvu->rswitch; u16 start = rswitch->start_entry; struct rvu_hwinfo *hw = rvu->hw; - int pf, vf, numvfs, hwvf; u16 pcifunc, entry = 0; + int pf, vf, numvfs; int err; for (pf = 1; pf < hw->total_pfs; pf++) { @@ -110,8 +110,8 @@ static int rvu_switch_install_rules(struct rvu *rvu) rswitch->entry2pcifunc[entry++] = pcifunc; - rvu_get_pf_numvfs(rvu, pf, &numvfs, &hwvf); - for (vf = 0; vf < numvfs; vf++, hwvf++) { + rvu_get_pf_numvfs(rvu, pf, &numvfs, NULL); + for (vf = 0; vf < numvfs; vf++) { pcifunc = pf << 10 | ((vf + 1) & 0x3FF); rvu_get_nix_blkaddr(rvu, pcifunc); @@ -198,7 +198,7 @@ void rvu_switch_disable(struct rvu *rvu) struct npc_mcam_free_entry_req free_req = { 0 }; struct rvu_switch *rswitch = &rvu->rswitch; struct rvu_hwinfo *hw = rvu->hw; - int pf, vf, numvfs, hwvf; + int pf, vf, numvfs; struct msg_rsp rsp; u16 pcifunc; int err; @@ -217,7 +217,8 @@ void rvu_switch_disable(struct rvu *rvu) "Reverting RX rule for PF%d failed(%d)\n", pf, err); - for (vf = 0; vf < numvfs; vf++, hwvf++) { + rvu_get_pf_numvfs(rvu, pf, &numvfs, NULL); + for (vf = 0; vf < numvfs; vf++) { pcifunc = pf << 10 | ((vf + 1) & 0x3FF); err = rvu_switch_install_rx_rule(rvu, pcifunc, 0xFFF); if (err) From 52f3456a96c06760b9bfae460e39596fec7af22e Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Fri, 23 Jul 2021 18:31:32 +0300 Subject: [PATCH 388/794] net: qrtr: fix memory leaks Syzbot reported memory leak in qrtr. The problem was in unputted struct sock. qrtr_local_enqueue() function calls qrtr_port_lookup() which takes sock reference if port was found. Then there is the following check: if (!ipc || &ipc->sk == skb->sk) { ... return -ENODEV; } Since we should drop the reference before returning from this function and ipc can be non-NULL inside this if, we should add qrtr_port_put() inside this if. The similar corner case is in qrtr_endpoint_post() as Manivannan reported. In case of sock_queue_rcv_skb() failure we need to put port reference to avoid leaking struct sock pointer. Fixes: e04df98adf7d ("net: qrtr: Remove receive worker") Fixes: bdabad3e363d ("net: Add Qualcomm IPC router") Reported-and-tested-by: syzbot+35a511c72ea7356cdcf3@syzkaller.appspotmail.com Signed-off-by: Pavel Skripkin Reviewed-by: Manivannan Sadhasivam Signed-off-by: David S. Miller --- net/qrtr/qrtr.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index e6f4a6202f82..171b7f3be6ef 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -518,8 +518,10 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) if (!ipc) goto err; - if (sock_queue_rcv_skb(&ipc->sk, skb)) + if (sock_queue_rcv_skb(&ipc->sk, skb)) { + qrtr_port_put(ipc); goto err; + } qrtr_port_put(ipc); } @@ -839,6 +841,8 @@ static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb, ipc = qrtr_port_lookup(to->sq_port); if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */ + if (ipc) + qrtr_port_put(ipc); kfree_skb(skb); return -ENODEV; } From 15bbf8bb4d4ab87108ecf5f4155ec8ffa3c141d6 Mon Sep 17 00:00:00 2001 From: Paul Jakma Date: Fri, 23 Jul 2021 16:13:04 +0100 Subject: [PATCH 389/794] NIU: fix incorrect error return, missed in previous revert Commit 7930742d6, reverting 26fd962, missed out on reverting an incorrect change to a return value. The niu_pci_vpd_scan_props(..) == 1 case appears to be a normal path - treating it as an error and return -EINVAL was breaking VPD_SCAN and causing the driver to fail to load. Fix, so my Neptune card works again. Cc: Kangjie Lu Cc: Shannon Nelson Cc: David S. Miller Cc: Greg Kroah-Hartman Cc: stable Fixes: 7930742d ('Revert "niu: fix missing checks of niu_pci_eeprom_read"') Signed-off-by: Paul Jakma Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/niu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 74e748662ec0..860644d182ab 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -8191,8 +8191,9 @@ static int niu_pci_vpd_fetch(struct niu *np, u32 start) err = niu_pci_vpd_scan_props(np, here, end); if (err < 0) return err; + /* ret == 1 is not an error */ if (err == 1) - return -EINVAL; + return 0; } return 0; } From 5ba03936c05584b6f6f79be5ebe7e5036c1dd252 Mon Sep 17 00:00:00 2001 From: Wei Shuyu Date: Mon, 28 Jun 2021 15:15:08 +0800 Subject: [PATCH 390/794] md/raid10: properly indicate failure when ending a failed write request Similar to [1], this patch fixes the same bug in raid10. Also cleanup the comments. [1] commit 2417b9869b81 ("md/raid1: properly indicate failure when ending a failed write request") Cc: stable@vger.kernel.org Fixes: 7cee6d4e6035 ("md/raid10: end bio when the device faulty") Signed-off-by: Wei Shuyu Acked-by: Guoqing Jiang Signed-off-by: Song Liu --- drivers/md/raid1.c | 2 -- drivers/md/raid10.c | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index ced076ba560e..753822ca9613 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -472,8 +472,6 @@ static void raid1_end_write_request(struct bio *bio) /* * When the device is faulty, it is not necessary to * handle write error. - * For failfast, this is the only remaining device, - * We need to retry the write without FailFast. */ if (!test_bit(Faulty, &rdev->flags)) set_bit(R1BIO_WriteError, &r1_bio->state); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 13f5e6b2a73d..40e845fb9717 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -469,12 +469,12 @@ static void raid10_end_write_request(struct bio *bio) /* * When the device is faulty, it is not necessary to * handle write error. - * For failfast, this is the only remaining device, - * We need to retry the write without FailFast. */ if (!test_bit(Faulty, &rdev->flags)) set_bit(R10BIO_WriteError, &r10_bio->state); else { + /* Fail the request */ + set_bit(R10BIO_Degraded, &r10_bio->state); r10_bio->devs[slot].bio = NULL; to_put = bio; dec_rdev = 1; From 6840e17b8ea992453e2d6f460d403cb05d194e76 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 23 Jul 2021 11:02:45 -0700 Subject: [PATCH 391/794] ionic: make all rx_mode work threadsafe Move the bulk of the code from ionic_set_rx_mode(), which can be called from atomic context, into ionic_lif_rx_mode() which is a safe context. A call from the stack will get pushed off into a work thread, but it is also possible to simultaneously have a call driven by a queue reconfig request from an ethtool command or fw recovery event. We add a mutex around the rx_mode work to be sure they don't collide. Fixes: 81dbc24147f9 ("ionic: change set_rx_mode from_ndo to can_sleep") Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- .../net/ethernet/pensando/ionic/ionic_lif.c | 205 ++++++++---------- .../net/ethernet/pensando/ionic/ionic_lif.h | 4 +- 2 files changed, 96 insertions(+), 113 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index af3a5368529c..7815e9034fb8 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -29,7 +29,7 @@ static const u8 ionic_qtype_versions[IONIC_QTYPE_MAX] = { */ }; -static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode); +static void ionic_lif_rx_mode(struct ionic_lif *lif); static int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr); static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr); static void ionic_link_status_check(struct ionic_lif *lif); @@ -77,7 +77,7 @@ static void ionic_lif_deferred_work(struct work_struct *work) switch (w->type) { case IONIC_DW_TYPE_RX_MODE: - ionic_lif_rx_mode(lif, w->rx_mode); + ionic_lif_rx_mode(lif); break; case IONIC_DW_TYPE_RX_ADDR_ADD: ionic_lif_addr_add(lif, w->addr); @@ -1301,10 +1301,8 @@ static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr) return 0; } -static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add, - bool can_sleep) +static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add) { - struct ionic_deferred_work *work; unsigned int nmfilters; unsigned int nufilters; @@ -1330,63 +1328,77 @@ static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add, lif->nucast--; } - if (!can_sleep) { - work = kzalloc(sizeof(*work), GFP_ATOMIC); - if (!work) - return -ENOMEM; - work->type = add ? IONIC_DW_TYPE_RX_ADDR_ADD : - IONIC_DW_TYPE_RX_ADDR_DEL; - memcpy(work->addr, addr, ETH_ALEN); - netdev_dbg(lif->netdev, "deferred: rx_filter %s %pM\n", - add ? "add" : "del", addr); - ionic_lif_deferred_enqueue(&lif->deferred, work); - } else { - netdev_dbg(lif->netdev, "rx_filter %s %pM\n", - add ? "add" : "del", addr); - if (add) - return ionic_lif_addr_add(lif, addr); - else - return ionic_lif_addr_del(lif, addr); - } + netdev_dbg(lif->netdev, "rx_filter %s %pM\n", + add ? "add" : "del", addr); + if (add) + return ionic_lif_addr_add(lif, addr); + else + return ionic_lif_addr_del(lif, addr); return 0; } static int ionic_addr_add(struct net_device *netdev, const u8 *addr) { - return ionic_lif_addr(netdev_priv(netdev), addr, ADD_ADDR, CAN_SLEEP); -} - -static int ionic_ndo_addr_add(struct net_device *netdev, const u8 *addr) -{ - return ionic_lif_addr(netdev_priv(netdev), addr, ADD_ADDR, CAN_NOT_SLEEP); + return ionic_lif_addr(netdev_priv(netdev), addr, ADD_ADDR); } static int ionic_addr_del(struct net_device *netdev, const u8 *addr) { - return ionic_lif_addr(netdev_priv(netdev), addr, DEL_ADDR, CAN_SLEEP); + return ionic_lif_addr(netdev_priv(netdev), addr, DEL_ADDR); } -static int ionic_ndo_addr_del(struct net_device *netdev, const u8 *addr) +static void ionic_lif_rx_mode(struct ionic_lif *lif) { - return ionic_lif_addr(netdev_priv(netdev), addr, DEL_ADDR, CAN_NOT_SLEEP); -} - -static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode) -{ - struct ionic_admin_ctx ctx = { - .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), - .cmd.rx_mode_set = { - .opcode = IONIC_CMD_RX_MODE_SET, - .lif_index = cpu_to_le16(lif->index), - .rx_mode = cpu_to_le16(rx_mode), - }, - }; + struct net_device *netdev = lif->netdev; + unsigned int nfilters; + unsigned int nd_flags; char buf[128]; - int err; + u16 rx_mode; int i; #define REMAIN(__x) (sizeof(buf) - (__x)) + mutex_lock(&lif->config_lock); + + /* grab the flags once for local use */ + nd_flags = netdev->flags; + + rx_mode = IONIC_RX_MODE_F_UNICAST; + rx_mode |= (nd_flags & IFF_MULTICAST) ? IONIC_RX_MODE_F_MULTICAST : 0; + rx_mode |= (nd_flags & IFF_BROADCAST) ? IONIC_RX_MODE_F_BROADCAST : 0; + rx_mode |= (nd_flags & IFF_PROMISC) ? IONIC_RX_MODE_F_PROMISC : 0; + rx_mode |= (nd_flags & IFF_ALLMULTI) ? IONIC_RX_MODE_F_ALLMULTI : 0; + + /* sync unicast addresses + * next check to see if we're in an overflow state + * if so, we track that we overflowed and enable NIC PROMISC + * else if the overflow is set and not needed + * we remove our overflow flag and check the netdev flags + * to see if we can disable NIC PROMISC + */ + __dev_uc_sync(netdev, ionic_addr_add, ionic_addr_del); + nfilters = le32_to_cpu(lif->identity->eth.max_ucast_filters); + if (netdev_uc_count(netdev) + 1 > nfilters) { + rx_mode |= IONIC_RX_MODE_F_PROMISC; + lif->uc_overflow = true; + } else if (lif->uc_overflow) { + lif->uc_overflow = false; + if (!(nd_flags & IFF_PROMISC)) + rx_mode &= ~IONIC_RX_MODE_F_PROMISC; + } + + /* same for multicast */ + __dev_mc_sync(netdev, ionic_addr_add, ionic_addr_del); + nfilters = le32_to_cpu(lif->identity->eth.max_mcast_filters); + if (netdev_mc_count(netdev) > nfilters) { + rx_mode |= IONIC_RX_MODE_F_ALLMULTI; + lif->mc_overflow = true; + } else if (lif->mc_overflow) { + lif->mc_overflow = false; + if (!(nd_flags & IFF_ALLMULTI)) + rx_mode &= ~IONIC_RX_MODE_F_ALLMULTI; + } + i = scnprintf(buf, sizeof(buf), "rx_mode 0x%04x -> 0x%04x:", lif->rx_mode, rx_mode); if (rx_mode & IONIC_RX_MODE_F_UNICAST) @@ -1399,79 +1411,48 @@ static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode) i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_PROMISC"); if (rx_mode & IONIC_RX_MODE_F_ALLMULTI) i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_ALLMULTI"); - netdev_dbg(lif->netdev, "lif%d %s\n", lif->index, buf); + if (rx_mode & IONIC_RX_MODE_F_RDMA_SNIFFER) + i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_RDMA_SNIFFER"); + netdev_dbg(netdev, "lif%d %s\n", lif->index, buf); - err = ionic_adminq_post_wait(lif, &ctx); - if (err) - netdev_warn(lif->netdev, "set rx_mode 0x%04x failed: %d\n", - rx_mode, err); - else - lif->rx_mode = rx_mode; + if (lif->rx_mode != rx_mode) { + struct ionic_admin_ctx ctx = { + .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), + .cmd.rx_mode_set = { + .opcode = IONIC_CMD_RX_MODE_SET, + .lif_index = cpu_to_le16(lif->index), + }, + }; + int err; + + ctx.cmd.rx_mode_set.rx_mode = cpu_to_le16(rx_mode); + err = ionic_adminq_post_wait(lif, &ctx); + if (err) + netdev_warn(netdev, "set rx_mode 0x%04x failed: %d\n", + rx_mode, err); + else + lif->rx_mode = rx_mode; + } + + mutex_unlock(&lif->config_lock); } static void ionic_set_rx_mode(struct net_device *netdev, bool can_sleep) { struct ionic_lif *lif = netdev_priv(netdev); struct ionic_deferred_work *work; - unsigned int nfilters; - unsigned int rx_mode; - rx_mode = IONIC_RX_MODE_F_UNICAST; - rx_mode |= (netdev->flags & IFF_MULTICAST) ? IONIC_RX_MODE_F_MULTICAST : 0; - rx_mode |= (netdev->flags & IFF_BROADCAST) ? IONIC_RX_MODE_F_BROADCAST : 0; - rx_mode |= (netdev->flags & IFF_PROMISC) ? IONIC_RX_MODE_F_PROMISC : 0; - rx_mode |= (netdev->flags & IFF_ALLMULTI) ? IONIC_RX_MODE_F_ALLMULTI : 0; - - /* sync unicast addresses - * next check to see if we're in an overflow state - * if so, we track that we overflowed and enable NIC PROMISC - * else if the overflow is set and not needed - * we remove our overflow flag and check the netdev flags - * to see if we can disable NIC PROMISC - */ - if (can_sleep) - __dev_uc_sync(netdev, ionic_addr_add, ionic_addr_del); - else - __dev_uc_sync(netdev, ionic_ndo_addr_add, ionic_ndo_addr_del); - nfilters = le32_to_cpu(lif->identity->eth.max_ucast_filters); - if (netdev_uc_count(netdev) + 1 > nfilters) { - rx_mode |= IONIC_RX_MODE_F_PROMISC; - lif->uc_overflow = true; - } else if (lif->uc_overflow) { - lif->uc_overflow = false; - if (!(netdev->flags & IFF_PROMISC)) - rx_mode &= ~IONIC_RX_MODE_F_PROMISC; - } - - /* same for multicast */ - if (can_sleep) - __dev_mc_sync(netdev, ionic_addr_add, ionic_addr_del); - else - __dev_mc_sync(netdev, ionic_ndo_addr_add, ionic_ndo_addr_del); - nfilters = le32_to_cpu(lif->identity->eth.max_mcast_filters); - if (netdev_mc_count(netdev) > nfilters) { - rx_mode |= IONIC_RX_MODE_F_ALLMULTI; - lif->mc_overflow = true; - } else if (lif->mc_overflow) { - lif->mc_overflow = false; - if (!(netdev->flags & IFF_ALLMULTI)) - rx_mode &= ~IONIC_RX_MODE_F_ALLMULTI; - } - - if (lif->rx_mode != rx_mode) { - if (!can_sleep) { - work = kzalloc(sizeof(*work), GFP_ATOMIC); - if (!work) { - netdev_err(lif->netdev, "rxmode change dropped\n"); - return; - } - work->type = IONIC_DW_TYPE_RX_MODE; - work->rx_mode = rx_mode; - netdev_dbg(lif->netdev, "deferred: rx_mode\n"); - ionic_lif_deferred_enqueue(&lif->deferred, work); - } else { - ionic_lif_rx_mode(lif, rx_mode); + if (!can_sleep) { + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) { + netdev_err(lif->netdev, "rxmode change dropped\n"); + return; } + work->type = IONIC_DW_TYPE_RX_MODE; + netdev_dbg(lif->netdev, "deferred: rx_mode\n"); + ionic_lif_deferred_enqueue(&lif->deferred, work); + } else { + ionic_lif_rx_mode(lif); } } @@ -3058,6 +3039,7 @@ void ionic_lif_deinit(struct ionic_lif *lif) ionic_lif_qcq_deinit(lif, lif->notifyqcq); ionic_lif_qcq_deinit(lif, lif->adminqcq); + mutex_destroy(&lif->config_lock); mutex_destroy(&lif->queue_lock); ionic_lif_reset(lif); } @@ -3185,7 +3167,7 @@ static int ionic_station_set(struct ionic_lif *lif) */ if (!ether_addr_equal(ctx.comp.lif_getattr.mac, netdev->dev_addr)) - ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR, CAN_SLEEP); + ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR); } else { /* Update the netdev mac with the device's mac */ memcpy(addr.sa_data, ctx.comp.lif_getattr.mac, netdev->addr_len); @@ -3202,7 +3184,7 @@ static int ionic_station_set(struct ionic_lif *lif) netdev_dbg(lif->netdev, "adding station MAC addr %pM\n", netdev->dev_addr); - ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR, CAN_SLEEP); + ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR); return 0; } @@ -3225,6 +3207,7 @@ int ionic_lif_init(struct ionic_lif *lif) lif->hw_index = le16_to_cpu(comp.hw_index); mutex_init(&lif->queue_lock); + mutex_init(&lif->config_lock); /* now that we have the hw_index we can figure out our doorbell page */ lif->dbid_count = le32_to_cpu(lif->ionic->ident.dev.ndbpgs_per_lif); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h index 346506f01715..af291303bd7a 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h @@ -108,7 +108,6 @@ struct ionic_deferred_work { struct list_head list; enum ionic_deferred_work_type type; union { - unsigned int rx_mode; u8 addr[ETH_ALEN]; u8 fw_status; }; @@ -179,6 +178,7 @@ struct ionic_lif { unsigned int index; unsigned int hw_index; struct mutex queue_lock; /* lock for queue structures */ + struct mutex config_lock; /* lock for config actions */ spinlock_t adminq_lock; /* lock for AdminQ operations */ struct ionic_qcq *adminqcq; struct ionic_qcq *notifyqcq; @@ -199,7 +199,7 @@ struct ionic_lif { unsigned int nrxq_descs; u32 rx_copybreak; u64 rxq_features; - unsigned int rx_mode; + u16 rx_mode; u64 hw_features; bool registered; bool mc_overflow; From f79eef711eb57d56874b08ea11db69221de54a6d Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 23 Jul 2021 11:02:46 -0700 Subject: [PATCH 392/794] ionic: catch no ptp support earlier If PTP configuration is attempted on ports that don't support it, such as VF ports, the driver will return an error status -95, or EOPNOSUPP and print an error message enp98s0: hwstamp set failed: -95 Because some daemons can retry every few seconds, this can end up filling the dmesg log and pushing out other more useful messages. We can catch this issue earlier in our handling and return the error without a log message. Fixes: 829600ce5e4e ("ionic: add ts_config replay") Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_lif.h | 7 ++----- drivers/net/ethernet/pensando/ionic/ionic_phc.c | 10 +++++++--- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h index af291303bd7a..69ab59fedb6c 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h @@ -302,7 +302,7 @@ int ionic_lif_identify(struct ionic *ionic, u8 lif_type, int ionic_lif_size(struct ionic *ionic); #if IS_ENABLED(CONFIG_PTP_1588_CLOCK) -int ionic_lif_hwstamp_replay(struct ionic_lif *lif); +void ionic_lif_hwstamp_replay(struct ionic_lif *lif); int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr); int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr); ktime_t ionic_lif_phc_ktime(struct ionic_lif *lif, u64 counter); @@ -311,10 +311,7 @@ void ionic_lif_unregister_phc(struct ionic_lif *lif); void ionic_lif_alloc_phc(struct ionic_lif *lif); void ionic_lif_free_phc(struct ionic_lif *lif); #else -static inline int ionic_lif_hwstamp_replay(struct ionic_lif *lif) -{ - return -EOPNOTSUPP; -} +static inline void ionic_lif_hwstamp_replay(struct ionic_lif *lif) {} static inline int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr) { diff --git a/drivers/net/ethernet/pensando/ionic/ionic_phc.c b/drivers/net/ethernet/pensando/ionic/ionic_phc.c index a87c87e86aef..6e2403c71608 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_phc.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_phc.c @@ -188,6 +188,9 @@ int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr) struct hwtstamp_config config; int err; + if (!lif->phc || !lif->phc->ptp) + return -EOPNOTSUPP; + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) return -EFAULT; @@ -203,15 +206,16 @@ int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr) return 0; } -int ionic_lif_hwstamp_replay(struct ionic_lif *lif) +void ionic_lif_hwstamp_replay(struct ionic_lif *lif) { int err; + if (!lif->phc || !lif->phc->ptp) + return; + err = ionic_lif_hwstamp_set_ts_config(lif, NULL); if (err) netdev_info(lif->netdev, "hwstamp replay failed: %d\n", err); - - return err; } int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr) From a6ff85e0a2d9d074a4b4c291ba9ec1e5b0aba22b Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 23 Jul 2021 11:02:47 -0700 Subject: [PATCH 393/794] ionic: remove intr coalesce update from napi Move the interrupt coalesce value update out of the napi thread and into the dim_work thread and set it only when it has actually changed. Fixes: 04a834592bf5 ("ionic: dynamic interrupt moderation") Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 14 +++++++++++++- drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 4 ---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 7815e9034fb8..e795fa63ca12 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -53,7 +53,19 @@ static void ionic_dim_work(struct work_struct *work) cur_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix); qcq = container_of(dim, struct ionic_qcq, dim); new_coal = ionic_coal_usec_to_hw(qcq->q.lif->ionic, cur_moder.usec); - qcq->intr.dim_coal_hw = new_coal ? new_coal : 1; + new_coal = new_coal ? new_coal : 1; + + if (qcq->intr.dim_coal_hw != new_coal) { + unsigned int qi = qcq->cq.bound_q->index; + struct ionic_lif *lif = qcq->q.lif; + + qcq->intr.dim_coal_hw = new_coal; + + ionic_intr_coal_init(lif->ionic->idev.intr_ctrl, + lif->rxqcqs[qi]->intr.index, + qcq->intr.dim_coal_hw); + } + dim->state = DIM_START_MEASURE; } diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 08934888575c..9d3a04110685 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -463,10 +463,6 @@ static void ionic_dim_update(struct ionic_qcq *qcq) lif = qcq->q.lif; qi = qcq->cq.bound_q->index; - ionic_intr_coal_init(lif->ionic->idev.intr_ctrl, - lif->rxqcqs[qi]->intr.index, - qcq->intr.dim_coal_hw); - dim_update_sample(qcq->cq.bound_intr->rearm_count, lif->txqstats[qi].pkts, lif->txqstats[qi].bytes, From 76ed8a4a00b484dcccef819ef2618bcf8e46f560 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 23 Jul 2021 11:02:48 -0700 Subject: [PATCH 394/794] ionic: fix up dim accounting for tx and rx We need to count the correct Tx and/or Rx packets for dynamic interrupt moderation, depending on which we're processing on the queue interrupt. Fixes: 04a834592bf5 ("ionic: dynamic interrupt moderation") Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- .../net/ethernet/pensando/ionic/ionic_txrx.c | 28 ++++++++++++++----- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 9d3a04110685..1c6e2b9fc96b 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -451,11 +451,12 @@ void ionic_rx_empty(struct ionic_queue *q) q->tail_idx = 0; } -static void ionic_dim_update(struct ionic_qcq *qcq) +static void ionic_dim_update(struct ionic_qcq *qcq, int napi_mode) { struct dim_sample dim_sample; struct ionic_lif *lif; unsigned int qi; + u64 pkts, bytes; if (!qcq->intr.dim_coal_hw) return; @@ -463,10 +464,23 @@ static void ionic_dim_update(struct ionic_qcq *qcq) lif = qcq->q.lif; qi = qcq->cq.bound_q->index; + switch (napi_mode) { + case IONIC_LIF_F_TX_DIM_INTR: + pkts = lif->txqstats[qi].pkts; + bytes = lif->txqstats[qi].bytes; + break; + case IONIC_LIF_F_RX_DIM_INTR: + pkts = lif->rxqstats[qi].pkts; + bytes = lif->rxqstats[qi].bytes; + break; + default: + pkts = lif->txqstats[qi].pkts + lif->rxqstats[qi].pkts; + bytes = lif->txqstats[qi].bytes + lif->rxqstats[qi].bytes; + break; + } + dim_update_sample(qcq->cq.bound_intr->rearm_count, - lif->txqstats[qi].pkts, - lif->txqstats[qi].bytes, - &dim_sample); + pkts, bytes, &dim_sample); net_dim(&qcq->dim, dim_sample); } @@ -487,7 +501,7 @@ int ionic_tx_napi(struct napi_struct *napi, int budget) ionic_tx_service, NULL, NULL); if (work_done < budget && napi_complete_done(napi, work_done)) { - ionic_dim_update(qcq); + ionic_dim_update(qcq, IONIC_LIF_F_TX_DIM_INTR); flags |= IONIC_INTR_CRED_UNMASK; cq->bound_intr->rearm_count++; } @@ -526,7 +540,7 @@ int ionic_rx_napi(struct napi_struct *napi, int budget) ionic_rx_fill(cq->bound_q); if (work_done < budget && napi_complete_done(napi, work_done)) { - ionic_dim_update(qcq); + ionic_dim_update(qcq, IONIC_LIF_F_RX_DIM_INTR); flags |= IONIC_INTR_CRED_UNMASK; cq->bound_intr->rearm_count++; } @@ -572,7 +586,7 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget) ionic_rx_fill(rxcq->bound_q); if (rx_work_done < budget && napi_complete_done(napi, rx_work_done)) { - ionic_dim_update(qcq); + ionic_dim_update(qcq, 0); flags |= IONIC_INTR_CRED_UNMASK; rxcq->bound_intr->rearm_count++; } From f07f9815b7046e25cc32bf8542c9c0bbc5eb6e0e Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 23 Jul 2021 11:02:49 -0700 Subject: [PATCH 395/794] ionic: count csum_none when offload enabled Be sure to count the csum_none cases when csum offload is enabled. Fixes: 0f3154e6bcb3 ("ionic: Add Tx and Rx handling") Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 1c6e2b9fc96b..08870190e4d2 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -274,12 +274,11 @@ static void ionic_rx_clean(struct ionic_queue *q, } } - if (likely(netdev->features & NETIF_F_RXCSUM)) { - if (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_CALC) { - skb->ip_summed = CHECKSUM_COMPLETE; - skb->csum = (__force __wsum)le16_to_cpu(comp->csum); - stats->csum_complete++; - } + if (likely(netdev->features & NETIF_F_RXCSUM) && + (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_CALC)) { + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum = (__force __wsum)le16_to_cpu(comp->csum); + stats->csum_complete++; } else { stats->csum_none++; } From 3c30ef0f78cfb36fdb13753794b0384cf7e37cc9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 23 Jul 2021 11:49:29 -0600 Subject: [PATCH 396/794] io_uring: never attempt iopoll reissue from release path There are two reasons why this shouldn't be done: 1) Ring is exiting, and we're canceling requests anyway. Any request should be canceled anyway. In theory, this could iterate for a number of times if someone else is also driving the target block queue into request starvation, however the likelihood of this happening is miniscule. 2) If the original task decided to pass the ring to another task, then we don't want to be reissuing from this context as it may be an unrelated task or context. No assumptions should be made about the context in which ->release() is run. This can only happen for pure read/write, and we'll get -EFAULT on them anyway. Link: https://lore.kernel.org/io-uring/YPr4OaHv0iv0KTOc@zeniv-ca.linux.org.uk/ Reported-by: Al Viro Signed-off-by: Jens Axboe --- fs/io_uring.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index f2fe4eca150b..117dc32eb8a8 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2205,7 +2205,7 @@ static inline bool io_run_task_work(void) * Find and free completed poll iocbs */ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, - struct list_head *done) + struct list_head *done, bool resubmit) { struct req_batch rb; struct io_kiocb *req; @@ -2220,7 +2220,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, req = list_first_entry(done, struct io_kiocb, inflight_entry); list_del(&req->inflight_entry); - if (READ_ONCE(req->result) == -EAGAIN && + if (READ_ONCE(req->result) == -EAGAIN && resubmit && !(req->flags & REQ_F_DONT_REISSUE)) { req->iopoll_completed = 0; req_ref_get(req); @@ -2244,7 +2244,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, } static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, - long min) + long min, bool resubmit) { struct io_kiocb *req, *tmp; LIST_HEAD(done); @@ -2287,7 +2287,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, } if (!list_empty(&done)) - io_iopoll_complete(ctx, nr_events, &done); + io_iopoll_complete(ctx, nr_events, &done, resubmit); return ret; } @@ -2305,7 +2305,7 @@ static void io_iopoll_try_reap_events(struct io_ring_ctx *ctx) while (!list_empty(&ctx->iopoll_list)) { unsigned int nr_events = 0; - io_do_iopoll(ctx, &nr_events, 0); + io_do_iopoll(ctx, &nr_events, 0, false); /* let it sleep and repeat later if can't complete a request */ if (nr_events == 0) @@ -2367,7 +2367,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min) list_empty(&ctx->iopoll_list)) break; } - ret = io_do_iopoll(ctx, &nr_events, min); + ret = io_do_iopoll(ctx, &nr_events, min, true); } while (!ret && nr_events < min && !need_resched()); out: mutex_unlock(&ctx->uring_lock); @@ -6798,7 +6798,7 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) mutex_lock(&ctx->uring_lock); if (!list_empty(&ctx->iopoll_list)) - io_do_iopoll(ctx, &nr_events, 0); + io_do_iopoll(ctx, &nr_events, 0, true); /* * Don't submit if refs are dying, good for io_uring_register(), From 991468dcf198bb87f24da330676724a704912b47 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 23 Jul 2021 11:53:54 -0600 Subject: [PATCH 397/794] io_uring: explicitly catch any illegal async queue attempt Catch an illegal case to queue async from an unrelated task that got the ring fd passed to it. This should not be possible to hit, but better be proactive and catch it explicitly. io-wq is extended to check for early IO_WQ_WORK_CANCEL being set on a work item as well, so it can run the request through the normal cancelation path. Signed-off-by: Jens Axboe --- fs/io-wq.c | 7 ++++++- fs/io_uring.c | 11 +++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 843d4a7bcd6e..cf086b01c6c6 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -731,7 +731,12 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) int work_flags; unsigned long flags; - if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state)) { + /* + * If io-wq is exiting for this task, or if the request has explicitly + * been marked as one that should not get executed, cancel it here. + */ + if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state) || + (work->flags & IO_WQ_WORK_CANCEL)) { io_run_cancel(work, wqe); return; } diff --git a/fs/io_uring.c b/fs/io_uring.c index 117dc32eb8a8..5a0fd6bcd318 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1294,6 +1294,17 @@ static void io_queue_async_work(struct io_kiocb *req) /* init ->work of the whole link before punting */ io_prep_async_link(req); + + /* + * Not expected to happen, but if we do have a bug where this _can_ + * happen, catch it here and ensure the request is marked as + * canceled. That will make io-wq go through the usual work cancel + * procedure rather than attempt to run this request (or create a new + * worker for it). + */ + if (WARN_ON_ONCE(!same_thread_group(req->task, current))) + req->work.flags |= IO_WQ_WORK_CANCEL; + trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req, &req->work, req->flags); io_wq_enqueue(tctx->io_wq, &req->work); From 76f5dfacfb42b75e5782c017827877cfcee20474 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Fri, 23 Jul 2021 08:22:26 +0800 Subject: [PATCH 398/794] riscv: stacktrace: pin the task's stack in get_wchan Pin the task's stack before calling walk_stackframe() in get_wchan(). This can fix the panic as reported by Andreas when CONFIG_VMAP_STACK=y: [ 65.609696] Unable to handle kernel paging request at virtual address ffffffd0003bbde8 [ 65.610460] Oops [#1] [ 65.610626] Modules linked in: virtio_blk virtio_mmio rtc_goldfish btrfs blake2b_generic libcrc32c xor raid6_pq sg dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua efivarfs [ 65.611670] CPU: 2 PID: 1 Comm: systemd Not tainted 5.14.0-rc1-1.g34fe32a-default #1 openSUSE Tumbleweed (unreleased) c62f7109153e5a0897ee58ba52393ad99b070fd2 [ 65.612334] Hardware name: riscv-virtio,qemu (DT) [ 65.613008] epc : get_wchan+0x5c/0x88 [ 65.613334] ra : get_wchan+0x42/0x88 [ 65.613625] epc : ffffffff800048a4 ra : ffffffff8000488a sp : ffffffd00021bb90 [ 65.614008] gp : ffffffff817709f8 tp : ffffffe07fe91b80 t0 : 00000000000001f8 [ 65.614411] t1 : 0000000000020000 t2 : 0000000000000000 s0 : ffffffd00021bbd0 [ 65.614818] s1 : ffffffd0003bbdf0 a0 : 0000000000000001 a1 : 0000000000000002 [ 65.615237] a2 : ffffffff81618008 a3 : 0000000000000000 a4 : 0000000000000000 [ 65.615637] a5 : ffffffd0003bc000 a6 : 0000000000000002 a7 : ffffffe27d370000 [ 65.616022] s2 : ffffffd0003bbd90 s3 : ffffffff8071a81e s4 : 0000000000003fff [ 65.616407] s5 : ffffffffffffc000 s6 : 0000000000000000 s7 : ffffffff81618008 [ 65.616845] s8 : 0000000000000001 s9 : 0000000180000040 s10: 0000000000000000 [ 65.617248] s11: 000000000000016b t3 : 000000ff00000000 t4 : 0c6aec92de5e3fd7 [ 65.617672] t5 : fff78f60608fcfff t6 : 0000000000000078 [ 65.618088] status: 0000000000000120 badaddr: ffffffd0003bbde8 cause: 000000000000000d [ 65.618621] [] get_wchan+0x5c/0x88 [ 65.619008] [] do_task_stat+0x7a2/0xa46 [ 65.619325] [] proc_tgid_stat+0xe/0x16 [ 65.619637] [] proc_single_show+0x46/0x96 [ 65.619979] [] seq_read_iter+0x190/0x31e [ 65.620341] [] seq_read+0xc4/0x104 [ 65.620633] [] vfs_read+0x6a/0x112 [ 65.620922] [] ksys_read+0x54/0xbe [ 65.621206] [] sys_read+0xe/0x16 [ 65.621474] [] ret_from_syscall+0x0/0x2 [ 65.622169] ---[ end trace f24856ed2b8789c5 ]--- [ 65.622832] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b Signed-off-by: Jisheng Zhang Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/stacktrace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index ff467b98c3e3..ac7593607fa6 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -132,8 +132,12 @@ unsigned long get_wchan(struct task_struct *task) { unsigned long pc = 0; - if (likely(task && task != current && !task_is_running(task))) + if (likely(task && task != current && !task_is_running(task))) { + if (!try_get_task_stack(task)) + return 0; walk_stackframe(task, NULL, save_wchan, &pc); + put_task_stack(task); + } return pc; } From e71e2ace5721a8b921dca18b045069e7bb411277 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 23 Jul 2021 15:50:01 -0700 Subject: [PATCH 399/794] userfaultfd: do not untag user pointers Patch series "userfaultfd: do not untag user pointers", v5. If a user program uses userfaultfd on ranges of heap memory, it may end up passing a tagged pointer to the kernel in the range.start field of the UFFDIO_REGISTER ioctl. This can happen when using an MTE-capable allocator, or on Android if using the Tagged Pointers feature for MTE readiness [1]. When a fault subsequently occurs, the tag is stripped from the fault address returned to the application in the fault.address field of struct uffd_msg. However, from the application's perspective, the tagged address *is* the memory address, so if the application is unaware of memory tags, it may get confused by receiving an address that is, from its point of view, outside of the bounds of the allocation. We observed this behavior in the kselftest for userfaultfd [2] but other applications could have the same problem. Address this by not untagging pointers passed to the userfaultfd ioctls. Instead, let the system call fail. Also change the kselftest to use mmap so that it doesn't encounter this problem. [1] https://source.android.com/devices/tech/debug/tagged-pointers [2] tools/testing/selftests/vm/userfaultfd.c This patch (of 2): Do not untag pointers passed to the userfaultfd ioctls. Instead, let the system call fail. This will provide an early indication of problems with tag-unaware userspace code instead of letting the code get confused later, and is consistent with how we decided to handle brk/mmap/mremap in commit dcde237319e6 ("mm: Avoid creating virtual address aliases in brk()/mmap()/mremap()"), as well as being consistent with the existing tagged address ABI documentation relating to how ioctl arguments are handled. The code change is a revert of commit 7d0325749a6c ("userfaultfd: untag user pointers") plus some fixups to some additional calls to validate_range that have appeared since then. [1] https://source.android.com/devices/tech/debug/tagged-pointers [2] tools/testing/selftests/vm/userfaultfd.c Link: https://lkml.kernel.org/r/20210714195437.118982-1-pcc@google.com Link: https://lkml.kernel.org/r/20210714195437.118982-2-pcc@google.com Link: https://linux-review.googlesource.com/id/I761aa9f0344454c482b83fcfcce547db0a25501b Fixes: 63f0c6037965 ("arm64: Introduce prctl() options to control the tagged user addresses ABI") Signed-off-by: Peter Collingbourne Reviewed-by: Andrey Konovalov Reviewed-by: Catalin Marinas Cc: Alistair Delva Cc: Andrea Arcangeli Cc: Dave Martin Cc: Evgenii Stepanov Cc: Lokesh Gidra Cc: Mitch Phillips Cc: Vincenzo Frascino Cc: Will Deacon Cc: William McVicker Cc: [5.4] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/arm64/tagged-address-abi.rst | 24 ++++++++++++++------ fs/userfaultfd.c | 26 ++++++++++------------ 2 files changed, 29 insertions(+), 21 deletions(-) diff --git a/Documentation/arm64/tagged-address-abi.rst b/Documentation/arm64/tagged-address-abi.rst index 459e6b66ff68..0c9120ec58ae 100644 --- a/Documentation/arm64/tagged-address-abi.rst +++ b/Documentation/arm64/tagged-address-abi.rst @@ -45,14 +45,24 @@ how the user addresses are used by the kernel: 1. User addresses not accessed by the kernel but used for address space management (e.g. ``mprotect()``, ``madvise()``). The use of valid - tagged pointers in this context is allowed with the exception of - ``brk()``, ``mmap()`` and the ``new_address`` argument to - ``mremap()`` as these have the potential to alias with existing - user addresses. + tagged pointers in this context is allowed with these exceptions: - NOTE: This behaviour changed in v5.6 and so some earlier kernels may - incorrectly accept valid tagged pointers for the ``brk()``, - ``mmap()`` and ``mremap()`` system calls. + - ``brk()``, ``mmap()`` and the ``new_address`` argument to + ``mremap()`` as these have the potential to alias with existing + user addresses. + + NOTE: This behaviour changed in v5.6 and so some earlier kernels may + incorrectly accept valid tagged pointers for the ``brk()``, + ``mmap()`` and ``mremap()`` system calls. + + - The ``range.start``, ``start`` and ``dst`` arguments to the + ``UFFDIO_*`` ``ioctl()``s used on a file descriptor obtained from + ``userfaultfd()``, as fault addresses subsequently obtained by reading + the file descriptor will be untagged, which may otherwise confuse + tag-unaware programs. + + NOTE: This behaviour changed in v5.14 and so some earlier kernels may + incorrectly accept valid tagged pointers for this system call. 2. User addresses accessed by the kernel (e.g. ``write()``). This ABI relaxation is disabled by default and the application thread needs to diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index f6e0f0c0d0e5..5c2d806e6ae5 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1236,23 +1236,21 @@ static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx, } static __always_inline int validate_range(struct mm_struct *mm, - __u64 *start, __u64 len) + __u64 start, __u64 len) { __u64 task_size = mm->task_size; - *start = untagged_addr(*start); - - if (*start & ~PAGE_MASK) + if (start & ~PAGE_MASK) return -EINVAL; if (len & ~PAGE_MASK) return -EINVAL; if (!len) return -EINVAL; - if (*start < mmap_min_addr) + if (start < mmap_min_addr) return -EINVAL; - if (*start >= task_size) + if (start >= task_size) return -EINVAL; - if (len > task_size - *start) + if (len > task_size - start) return -EINVAL; return 0; } @@ -1316,7 +1314,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, vm_flags |= VM_UFFD_MINOR; } - ret = validate_range(mm, &uffdio_register.range.start, + ret = validate_range(mm, uffdio_register.range.start, uffdio_register.range.len); if (ret) goto out; @@ -1522,7 +1520,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister))) goto out; - ret = validate_range(mm, &uffdio_unregister.start, + ret = validate_range(mm, uffdio_unregister.start, uffdio_unregister.len); if (ret) goto out; @@ -1671,7 +1669,7 @@ static int userfaultfd_wake(struct userfaultfd_ctx *ctx, if (copy_from_user(&uffdio_wake, buf, sizeof(uffdio_wake))) goto out; - ret = validate_range(ctx->mm, &uffdio_wake.start, uffdio_wake.len); + ret = validate_range(ctx->mm, uffdio_wake.start, uffdio_wake.len); if (ret) goto out; @@ -1711,7 +1709,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx, sizeof(uffdio_copy)-sizeof(__s64))) goto out; - ret = validate_range(ctx->mm, &uffdio_copy.dst, uffdio_copy.len); + ret = validate_range(ctx->mm, uffdio_copy.dst, uffdio_copy.len); if (ret) goto out; /* @@ -1768,7 +1766,7 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx, sizeof(uffdio_zeropage)-sizeof(__s64))) goto out; - ret = validate_range(ctx->mm, &uffdio_zeropage.range.start, + ret = validate_range(ctx->mm, uffdio_zeropage.range.start, uffdio_zeropage.range.len); if (ret) goto out; @@ -1818,7 +1816,7 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx, sizeof(struct uffdio_writeprotect))) return -EFAULT; - ret = validate_range(ctx->mm, &uffdio_wp.range.start, + ret = validate_range(ctx->mm, uffdio_wp.range.start, uffdio_wp.range.len); if (ret) return ret; @@ -1866,7 +1864,7 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg) sizeof(uffdio_continue) - (sizeof(__s64)))) goto out; - ret = validate_range(ctx->mm, &uffdio_continue.range.start, + ret = validate_range(ctx->mm, uffdio_continue.range.start, uffdio_continue.range.len); if (ret) goto out; From 0db282ba2c12c1515d490d14a1ff696643ab0f1b Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 23 Jul 2021 15:50:04 -0700 Subject: [PATCH 400/794] selftest: use mmap instead of posix_memalign to allocate memory This test passes pointers obtained from anon_allocate_area to the userfaultfd and mremap APIs. This causes a problem if the system allocator returns tagged pointers because with the tagged address ABI the kernel rejects tagged addresses passed to these APIs, which would end up causing the test to fail. To make this test compatible with such system allocators, stop using the system allocator to allocate memory in anon_allocate_area, and instead just use mmap. Link: https://lkml.kernel.org/r/20210714195437.118982-3-pcc@google.com Link: https://linux-review.googlesource.com/id/Icac91064fcd923f77a83e8e133f8631c5b8fc241 Fixes: c47174fc362a ("userfaultfd: selftest") Co-developed-by: Lokesh Gidra Signed-off-by: Lokesh Gidra Signed-off-by: Peter Collingbourne Reviewed-by: Catalin Marinas Cc: Vincenzo Frascino Cc: Dave Martin Cc: Will Deacon Cc: Andrea Arcangeli Cc: Alistair Delva Cc: William McVicker Cc: Evgenii Stepanov Cc: Mitch Phillips Cc: Andrey Konovalov Cc: [5.4] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/userfaultfd.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index e363bdaff59d..2ea438e6b8b1 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -210,8 +210,10 @@ static void anon_release_pages(char *rel_area) static void anon_allocate_area(void **alloc_area) { - if (posix_memalign(alloc_area, page_size, nr_pages * page_size)) - err("posix_memalign() failed"); + *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (*alloc_area == MAP_FAILED) + err("mmap of anonymous memory failed"); } static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset) From 32ae8a0669392248a92d7545a7363004543f3932 Mon Sep 17 00:00:00 2001 From: Weizhao Ouyang Date: Fri, 23 Jul 2021 15:50:08 -0700 Subject: [PATCH 401/794] kfence: defer kfence_test_init to ensure that kunit debugfs is created kfence_test_init and kunit_init both use the same level late_initcall, which means if kfence_test_init linked ahead of kunit_init, kfence_test_init will get a NULL debugfs_rootdir as parent dentry, then kfence_test_init and kfence_debugfs_init both create a debugfs node named "kfence" under debugfs_mount->mnt_root, and it will throw out "debugfs: Directory 'kfence' with parent '/' already present!" with EEXIST. So kfence_test_init should be deferred. Link: https://lkml.kernel.org/r/20210714113140.2949995-1-o451686892@gmail.com Signed-off-by: Weizhao Ouyang Tested-by: Marco Elver Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kfence/kfence_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c index 7f24b9bcb2ec..942cbc16ad26 100644 --- a/mm/kfence/kfence_test.c +++ b/mm/kfence/kfence_test.c @@ -852,7 +852,7 @@ static void kfence_test_exit(void) tracepoint_synchronize_unregister(); } -late_initcall(kfence_test_init); +late_initcall_sync(kfence_test_init); module_exit(kfence_test_exit); MODULE_LICENSE("GPL v2"); From 235a85cb32bb123854ad31de46fdbf04c1d57cda Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 23 Jul 2021 15:50:11 -0700 Subject: [PATCH 402/794] kfence: move the size check to the beginning of __kfence_alloc() Check the allocation size before toggling kfence_allocation_gate. This way allocations that can't be served by KFENCE will not result in waiting for another CONFIG_KFENCE_SAMPLE_INTERVAL without allocating anything. Link: https://lkml.kernel.org/r/20210714092222.1890268-1-glider@google.com Signed-off-by: Alexander Potapenko Suggested-by: Marco Elver Reviewed-by: Marco Elver Cc: Dmitry Vyukov Cc: Marco Elver Cc: Greg Kroah-Hartman Cc: [5.12+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kfence/core.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index d7666ace9d2e..2623ff401a10 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -733,6 +733,13 @@ void kfence_shutdown_cache(struct kmem_cache *s) void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) { + /* + * Perform size check before switching kfence_allocation_gate, so that + * we don't disable KFENCE without making an allocation. + */ + if (size > PAGE_SIZE) + return NULL; + /* * allocation_gate only needs to become non-zero, so it doesn't make * sense to continue writing to it and pay the associated contention @@ -757,9 +764,6 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) if (!READ_ONCE(kfence_enabled)) return NULL; - if (size > PAGE_SIZE) - return NULL; - return kfence_guarded_alloc(s, size, flags); } From 236e9f1538523d3d380dda1cc99571d587058f37 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 23 Jul 2021 15:50:14 -0700 Subject: [PATCH 403/794] kfence: skip all GFP_ZONEMASK allocations Allocation requests outside ZONE_NORMAL (MOVABLE, HIGHMEM or DMA) cannot be fulfilled by KFENCE, because KFENCE memory pool is located in a zone different from the requested one. Because callers of kmem_cache_alloc() may actually rely on the allocation to reside in the requested zone (e.g. memory allocations done with __GFP_DMA must be DMAable), skip all allocations done with GFP_ZONEMASK and/or respective SLAB flags (SLAB_CACHE_DMA and SLAB_CACHE_DMA32). Link: https://lkml.kernel.org/r/20210714092222.1890268-2-glider@google.com Fixes: 0ce20dd84089 ("mm: add Kernel Electric-Fence infrastructure") Signed-off-by: Alexander Potapenko Reviewed-by: Marco Elver Acked-by: Souptick Joarder Cc: Dmitry Vyukov Cc: Greg Kroah-Hartman Cc: Souptick Joarder Cc: [5.12+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kfence/core.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 2623ff401a10..575c685aa642 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -740,6 +740,15 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) if (size > PAGE_SIZE) return NULL; + /* + * Skip allocations from non-default zones, including DMA. We cannot + * guarantee that pages in the KFENCE pool will have the requested + * properties (e.g. reside in DMAable memory). + */ + if ((flags & GFP_ZONEMASK) || + (s->flags & (SLAB_CACHE_DMA | SLAB_CACHE_DMA32))) + return NULL; + /* * allocation_gate only needs to become non-zero, so it doesn't make * sense to continue writing to it and pay the associated contention From 8dad53a11f8d94dceb540a5f8f153484f42be84b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 23 Jul 2021 15:50:17 -0700 Subject: [PATCH 404/794] mm: call flush_dcache_page() in memcpy_to_page() and memzero_page() memcpy_to_page and memzero_page can write to arbitrary pages, which could be in the page cache or in high memory, so call flush_kernel_dcache_pages to flush the dcache. This is a problem when using these helpers on dcache challeneged architectures. Right now there are just a few users, chances are no one used the PC floppy driver, the aha1542 driver for an ISA SCSI HBA, and a few advanced and optional btrfs and ext4 features on those platforms yet since the conversion. Link: https://lkml.kernel.org/r/20210713055231.137602-2-hch@lst.de Fixes: bb90d4bc7b6a ("mm/highmem: Lift memcpy_[to|from]_page to core") Fixes: 28961998f858 ("iov_iter: lift memzero_page() to highmem.h") Signed-off-by: Christoph Hellwig Reviewed-by: Ira Weiny Cc: Chaitanya Kulkarni Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/highmem.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 8c6e8e996c87..8e7e50a53a12 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -318,6 +318,7 @@ static inline void memcpy_to_page(struct page *page, size_t offset, VM_BUG_ON(offset + len > PAGE_SIZE); memcpy(to + offset, from, len); + flush_dcache_page(page); kunmap_local(to); } @@ -325,6 +326,7 @@ static inline void memzero_page(struct page *page, size_t offset, size_t len) { char *addr = kmap_atomic(page); memset(addr + offset, 0, len); + flush_dcache_page(page); kunmap_atomic(addr); } From d9a42b53bdf7b0329dc09a59fc1b092640b6da19 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 23 Jul 2021 15:50:20 -0700 Subject: [PATCH 405/794] mm: use kmap_local_page in memzero_page The commit message introducing the global memzero_page explicitly mentions switching to kmap_local_page in the commit log but doesn't actually do that. Link: https://lkml.kernel.org/r/20210713055231.137602-3-hch@lst.de Fixes: 28961998f858 ("iov_iter: lift memzero_page() to highmem.h") Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Ira Weiny Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/highmem.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 8e7e50a53a12..d9a606a9fc64 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -324,10 +324,10 @@ static inline void memcpy_to_page(struct page *page, size_t offset, static inline void memzero_page(struct page *page, size_t offset, size_t len) { - char *addr = kmap_atomic(page); + char *addr = kmap_local_page(page); memset(addr + offset, 0, len); flush_dcache_page(page); - kunmap_atomic(addr); + kunmap_local(addr); } #endif /* _LINUX_HIGHMEM_H */ From 69e5d322a2fb86173fde8bad26e8eb38cad1b1e9 Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Fri, 23 Jul 2021 15:50:23 -0700 Subject: [PATCH 406/794] mm: page_alloc: fix page_poison=1 / INIT_ON_ALLOC_DEFAULT_ON interaction To reproduce the failure we need the following system: - kernel command: page_poison=1 init_on_free=0 init_on_alloc=0 - kernel config: * CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y * CONFIG_INIT_ON_FREE_DEFAULT_ON=y * CONFIG_PAGE_POISONING=y Resulting in: 0000000085629bdd: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 0000000022861832: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000000c597f5b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ CPU: 11 PID: 15195 Comm: bash Kdump: loaded Tainted: G U O 5.13.1-gentoo-x86_64 #1 Hardware name: System manufacturer System Product Name/PRIME Z370-A, BIOS 2801 01/13/2021 Call Trace: dump_stack+0x64/0x7c __kernel_unpoison_pages.cold+0x48/0x84 post_alloc_hook+0x60/0xa0 get_page_from_freelist+0xdb8/0x1000 __alloc_pages+0x163/0x2b0 __get_free_pages+0xc/0x30 pgd_alloc+0x2e/0x1a0 mm_init+0x185/0x270 dup_mm+0x6b/0x4f0 copy_process+0x190d/0x1b10 kernel_clone+0xba/0x3b0 __do_sys_clone+0x8f/0xb0 do_syscall_64+0x68/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae Before commit 51cba1ebc60d ("init_on_alloc: Optimize static branches") init_on_alloc never enabled static branch by default. It could only be enabed explicitly by init_mem_debugging_and_hardening(). But after commit 51cba1ebc60d, a static branch could already be enabled by default. There was no code to ever disable it. That caused page_poison=1 / init_on_free=1 conflict. This change extends init_mem_debugging_and_hardening() to also disable static branch disabling. Link: https://lkml.kernel.org/r/20210714031935.4094114-1-keescook@chromium.org Link: https://lore.kernel.org/r/20210712215816.1512739-1-slyfox@gentoo.org Fixes: 51cba1ebc60d ("init_on_alloc: Optimize static branches") Signed-off-by: Sergei Trofimovich Signed-off-by: Kees Cook Co-developed-by: Kees Cook Reported-by: Mikhail Morfikov Reported-by: Tested-by: Reviewed-by: David Hildenbrand Cc: Alexander Potapenko Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3e97e68aef7a..856b175c15a4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -840,21 +840,24 @@ void init_mem_debugging_and_hardening(void) } #endif - if (_init_on_alloc_enabled_early) { - if (page_poisoning_requested) - pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, " - "will take precedence over init_on_alloc\n"); - else - static_branch_enable(&init_on_alloc); - } - if (_init_on_free_enabled_early) { - if (page_poisoning_requested) - pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, " - "will take precedence over init_on_free\n"); - else - static_branch_enable(&init_on_free); + if ((_init_on_alloc_enabled_early || _init_on_free_enabled_early) && + page_poisoning_requested) { + pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, " + "will take precedence over init_on_alloc and init_on_free\n"); + _init_on_alloc_enabled_early = false; + _init_on_free_enabled_early = false; } + if (_init_on_alloc_enabled_early) + static_branch_enable(&init_on_alloc); + else + static_branch_disable(&init_on_alloc); + + if (_init_on_free_enabled_early) + static_branch_enable(&init_on_free); + else + static_branch_disable(&init_on_free); + #ifdef CONFIG_DEBUG_PAGEALLOC if (!debug_pagealloc_enabled()) return; From 79e482e9c3ae86e849c701c846592e72baddda5a Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Fri, 23 Jul 2021 15:50:26 -0700 Subject: [PATCH 407/794] memblock: make for_each_mem_range() traverse MEMBLOCK_HOTPLUG regions Commit b10d6bca8720 ("arch, drivers: replace for_each_membock() with for_each_mem_range()") didn't take into account that when there is movable_node parameter in the kernel command line, for_each_mem_range() would skip ranges marked with MEMBLOCK_HOTPLUG. The page table setup code in POWER uses for_each_mem_range() to create the linear mapping of the physical memory and since the regions marked as MEMORY_HOTPLUG are skipped, they never make it to the linear map. A later access to the memory in those ranges will fail: BUG: Unable to handle kernel data access on write at 0xc000000400000000 Faulting instruction address: 0xc00000000008a3c0 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries Modules linked in: CPU: 0 PID: 53 Comm: kworker/u2:0 Not tainted 5.13.0 #7 NIP: c00000000008a3c0 LR: c0000000003c1ed8 CTR: 0000000000000040 REGS: c000000008a57770 TRAP: 0300 Not tainted (5.13.0) MSR: 8000000002009033 CR: 84222202 XER: 20040000 CFAR: c0000000003c1ed4 DAR: c000000400000000 DSISR: 42000000 IRQMASK: 0 GPR00: c0000000003c1ed8 c000000008a57a10 c0000000019da700 c000000400000000 GPR04: 0000000000000280 0000000000000180 0000000000000400 0000000000000200 GPR08: 0000000000000100 0000000000000080 0000000000000040 0000000000000300 GPR12: 0000000000000380 c000000001bc0000 c0000000001660c8 c000000006337e00 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000040000000 0000000020000000 c000000001a81990 c000000008c30000 GPR24: c000000008c20000 c000000001a81998 000fffffffff0000 c000000001a819a0 GPR28: c000000001a81908 c00c000001000000 c000000008c40000 c000000008a64680 NIP clear_user_page+0x50/0x80 LR __handle_mm_fault+0xc88/0x1910 Call Trace: __handle_mm_fault+0xc44/0x1910 (unreliable) handle_mm_fault+0x130/0x2a0 __get_user_pages+0x248/0x610 __get_user_pages_remote+0x12c/0x3e0 get_arg_page+0x54/0xf0 copy_string_kernel+0x11c/0x210 kernel_execve+0x16c/0x220 call_usermodehelper_exec_async+0x1b0/0x2f0 ret_from_kernel_thread+0x5c/0x70 Instruction dump: 79280fa4 79271764 79261f24 794ae8e2 7ca94214 7d683a14 7c893a14 7d893050 7d4903a6 60000000 60000000 60000000 <7c001fec> 7c091fec 7c081fec 7c051fec ---[ end trace 490b8c67e6075e09 ]--- Making for_each_mem_range() include MEMBLOCK_HOTPLUG regions in the traversal fixes this issue. Link: https://bugzilla.redhat.com/show_bug.cgi?id=1976100 Link: https://lkml.kernel.org/r/20210712071132.20902-1-rppt@kernel.org Fixes: b10d6bca8720 ("arch, drivers: replace for_each_membock() with for_each_mem_range()") Signed-off-by: Mike Rapoport Tested-by: Greg Kurz Reviewed-by: David Hildenbrand Cc: [5.10+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 4 ++-- mm/memblock.c | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index cbf46f56d105..4a53c3ca86bd 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -209,7 +209,7 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, */ #define for_each_mem_range(i, p_start, p_end) \ __for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, \ - MEMBLOCK_NONE, p_start, p_end, NULL) + MEMBLOCK_HOTPLUG, p_start, p_end, NULL) /** * for_each_mem_range_rev - reverse iterate through memblock areas from @@ -220,7 +220,7 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, */ #define for_each_mem_range_rev(i, p_start, p_end) \ __for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, \ - MEMBLOCK_NONE, p_start, p_end, NULL) + MEMBLOCK_HOTPLUG, p_start, p_end, NULL) /** * for_each_reserved_mem_range - iterate over all reserved memblock areas diff --git a/mm/memblock.c b/mm/memblock.c index 0041ff62c584..de7b553baa50 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -947,7 +947,8 @@ static bool should_skip_region(struct memblock_type *type, return true; /* skip hotpluggable memory regions if needed */ - if (movable_node_is_enabled() && memblock_is_hotpluggable(m)) + if (movable_node_is_enabled() && memblock_is_hotpluggable(m) && + !(flags & MEMBLOCK_HOTPLUG)) return true; /* if we want mirror memory skip non-mirror memory regions */ From b43a9e76b4cc78cdaa8c809dd31cd452797b7661 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 23 Jul 2021 15:50:29 -0700 Subject: [PATCH 408/794] writeback, cgroup: remove wb from offline list before releasing refcnt Boyang reported that the commit c22d70a162d3 ("writeback, cgroup: release dying cgwbs by switching attached inodes") causes the kernel to crash while running xfstests generic/256 on ext4 on aarch64 and ppc64le. run fstests generic/256 at 2021-07-12 05:41:40 EXT4-fs (vda3): mounted filesystem with ordered data mode. Opts: . Quota mode: none. Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 Mem abort info: ESR = 0x96000005 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x05: level 1 translation fault Data abort info: ISV = 0, ISS = 0x00000005 CM = 0, WnR = 0 user pgtable: 64k pages, 48-bit VAs, pgdp=00000000b0502000 [0000000000000000] pgd=0000000000000000, p4d=0000000000000000, pud=0000000000000000 Internal error: Oops: 96000005 [#1] SMP Modules linked in: dm_flakey dm_snapshot dm_bufio dm_zero dm_mod loop tls rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache netfs rfkill sunrpc ext4 vfat fat mbcache jbd2 drm fuse xfs libcrc32c crct10dif_ce ghash_ce sha2_ce sha256_arm64 sha1_ce virtio_blk virtio_net net_failover virtio_console failover virtio_mmio aes_neon_bs [last unloaded: scsi_debug] CPU: 0 PID: 408468 Comm: kworker/u8:5 Tainted: G X --------- --- 5.14.0-0.rc1.15.bx.el9.aarch64 #1 Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 Workqueue: events_unbound cleanup_offline_cgwbs_workfn pstate: 004000c5 (nzcv daIF +PAN -UAO -TCO BTYPE=--) pc : cleanup_offline_cgwbs_workfn+0x320/0x394 lr : cleanup_offline_cgwbs_workfn+0xe0/0x394 sp : ffff80001554fd10 x29: ffff80001554fd10 x28: 0000000000000000 x27: 0000000000000001 x26: 0000000000000000 x25: 00000000000000e0 x24: ffffd2a2fbe671a8 x23: ffff80001554fd88 x22: ffffd2a2fbe67198 x21: ffffd2a2fc25a730 x20: ffff210412bc3000 x19: ffff210412bc3280 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 x14: 0000000000000000 x13: 0000000000000030 x12: 0000000000000040 x11: ffff210481572238 x10: ffff21048157223a x9 : ffffd2a2fa276c60 x8 : ffff210484106b60 x7 : 0000000000000000 x6 : 000000000007d18a x5 : ffff210416a86400 x4 : ffff210412bc0280 x3 : 0000000000000000 x2 : ffff80001554fd88 x1 : ffff210412bc0280 x0 : 0000000000000003 Call trace: cleanup_offline_cgwbs_workfn+0x320/0x394 process_one_work+0x1f4/0x4b0 worker_thread+0x184/0x540 kthread+0x114/0x120 ret_from_fork+0x10/0x18 Code: d63f0020 97f99963 17ffffa6 f8588263 (f9400061) ---[ end trace e250fe289272792a ]--- Kernel panic - not syncing: Oops: Fatal exception SMP: stopping secondary CPUs SMP: failed to stop secondary CPUs 0-2 Kernel Offset: 0x52a2e9fa0000 from 0xffff800010000000 PHYS_OFFSET: 0xfff0defca0000000 CPU features: 0x00200251,23200840 Memory Limit: none ---[ end Kernel panic - not syncing: Oops: Fatal exception ]--- The problem happens when cgwb_release_workfn() races with cleanup_offline_cgwbs_workfn(): wb_tryget() in cleanup_offline_cgwbs_workfn() can be called after percpu_ref_exit() is cgwb_release_workfn(), which is basically a use-after-free error. Fix the problem by making removing the writeback structure from the offline list before releasing the percpu reference counter. It will guarantee that cleanup_offline_cgwbs_workfn() will not see and not access writeback structures which are about to be released. Link: https://lkml.kernel.org/r/20210716201039.3762203-1-guro@fb.com Fixes: c22d70a162d3 ("writeback, cgroup: release dying cgwbs by switching attached inodes") Signed-off-by: Roman Gushchin Reported-by: Boyang Xue Suggested-by: Jan Kara Tested-by: Darrick J. Wong Cc: Will Deacon Cc: Dave Chinner Cc: Murphy Zhou Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/backing-dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 271f2ca862c8..f5561ea7d90a 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -398,12 +398,12 @@ static void cgwb_release_workfn(struct work_struct *work) blkcg_unpin_online(blkcg); fprop_local_destroy_percpu(&wb->memcg_completions); - percpu_ref_exit(&wb->refcnt); spin_lock_irq(&cgwb_lock); list_del(&wb->offline_node); spin_unlock_irq(&cgwb_lock); + percpu_ref_exit(&wb->refcnt); wb_exit(wb); WARN_ON_ONCE(!list_empty(&wb->b_attached)); kfree_rcu(wb, rcu); From 593311e85b26ecc6e4d45b6fb81b942b6672df09 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 23 Jul 2021 15:50:32 -0700 Subject: [PATCH 409/794] writeback, cgroup: do not reparent dax inodes The inode switching code is not suited for dax inodes. An attempt to switch a dax inode to a parent writeback structure (as a part of a writeback cleanup procedure) results in a panic like this: run fstests generic/270 at 2021-07-15 05:54:02 XFS (pmem0p2): EXPERIMENTAL big timestamp feature in use. Use at your own risk! XFS (pmem0p2): DAX enabled. Warning: EXPERIMENTAL, use at your own risk XFS (pmem0p2): EXPERIMENTAL inode btree counters feature in use. Use at your own risk! XFS (pmem0p2): Mounting V5 Filesystem XFS (pmem0p2): Ending clean mount XFS (pmem0p2): Quotacheck needed: Please wait. XFS (pmem0p2): Quotacheck: Done. XFS (pmem0p2): xlog_verify_grant_tail: space > BBTOB(tail_blocks) XFS (pmem0p2): xlog_verify_grant_tail: space > BBTOB(tail_blocks) XFS (pmem0p2): xlog_verify_grant_tail: space > BBTOB(tail_blocks) BUG: unable to handle page fault for address: 0000000005b0f669 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 13 PID: 10479 Comm: kworker/13:16 Not tainted 5.14.0-rc1-master-8096acd7442e+ #8 Hardware name: HP ProLiant DL360 Gen9/ProLiant DL360 Gen9, BIOS P89 09/13/2016 Workqueue: inode_switch_wbs inode_switch_wbs_work_fn RIP: 0010:inode_do_switch_wbs+0xaf/0x470 Code: 00 30 0f 85 c1 03 00 00 0f 1f 44 00 00 31 d2 48 c7 c6 ff ff ff ff 48 8d 7c 24 08 e8 eb 49 1a 00 48 85 c0 74 4a bb ff ff ff ff <48> 8b 50 08 48 8d 4a ff 83 e2 01 48 0f 45 c1 48 8b 00 a8 08 0f 85 RSP: 0018:ffff9c66691abdc8 EFLAGS: 00010002 RAX: 0000000005b0f661 RBX: 00000000ffffffff RCX: ffff89e6a21382b0 RDX: 0000000000000001 RSI: ffff89e350230248 RDI: ffffffffffffffff RBP: ffff89e681d19400 R08: 0000000000000000 R09: 0000000000000228 R10: ffffffffffffffff R11: ffffffffffffffc0 R12: ffff89e6a2138130 R13: ffff89e316af7400 R14: ffff89e316af6e78 R15: ffff89e6a21382b0 FS: 0000000000000000(0000) GS:ffff89ee5fb40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000005b0f669 CR3: 0000000cb2410004 CR4: 00000000001706e0 Call Trace: inode_switch_wbs_work_fn+0xb6/0x2a0 process_one_work+0x1e6/0x380 worker_thread+0x53/0x3d0 kthread+0x10f/0x130 ret_from_fork+0x22/0x30 Modules linked in: xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 nft_compat nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nft_counter nf_tables nfnetlink bridge stp llc rfkill sunrpc intel_rapl_msr intel_rapl_common sb_edac x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel ipmi_ssif kvm mgag200 i2c_algo_bit iTCO_wdt irqbypass drm_kms_helper iTCO_vendor_support acpi_ipmi rapl syscopyarea sysfillrect intel_cstate ipmi_si sysimgblt ioatdma dax_pmem_compat fb_sys_fops ipmi_devintf device_dax i2c_i801 pcspkr intel_uncore hpilo nd_pmem cec dax_pmem_core dca i2c_smbus acpi_tad lpc_ich ipmi_msghandler acpi_power_meter drm fuse xfs libcrc32c sd_mod t10_pi crct10dif_pclmul crc32_pclmul crc32c_intel tg3 ghash_clmulni_intel serio_raw hpsa hpwdt scsi_transport_sas wmi dm_mirror dm_region_hash dm_log dm_mod CR2: 0000000005b0f669 ---[ end trace ed2105faff8384f3 ]--- RIP: 0010:inode_do_switch_wbs+0xaf/0x470 Code: 00 30 0f 85 c1 03 00 00 0f 1f 44 00 00 31 d2 48 c7 c6 ff ff ff ff 48 8d 7c 24 08 e8 eb 49 1a 00 48 85 c0 74 4a bb ff ff ff ff <48> 8b 50 08 48 8d 4a ff 83 e2 01 48 0f 45 c1 48 8b 00 a8 08 0f 85 RSP: 0018:ffff9c66691abdc8 EFLAGS: 00010002 RAX: 0000000005b0f661 RBX: 00000000ffffffff RCX: ffff89e6a21382b0 RDX: 0000000000000001 RSI: ffff89e350230248 RDI: ffffffffffffffff RBP: ffff89e681d19400 R08: 0000000000000000 R09: 0000000000000228 R10: ffffffffffffffff R11: ffffffffffffffc0 R12: ffff89e6a2138130 R13: ffff89e316af7400 R14: ffff89e316af6e78 R15: ffff89e6a21382b0 FS: 0000000000000000(0000) GS:ffff89ee5fb40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000005b0f669 CR3: 0000000cb2410004 CR4: 00000000001706e0 Kernel panic - not syncing: Fatal exception Kernel Offset: 0x15200000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) ---[ end Kernel panic - not syncing: Fatal exception ]--- The crash happens on an attempt to iterate over attached pagecache pages and check the dirty flag: a dax inode's xarray contains pfn's instead of generic struct page pointers. This happens for DAX and not for other kinds of non-page entries in the inodes because it's a tagged iteration, and shadow/swap entries are never tagged; only DAX entries get tagged. Fix the problem by bailing out (with the false return value) of inode_prepare_sbs_switch() if a dax inode is passed. [willy@infradead.org: changelog addition] Link: https://lkml.kernel.org/r/20210719171350.3876830-1-guro@fb.com Fixes: c22d70a162d3 ("writeback, cgroup: release dying cgwbs by switching attached inodes") Signed-off-by: Roman Gushchin Reported-by: Murphy Zhou Reported-by: Darrick J. Wong Tested-by: Darrick J. Wong Tested-by: Murphy Zhou Acked-by: Matthew Wilcox (Oracle) Cc: Jan Kara Cc: Dave Chinner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fs-writeback.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 06d04a74ab6c..4c3370548982 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -521,6 +521,9 @@ static bool inode_prepare_wbs_switch(struct inode *inode, */ smp_mb(); + if (IS_DAX(inode)) + return false; + /* while holding I_WB_SWITCH, no one else can update the association */ spin_lock(&inode->i_lock); if (!(inode->i_sb->s_flags & SB_ACTIVE) || From af64237461910f4c7365d367291d1c4f20c18769 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Fri, 23 Jul 2021 15:50:35 -0700 Subject: [PATCH 410/794] mm/secretmem: wire up ->set_page_dirty Make secretmem up to date with the changes done in commit 0af573780b0b ("mm: require ->set_page_dirty to be explicitly wired up") so that unconditional call to this method won't cause crashes. Link: https://lkml.kernel.org/r/20210716063933.31633-1-rppt@kernel.org Fixes: 0af573780b0b ("mm: require ->set_page_dirty to be explicitly wired up") Signed-off-by: Mike Rapoport Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/secretmem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/secretmem.c b/mm/secretmem.c index f77d25467a14..030f02ddc7c1 100644 --- a/mm/secretmem.c +++ b/mm/secretmem.c @@ -152,6 +152,7 @@ static void secretmem_freepage(struct page *page) } const struct address_space_operations secretmem_aops = { + .set_page_dirty = __set_page_dirty_no_writeback, .freepage = secretmem_freepage, .migratepage = secretmem_migratepage, .isolate_page = secretmem_isolate_page, From e904c2ccf9b5cb356eec754ffea05c08984f6535 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Fri, 23 Jul 2021 15:50:38 -0700 Subject: [PATCH 411/794] mm: mmap_lock: fix disabling preemption directly Commit 832b50725373 ("mm: mmap_lock: use local locks instead of disabling preemption") fixed a bug by using local locks. But commit d01079f3d0c0 ("mm/mmap_lock: remove dead code for !CONFIG_TRACING configurations") changed those lines back to the original version. I guess it was introduced by fixing conflicts. Link: https://lkml.kernel.org/r/20210720074228.76342-1-songmuchun@bytedance.com Fixes: d01079f3d0c0 ("mm/mmap_lock: remove dead code for !CONFIG_TRACING configurations") Signed-off-by: Muchun Song Acked-by: Mel Gorman Reviewed-by: Yang Shi Reviewed-by: Pankaj Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mmap_lock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/mmap_lock.c b/mm/mmap_lock.c index f5852a058ce0..1854850b4b89 100644 --- a/mm/mmap_lock.c +++ b/mm/mmap_lock.c @@ -156,14 +156,14 @@ static inline void put_memcg_path_buf(void) #define TRACE_MMAP_LOCK_EVENT(type, mm, ...) \ do { \ const char *memcg_path; \ - preempt_disable(); \ + local_lock(&memcg_paths.lock); \ memcg_path = get_mm_memcg_path(mm); \ trace_mmap_lock_##type(mm, \ memcg_path != NULL ? memcg_path : "", \ ##__VA_ARGS__); \ if (likely(memcg_path != NULL)) \ put_memcg_path_buf(); \ - preempt_enable(); \ + local_unlock(&memcg_paths.lock); \ } while (0) #else /* !CONFIG_MEMCG */ From e4dc3489143f84f7ed30be58b886bb6772f229b9 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Fri, 23 Jul 2021 15:50:41 -0700 Subject: [PATCH 412/794] mm: fix the deadlock in finish_fault() Commit 63f3655f9501 ("mm, memcg: fix reclaim deadlock with writeback") fix the following ABBA deadlock by pre-allocating the pte page table without holding the page lock. lock_page(A) SetPageWriteback(A) unlock_page(A) lock_page(B) lock_page(B) pte_alloc_one shrink_page_list wait_on_page_writeback(A) SetPageWriteback(B) unlock_page(B) # flush A, B to clear the writeback Commit f9ce0be71d1f ("mm: Cleanup faultaround and finish_fault() codepaths") reworked the relevant code but ignored this race. This will cause the deadlock above to appear again, so fix it. Link: https://lkml.kernel.org/r/20210721074849.57004-1-zhengqi.arch@bytedance.com Fixes: f9ce0be71d1f ("mm: Cleanup faultaround and finish_fault() codepaths") Signed-off-by: Qi Zheng Acked-by: Kirill A. Shutemov Cc: Thomas Gleixner Cc: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Cc: Muchun Song Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index 747a01d495f2..25fc46e87214 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4026,8 +4026,17 @@ vm_fault_t finish_fault(struct vm_fault *vmf) return ret; } - if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) + if (vmf->prealloc_pte) { + vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); + if (likely(pmd_none(*vmf->pmd))) { + mm_inc_nr_ptes(vma->vm_mm); + pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte); + vmf->prealloc_pte = NULL; + } + spin_unlock(vmf->ptl); + } else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) { return VM_FAULT_OOM; + } } /* See comment in handle_pte_fault() */ From e0f7e2b2f7e7864238a4eea05cc77ae1be2bf784 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Fri, 23 Jul 2021 15:50:44 -0700 Subject: [PATCH 413/794] hugetlbfs: fix mount mode command line processing In commit 32021982a324 ("hugetlbfs: Convert to fs_context") processing of the mount mode string was changed from match_octal() to fsparam_u32. This changed existing behavior as match_octal does not require octal values to have a '0' prefix, but fsparam_u32 does. Use fsparam_u32oct which provides the same behavior as match_octal. Link: https://lkml.kernel.org/r/20210721183326.102716-1-mike.kravetz@oracle.com Fixes: 32021982a324 ("hugetlbfs: Convert to fs_context") Signed-off-by: Mike Kravetz Reported-by: Dennis Camera Reviewed-by: Matthew Wilcox (Oracle) Cc: David Howells Cc: Al Viro Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 926eeb9bf4eb..cdfb1ae78a3f 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -77,7 +77,7 @@ enum hugetlb_param { static const struct fs_parameter_spec hugetlb_fs_parameters[] = { fsparam_u32 ("gid", Opt_gid), fsparam_string("min_size", Opt_min_size), - fsparam_u32 ("mode", Opt_mode), + fsparam_u32oct("mode", Opt_mode), fsparam_string("nr_inodes", Opt_nr_inodes), fsparam_string("pagesize", Opt_pagesize), fsparam_string("size", Opt_size), From 6010d300f9f7e16d1bf327b4730bcd0c0886d9e6 Mon Sep 17 00:00:00 2001 From: Akira Tsukamoto Date: Tue, 20 Jul 2021 17:50:52 +0900 Subject: [PATCH 414/794] riscv: __asm_copy_to-from_user: Fix: overrun copy There were two causes for the overrun memory access. The threshold size was too small. The aligning dst require one SZREG and unrolling word copy requires 8*SZREG, total have to be at least 9*SZREG. Inside the unrolling copy, the subtracting -(8*SZREG-1) would make iteration happening one extra loop. Proper value is -(8*SZREG). Signed-off-by: Akira Tsukamoto Fixes: ca6eaaa210de ("riscv: __asm_copy_to-from_user: Optimize unaligned memory access and pipeline stall") Signed-off-by: Palmer Dabbelt --- arch/riscv/lib/uaccess.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S index bceb0629e440..8bbeca89a93f 100644 --- a/arch/riscv/lib/uaccess.S +++ b/arch/riscv/lib/uaccess.S @@ -35,7 +35,7 @@ ENTRY(__asm_copy_from_user) /* * Use byte copy only if too small. */ - li a3, 8*SZREG /* size must be larger than size in word_copy */ + li a3, 9*SZREG /* size must be larger than size in word_copy */ bltu a2, a3, .Lbyte_copy_tail /* @@ -75,7 +75,7 @@ ENTRY(__asm_copy_from_user) * a3 - a1 & mask:(SZREG-1) * t0 - end of aligned dst */ - addi t0, t0, -(8*SZREG-1) /* not to over run */ + addi t0, t0, -(8*SZREG) /* not to over run */ 2: fixup REG_L a4, 0(a1), 10f fixup REG_L a5, SZREG(a1), 10f @@ -97,7 +97,7 @@ ENTRY(__asm_copy_from_user) addi a1, a1, 8*SZREG bltu a0, t0, 2b - addi t0, t0, 8*SZREG-1 /* revert to original value */ + addi t0, t0, 8*SZREG /* revert to original value */ j .Lbyte_copy_tail .Lshift_copy: From 22b5f16ffeff38938ad7420a2bfa3c281c36fd17 Mon Sep 17 00:00:00 2001 From: Akira Tsukamoto Date: Tue, 20 Jul 2021 17:51:45 +0900 Subject: [PATCH 415/794] riscv: __asm_copy_to-from_user: Fix: fail on RV32 Had a bug when converting bytes to bits when the cpu was rv32. The a3 contains the number of bytes and multiple of 8 would be the bits. The LGREG is holding 2 for RV32 and 3 for RV32, so to achieve multiple of 8 it must always be constant 3. The 2 was mistakenly used for rv32. Signed-off-by: Akira Tsukamoto Fixes: ca6eaaa210de ("riscv: __asm_copy_to-from_user: Optimize unaligned memory access and pipeline stall") Signed-off-by: Palmer Dabbelt --- arch/riscv/lib/uaccess.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S index 8bbeca89a93f..279876821969 100644 --- a/arch/riscv/lib/uaccess.S +++ b/arch/riscv/lib/uaccess.S @@ -125,7 +125,7 @@ ENTRY(__asm_copy_from_user) * t3 - prev shift * t4 - current shift */ - slli t3, a3, LGREG + slli t3, a3, 3 /* converting bytes in a3 to bits */ li a5, SZREG*8 sub t4, a5, t3 From d4b3e0105e3c2411af666a50b1bf2d25656a5e83 Mon Sep 17 00:00:00 2001 From: Akira Tsukamoto Date: Tue, 20 Jul 2021 17:52:36 +0900 Subject: [PATCH 416/794] riscv: __asm_copy_to-from_user: Remove unnecessary size check Clean up: The size of 0 will be evaluated in the next step. Not required here. Signed-off-by: Akira Tsukamoto Fixes: ca6eaaa210de ("riscv: __asm_copy_to-from_user: Optimize unaligned memory access and pipeline stall") Signed-off-by: Palmer Dabbelt --- arch/riscv/lib/uaccess.S | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S index 279876821969..54d497a03164 100644 --- a/arch/riscv/lib/uaccess.S +++ b/arch/riscv/lib/uaccess.S @@ -30,7 +30,6 @@ ENTRY(__asm_copy_from_user) * t0 - end of uncopied dst */ add t0, a0, a2 - bgtu a0, t0, 5f /* * Use byte copy only if too small. From ea196c548c0ac407afd31d142712b6da8bd00244 Mon Sep 17 00:00:00 2001 From: Akira Tsukamoto Date: Tue, 20 Jul 2021 17:53:23 +0900 Subject: [PATCH 417/794] riscv: __asm_copy_to-from_user: Fix: Typos in comments Fixing typos and grammar mistakes and using more intuitive label name. Signed-off-by: Akira Tsukamoto Fixes: ca6eaaa210de ("riscv: __asm_copy_to-from_user: Optimize unaligned memory access and pipeline stall") Signed-off-by: Palmer Dabbelt --- arch/riscv/lib/uaccess.S | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S index 54d497a03164..63bc691cff91 100644 --- a/arch/riscv/lib/uaccess.S +++ b/arch/riscv/lib/uaccess.S @@ -33,19 +33,20 @@ ENTRY(__asm_copy_from_user) /* * Use byte copy only if too small. + * SZREG holds 4 for RV32 and 8 for RV64 */ li a3, 9*SZREG /* size must be larger than size in word_copy */ bltu a2, a3, .Lbyte_copy_tail /* - * Copy first bytes until dst is align to word boundary. + * Copy first bytes until dst is aligned to word boundary. * a0 - start of dst * t1 - start of aligned dst */ addi t1, a0, SZREG-1 andi t1, t1, ~(SZREG-1) /* dst is already aligned, skip */ - beq a0, t1, .Lskip_first_bytes + beq a0, t1, .Lskip_align_dst 1: /* a5 - one byte for copying data */ fixup lb a5, 0(a1), 10f @@ -54,7 +55,7 @@ ENTRY(__asm_copy_from_user) addi a0, a0, 1 /* dst */ bltu a0, t1, 1b /* t1 - start of aligned dst */ -.Lskip_first_bytes: +.Lskip_align_dst: /* * Now dst is aligned. * Use shift-copy if src is misaligned. @@ -71,7 +72,6 @@ ENTRY(__asm_copy_from_user) * * a0 - start of aligned dst * a1 - start of aligned src - * a3 - a1 & mask:(SZREG-1) * t0 - end of aligned dst */ addi t0, t0, -(8*SZREG) /* not to over run */ @@ -106,7 +106,7 @@ ENTRY(__asm_copy_from_user) * For misaligned copy we still perform aligned word copy, but * we need to use the value fetched from the previous iteration and * do some shifts. - * This is safe because reading less than a word size. + * This is safe because reading is less than a word size. * * a0 - start of aligned dst * a1 - start of src @@ -116,7 +116,7 @@ ENTRY(__asm_copy_from_user) */ /* calculating aligned word boundary for dst */ andi t1, t0, ~(SZREG-1) - /* Converting unaligned src to aligned arc */ + /* Converting unaligned src to aligned src */ andi a1, a1, ~(SZREG-1) /* @@ -128,7 +128,7 @@ ENTRY(__asm_copy_from_user) li a5, SZREG*8 sub t4, a5, t3 - /* Load the first word to combine with seceond word */ + /* Load the first word to combine with second word */ fixup REG_L a5, 0(a1), 10f 3: @@ -160,7 +160,7 @@ ENTRY(__asm_copy_from_user) * a1 - start of remaining src * t0 - end of remaining dst */ - bgeu a0, t0, 5f + bgeu a0, t0, .Lout_copy_user /* check if end of copy */ 4: fixup lb a5, 0(a1), 10f addi a1, a1, 1 /* src */ @@ -168,7 +168,7 @@ ENTRY(__asm_copy_from_user) addi a0, a0, 1 /* dst */ bltu a0, t0, 4b /* t0 - end of dst */ -5: +.Lout_copy_user: /* Disable access to user memory */ csrc CSR_STATUS, t6 li a0, 0 From cdf72837cda89b2d38bd18fbe6cc591c1d5f2416 Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Fri, 23 Jul 2021 05:41:53 +0930 Subject: [PATCH 418/794] ALSA: scarlett2: Fix Mute/Dim/MSD Mode control names Append "Playback Switch" to the names of "Mute" and "Dim" controls, and append "Switch" to the "MSD Mode" control as per Documentation/sound/designs/control-names.rst. Signed-off-by: Geoffrey D. Bennett Link: https://lore.kernel.org/r/77f1000652c37e3217fb8dad8e156bc6392abc0b.1626959758.git.g@b4.vu Signed-off-by: Takashi Iwai --- sound/usb/mixer_scarlett_gen2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index f9d698a37153..347995ea39e4 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -228,7 +228,7 @@ enum { }; static const char *const scarlett2_dim_mute_names[SCARLETT2_DIM_MUTE_COUNT] = { - "Mute", "Dim" + "Mute Playback Switch", "Dim Playback Switch" }; /* Description of each hardware port type: @@ -3455,7 +3455,7 @@ static int scarlett2_add_msd_ctl(struct usb_mixer_interface *mixer) /* Add MSD control */ return scarlett2_add_new_ctl(mixer, &scarlett2_msd_ctl, - 0, 1, "MSD Mode", NULL); + 0, 1, "MSD Mode Switch", NULL); } /*** Cleanup/Suspend Callbacks ***/ From d3a4f784d20c696b134b916f57956f12a37ecd47 Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Fri, 23 Jul 2021 05:42:08 +0930 Subject: [PATCH 419/794] ALSA: scarlett2: Fix Direct Monitor control name for 2i2 The Direct Monitor control for the 2i2 is an enumerated value, not a boolean. Fix the control name to say "Playback Enum" instead of "Playback Switch" in this case. Signed-off-by: Geoffrey D. Bennett Link: https://lore.kernel.org/r/faf5de1d2100038e7d07520d770fda4a1adc276a.1626959758.git.g@b4.vu Signed-off-by: Takashi Iwai --- sound/usb/mixer_scarlett_gen2.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index 347995ea39e4..fa604b61066f 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -2530,14 +2530,18 @@ static int scarlett2_add_direct_monitor_ctl(struct usb_mixer_interface *mixer) { struct scarlett2_data *private = mixer->private_data; const struct scarlett2_device_info *info = private->info; + const char *s; if (!info->direct_monitor) return 0; + s = info->direct_monitor == 1 + ? "Direct Monitor Playback Switch" + : "Direct Monitor Playback Enum"; + return scarlett2_add_new_ctl( mixer, &scarlett2_direct_monitor_ctl[info->direct_monitor - 1], - 0, 1, "Direct Monitor Playback Switch", - &private->direct_monitor_ctl); + 0, 1, s, &private->direct_monitor_ctl); } /*** Speaker Switching Control ***/ From 9ee0fc8366ddce380547878640708f1bd7dd2ead Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Fri, 23 Jul 2021 05:42:48 +0930 Subject: [PATCH 420/794] ALSA: scarlett2: Correct channel mute status after mute button pressed After the hardware mute button is pressed, private->vol_updated is set so that the mute status is invalidated. As the channel mute values may be affected by the global mute value, update scarlett2_mute_ctl_get() to call scarlett2_update_volumes() if private->vol_updated is set. Signed-off-by: Geoffrey D. Bennett Link: https://lore.kernel.org/r/aa18ddbf8d8bd7f31832ab1b6b6057c00b931202.1626959758.git.g@b4.vu Signed-off-by: Takashi Iwai --- sound/usb/mixer_scarlett_gen2.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index fa604b61066f..3457fbc8108f 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -1856,9 +1856,15 @@ static int scarlett2_mute_ctl_get(struct snd_kcontrol *kctl, struct snd_ctl_elem_value *ucontrol) { struct usb_mixer_elem_info *elem = kctl->private_data; - struct scarlett2_data *private = elem->head.mixer->private_data; + struct usb_mixer_interface *mixer = elem->head.mixer; + struct scarlett2_data *private = mixer->private_data; int index = line_out_remap(private, elem->control); + mutex_lock(&private->data_mutex); + if (private->vol_updated) + scarlett2_update_volumes(mixer); + mutex_unlock(&private->data_mutex); + ucontrol->value.integer.value[0] = private->mute_switch[index]; return 0; } From 2b8b12be9b9752c36efda38b7dd5d83d790d01d8 Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Fri, 23 Jul 2021 05:43:26 +0930 Subject: [PATCH 421/794] ALSA: scarlett2: Fix line out/speaker switching notifications The values of the line output controls can change when the SW/HW switches are set to HW, and also when speaker switching is enabled. These notifications were sent with a mask of only SNDRV_CTL_EVENT_MASK_INFO. Change the notifications to set the SNDRV_CTL_EVENT_MASK_VALUE mask bit as well. When the mute control is updated, the notification was sent with a mask of SNDRV_CTL_EVENT_MASK_INFO. Change the mask to the correct value of SNDRV_CTL_EVENT_MASK_VALUE. Signed-off-by: Geoffrey D. Bennett Link: https://lore.kernel.org/r/8192e15ba62fa4bc90425c005f265c0de530be20.1626959758.git.g@b4.vu Signed-off-by: Takashi Iwai --- sound/usb/mixer_scarlett_gen2.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index 3457fbc8108f..3d5848d5481b 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -1961,10 +1961,12 @@ static void scarlett2_vol_ctl_set_writable(struct usb_mixer_interface *mixer, ~SNDRV_CTL_ELEM_ACCESS_WRITE; } - /* Notify of write bit change */ - snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_INFO, + /* Notify of write bit and possible value change */ + snd_ctl_notify(card, + SNDRV_CTL_EVENT_MASK_VALUE | SNDRV_CTL_EVENT_MASK_INFO, &private->vol_ctls[index]->id); - snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_INFO, + snd_ctl_notify(card, + SNDRV_CTL_EVENT_MASK_VALUE | SNDRV_CTL_EVENT_MASK_INFO, &private->mute_ctls[index]->id); } @@ -2599,7 +2601,9 @@ static int scarlett2_speaker_switch_enable(struct usb_mixer_interface *mixer) /* disable the line out SW/HW switch */ scarlett2_sw_hw_ctl_ro(private, i); - snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_INFO, + snd_ctl_notify(card, + SNDRV_CTL_EVENT_MASK_VALUE | + SNDRV_CTL_EVENT_MASK_INFO, &private->sw_hw_ctls[i]->id); } @@ -2923,7 +2927,7 @@ static int scarlett2_dim_mute_ctl_put(struct snd_kcontrol *kctl, if (private->vol_sw_hw_switch[line_index]) { private->mute_switch[line_index] = val; snd_ctl_notify(mixer->chip->card, - SNDRV_CTL_EVENT_MASK_INFO, + SNDRV_CTL_EVENT_MASK_VALUE, &private->mute_ctls[i]->id); } } From 4511781f95da0a3b2bad34f3f5e3967e80cd2d18 Mon Sep 17 00:00:00 2001 From: "chihhao.chen" Date: Sat, 24 Jul 2021 12:23:41 +0800 Subject: [PATCH 422/794] ALSA: usb-audio: fix incorrect clock source setting The following scenario describes an echo test for Samsung USBC Headset (AKG) with VID/PID (0x04e8/0xa051). We first start a capture stream(USB IN transfer) in 96Khz/24bit/1ch mode. In clock find source function, we get value 0x2 for clock selector and 0x1 for clock source. Kernel-4.14 behavior Since clock source is valid so clock selector was not set again. We pass through this function and start a playback stream(USB OUT transfer) in 48Khz/32bit/2ch mode. This time we get value 0x1 for clock selector and 0x1 for clock source. Finally clock id with this setting is 0x9. Kernel-5.10 behavior Clock selector was always set one more time even it is valid. When we start a playback stream, we will get 0x2 for clock selector and 0x1 for clock source. In this case clock id becomes 0xA. This is an incorrect clock source setting and results in severe noises. We see wrong data rate in USB IN transfer. (From 288 bytes/ms becomes 144 bytes/ms) It should keep in 288 bytes/ms. This earphone works fine on older kernel version load because this is a newly-added behavior. Fixes: d2e8f641257d ("ALSA: usb-audio: Explicitly set up the clock selector") Signed-off-by: chihhao.chen Link: https://lore.kernel.org/r/1627100621-19225-1-git-send-email-chihhao.chen@mediatek.com Signed-off-by: Takashi Iwai --- sound/usb/clock.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/usb/clock.c b/sound/usb/clock.c index 52de52288e10..14456f61539e 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c @@ -324,6 +324,12 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, sources[ret - 1], visited, validate); if (ret > 0) { + /* + * For Samsung USBC Headset (AKG), setting clock selector again + * will result in incorrect default clock setting problems + */ + if (chip->usb_id == USB_ID(0x04e8, 0xa051)) + return ret; err = uac_clock_selector_set_val(chip, entity_id, cur); if (err < 0) return err; From f5d156c7bfab7d728b2fd35bc63eab12eda18125 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Mon, 19 Jul 2021 15:34:37 +0800 Subject: [PATCH 423/794] arm64: dts: imx8mp: remove fallback compatible string for FlexCAN FlexCAN on i.MX8MP is not derived from i.MX6Q, instead reuses from i.MX8QM with extra ECC added and default is enabled, so that the FlexCAN would be put into freeze mode without FLEXCAN_QUIRK_DISABLE_MECR quirk. This patch removes "fsl,imx6q-flexcan" fallback compatible string since it's not compatible with the i.MX6Q. Link: https://lore.kernel.org/r/20210719073437.32078-1-qiangqing.zhang@nxp.com Signed-off-by: Joakim Zhang Reviewed-by: Fabio Estevam Signed-off-by: Marc Kleine-Budde --- arch/arm64/boot/dts/freescale/imx8mp.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index ca38d0d6c3c4..f4eaab3ecf03 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -579,7 +579,7 @@ }; flexcan1: can@308c0000 { - compatible = "fsl,imx8mp-flexcan", "fsl,imx6q-flexcan"; + compatible = "fsl,imx8mp-flexcan"; reg = <0x308c0000 0x10000>; interrupts = ; clocks = <&clk IMX8MP_CLK_IPG_ROOT>, @@ -594,7 +594,7 @@ }; flexcan2: can@308d0000 { - compatible = "fsl,imx8mp-flexcan", "fsl,imx6q-flexcan"; + compatible = "fsl,imx8mp-flexcan"; reg = <0x308d0000 0x10000>; interrupts = ; clocks = <&clk IMX8MP_CLK_IPG_ROOT>, From 54f93336d000229f72c26d8a3f69dd256b744528 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Thu, 22 Jul 2021 15:08:19 +0800 Subject: [PATCH 424/794] can: raw: raw_setsockopt(): fix raw_rcv panic for sock UAF We get a bug during ltp can_filter test as following. =========================================== [60919.264984] BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 [60919.265223] PGD 8000003dda726067 P4D 8000003dda726067 PUD 3dda727067 PMD 0 [60919.265443] Oops: 0000 [#1] SMP PTI [60919.265550] CPU: 30 PID: 3638365 Comm: can_filter Kdump: loaded Tainted: G W 4.19.90+ #1 [60919.266068] RIP: 0010:selinux_socket_sock_rcv_skb+0x3e/0x200 [60919.293289] RSP: 0018:ffff8d53bfc03cf8 EFLAGS: 00010246 [60919.307140] RAX: 0000000000000000 RBX: 000000000000001d RCX: 0000000000000007 [60919.320756] RDX: 0000000000000001 RSI: ffff8d5104a8ed00 RDI: ffff8d53bfc03d30 [60919.334319] RBP: ffff8d9338056800 R08: ffff8d53bfc29d80 R09: 0000000000000001 [60919.347969] R10: ffff8d53bfc03ec0 R11: ffffb8526ef47c98 R12: ffff8d53bfc03d30 [60919.350320] perf: interrupt took too long (3063 > 2500), lowering kernel.perf_event_max_sample_rate to 65000 [60919.361148] R13: 0000000000000001 R14: ffff8d53bcf90000 R15: 0000000000000000 [60919.361151] FS: 00007fb78b6b3600(0000) GS:ffff8d53bfc00000(0000) knlGS:0000000000000000 [60919.400812] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [60919.413730] CR2: 0000000000000010 CR3: 0000003e3f784006 CR4: 00000000007606e0 [60919.426479] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [60919.439339] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [60919.451608] PKRU: 55555554 [60919.463622] Call Trace: [60919.475617] [60919.487122] ? update_load_avg+0x89/0x5d0 [60919.498478] ? update_load_avg+0x89/0x5d0 [60919.509822] ? account_entity_enqueue+0xc5/0xf0 [60919.520709] security_sock_rcv_skb+0x2a/0x40 [60919.531413] sk_filter_trim_cap+0x47/0x1b0 [60919.542178] ? kmem_cache_alloc+0x38/0x1b0 [60919.552444] sock_queue_rcv_skb+0x17/0x30 [60919.562477] raw_rcv+0x110/0x190 [can_raw] [60919.572539] can_rcv_filter+0xbc/0x1b0 [can] [60919.582173] can_receive+0x6b/0xb0 [can] [60919.591595] can_rcv+0x31/0x70 [can] [60919.600783] __netif_receive_skb_one_core+0x5a/0x80 [60919.609864] process_backlog+0x9b/0x150 [60919.618691] net_rx_action+0x156/0x400 [60919.627310] ? sched_clock_cpu+0xc/0xa0 [60919.635714] __do_softirq+0xe8/0x2e9 [60919.644161] do_softirq_own_stack+0x2a/0x40 [60919.652154] [60919.659899] do_softirq.part.17+0x4f/0x60 [60919.667475] __local_bh_enable_ip+0x60/0x70 [60919.675089] __dev_queue_xmit+0x539/0x920 [60919.682267] ? finish_wait+0x80/0x80 [60919.689218] ? finish_wait+0x80/0x80 [60919.695886] ? sock_alloc_send_pskb+0x211/0x230 [60919.702395] ? can_send+0xe5/0x1f0 [can] [60919.708882] can_send+0xe5/0x1f0 [can] [60919.715037] raw_sendmsg+0x16d/0x268 [can_raw] It's because raw_setsockopt() concurrently with unregister_netdevice_many(). Concurrent scenario as following. cpu0 cpu1 raw_bind raw_setsockopt unregister_netdevice_many unlist_netdevice dev_get_by_index raw_notifier raw_enable_filters ...... can_rx_register can_rcv_list_find(..., net->can.rx_alldev_list) ...... sock_close raw_release(sock_a) ...... can_receive can_rcv_filter(net->can.rx_alldev_list, ...) raw_rcv(skb, sock_a) BUG After unlist_netdevice(), dev_get_by_index() return NULL in raw_setsockopt(). Function raw_enable_filters() will add sock and can_filter to net->can.rx_alldev_list. Then the sock is closed. Followed by, we sock_sendmsg() to a new vcan device use the same can_filter. Protocol stack match the old receiver whose sock has been released on net->can.rx_alldev_list in can_rcv_filter(). Function raw_rcv() uses the freed sock. UAF BUG is triggered. We can find that the key issue is that net_device has not been protected in raw_setsockopt(). Use rtnl_lock to protect net_device in raw_setsockopt(). Fixes: c18ce101f2e4 ("[CAN]: Add raw protocol") Link: https://lore.kernel.org/r/20210722070819.1048263-1-william.xuanziyang@huawei.com Cc: linux-stable Signed-off-by: Ziyang Xuan Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- net/can/raw.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/net/can/raw.c b/net/can/raw.c index ed4fcb7ab0c3..cd5a49380116 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -546,10 +546,18 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, return -EFAULT; } + rtnl_lock(); lock_sock(sk); - if (ro->bound && ro->ifindex) + if (ro->bound && ro->ifindex) { dev = dev_get_by_index(sock_net(sk), ro->ifindex); + if (!dev) { + if (count > 1) + kfree(filter); + err = -ENODEV; + goto out_fil; + } + } if (ro->bound) { /* (try to) register the new filters */ @@ -588,6 +596,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, dev_put(dev); release_sock(sk); + rtnl_unlock(); break; @@ -600,10 +609,16 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, err_mask &= CAN_ERR_MASK; + rtnl_lock(); lock_sock(sk); - if (ro->bound && ro->ifindex) + if (ro->bound && ro->ifindex) { dev = dev_get_by_index(sock_net(sk), ro->ifindex); + if (!dev) { + err = -ENODEV; + goto out_err; + } + } /* remove current error mask */ if (ro->bound) { @@ -627,6 +642,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, dev_put(dev); release_sock(sk); + rtnl_unlock(); break; From 0c71437dd50dd687c15d8ca80b3b68f10bb21d63 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 14 Jul 2021 13:16:02 +0200 Subject: [PATCH 425/794] can: j1939: j1939_session_deactivate(): clarify lifetime of session object The j1939_session_deactivate() is decrementing the session ref-count and potentially can free() the session. This would cause use-after-free situation. However, the code calling j1939_session_deactivate() does always hold another reference to the session, so that it would not be free()ed in this code path. This patch adds a comment to make this clear and a WARN_ON, to ensure that future changes will not violate this requirement. Further this patch avoids dereferencing the session pointer as a precaution to avoid use-after-free if the session is actually free()ed. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Link: https://lore.kernel.org/r/20210714111602.24021-1-o.rempel@pengutronix.de Reported-by: Xiaochen Zou Signed-off-by: Oleksij Rempel Signed-off-by: Marc Kleine-Budde --- net/can/j1939/transport.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index c3946c355882..bb1092c3e7e3 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1075,11 +1075,16 @@ static bool j1939_session_deactivate_locked(struct j1939_session *session) static bool j1939_session_deactivate(struct j1939_session *session) { + struct j1939_priv *priv = session->priv; bool active; - j1939_session_list_lock(session->priv); + j1939_session_list_lock(priv); + /* This function should be called with a session ref-count of at + * least 2. + */ + WARN_ON_ONCE(kref_read(&session->kref) < 2); active = j1939_session_deactivate_locked(session); - j1939_session_list_unlock(session->priv); + j1939_session_list_unlock(priv); return active; } From c6eea1c8bda56737752465a298dc6ce07d6b8ce3 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Tue, 6 Jul 2021 19:00:08 +0800 Subject: [PATCH 426/794] can: j1939: j1939_xtp_rx_dat_one(): fix rxtimer value between consecutive TP.DT to 750ms For receive side, the max time interval between two consecutive TP.DT should be 750ms. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Link: https://lore.kernel.org/r/1625569210-47506-1-git-send-email-zhangchangzhong@huawei.com Cc: linux-stable Signed-off-by: Zhang Changzhong Acked-by: Oleksij Rempel Signed-off-by: Marc Kleine-Budde --- net/can/j1939/transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index bb1092c3e7e3..bdc95bd7a851 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1874,7 +1874,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session, if (!session->transmission) j1939_tp_schedule_txtimer(session, 0); } else { - j1939_tp_set_rxtimeout(session, 250); + j1939_tp_set_rxtimeout(session, 750); } session->last_cmd = 0xff; consume_skb(se_skb); From 590eb2b7d8cfafb27e8108d52d4bf4850626d31d Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Fri, 25 Jun 2021 15:09:29 +0200 Subject: [PATCH 427/794] can: peak_usb: pcan_usb_handle_bus_evt(): fix reading rxerr/txerr values This patch fixes an incorrect way of reading error counters in messages received for this purpose from the PCAN-USB interface. These messages inform about the increase or decrease of the error counters, whose values are placed in bytes 1 and 2 of the message data (not 0 and 1). Fixes: ea8b33bde76c ("can: pcan_usb: add support of rxerr/txerr counters") Link: https://lore.kernel.org/r/20210625130931.27438-4-s.grosjean@peak-system.com Cc: linux-stable Signed-off-by: Stephane Grosjean Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/peak_usb/pcan_usb.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c index 1d6f77252f01..899a3d21b77f 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb.c @@ -117,7 +117,8 @@ #define PCAN_USB_BERR_MASK (PCAN_USB_ERR_RXERR | PCAN_USB_ERR_TXERR) /* identify bus event packets with rx/tx error counters */ -#define PCAN_USB_ERR_CNT 0x80 +#define PCAN_USB_ERR_CNT_DEC 0x00 /* counters are decreasing */ +#define PCAN_USB_ERR_CNT_INC 0x80 /* counters are increasing */ /* private to PCAN-USB adapter */ struct pcan_usb { @@ -608,11 +609,12 @@ static int pcan_usb_handle_bus_evt(struct pcan_usb_msg_context *mc, u8 ir) /* acccording to the content of the packet */ switch (ir) { - case PCAN_USB_ERR_CNT: + case PCAN_USB_ERR_CNT_DEC: + case PCAN_USB_ERR_CNT_INC: /* save rx/tx error counters from in the device context */ - pdev->bec.rxerr = mc->ptr[0]; - pdev->bec.txerr = mc->ptr[1]; + pdev->bec.rxerr = mc->ptr[1]; + pdev->bec.txerr = mc->ptr[2]; break; default: From ef68a717960658e6a1e5f08adb0574326e9a12c2 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sat, 24 Apr 2021 16:20:39 +0200 Subject: [PATCH 428/794] can: mcp251xfd: mcp251xfd_irq(): stop timestamping worker in case error in IRQ In case an error occurred in the IRQ handler, the chip status is dumped via devcoredump and all IRQs are disabled, but the chip stays powered for further analysis. The chip is in an undefined state and will not receive any CAN frames, so shut down the timestamping worker, which reads the TBC register regularly, too. This avoids any CRC read error messages if there is a communication problem with the chip. Fixes: efd8d98dfb90 ("can: mcp251xfd: add HW timestamp infrastructure") Link: https://lore.kernel.org/r/20210724155131.471303-1-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 47c3f408a799..9ae48072b6c6 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -2300,6 +2300,7 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id) err, priv->regs_status.intf); mcp251xfd_dump(priv); mcp251xfd_chip_interrupts_disable(priv); + mcp251xfd_timestamp_stop(priv); return handled; } From 3cf4375a090473d240281a0d2b04a3a5aaeac34b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 23 Jul 2021 18:46:01 -0400 Subject: [PATCH 429/794] tipc: do not write skb_shinfo frags when doing decrytion One skb's skb_shinfo frags are not writable, and they can be shared with other skbs' like by pskb_copy(). To write the frags may cause other skb's data crash. So before doing en/decryption, skb_cow_data() should always be called for a cloned or nonlinear skb if req dst is using the same sg as req src. While at it, the likely branch can be removed, as it will be covered by skb_cow_data(). Note that esp_input() has the same issue, and I will fix it in another patch. tipc_aead_encrypt() doesn't have this issue, as it only processes linear data in the unlikely branch. Fixes: fc1b6d6de220 ("tipc: introduce TIPC encryption & authentication") Reported-by: Shuang Li Signed-off-by: Xin Long Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/crypto.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index e5c43d4d5a75..c9391d38de85 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -898,16 +898,10 @@ static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead, if (unlikely(!aead)) return -ENOKEY; - /* Cow skb data if needed */ - if (likely(!skb_cloned(skb) && - (!skb_is_nonlinear(skb) || !skb_has_frag_list(skb)))) { - nsg = 1 + skb_shinfo(skb)->nr_frags; - } else { - nsg = skb_cow_data(skb, 0, &unused); - if (unlikely(nsg < 0)) { - pr_err("RX: skb_cow_data() returned %d\n", nsg); - return nsg; - } + nsg = skb_cow_data(skb, 0, &unused); + if (unlikely(nsg < 0)) { + pr_err("RX: skb_cow_data() returned %d\n", nsg); + return nsg; } /* Allocate memory for the AEAD operation */ From 89bc7f456cd40e0be7b94f4fdae9186f22b76a05 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 23 Jul 2021 17:53:48 -0400 Subject: [PATCH 430/794] bnxt_en: Add missing periodic PHC overflow check We use the timecounter APIs for the 48-bit PHC and packet timestamps. We must periodically update the timecounter at roughly half the overflow interval. The overflow interval is about 78 hours, so update it every 19 hours (1/4 interval) for some extra margins. Fixes: 390862f45c85 ("bnxt_en: Get the full 48-bit hardware timestamp periodically") Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Acked-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 7 +++++++ drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index 9089e7f3fbd4..ec381c2423b8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -353,6 +353,12 @@ static long bnxt_ptp_ts_aux_work(struct ptp_clock_info *ptp_info) bnxt_ptp_get_current_time(bp); ptp->next_period = now + HZ; + if (time_after_eq(now, ptp->next_overflow_check)) { + spin_lock_bh(&ptp->ptp_lock); + timecounter_read(&ptp->tc); + spin_unlock_bh(&ptp->ptp_lock); + ptp->next_overflow_check = now + BNXT_PHC_OVERFLOW_PERIOD; + } return HZ; } @@ -423,6 +429,7 @@ int bnxt_ptp_init(struct bnxt *bp) ptp->cc.shift = 0; ptp->cc.mult = 1; + ptp->next_overflow_check = jiffies + BNXT_PHC_OVERFLOW_PERIOD; timecounter_init(&ptp->tc, &ptp->cc, ktime_to_ns(ktime_get_real())); ptp->ptp_info = bnxt_ptp_caps; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h index 4135ea3ec788..254ba7bc0f99 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h @@ -32,6 +32,10 @@ struct bnxt_ptp_cfg { u64 current_time; u64 old_time; unsigned long next_period; + unsigned long next_overflow_check; + /* 48-bit PHC overflows in 78 hours. Check overflow every 19 hours. */ + #define BNXT_PHC_OVERFLOW_PERIOD (19 * 3600 * HZ) + u16 tx_seqid; struct bnxt *bp; atomic_t tx_avail; From 78d9d8005e4556448f398d876f29d0ca7ab8e398 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Fri, 16 Jul 2021 21:40:51 +0800 Subject: [PATCH 431/794] riscv: stacktrace: Fix NULL pointer dereference When CONFIG_FRAME_POINTER=y, calling dump_stack() can always trigger NULL pointer dereference panic similar as below: [ 0.396060] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.13.0-rc5+ #47 [ 0.396692] Hardware name: riscv-virtio,qemu (DT) [ 0.397176] Call Trace: [ 0.398191] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000960 [ 0.399487] Oops [#1] [ 0.399739] Modules linked in: [ 0.400135] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.13.0-rc5+ #47 [ 0.400570] Hardware name: riscv-virtio,qemu (DT) [ 0.400926] epc : walk_stackframe+0xc4/0xdc [ 0.401291] ra : dump_backtrace+0x30/0x38 [ 0.401630] epc : ffffffff80004922 ra : ffffffff8000496a sp : ffffffe000f3bd00 [ 0.402115] gp : ffffffff80cfdcb8 tp : ffffffe000f30000 t0 : ffffffff80d0b0cf [ 0.402602] t1 : ffffffff80d0b0c0 t2 : 0000000000000000 s0 : ffffffe000f3bd60 [ 0.403071] s1 : ffffffff808bc2e8 a0 : 0000000000001000 a1 : 0000000000000000 [ 0.403448] a2 : ffffffff803d7088 a3 : ffffffff808bc2e8 a4 : 6131725dbc24d400 [ 0.403820] a5 : 0000000000001000 a6 : 0000000000000002 a7 : ffffffffffffffff [ 0.404226] s2 : 0000000000000000 s3 : 0000000000000000 s4 : 0000000000000000 [ 0.404634] s5 : ffffffff803d7088 s6 : ffffffff808bc2e8 s7 : ffffffff80630650 [ 0.405085] s8 : ffffffff80912a80 s9 : 0000000000000008 s10: ffffffff804000fc [ 0.405388] s11: 0000000000000000 t3 : 0000000000000043 t4 : ffffffffffffffff [ 0.405616] t5 : 000000000000003d t6 : ffffffe000f3baa8 [ 0.405793] status: 0000000000000100 badaddr: 0000000000000960 cause: 000000000000000d [ 0.406135] [] walk_stackframe+0xc4/0xdc [ 0.407032] [] dump_backtrace+0x30/0x38 [ 0.407797] [] show_stack+0x40/0x4c [ 0.408234] [] dump_stack+0x90/0xb6 [ 0.409019] [] ptdump_init+0x20/0xc4 [ 0.409681] [] do_one_initcall+0x4c/0x226 [ 0.410110] [] kernel_init_freeable+0x1f4/0x258 [ 0.410562] [] kernel_init+0x22/0x148 [ 0.410959] [] ret_from_exception+0x0/0x14 [ 0.412241] ---[ end trace b2ab92c901b96251 ]--- [ 0.413099] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b The reason is the task is NULL when we finally call walk_stackframe() the NULL is passed from __dump_stack(): |static void __dump_stack(void) |{ | dump_stack_print_info(KERN_DEFAULT); | show_stack(NULL, NULL, KERN_DEFAULT); |} Fix this issue by checking "task == NULL" case in walk_stackframe(). Fixes: eac2f3059e02 ("riscv: stacktrace: fix the riscv stacktrace when CONFIG_FRAME_POINTER enabled") Signed-off-by: Jisheng Zhang Reviewed-by: Atish Patra Tested-by: Wende Tan Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/stacktrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index ac7593607fa6..315db3d0229b 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -27,7 +27,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, fp = frame_pointer(regs); sp = user_stack_pointer(regs); pc = instruction_pointer(regs); - } else if (task == current) { + } else if (task == NULL || task == current) { fp = (unsigned long)__builtin_frame_address(1); sp = (unsigned long)__builtin_frame_address(0); pc = (unsigned long)__builtin_return_address(0); From fc68f42aa737dc15e7665a4101d4168aadb8e4c4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 24 Jul 2021 15:25:54 -0700 Subject: [PATCH 432/794] ACPI: fix NULL pointer dereference Commit 71f642833284 ("ACPI: utils: Fix reference counting in for_each_acpi_dev_match()") started doing "acpi_dev_put()" on a pointer that was possibly NULL. That fails miserably, because that helper inline function is not set up to handle that case. Just make acpi_dev_put() silently accept a NULL pointer, rather than calling down to put_device() with an invalid offset off that NULL pointer. Link: https://lore.kernel.org/lkml/a607c149-6bf6-0fd0-0e31-100378504da2@kernel.dk/ Reported-and-tested-by: Jens Axboe Tested-by: Daniel Scally Cc: Andy Shevchenko Signed-off-by: Linus Torvalds --- include/acpi/acpi_bus.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index b9d434a93632..13d93371790e 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -720,7 +720,8 @@ static inline struct acpi_device *acpi_dev_get(struct acpi_device *adev) static inline void acpi_dev_put(struct acpi_device *adev) { - put_device(&adev->dev); + if (adev) + put_device(&adev->dev); } struct acpi_device *acpi_bus_get_acpi_device(acpi_handle handle); From ac059d16442f30e6a9a95d41655153e01247e710 Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Sun, 25 Jul 2021 13:28:24 +0530 Subject: [PATCH 433/794] octeontx2-af: Fix PKIND overlap between LBK and LMAC interfaces Currently PKINDs are not assigned to LBK channels. The default value of LBK_CHX_PKIND (channel to PKIND mapping) register is zero, which is resulting in a overlap of pkind between LBK and CGX LMACs. When KPU1 parser config is modified when PTP timestamping is enabled on the CGX LMAC interface it is impacting traffic on LBK interfaces as well. This patch fixes the issue by reserving the PKIND#0 for LBK devices. CGX mapped PF pkind starts from 1 and also fixes the max pkind available. Fixes: 421572175ba5 ("octeontx2-af: Support to enable/disable HW timestamping") Signed-off-by: Geetha sowjanya Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/npc.h | 3 +++ drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 1 + drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c | 11 +++++++---- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h index 19bad9a59c8f..243cf8070e77 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h @@ -151,7 +151,10 @@ enum npc_kpu_lh_ltype { * Software assigns pkind for each incoming port such as CGX * Ethernet interfaces, LBK interfaces, etc. */ +#define NPC_UNRESERVED_PKIND_COUNT NPC_RX_VLAN_EXDSA_PKIND + enum npc_pkind_type { + NPC_RX_LBK_PKIND = 0ULL, NPC_RX_VLAN_EXDSA_PKIND = 56ULL, NPC_RX_CHLEN24B_PKIND = 57ULL, NPC_RX_CPT_HDR_PKIND, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 0d2cd5169018..30067668eda7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -298,6 +298,7 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf) rvu_nix_chan_lbk(rvu, lbkid, vf + 1); pfvf->rx_chan_cnt = 1; pfvf->tx_chan_cnt = 1; + rvu_npc_set_pkind(rvu, NPC_RX_LBK_PKIND, pfvf); rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf, pfvf->rx_chan_base, pfvf->rx_chan_cnt); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 1097291aaa45..52b255426c22 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -1721,7 +1721,6 @@ static void npc_parser_profile_init(struct rvu *rvu, int blkaddr) { struct rvu_hwinfo *hw = rvu->hw; int num_pkinds, num_kpus, idx; - struct npc_pkind *pkind; /* Disable all KPUs and their entries */ for (idx = 0; idx < hw->npc_kpus; idx++) { @@ -1739,9 +1738,8 @@ static void npc_parser_profile_init(struct rvu *rvu, int blkaddr) * Check HW max count to avoid configuring junk or * writing to unsupported CSR addresses. */ - pkind = &hw->pkind; num_pkinds = rvu->kpu.pkinds; - num_pkinds = min_t(int, pkind->rsrc.max, num_pkinds); + num_pkinds = min_t(int, hw->npc_pkinds, num_pkinds); for (idx = 0; idx < num_pkinds; idx++) npc_config_kpuaction(rvu, blkaddr, &rvu->kpu.ikpu[idx], 0, idx, true); @@ -1891,7 +1889,8 @@ static void rvu_npc_hw_init(struct rvu *rvu, int blkaddr) if (npc_const1 & BIT_ULL(63)) npc_const2 = rvu_read64(rvu, blkaddr, NPC_AF_CONST2); - pkind->rsrc.max = (npc_const1 >> 12) & 0xFFULL; + pkind->rsrc.max = NPC_UNRESERVED_PKIND_COUNT; + hw->npc_pkinds = (npc_const1 >> 12) & 0xFFULL; hw->npc_kpu_entries = npc_const1 & 0xFFFULL; hw->npc_kpus = (npc_const >> 8) & 0x1FULL; hw->npc_intfs = npc_const & 0xFULL; @@ -2002,6 +2001,10 @@ int rvu_npc_init(struct rvu *rvu) err = rvu_alloc_bitmap(&pkind->rsrc); if (err) return err; + /* Reserve PKIND#0 for LBKs. Power reset value of LBK_CH_PKIND is '0', + * no need to configure PKIND for all LBKs separately. + */ + rvu_alloc_rsrc(&pkind->rsrc); /* Allocate mem for pkind to PF and channel mapping info */ pkind->pfchan_map = devm_kcalloc(rvu->dev, pkind->rsrc.max, From 69f0aeb13bb548e2d5710a350116e03f0273302e Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Sun, 25 Jul 2021 13:29:03 +0530 Subject: [PATCH 434/794] octeontx2-pf: Fix interface down flag on error In the existing code while changing the number of TX/RX queues using ethtool the PF/VF interface resources are freed and reallocated (otx2_stop and otx2_open is called) if the device is in running state. If any resource allocation fails in otx2_open, driver free already allocated resources and return. But again, when the number of queues changes as the device state still running oxt2_stop is called. In which we try to free already freed resources leading to driver crash. This patch fixes the issue by setting the INTF_DOWN flag on error and free the resources in otx2_stop only if the flag is not set. Fixes: 50fe6c02e5ad ("octeontx2-pf: Register and handle link notifications") Signed-off-by: Geetha sowjanya Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c | 7 +++---- drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 5 +++++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index 8df748e0677b..b906a0eb6e0d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -298,15 +298,14 @@ static int otx2_set_channels(struct net_device *dev, err = otx2_set_real_num_queues(dev, channel->tx_count, channel->rx_count); if (err) - goto fail; + return err; pfvf->hw.rx_queues = channel->rx_count; pfvf->hw.tx_queues = channel->tx_count; pfvf->qset.cq_cnt = pfvf->hw.tx_queues + pfvf->hw.rx_queues; -fail: if (if_up) - dev->netdev_ops->ndo_open(dev); + err = dev->netdev_ops->ndo_open(dev); netdev_info(dev, "Setting num Tx rings to %d, Rx rings to %d success\n", pfvf->hw.tx_queues, pfvf->hw.rx_queues); @@ -410,7 +409,7 @@ static int otx2_set_ringparam(struct net_device *netdev, qs->rqe_cnt = rx_count; if (if_up) - netdev->netdev_ops->ndo_open(netdev); + return netdev->netdev_ops->ndo_open(netdev); return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index f300b807a85b..2c24944a4dba 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1662,6 +1662,7 @@ int otx2_open(struct net_device *netdev) err_tx_stop_queues: netif_tx_stop_all_queues(netdev); netif_carrier_off(netdev); + pf->flags |= OTX2_FLAG_INTF_DOWN; err_free_cints: otx2_free_cints(pf, qidx); vec = pci_irq_vector(pf->pdev, @@ -1689,6 +1690,10 @@ int otx2_stop(struct net_device *netdev) struct otx2_rss_info *rss; int qidx, vec, wrk; + /* If the DOWN flag is set resources are already freed */ + if (pf->flags & OTX2_FLAG_INTF_DOWN) + return 0; + netif_carrier_off(netdev); netif_tx_stop_all_queues(netdev); From 4c85e57575fb9e6405d02d55aef8025c60abb824 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Sun, 25 Jul 2021 13:29:37 +0530 Subject: [PATCH 435/794] octeontx2-pf: Dont enable backpressure on LBK links Avoid configure backpressure for LBK links as they don't support it and enable lmacs before configuration pause frames. Fixes: 75f36270990c ("octeontx2-pf: Support to enable/disable pause frames via ethtool") Signed-off-by: Geetha sowjanya Signed-off-by: Hariprasad Kelam Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 2 +- .../ethernet/marvell/octeontx2/nic/otx2_common.c | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 9169849881bf..544c96c8fe1d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -1504,8 +1504,8 @@ static int cgx_lmac_init(struct cgx *cgx) /* Add reference */ cgx->lmac_idmap[lmac->lmac_id] = lmac; - cgx->mac_ops->mac_pause_frm_config(cgx, lmac->lmac_id, true); set_bit(lmac->lmac_id, &cgx->lmac_bmap); + cgx->mac_ops->mac_pause_frm_config(cgx, lmac->lmac_id, true); } return cgx_lmac_verify_fwi_version(cgx); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 7cccd802c4ed..70fcc1fd962f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -924,12 +924,14 @@ static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx) aq->cq.drop = RQ_DROP_LVL_CQ(pfvf->hw.rq_skid, cq->cqe_cnt); aq->cq.drop_ena = 1; - /* Enable receive CQ backpressure */ - aq->cq.bp_ena = 1; - aq->cq.bpid = pfvf->bpid[0]; + if (!is_otx2_lbkvf(pfvf->pdev)) { + /* Enable receive CQ backpressure */ + aq->cq.bp_ena = 1; + aq->cq.bpid = pfvf->bpid[0]; - /* Set backpressure level is same as cq pass level */ - aq->cq.bp = RQ_PASS_LVL_CQ(pfvf->hw.rq_skid, qset->rqe_cnt); + /* Set backpressure level is same as cq pass level */ + aq->cq.bp = RQ_PASS_LVL_CQ(pfvf->hw.rq_skid, qset->rqe_cnt); + } } /* Fill AQ info */ @@ -1186,7 +1188,7 @@ static int otx2_aura_init(struct otx2_nic *pfvf, int aura_id, aq->aura.fc_hyst_bits = 0; /* Store count on all updates */ /* Enable backpressure for RQ aura */ - if (aura_id < pfvf->hw.rqpool_cnt) { + if (aura_id < pfvf->hw.rqpool_cnt && !is_otx2_lbkvf(pfvf->pdev)) { aq->aura.bp_ena = 0; aq->aura.nix0_bpid = pfvf->bpid[0]; /* Set backpressure level for RQ's Aura */ From 149ea30fdd5c28b89a3bfdecfc75cdab1deddb14 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 23 Jul 2021 17:56:00 +0300 Subject: [PATCH 436/794] devlink: Fix phys_port_name of virtual port and merge error Merge commit cited in fixes tag was incorrect. Due to it phys_port_name of the virtual port resulted in incorrect name. Also the phys_port_name of the physical port was written twice due to the merge error. Fix it by removing the old code and inserting back the misplaced code. Related commits of interest in net and net-next branches that resulted in merge conflict are: in net-next branch: commit f285f37cb1e6 ("devlink: append split port number to the port name") in net branch: commit b28d8f0c25a9 ("devlink: Correct VIRTUAL port to not have phys_port attributes") Fixes: 126285651b7 ("Merge ra.kernel.org:/pub/scm/linux/kernel/git/netdev/net") Signed-off-by: Parav Pandit Reported-by: Niklas Schnelle Tested-by: Niklas Schnelle Signed-off-by: David S. Miller --- net/core/devlink.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/net/core/devlink.c b/net/core/devlink.c index 8fdd04f00fd7..85032626de24 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -9328,18 +9328,10 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, switch (attrs->flavour) { case DEVLINK_PORT_FLAVOUR_PHYSICAL: - case DEVLINK_PORT_FLAVOUR_VIRTUAL: n = snprintf(name, len, "p%u", attrs->phys.port_number); if (n < len && attrs->split) n += snprintf(name + n, len - n, "s%u", attrs->phys.split_subport_number); - if (!attrs->split) - n = snprintf(name, len, "p%u", attrs->phys.port_number); - else - n = snprintf(name, len, "p%us%u", - attrs->phys.port_number, - attrs->phys.split_subport_number); - break; case DEVLINK_PORT_FLAVOUR_CPU: case DEVLINK_PORT_FLAVOUR_DSA: @@ -9381,6 +9373,8 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, n = snprintf(name, len, "pf%usf%u", attrs->pci_sf.pf, attrs->pci_sf.sf); break; + case DEVLINK_PORT_FLAVOUR_VIRTUAL: + return -EOPNOTSUPP; } if (n >= len) From ad4e1e48a6291f7fb53fbef38ca264966ffd65c9 Mon Sep 17 00:00:00 2001 From: Kevin Lo Date: Fri, 23 Jul 2021 21:59:27 +0800 Subject: [PATCH 437/794] net: phy: broadcom: re-add check for PHY_BRCM_DIS_TXCRXC_NOENRGY on the BCM54811 PHY Restore PHY_ID_BCM54811 accidently removed by commit 5d4358ede8eb. Fixes: 5d4358ede8eb ("net: phy: broadcom: Allow BCM54210E to configure APD") Signed-off-by: Kevin Lo Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/broadcom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 7bf3011b8e77..83aea5c5cd03 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -288,7 +288,7 @@ static void bcm54xx_adjust_rxrefclk(struct phy_device *phydev) if (phydev->dev_flags & PHY_BRCM_DIS_TXCRXC_NOENRGY) { if (BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54210E || BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54810 || - BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54210E) + BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54811) val |= BCM54XX_SHD_SCR3_RXCTXC_DIS; else val |= BCM54XX_SHD_SCR3_TRDDAPD; From 7e4960b3d66d7248b23de3251118147812b42da2 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 23 Jul 2021 18:36:09 +0800 Subject: [PATCH 438/794] mlx4: Fix missing error code in mlx4_load_one() The error code is missing in this code scenario, add the error code '-EINVAL' to the return value 'err'. Eliminate the follow smatch warning: drivers/net/ethernet/mellanox/mlx4/main.c:3538 mlx4_load_one() warn: missing error code 'err'. Reported-by: Abaci Robot Fixes: 7ae0e400cd93 ("net/mlx4_core: Flexible (asymmetric) allocation of EQs and MSI-X vectors for PF/VFs") Signed-off-by: Jiapeng Chong Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 00c84656b2e7..28ac4693da3c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -3535,6 +3535,7 @@ slave_start: if (!SRIOV_VALID_STATE(dev->flags)) { mlx4_err(dev, "Invalid SRIOV state\n"); + err = -EINVAL; goto err_close; } } From a1833a54033e4ca760ad58fa2a6469ad59b3fa1a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 25 Jul 2021 11:06:37 -0700 Subject: [PATCH 439/794] smpboot: fix duplicate and misplaced inlining directive gcc doesn't care, but clang quite reasonably pointed out that the recent commit e9ba16e68cce ("smpboot: Mark idle_init() as __always_inlined to work around aggressive compiler un-inlining") did some really odd things: kernel/smpboot.c:50:20: warning: duplicate 'inline' declaration specifier [-Wduplicate-decl-specifier] static inline void __always_inline idle_init(unsigned int cpu) ^ which not only has that duplicate inlining specifier, but the new __always_inline was put in the wrong place of the function definition. We put the storage class specifiers (ie things like "static" and "extern") first, and the type information after that. And while the compiler may not care, we put the inline specifier before the types. So it should be just static __always_inline void idle_init(unsigned int cpu) instead. Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Linus Torvalds --- kernel/smpboot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 21b7953f8242..cf6acab78538 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -47,7 +47,7 @@ void __init idle_thread_set_boot_cpu(void) * * Creates the thread if it does not exist. */ -static inline void __always_inline idle_init(unsigned int cpu) +static __always_inline void idle_init(unsigned int cpu) { struct task_struct *tsk = per_cpu(idle_threads, cpu); From 44379b986424b02acfa6e8c85ec5d68d89d3ccc4 Mon Sep 17 00:00:00 2001 From: Jagan Teki Date: Sun, 25 Jul 2021 23:17:37 +0530 Subject: [PATCH 440/794] drm/panel: panel-simple: Fix proper bpc for ytc700tlag_05_201c ytc700tlag_05_201c panel support 8 bpc not 6 bpc as per recent testing in i.MX8MM platform. Fix it. Fixes: 7a1f4fa4a629 ("drm/panel: simple: Add YTC700TLAG-05-201C") Signed-off-by: Jagan Teki Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20210725174737.891106-1-jagan@amarulasolutions.com --- drivers/gpu/drm/panel/panel-simple.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 21939d4352cf..1b80290c2b53 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -4166,7 +4166,7 @@ static const struct drm_display_mode yes_optoelectronics_ytc700tlag_05_201c_mode static const struct panel_desc yes_optoelectronics_ytc700tlag_05_201c = { .modes = &yes_optoelectronics_ytc700tlag_05_201c_mode, .num_modes = 1, - .bpc = 6, + .bpc = 8, .size = { .width = 154, .height = 90, From 795e3d2ea68e489ee7039ac29e98bfea0e34a96c Mon Sep 17 00:00:00 2001 From: Harshvardhan Jha Date: Sun, 25 Jul 2021 23:28:04 +0530 Subject: [PATCH 441/794] net: qede: Fix end of loop tests for list_for_each_entry The list_for_each_entry() iterator, "vlan" in this code, can never be NULL so the warning will never be printed. Signed-off-by: Harshvardhan Jha Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index c59b72c90293..a2e4dfb5cb44 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -831,7 +831,7 @@ int qede_configure_vlan_filters(struct qede_dev *edev) int qede_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) { struct qede_dev *edev = netdev_priv(dev); - struct qede_vlan *vlan = NULL; + struct qede_vlan *vlan; int rc = 0; DP_VERBOSE(edev, NETIF_MSG_IFDOWN, "Removing vlan 0x%04x\n", vid); @@ -842,7 +842,7 @@ int qede_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) if (vlan->vid == vid) break; - if (!vlan || (vlan->vid != vid)) { + if (list_entry_is_head(vlan, &edev->vlan_list, list)) { DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), "Vlan isn't configured\n"); goto out; From 058e6e0ed0eace43401c945082dec1d669b5b231 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 25 Jul 2021 13:42:50 -0400 Subject: [PATCH 442/794] sctp: improve the code for pmtu probe send and recv update This patch does 3 things: - make sctp_transport_pl_send() and sctp_transport_pl_recv() return bool type to decide if more probe is needed to send. - pr_debug() only when probe is really needed to send. - count pl.raise_count in sctp_transport_pl_send() instead of sctp_transport_pl_recv(), and it's only incremented for the 1st probe for the same size. These are preparations for the next patch to make probes happen only when there's packet loss in Search Complete state. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 4 ++-- net/sctp/sm_statefuns.c | 15 +++++++------- net/sctp/transport.c | 41 +++++++++++++++++++++----------------- 3 files changed, 32 insertions(+), 28 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 32fc4a309df5..f3d414ed208e 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1024,8 +1024,8 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu); void sctp_transport_immediate_rtx(struct sctp_transport *); void sctp_transport_dst_release(struct sctp_transport *t); void sctp_transport_dst_confirm(struct sctp_transport *t); -void sctp_transport_pl_send(struct sctp_transport *t); -void sctp_transport_pl_recv(struct sctp_transport *t); +bool sctp_transport_pl_send(struct sctp_transport *t); +bool sctp_transport_pl_recv(struct sctp_transport *t); /* This is the structure we use to queue packets as they come into diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 09a8f23ec709..32df65f68c12 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1109,12 +1109,12 @@ enum sctp_disposition sctp_sf_send_probe(struct net *net, if (!sctp_transport_pl_enabled(transport)) return SCTP_DISPOSITION_CONSUME; - sctp_transport_pl_send(transport); - - reply = sctp_make_heartbeat(asoc, transport, transport->pl.probe_size); - if (!reply) - return SCTP_DISPOSITION_NOMEM; - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply)); + if (sctp_transport_pl_send(transport)) { + reply = sctp_make_heartbeat(asoc, transport, transport->pl.probe_size); + if (!reply) + return SCTP_DISPOSITION_NOMEM; + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply)); + } sctp_add_cmd_sf(commands, SCTP_CMD_PROBE_TIMER_UPDATE, SCTP_TRANSPORT(transport)); @@ -1274,8 +1274,7 @@ enum sctp_disposition sctp_sf_backbeat_8_3(struct net *net, !sctp_transport_pl_enabled(link)) return SCTP_DISPOSITION_DISCARD; - sctp_transport_pl_recv(link); - if (link->pl.state == SCTP_PL_COMPLETE) + if (sctp_transport_pl_recv(link)) return SCTP_DISPOSITION_CONSUME; return sctp_sf_send_probe(net, ep, asoc, type, link, commands); diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 397a6244dd97..23e7bd3e3bd4 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -258,16 +258,12 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) sctp_transport_pl_update(transport); } -void sctp_transport_pl_send(struct sctp_transport *t) +bool sctp_transport_pl_send(struct sctp_transport *t) { - pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n", - __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high); - - if (t->pl.probe_count < SCTP_MAX_PROBES) { - t->pl.probe_count++; - return; - } + if (t->pl.probe_count < SCTP_MAX_PROBES) + goto out; + t->pl.probe_count = 0; if (t->pl.state == SCTP_PL_BASE) { if (t->pl.probe_size == SCTP_BASE_PLPMTU) { /* BASE_PLPMTU Confirmation Failed */ t->pl.state = SCTP_PL_ERROR; /* Base -> Error */ @@ -299,10 +295,20 @@ void sctp_transport_pl_send(struct sctp_transport *t) sctp_assoc_sync_pmtu(t->asoc); } } - t->pl.probe_count = 1; + +out: + if (t->pl.state == SCTP_PL_COMPLETE && t->pl.raise_count < 30 && + !t->pl.probe_count) + t->pl.raise_count++; + + pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n", + __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high); + + t->pl.probe_count++; + return true; } -void sctp_transport_pl_recv(struct sctp_transport *t) +bool sctp_transport_pl_recv(struct sctp_transport *t) { pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n", __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high); @@ -323,7 +329,7 @@ void sctp_transport_pl_recv(struct sctp_transport *t) if (!t->pl.probe_high) { t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_BIG_STEP, SCTP_MAX_PLPMTU); - return; + return false; } t->pl.probe_size += SCTP_PL_MIN_STEP; if (t->pl.probe_size >= t->pl.probe_high) { @@ -335,14 +341,13 @@ void sctp_transport_pl_recv(struct sctp_transport *t) t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); sctp_assoc_sync_pmtu(t->asoc); } - } else if (t->pl.state == SCTP_PL_COMPLETE) { - t->pl.raise_count++; - if (t->pl.raise_count == 30) { - /* Raise probe_size again after 30 * interval in Search Complete */ - t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */ - t->pl.probe_size += SCTP_PL_MIN_STEP; - } + } else if (t->pl.state == SCTP_PL_COMPLETE && t->pl.raise_count == 30) { + /* Raise probe_size again after 30 * interval in Search Complete */ + t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */ + t->pl.probe_size += SCTP_PL_MIN_STEP; } + + return t->pl.state == SCTP_PL_COMPLETE; } static bool sctp_transport_pl_toobig(struct sctp_transport *t, u32 pmtu) From eacf078cf4c7aa23e9591738511f142cc39b5186 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 25 Jul 2021 13:42:51 -0400 Subject: [PATCH 443/794] sctp: send pmtu probe only if packet loss in Search Complete state This patch is to introduce last_rtx_chunks into sctp_transport to detect if there's any packet retransmission/loss happened by checking against asoc's rtx_data_chunks in sctp_transport_pl_send(). If there is, namely, transport->last_rtx_chunks != asoc->rtx_data_chunks, the pmtu probe will be sent out. Otherwise, increment the pl.raise_count and return when it's in Search Complete state. With this patch, if in Search Complete state, which is a long period, it doesn't need to keep probing the current pmtu unless there's data packet loss. This will save quite some traffic. v1->v2: - add the missing Fixes tag. Fixes: 0dac127c0557 ("sctp: do black hole detection in search complete state") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 + net/sctp/transport.c | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index f3d414ed208e..651bba654d77 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -984,6 +984,7 @@ struct sctp_transport { } cacc; struct { + __u32 last_rtx_chunks; __u16 pmtu; __u16 probe_size; __u16 probe_high; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 23e7bd3e3bd4..a3d3ca6dd63d 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -263,6 +263,7 @@ bool sctp_transport_pl_send(struct sctp_transport *t) if (t->pl.probe_count < SCTP_MAX_PROBES) goto out; + t->pl.last_rtx_chunks = t->asoc->rtx_data_chunks; t->pl.probe_count = 0; if (t->pl.state == SCTP_PL_BASE) { if (t->pl.probe_size == SCTP_BASE_PLPMTU) { /* BASE_PLPMTU Confirmation Failed */ @@ -298,8 +299,10 @@ bool sctp_transport_pl_send(struct sctp_transport *t) out: if (t->pl.state == SCTP_PL_COMPLETE && t->pl.raise_count < 30 && - !t->pl.probe_count) + !t->pl.probe_count && t->pl.last_rtx_chunks == t->asoc->rtx_data_chunks) { t->pl.raise_count++; + return false; + } pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n", __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high); @@ -313,6 +316,7 @@ bool sctp_transport_pl_recv(struct sctp_transport *t) pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n", __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high); + t->pl.last_rtx_chunks = t->asoc->rtx_data_chunks; t->pl.pmtu = t->pl.probe_size; t->pl.probe_count = 0; if (t->pl.state == SCTP_PL_BASE) { From ff1176468d368232b684f75e82563369208bc371 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 25 Jul 2021 15:35:14 -0700 Subject: [PATCH 444/794] Linux 5.14-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e4f5895badb5..6b555f64df06 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Opossums on Parade # *DOCUMENTATION* From 9f66861181e64dc192bea136da6c91528910002e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 4 Jul 2021 16:01:37 -0700 Subject: [PATCH 445/794] m68k/coldfire: change pll var. to clk_pll DEFINE_CLK() makes the variable name be clk_xyz, so variable 'pll' should instead be 'clk_pll'. In file included from ../arch/m68k/coldfire/m525x.c:12: ../arch/m68k/coldfire/m525x.c:29:30: error: 'pll' undeclared here (not in a function) 29 | CLKDEV_INIT(NULL, "pll.0", &pll), | ^~~ ../include/linux/clkdev.h:30:10: note: in definition of macro 'CLKDEV_INIT' 30 | .clk = c, \ | ^ In file included from ../arch/m68k/coldfire/m525x.c:21: ../arch/m68k/include/asm/mcfclk.h:43:27: warning: 'clk_pll' defined but not used [-Wunused-variable] 43 | static struct clk clk_##clk_ref = { \ | ^~~~ ../arch/m68k/coldfire/m525x.c:25:1: note: in expansion of macro 'DEFINE_CLK' 25 | DEFINE_CLK(pll, "pll.0", MCF_CLK); | ^~~~~~~~~~ Fixes: 63aadb77669a ("m68k: coldfire: use clkdev_lookup on most coldfire") Reported-by: kernel test robot Signed-off-by: Randy Dunlap Cc: Greg Ungerer Cc: linux-m68k@lists.linux-m68k.org Cc: uclinux-dev@uclinux.org Cc: Arnd Bergmann Signed-off-by: Greg Ungerer --- arch/m68k/coldfire/m525x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/m68k/coldfire/m525x.c b/arch/m68k/coldfire/m525x.c index 2c4d2ca2f20d..485375112e28 100644 --- a/arch/m68k/coldfire/m525x.c +++ b/arch/m68k/coldfire/m525x.c @@ -26,7 +26,7 @@ DEFINE_CLK(pll, "pll.0", MCF_CLK); DEFINE_CLK(sys, "sys.0", MCF_BUSCLK); static struct clk_lookup m525x_clk_lookup[] = { - CLKDEV_INIT(NULL, "pll.0", &pll), + CLKDEV_INIT(NULL, "pll.0", &clk_pll), CLKDEV_INIT(NULL, "sys.0", &clk_sys), CLKDEV_INIT("mcftmr.0", NULL, &clk_sys), CLKDEV_INIT("mcftmr.1", NULL, &clk_sys), From e4b016f4b44176807e545fd437cd519b6380e86f Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Fri, 8 Nov 2019 15:36:08 +0000 Subject: [PATCH 446/794] alpha: __udiv_qrnnd should be exported When building an alpha kernel with mpi set as module, I hit this build error: ERROR: "__udiv_qrnnd" [lib/mpi/mpi.ko] undefined! make[2]: *** [scripts/Makefile.modpost:92: __modpost] Error 1 make[1]: *** [Makefile:1266: modules] Error 2 This is due to __udiv_qrnnd not exported. Signed-off-by: Corentin Labbe Signed-off-by: Matt Turner --- arch/alpha/math-emu/math.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/alpha/math-emu/math.c b/arch/alpha/math-emu/math.c index d568cd9a3e43..5617ac0889b8 100644 --- a/arch/alpha/math-emu/math.c +++ b/arch/alpha/math-emu/math.c @@ -401,3 +401,5 @@ alpha_fp_emul_imprecise (struct pt_regs *regs, unsigned long write_mask) egress: return si_code; } + +EXPORT_SYMBOL(__udiv_qrnnd); From a09c33cbf3db545d44eab16eb528acf834310690 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Mon, 13 Jul 2020 11:43:41 +0200 Subject: [PATCH 447/794] alpha: Kconfig: Replace HTTP links with HTTPS ones Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Signed-off-by: Matt Turner --- arch/alpha/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 77d3280dc678..f4ffad76e8b1 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -532,7 +532,7 @@ config SMP will run faster if you say N here. See also the SMP-HOWTO available at - . + . If you don't know what to do here, say N. From 5e3c3a0ae5d194f0a464aaaa71d764d96f2e7245 Mon Sep 17 00:00:00 2001 From: Chen Li Date: Tue, 13 Oct 2020 14:31:52 +0800 Subject: [PATCH 448/794] alpha: remove undef inline in compiler.h since 889b3c1245de48ed0cacf7aebb25c489d3e4a3e9, CONFIG_OPTIMIZE_INLINING is removed entirely and inline is always defined to `inline __gnu_inline __inline_maybe_unused notrace` in compiler_types.h Besides, undef inline here also means it never use __attribute__((__gnu_inline__)), so `extern inline` function can never be defined header files, otherwise multiple definition errors will happen, e.g. if multiple translation units use alpha/include/asm/pal.h will report multiple definitions, because there are many extern inline function definitions in this header. ``` c extern inline TYPE NAME(void) \ { \ register TYPE __r0 __asm__("$0"); \ __asm__ __volatile__( \ ... ``` Ofc, it is also ok to remove `extern` in `extern inline` here, then all of iso c99 and gnuc99/89 are ok, but there are also other alpha headers have such function definitions. Signed-off-by: chenli Signed-off-by: Matt Turner --- arch/alpha/include/asm/compiler.h | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/alpha/include/asm/compiler.h b/arch/alpha/include/asm/compiler.h index 5159ba259d65..ae645959018a 100644 --- a/arch/alpha/include/asm/compiler.h +++ b/arch/alpha/include/asm/compiler.h @@ -4,15 +4,4 @@ #include -/* Some idiots over in thought inline should imply - always_inline. This breaks stuff. We'll include this file whenever - we run into such problems. */ - -#include -#undef inline -#undef __inline__ -#undef __inline -#undef __always_inline -#define __always_inline inline __attribute__((always_inline)) - #endif /* __ALPHA_COMPILER_H */ From f0443da1d8560f4c664ab0f9a900ed69e9aaeb14 Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Wed, 16 Dec 2020 21:12:41 +0800 Subject: [PATCH 449/794] alpha: convert comma to semicolon Replace a comma between expression statements by a semicolon. Fixes: cba1ec7e88a0 ("alpha: switch to generic kernel_thread()") Signed-off-by: Zheng Yongjun Signed-off-by: Matt Turner --- arch/alpha/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index ef0c08ed0481..a5123ea426ce 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -256,7 +256,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, childstack->r26 = (unsigned long) ret_from_kernel_thread; childstack->r9 = usp; /* function */ childstack->r10 = kthread_arg; - childregs->hae = alpha_mv.hae_cache, + childregs->hae = alpha_mv.hae_cache; childti->pcb.usp = 0; return 0; } From caace6ca4e06f09413fb8f8a63319594cfb7d47d Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Tue, 5 Jan 2021 10:16:27 -0500 Subject: [PATCH 450/794] alpha: Send stop IPI to send to online CPUs This issue was noticed while debugging a shutdown issue where some secondary CPUs are not being shutdown correctly. A fix for that [1] requires that secondary cpus be offlined using the cpu_online_mask so that the stop operation is a no-op if CPU HOTPLUG is disabled. I, like the author in [1] looked at the architectures and found that alpha is one of two architectures that executes smp_send_stop() on all possible CPUs. On alpha, smp_send_stop() sends an IPI to all possible CPUs but only needs to send them to online CPUs. Send the stop IPI to only the online CPUs. [1] https://lkml.org/lkml/2020/1/10/250 Signed-off-by: Prarit Bhargava Cc: Richard Henderson Cc: Ivan Kokshaysky Signed-off-by: Matt Turner --- arch/alpha/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 4b2575f936d4..cb64e4797d2a 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -582,7 +582,7 @@ void smp_send_stop(void) { cpumask_t to_whom; - cpumask_copy(&to_whom, cpu_possible_mask); + cpumask_copy(&to_whom, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), &to_whom); #ifdef DEBUG_IPI_MSG if (hard_smp_processor_id() != boot_cpu_id) From bfd736e3ffcc9dfc23c0a619fcc131eefd91d7ca Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 18 Jan 2021 11:20:29 +0000 Subject: [PATCH 451/794] alpha: defconfig: add necessary configs for boot testing Gentoo's KernelCI will soon boot test alpha kernel and we need CONFIG_DEVTMPFS=y to be set for that. Note that CONFIG_DEVTMPFS=y is already necessary for lot of other distribution/tools like recent udev/systemd. Signed-off-by: Corentin Labbe Signed-off-by: Matt Turner --- arch/alpha/configs/defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/alpha/configs/defconfig b/arch/alpha/configs/defconfig index dd2dd9f0861f..7f1ca30b115b 100644 --- a/arch/alpha/configs/defconfig +++ b/arch/alpha/configs/defconfig @@ -70,3 +70,4 @@ CONFIG_DEBUG_INFO=y CONFIG_ALPHA_LEGACY_START_ADDRESS=y CONFIG_MATHEMU=y CONFIG_CRYPTO_HMAC=y +CONFIG_DEVTMPFS=y From 8f34ed9d959786e2f2a643a1237f69f0171911cf Mon Sep 17 00:00:00 2001 From: tangchunyou Date: Wed, 20 Jan 2021 21:34:10 +0800 Subject: [PATCH 452/794] alpha: fix typos in a comment "kerne" -> "kernel" Signed-off-by: tangchunyou Signed-off-by: Matt Turner --- arch/alpha/boot/bootpz.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/alpha/boot/bootpz.c b/arch/alpha/boot/bootpz.c index 43af71835adf..90a2b341e9c0 100644 --- a/arch/alpha/boot/bootpz.c +++ b/arch/alpha/boot/bootpz.c @@ -200,7 +200,7 @@ extern char _end; START_ADDR KSEG address of the entry point of kernel code. ZERO_PGE KSEG address of page full of zeroes, but - upon entry to kerne cvan be expected + upon entry to kernel, it can be expected to hold the parameter list and possible INTRD information. From 6208721f1399912a0a53c77ed86dcc25d3e20efb Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 20 Apr 2021 19:56:31 +0200 Subject: [PATCH 453/794] binfmt: remove support for em86 (alpha only) We have a fairly specific alpha binary loader in Linux: running x86 (i386, i486) binaries via the em86 [1] emulator. As noted in the Kconfig option, the same behavior can be achieved via binfmt_misc, for example, more nowadays used for running qemu-user. An example on how to get binfmt_misc running with em86 can be found in Documentation/admin-guide/binfmt-misc.rst The defconfig does not have CONFIG_BINFMT_EM86=y set. And doing a make defconfig && make olddefconfig results in # CONFIG_BINFMT_EM86 is not set ... as we don't seem to have any supported Linux distirbution for alpha anymore, there isn't really any "default" user of that feature anymore. Searching for "CONFIG_BINFMT_EM86=y" reveals mostly discussions from around 20 years ago, like [2] describing how to get netscape via em86 running via em86, or [3] discussing that running wine or installing Win 3.11 through em86 would be a nice feature. The latest binaries available for em86 are from 2000, version 2.2.1 [4] -- which translates to "unsupported"; further, em86 doesn't even work with glibc-2.x but only with glibc-2.0 [4, 5]. These are clear signs that there might not be too many em86 users out there, especially users relying on modern Linux kernels. Even though the code footprint is relatively small, let's just get rid of this blast from the past that's effectively unused. [1] http://ftp.dreamtime.org/pub/linux/Linux-Alpha/em86/v0.4/docs/em86.html [2] https://static.lwn.net/1998/1119/a/alpha-netscape.html [3] https://groups.google.com/g/linux.debian.alpha/c/AkGuQHeCe0Y [4] http://zeniv.linux.org.uk/pub/linux/alpha/em86/v2.2-1/relnotes.2.2.1.html [5] https://forum.teamspeak.com/archive/index.php/t-1477.html Cc: Alexander Viro Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: Jonathan Corbet Cc: linux-fsdevel@vger.kernel.org Cc: linux-api@vger.kernel.org Cc: linux-alpha@vger.kernel.org Signed-off-by: David Hildenbrand Signed-off-by: Matt Turner --- fs/Kconfig.binfmt | 15 ------- fs/Makefile | 1 - fs/binfmt_em86.c | 110 ---------------------------------------------- 3 files changed, 126 deletions(-) delete mode 100644 fs/binfmt_em86.c diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 06fb7a93a1bd..4d5ae61580aa 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -168,21 +168,6 @@ config OSF4_COMPAT with v4 shared libraries freely available from Compaq. If you're going to use shared libraries from Tru64 version 5.0 or later, say N. -config BINFMT_EM86 - tristate "Kernel support for Linux/Intel ELF binaries" - depends on ALPHA - help - Say Y here if you want to be able to execute Linux/Intel ELF - binaries just like native Alpha binaries on your Alpha machine. For - this to work, you need to have the emulator /usr/bin/em86 in place. - - You can get the same functionality by saying N here and saying Y to - "Kernel support for MISC binaries". - - You may answer M to compile the emulation support as a module and - later load the module when you want to use a Linux/Intel binary. The - module will be called binfmt_em86. If unsure, say Y. - config BINFMT_MISC tristate "Kernel support for MISC binaries" help diff --git a/fs/Makefile b/fs/Makefile index 9c708e1fbe8f..f98f3e691c37 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -39,7 +39,6 @@ obj-$(CONFIG_FS_ENCRYPTION) += crypto/ obj-$(CONFIG_FS_VERITY) += verity/ obj-$(CONFIG_FILE_LOCKING) += locks.o obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o -obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o obj-$(CONFIG_BINFMT_SCRIPT) += binfmt_script.o obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c deleted file mode 100644 index 06b9b9fddf70..000000000000 --- a/fs/binfmt_em86.c +++ /dev/null @@ -1,110 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * linux/fs/binfmt_em86.c - * - * Based on linux/fs/binfmt_script.c - * Copyright (C) 1996 Martin von Löwis - * original #!-checking implemented by tytso. - * - * em86 changes Copyright (C) 1997 Jim Paradis - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#define EM86_INTERP "/usr/bin/em86" -#define EM86_I_NAME "em86" - -static int load_em86(struct linux_binprm *bprm) -{ - const char *i_name, *i_arg; - char *interp; - struct file * file; - int retval; - struct elfhdr elf_ex; - - /* Make sure this is a Linux/Intel ELF executable... */ - elf_ex = *((struct elfhdr *)bprm->buf); - - if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0) - return -ENOEXEC; - - /* First of all, some simple consistency checks */ - if ((elf_ex.e_type != ET_EXEC && elf_ex.e_type != ET_DYN) || - (!((elf_ex.e_machine == EM_386) || (elf_ex.e_machine == EM_486))) || - !bprm->file->f_op->mmap) { - return -ENOEXEC; - } - - /* Need to be able to load the file after exec */ - if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE) - return -ENOENT; - - /* Unlike in the script case, we don't have to do any hairy - * parsing to find our interpreter... it's hardcoded! - */ - interp = EM86_INTERP; - i_name = EM86_I_NAME; - i_arg = NULL; /* We reserve the right to add an arg later */ - - /* - * Splice in (1) the interpreter's name for argv[0] - * (2) (optional) argument to interpreter - * (3) filename of emulated file (replace argv[0]) - * - * This is done in reverse order, because of how the - * user environment and arguments are stored. - */ - remove_arg_zero(bprm); - retval = copy_string_kernel(bprm->filename, bprm); - if (retval < 0) return retval; - bprm->argc++; - if (i_arg) { - retval = copy_string_kernel(i_arg, bprm); - if (retval < 0) return retval; - bprm->argc++; - } - retval = copy_string_kernel(i_name, bprm); - if (retval < 0) return retval; - bprm->argc++; - - /* - * OK, now restart the process with the interpreter's inode. - * Note that we use open_exec() as the name is now in kernel - * space, and we don't need to copy it. - */ - file = open_exec(interp); - if (IS_ERR(file)) - return PTR_ERR(file); - - bprm->interpreter = file; - return 0; -} - -static struct linux_binfmt em86_format = { - .module = THIS_MODULE, - .load_binary = load_em86, -}; - -static int __init init_em86_binfmt(void) -{ - register_binfmt(&em86_format); - return 0; -} - -static void __exit exit_em86_binfmt(void) -{ - unregister_binfmt(&em86_format); -} - -core_initcall(init_em86_binfmt); -module_exit(exit_em86_binfmt); -MODULE_LICENSE("GPL"); From 15b9e384030cf34de33deed70d670a8dc0fc784a Mon Sep 17 00:00:00 2001 From: He Zhe Date: Mon, 26 Apr 2021 17:16:29 +0800 Subject: [PATCH 454/794] alpha: Add syscall_get_return_value() audit now requires syscall_get_return_value instead of regs_return_value to retrieve syscall return code . Other architectures that support audit have already define this function. Signed-off-by: He Zhe Signed-off-by: Matt Turner --- arch/alpha/include/asm/syscall.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/alpha/include/asm/syscall.h b/arch/alpha/include/asm/syscall.h index 11c688c1d7ec..f21babaeed85 100644 --- a/arch/alpha/include/asm/syscall.h +++ b/arch/alpha/include/asm/syscall.h @@ -9,4 +9,10 @@ static inline int syscall_get_arch(struct task_struct *task) return AUDIT_ARCH_ALPHA; } +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->r0; +} + #endif /* _ASM_ALPHA_SYSCALL_H */ From ee3e9fa29e8b2553097009dac270cbed0f03f6d2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 14 May 2021 23:37:20 +0200 Subject: [PATCH 455/794] alpha: fp_emul: avoid init/cleanup_module names This is one of the last modules using the old calling conventions for module init/exit functions. Change it over to the style used everywhere else. Signed-off-by: Arnd Bergmann Signed-off-by: Matt Turner --- arch/alpha/math-emu/math.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/alpha/math-emu/math.c b/arch/alpha/math-emu/math.c index 5617ac0889b8..f7cef66af88d 100644 --- a/arch/alpha/math-emu/math.c +++ b/arch/alpha/math-emu/math.c @@ -65,7 +65,7 @@ static long (*save_emul) (unsigned long pc); long do_alpha_fp_emul_imprecise(struct pt_regs *, unsigned long); long do_alpha_fp_emul(unsigned long); -int init_module(void) +static int alpha_fp_emul_init_module(void) { save_emul_imprecise = alpha_fp_emul_imprecise; save_emul = alpha_fp_emul; @@ -73,12 +73,14 @@ int init_module(void) alpha_fp_emul = do_alpha_fp_emul; return 0; } +module_init(alpha_fp_emul_init_module); -void cleanup_module(void) +static void alpha_fp_emul_cleanup_module(void) { alpha_fp_emul_imprecise = save_emul_imprecise; alpha_fp_emul = save_emul; } +module_exit(alpha_fp_emul_cleanup_module); #undef alpha_fp_emul_imprecise #define alpha_fp_emul_imprecise do_alpha_fp_emul_imprecise From 3e0c6d15adeafa2afcb4c95c892bb5980c1430e6 Mon Sep 17 00:00:00 2001 From: gushengxian Date: Tue, 25 May 2021 20:16:10 -0700 Subject: [PATCH 456/794] alpha: Remove space between * and parameter name 'struct pcb_struct * pcb_va' should be 'struct pcb_struct *pcb_va'. Signed-off-by: gushengxian Signed-off-by: Matt Turner --- arch/alpha/boot/bootp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/alpha/boot/bootp.c b/arch/alpha/boot/bootp.c index 00266e6e1b71..b4faba2432d5 100644 --- a/arch/alpha/boot/bootp.c +++ b/arch/alpha/boot/bootp.c @@ -23,7 +23,7 @@ #include "ksize.h" extern unsigned long switch_to_osf_pal(unsigned long nr, - struct pcb_struct * pcb_va, struct pcb_struct * pcb_pa, + struct pcb_struct *pcb_va, struct pcb_struct *pcb_pa, unsigned long *vptb); extern void move_stack(unsigned long new_stack); From fc520525c18ac2207792eb2067c6b626326a87ad Mon Sep 17 00:00:00 2001 From: gushengxian Date: Fri, 2 Jul 2021 05:48:12 -0700 Subject: [PATCH 457/794] alpha: fix spelling mistakes Fix some spelling mistakes in comments: delarations ==> declarations softare ==> software suffiently ==> sufficiently requred ==> required unaliged ==> unaligned Signed-off-by: gushengxian Signed-off-by: Matt Turner --- arch/alpha/boot/misc.c | 2 +- arch/alpha/kernel/osf_sys.c | 4 ++-- arch/alpha/kernel/perf_event.c | 2 +- arch/alpha/kernel/sys_nautilus.c | 2 +- arch/alpha/kernel/traps.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/alpha/boot/misc.c b/arch/alpha/boot/misc.c index d65192202703..325d4dd4f904 100644 --- a/arch/alpha/boot/misc.c +++ b/arch/alpha/boot/misc.c @@ -30,7 +30,7 @@ extern long srm_printk(const char *, ...) __attribute__ ((format (printf, 1, 2))); /* - * gzip delarations + * gzip declarations */ #define OF(args) args #define STATIC static diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index d5367a1c6300..d31167e3269c 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -834,7 +834,7 @@ SYSCALL_DEFINE5(osf_setsysinfo, unsigned long, op, void __user *, buffer, return -EFAULT; state = ¤t_thread_info()->ieee_state; - /* Update softare trap enable bits. */ + /* Update software trap enable bits. */ *state = (*state & ~IEEE_SW_MASK) | (swcr & IEEE_SW_MASK); /* Update the real fpcr. */ @@ -854,7 +854,7 @@ SYSCALL_DEFINE5(osf_setsysinfo, unsigned long, op, void __user *, buffer, state = ¤t_thread_info()->ieee_state; exc &= IEEE_STATUS_MASK; - /* Update softare trap enable bits. */ + /* Update software trap enable bits. */ swcr = (*state & IEEE_SW_MASK) | exc; *state |= exc; diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index e7a59d927d78..efcf7321701b 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -574,7 +574,7 @@ static void alpha_pmu_start(struct perf_event *event, int flags) * Check that CPU performance counters are supported. * - currently support EV67 and later CPUs. * - actually some later revisions of the EV6 have the same PMC model as the - * EV67 but we don't do suffiently deep CPU detection to detect them. + * EV67 but we don't do sufficiently deep CPU detection to detect them. * Bad luck to the very few people who might have one, I guess. */ static int supported_cpu(void) diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c index 53adf43dcd44..96fd6ff3fe81 100644 --- a/arch/alpha/kernel/sys_nautilus.c +++ b/arch/alpha/kernel/sys_nautilus.c @@ -212,7 +212,7 @@ nautilus_init_pci(void) /* Use default IO. */ pci_add_resource(&bridge->windows, &ioport_resource); - /* Irongate PCI memory aperture, calculate requred size before + /* Irongate PCI memory aperture, calculate required size before setting it up. */ pci_add_resource(&bridge->windows, &irongate_mem); diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index 921d4b6e4d95..5398f982bdd1 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -730,7 +730,7 @@ do_entUnaUser(void __user * va, unsigned long opcode, long error; /* Check the UAC bits to decide what the user wants us to do - with the unaliged access. */ + with the unaligned access. */ if (!(current_thread_info()->status & TS_UAC_NOPRINT)) { if (__ratelimit(&ratelimit)) { From d7f237df53457cf0cbdb9943b9b7c93a05e2fdb6 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 23 Jul 2021 05:52:25 -0400 Subject: [PATCH 458/794] drm/i915/bios: Fix ports mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PORT_A to PORT_F are regular integers defined in the enum port, while for_each_port_masked requires a bit mask for the ports. Current given mask: 0b111 Desired mask: 0b111111 I noticed this while Christoph was reporting a bug found on headless GVT configuration which bisect blamed commit 3ae04c0c7e63 ("drm/i915/bios: limit default outputs to ports A through F") v2: Avoid unnecessary line continuations as pointed by CI and Christoph Cc: Christoph Hellwig Fixes: 3ae04c0c7e63 ("drm/i915/bios: limit default outputs to ports A through F") Cc: Lucas De Marchi Cc: Ville Syrjälä Cc: Jani Nikula Signed-off-by: Rodrigo Vivi Reviewed-by: José Roberto de Souza Reviewed-by: Lucas De Marchi Tested-by: Christoph Hellwig Link: https://patchwork.freedesktop.org/patch/msgid/20210723095225.562913-1-rodrigo.vivi@intel.com (cherry picked from commit 9b52aa720168859526bf90d77fa210fc0336f170) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_bios.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 5b6922e28ef2..aa667fa71158 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -2166,7 +2166,8 @@ static void init_vbt_missing_defaults(struct drm_i915_private *i915) { enum port port; - int ports = PORT_A | PORT_B | PORT_C | PORT_D | PORT_E | PORT_F; + int ports = BIT(PORT_A) | BIT(PORT_B) | BIT(PORT_C) | + BIT(PORT_D) | BIT(PORT_E) | BIT(PORT_F); if (!HAS_DDI(i915) && !IS_CHERRYVIEW(i915)) return; From 5d3a618f356595f132ee85c63a1b5f007a71f23c Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 23 Jul 2021 16:43:52 -0700 Subject: [PATCH 459/794] drm/i915: fix not reading DSC disable fuse in GLK We were using GRAPHICS_VER() to handle SKL_DFSM register, which means we were not handling GLK correctly since that has GRAPHICS_VER == 9, but DISPLAY_VER == 10. Switch the entire branch to check DISPLAY_VER which makes it more in line with Bspec. Even though the Bspec has an exception for RKL in TGL_DFSM_PIPE_D_DISABLE, we don't have to do anything as the bit has disable semantic and RKL doesn't have pipe D. Bspec: 50075, 7548 Fixes: 2b5a4562edd0 ("drm/i915/display: Simplify GLK display version tests") Cc: Matt Roper Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210723234352.214459-1-lucas.demarchi@intel.com (cherry picked from commit 4fd177288a4ee046bd8590355a64de855dcf77e2) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_device_info.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 7eaa92fee421..e0a10f36acc1 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -325,7 +325,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) info->pipe_mask &= ~BIT(PIPE_C); info->cpu_transcoder_mask &= ~BIT(TRANSCODER_C); } - } else if (HAS_DISPLAY(dev_priv) && GRAPHICS_VER(dev_priv) >= 9) { + } else if (HAS_DISPLAY(dev_priv) && DISPLAY_VER(dev_priv) >= 9) { u32 dfsm = intel_de_read(dev_priv, SKL_DFSM); if (dfsm & SKL_DFSM_PIPE_A_DISABLE) { @@ -340,7 +340,8 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) info->pipe_mask &= ~BIT(PIPE_C); info->cpu_transcoder_mask &= ~BIT(TRANSCODER_C); } - if (GRAPHICS_VER(dev_priv) >= 12 && + + if (DISPLAY_VER(dev_priv) >= 12 && (dfsm & TGL_DFSM_PIPE_D_DISABLE)) { info->pipe_mask &= ~BIT(PIPE_D); info->cpu_transcoder_mask &= ~BIT(TRANSCODER_D); @@ -352,10 +353,10 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) if (dfsm & SKL_DFSM_DISPLAY_PM_DISABLE) info->display.has_fbc = 0; - if (GRAPHICS_VER(dev_priv) >= 11 && (dfsm & ICL_DFSM_DMC_DISABLE)) + if (DISPLAY_VER(dev_priv) >= 11 && (dfsm & ICL_DFSM_DMC_DISABLE)) info->display.has_dmc = 0; - if (GRAPHICS_VER(dev_priv) >= 10 && + if (DISPLAY_VER(dev_priv) >= 10 && (dfsm & CNL_DFSM_DISPLAY_DSC_DISABLE)) info->display.has_dsc = 0; } From b4bde5554f70fb04ff07989fdc1356ab84d6f482 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 22 Jul 2021 16:29:22 -0700 Subject: [PATCH 460/794] drm/i915/display: split DISPLAY_VER 9 and 10 in intel_setup_outputs() Commit 5a9d38b20a5a ("drm/i915/display: hide workaround for broken vbt in intel_bios.c") moved the workaround for broken or missing VBT to intel_bios.c. However is_port_valid() only protects the handling of different skus of the same display version. Since in intel_setup_outputs() we share the code path with version 9, this would also create port F for SKL/KBL, which does not exist. Missing VBT can be reproduced when starting a headless QEMU with no opregion available. Avoid the issue by splitting versions 9 and 10 in intel_setup_outputs(), which also makes it more clear what code path it's taking for each version. v2: move generic display version after Geminilake since that one has a different set of outputs Fixes: 5a9d38b20a5a ("drm/i915/display: hide workaround for broken vbt in intel_bios.c") Cc: Jani Nikula Cc: Rodrigo Vivi Reported-by: Christoph Hellwig Signed-off-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Reviewed-by: Matt Roper Tested-by: Christoph Hellwig Link: https://patchwork.freedesktop.org/patch/msgid/20210722232922.3796835-1-lucas.demarchi@intel.com (cherry picked from commit ec387b8ff8d757561369be9a280cf63f23bbb926) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_display.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 3bad4e00f7be..2d5d21740c25 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -11361,13 +11361,19 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) intel_ddi_init(dev_priv, PORT_B); intel_ddi_init(dev_priv, PORT_C); vlv_dsi_init(dev_priv); - } else if (DISPLAY_VER(dev_priv) >= 9) { + } else if (DISPLAY_VER(dev_priv) == 10) { intel_ddi_init(dev_priv, PORT_A); intel_ddi_init(dev_priv, PORT_B); intel_ddi_init(dev_priv, PORT_C); intel_ddi_init(dev_priv, PORT_D); intel_ddi_init(dev_priv, PORT_E); intel_ddi_init(dev_priv, PORT_F); + } else if (DISPLAY_VER(dev_priv) >= 9) { + intel_ddi_init(dev_priv, PORT_A); + intel_ddi_init(dev_priv, PORT_B); + intel_ddi_init(dev_priv, PORT_C); + intel_ddi_init(dev_priv, PORT_D); + intel_ddi_init(dev_priv, PORT_E); } else if (HAS_DDI(dev_priv)) { u32 found; From 480e93e12aa04d857f7cc2e6fcec181c0d690404 Mon Sep 17 00:00:00 2001 From: Harshvardhan Jha Date: Sun, 25 Jul 2021 23:23:55 +0530 Subject: [PATCH 461/794] net: xfrm: Fix end of loop tests for list_for_each_entry The list_for_each_entry() iterator, "pos" in this code, can never be NULL so the warning will never be printed. Signed-off-by: Harshvardhan Jha Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_ipcomp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index 2e8afe078d61..cb40ff0ff28d 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -241,7 +241,7 @@ static void ipcomp_free_tfms(struct crypto_comp * __percpu *tfms) break; } - WARN_ON(!pos); + WARN_ON(list_entry_is_head(pos, &ipcomp_tfms_list, list)); if (--pos->users) return; From f0c6225531e4a9e43e51c5f7b02089bdd725c734 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Sat, 17 Jul 2021 23:11:38 -0500 Subject: [PATCH 462/794] ACPI: PM: Add support for upcoming AMD uPEP HID AMDI007 AMD systems with uPEP HID AMDI007 should be using revision 2 and the AMD method. Fixes: 8fbd6c15ea0a ("ACPI: PM: Adjust behavior for field problems on AMD systems") Signed-off-by: Mario Limonciello Signed-off-by: Rafael J. Wysocki --- drivers/acpi/x86/s2idle.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c index 1c507804fb10..fbdbef0ab552 100644 --- a/drivers/acpi/x86/s2idle.c +++ b/drivers/acpi/x86/s2idle.c @@ -378,19 +378,25 @@ static int lps0_device_attach(struct acpi_device *adev, * AMDI0006: * - should use rev_id 0x0 * - function mask = 0x3: Should use Microsoft method + * AMDI0007: + * - Should use rev_id 0x2 + * - Should only use AMD method */ const char *hid = acpi_device_hid(adev); - rev_id = 0; + rev_id = strcmp(hid, "AMDI0007") ? 0 : 2; lps0_dsm_func_mask = validate_dsm(adev->handle, ACPI_LPS0_DSM_UUID_AMD, rev_id, &lps0_dsm_guid); lps0_dsm_func_mask_microsoft = validate_dsm(adev->handle, - ACPI_LPS0_DSM_UUID_MICROSOFT, rev_id, + ACPI_LPS0_DSM_UUID_MICROSOFT, 0, &lps0_dsm_guid_microsoft); if (lps0_dsm_func_mask > 0x3 && (!strcmp(hid, "AMD0004") || !strcmp(hid, "AMDI0005"))) { lps0_dsm_func_mask = (lps0_dsm_func_mask << 1) | 0x1; acpi_handle_debug(adev->handle, "_DSM UUID %s: Adjusted function mask: 0x%x\n", ACPI_LPS0_DSM_UUID_AMD, lps0_dsm_func_mask); + } else if (lps0_dsm_func_mask_microsoft > 0 && !strcmp(hid, "AMDI0007")) { + lps0_dsm_func_mask_microsoft = -EINVAL; + acpi_handle_debug(adev->handle, "_DSM Using AMD method\n"); } } else { rev_id = 1; From 94cbe7db7d757c2d481c3617ab5579a28cfc2175 Mon Sep 17 00:00:00 2001 From: Mohammad Athari Bin Ismail Date: Mon, 26 Jul 2021 10:20:20 +0800 Subject: [PATCH 463/794] net: stmmac: add est_irq_status callback function for GMAC 4.10 and 5.10 Assign dwmac5_est_irq_status to est_irq_status callback function for GMAC 4.10 and 5.10. With this, EST related interrupts could be handled properly. Fixes: e49aa315cb01 ("net: stmmac: EST interrupts handling and error reporting") Cc: # 5.13.x Signed-off-by: Mohammad Athari Bin Ismail Acked-by: Wong Vee Khee Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 67ba083eb90c..b21745368983 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -1249,6 +1249,7 @@ const struct stmmac_ops dwmac410_ops = { .config_l3_filter = dwmac4_config_l3_filter, .config_l4_filter = dwmac4_config_l4_filter, .est_configure = dwmac5_est_configure, + .est_irq_status = dwmac5_est_irq_status, .fpe_configure = dwmac5_fpe_configure, .fpe_send_mpacket = dwmac5_fpe_send_mpacket, .fpe_irq_status = dwmac5_fpe_irq_status, @@ -1300,6 +1301,7 @@ const struct stmmac_ops dwmac510_ops = { .config_l3_filter = dwmac4_config_l3_filter, .config_l4_filter = dwmac4_config_l4_filter, .est_configure = dwmac5_est_configure, + .est_irq_status = dwmac5_est_irq_status, .fpe_configure = dwmac5_fpe_configure, .fpe_send_mpacket = dwmac5_fpe_send_mpacket, .fpe_irq_status = dwmac5_fpe_irq_status, From 2ebda027148315581b89a2ed2fef84ad53b2aedd Mon Sep 17 00:00:00 2001 From: Chen Shen Date: Mon, 26 Jul 2021 13:47:34 +0800 Subject: [PATCH 464/794] sctp: delete addr based on sin6_scope_id sctp_inet6addr_event deletes 'addr' from 'local_addr_list' when setting netdev down, but it is possible to delete the incorrect entry (match the first one with the same ipaddr, but the different 'ifindex'), if there are some netdevs with the same 'local-link' ipaddr added already. It should delete the entry depending on 'sin6_addr' and 'sin6_scope_id' both. otherwise, the endpoint will call 'sctp_sf_ootb' if it can't find the according association when receives 'heartbeat', and finally will reply 'abort'. For example: 1.when linux startup the entries in local_addr_list: ifindex:35 addr:fe80::40:43ff:fe80:0 (eths0.201) ifindex:36 addr:fe80::40:43ff:fe80:0 (eths0.209) ifindex:37 addr:fe80::40:43ff:fe80:0 (eths0.210) the route table: local fe80::40:43ff:fe80:0 dev eths0.201 local fe80::40:43ff:fe80:0 dev eths0.209 local fe80::40:43ff:fe80:0 dev eths0.210 2.after 'ifconfig eths0.209 down' the entries in local_addr_list: ifindex:36 addr:fe80::40:43ff:fe80:0 (eths0.209) ifindex:37 addr:fe80::40:43ff:fe80:0 (eths0.210) the route table: local fe80::40:43ff:fe80:0 dev eths0.201 local fe80::40:43ff:fe80:0 dev eths0.210 3.asoc not found for src:[fe80::40:43ff:fe80:0]:37381 dst:[:1]:53335 ::1->fe80::40:43ff:fe80:0 HEARTBEAT fe80::40:43ff:fe80:0->::1 ABORT Signed-off-by: Chen Shen Signed-off-by: David S. Miller --- net/sctp/ipv6.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index e48dd909dee5..470dbdc27d58 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -100,8 +100,9 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, list_for_each_entry_safe(addr, temp, &net->sctp.local_addr_list, list) { if (addr->a.sa.sa_family == AF_INET6 && - ipv6_addr_equal(&addr->a.v6.sin6_addr, - &ifa->addr)) { + ipv6_addr_equal(&addr->a.v6.sin6_addr, + &ifa->addr) && + addr->a.v6.sin6_scope_id == ifa->idev->dev->ifindex) { sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL); found = 1; addr->valid = 0; From 9a9e74819bb0e4694279fb437e136fe485878d25 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 16 Jul 2021 16:41:04 +0200 Subject: [PATCH 465/794] KVM: nSVM: Rename nested_svm_vmloadsave() to svm_copy_vmloadsave_state() To match svm_copy_vmrun_state(), rename nested_svm_vmloadsave() to svm_copy_vmloadsave_state(). Opportunistically add missing braces to 'else' branch in vmload_vmsave_interception(). No functional change intended. Suggested-by: Paolo Bonzini Signed-off-by: Vitaly Kuznetsov Message-Id: <20210716144104.465269-1-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 2 +- arch/x86/kvm/svm/svm.c | 7 ++++--- arch/x86/kvm/svm/svm.h | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 3bd09c50c98b..8493592b63b4 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -722,7 +722,7 @@ void svm_copy_vmrun_state(struct vmcb_save_area *from_save, to_save->cpl = 0; } -void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) +void svm_copy_vmloadsave_state(struct vmcb *from_vmcb, struct vmcb *to_vmcb) { to_vmcb->save.fs = from_vmcb->save.fs; to_vmcb->save.gs = from_vmcb->save.gs; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 664d20f0689c..cfe165d74093 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2147,11 +2147,12 @@ static int vmload_vmsave_interception(struct kvm_vcpu *vcpu, bool vmload) ret = kvm_skip_emulated_instruction(vcpu); if (vmload) { - nested_svm_vmloadsave(vmcb12, svm->vmcb); + svm_copy_vmloadsave_state(vmcb12, svm->vmcb); svm->sysenter_eip_hi = 0; svm->sysenter_esp_hi = 0; - } else - nested_svm_vmloadsave(svm->vmcb, vmcb12); + } else { + svm_copy_vmloadsave_state(svm->vmcb, vmcb12); + } kvm_vcpu_unmap(vcpu, &map, true); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 7e2090752d8f..1b65ee3a9569 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -466,7 +466,7 @@ int svm_allocate_nested(struct vcpu_svm *svm); int nested_svm_vmrun(struct kvm_vcpu *vcpu); void svm_copy_vmrun_state(struct vmcb_save_area *from_save, struct vmcb_save_area *to_save); -void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb); +void svm_copy_vmloadsave_state(struct vmcb *from_vmcb, struct vmcb *to_vmcb); int nested_svm_vmexit(struct vcpu_svm *svm); static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code) From 2bb16bea5feaa582fbbdbfd84ecaa1ab61bbb34c Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 19 Jul 2021 11:03:22 +0200 Subject: [PATCH 466/794] KVM: nSVM: Swap the parameter order for svm_copy_vmrun_state()/svm_copy_vmloadsave_state() Make svm_copy_vmrun_state()/svm_copy_vmloadsave_state() interface match 'memcpy(dest, src)' to avoid any confusion. No functional change intended. Suggested-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Message-Id: <20210719090322.625277-1-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 8 ++++---- arch/x86/kvm/svm/svm.c | 12 ++++++------ arch/x86/kvm/svm/svm.h | 6 +++--- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 8493592b63b4..1c2a0414a88d 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -702,8 +702,8 @@ out: } /* Copy state save area fields which are handled by VMRUN */ -void svm_copy_vmrun_state(struct vmcb_save_area *from_save, - struct vmcb_save_area *to_save) +void svm_copy_vmrun_state(struct vmcb_save_area *to_save, + struct vmcb_save_area *from_save) { to_save->es = from_save->es; to_save->cs = from_save->cs; @@ -722,7 +722,7 @@ void svm_copy_vmrun_state(struct vmcb_save_area *from_save, to_save->cpl = 0; } -void svm_copy_vmloadsave_state(struct vmcb *from_vmcb, struct vmcb *to_vmcb) +void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb) { to_vmcb->save.fs = from_vmcb->save.fs; to_vmcb->save.gs = from_vmcb->save.gs; @@ -1385,7 +1385,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa; - svm_copy_vmrun_state(save, &svm->vmcb01.ptr->save); + svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save); nested_load_control_from_vmcb12(svm, ctl); svm_switch_vmcb(svm, &svm->nested.vmcb02); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index cfe165d74093..9a6987549e1b 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2147,11 +2147,11 @@ static int vmload_vmsave_interception(struct kvm_vcpu *vcpu, bool vmload) ret = kvm_skip_emulated_instruction(vcpu); if (vmload) { - svm_copy_vmloadsave_state(vmcb12, svm->vmcb); + svm_copy_vmloadsave_state(svm->vmcb, vmcb12); svm->sysenter_eip_hi = 0; svm->sysenter_esp_hi = 0; } else { - svm_copy_vmloadsave_state(svm->vmcb, vmcb12); + svm_copy_vmloadsave_state(vmcb12, svm->vmcb); } kvm_vcpu_unmap(vcpu, &map, true); @@ -4345,8 +4345,8 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate) BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400); - svm_copy_vmrun_state(&svm->vmcb01.ptr->save, - map_save.hva + 0x400); + svm_copy_vmrun_state(map_save.hva + 0x400, + &svm->vmcb01.ptr->save); kvm_vcpu_unmap(vcpu, &map_save, true); } @@ -4394,8 +4394,8 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) &map_save) == -EINVAL) return 1; - svm_copy_vmrun_state(map_save.hva + 0x400, - &svm->vmcb01.ptr->save); + svm_copy_vmrun_state(&svm->vmcb01.ptr->save, + map_save.hva + 0x400); kvm_vcpu_unmap(vcpu, &map_save, true); } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 1b65ee3a9569..bd0fe94c2920 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -464,9 +464,9 @@ void svm_leave_nested(struct vcpu_svm *svm); void svm_free_nested(struct vcpu_svm *svm); int svm_allocate_nested(struct vcpu_svm *svm); int nested_svm_vmrun(struct kvm_vcpu *vcpu); -void svm_copy_vmrun_state(struct vmcb_save_area *from_save, - struct vmcb_save_area *to_save); -void svm_copy_vmloadsave_state(struct vmcb *from_vmcb, struct vmcb *to_vmcb); +void svm_copy_vmrun_state(struct vmcb_save_area *to_save, + struct vmcb_save_area *from_save); +void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb); int nested_svm_vmexit(struct vcpu_svm *svm); static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code) From 0e691ee7b5034c91a31b565d3ff9a50e01dde445 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 22 Jul 2021 11:26:28 +0200 Subject: [PATCH 467/794] KVM: Documentation: Fix KVM_CAP_ENFORCE_PV_FEATURE_CPUID name 'KVM_CAP_ENFORCE_PV_CPUID' doesn't match the define in include/uapi/linux/kvm.h. Signed-off-by: Vitaly Kuznetsov Message-Id: <20210722092628.236474-1-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index c7b165ca70b6..1a1d2061227b 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -7049,7 +7049,7 @@ In combination with KVM_CAP_X86_USER_SPACE_MSR, this allows user space to trap and emulate MSRs that are outside of the scope of KVM as well as limit the attack surface on KVM's MSR emulation code. -8.28 KVM_CAP_ENFORCE_PV_CPUID +8.28 KVM_CAP_ENFORCE_PV_FEATURE_CPUID ----------------------------- Architectures: x86 From 3b1c8c5682672d73c1e977944af8c3ebed4a0ce1 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 22 Jul 2021 11:50:03 +0200 Subject: [PATCH 468/794] docs: virt: kvm: api.rst: replace some characters The conversion tools used during DocBook/LaTeX/html/Markdown->ReST conversion and some cut-and-pasted text contain some characters that aren't easily reachable on standard keyboards and/or could cause troubles when parsed by the documentation build system. Replace the occurences of the following characters: - U+00a0 (' '): NO-BREAK SPACE as it can cause lines being truncated on PDF output Signed-off-by: Mauro Carvalho Chehab Message-Id: Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 1a1d2061227b..dae68e68ca23 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -855,7 +855,7 @@ in-kernel irqchip (GIC), and for in-kernel irqchip can tell the GIC to use PPIs designated for specific cpus. The irq field is interpreted like this:: -  bits: | 31 ... 28 | 27 ... 24 | 23 ... 16 | 15 ... 0 | + bits: | 31 ... 28 | 27 ... 24 | 23 ... 16 | 15 ... 0 | field: | vcpu2_index | irq_type | vcpu_index | irq_id | The irq_type field has the following values: @@ -2149,10 +2149,10 @@ prior to calling the KVM_RUN ioctl. Errors: ====== ============================================================ -  ENOENT   no such register -  EINVAL   invalid register ID, or no such register or used with VMs in + ENOENT no such register + EINVAL invalid register ID, or no such register or used with VMs in protected virtualization mode on s390 -  EPERM    (arm64) register access not allowed before vcpu finalization + EPERM (arm64) register access not allowed before vcpu finalization ====== ============================================================ (These error codes are indicative only: do not rely on a specific error @@ -2590,10 +2590,10 @@ following id bit patterns:: Errors include: ======== ============================================================ -  ENOENT   no such register -  EINVAL   invalid register ID, or no such register or used with VMs in + ENOENT no such register + EINVAL invalid register ID, or no such register or used with VMs in protected virtualization mode on s390 -  EPERM    (arm64) register access not allowed before vcpu finalization + EPERM (arm64) register access not allowed before vcpu finalization ======== ============================================================ (These error codes are indicative only: do not rely on a specific error @@ -3112,13 +3112,13 @@ current state. "addr" is ignored. Errors: ====== ================================================================= -  EINVAL    the target is unknown, or the combination of features is invalid. -  ENOENT    a features bit specified is unknown. + EINVAL the target is unknown, or the combination of features is invalid. + ENOENT a features bit specified is unknown. ====== ================================================================= This tells KVM what type of CPU to present to the guest, and what -optional features it should have.  This will cause a reset of the cpu -registers to their initial values.  If this is not called, KVM_RUN will +optional features it should have. This will cause a reset of the cpu +registers to their initial values. If this is not called, KVM_RUN will return ENOEXEC for that vcpu. The initial values are defined as: @@ -3239,8 +3239,8 @@ VCPU matching underlying host. Errors: ===== ============================================================== -  E2BIG     the reg index list is too big to fit in the array specified by -             the user (the number required will be written into n). + E2BIG the reg index list is too big to fit in the array specified by + the user (the number required will be written into n). ===== ============================================================== :: @@ -3288,7 +3288,7 @@ specific device. ARM/arm64 divides the id field into two parts, a device id and an address type id specific to the individual device:: -  bits: | 63 ... 32 | 31 ... 16 | 15 ... 0 | + bits: | 63 ... 32 | 31 ... 16 | 15 ... 0 | field: | 0x00000000 | device id | addr type id | ARM/arm64 currently only require this when using the in-kernel GIC From 0a31df6823232516f61f174907e444f710941dfe Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 22 Jul 2021 14:30:18 +0200 Subject: [PATCH 469/794] KVM: x86: Check the right feature bit for MSR_KVM_ASYNC_PF_ACK access MSR_KVM_ASYNC_PF_ACK MSR is part of interrupt based asynchronous page fault interface and not the original (deprecated) KVM_FEATURE_ASYNC_PF. This is stated in Documentation/virt/kvm/msr.rst. Fixes: 66570e966dd9 ("kvm: x86: only provide PV features if enabled in guest's CPUID") Signed-off-by: Vitaly Kuznetsov Reviewed-by: Maxim Levitsky Reviewed-by: Oliver Upton Message-Id: <20210722123018.260035-1-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a4fd10604f72..4116567f3d44 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3407,7 +3407,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; break; case MSR_KVM_ASYNC_PF_ACK: - if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF)) + if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT)) return 1; if (data & 0x1) { vcpu->arch.apf.pageready_pending = false; @@ -3746,7 +3746,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vcpu->arch.apf.msr_int_val; break; case MSR_KVM_ASYNC_PF_ACK: - if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF)) + if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT)) return 1; msr_info->data = 0; From 92766c4628ea349c8ddab0cd7bd0488f36e5c4ce Mon Sep 17 00:00:00 2001 From: Letu Ren Date: Sun, 25 Jul 2021 21:45:12 +0800 Subject: [PATCH 470/794] net/qla3xxx: fix schedule while atomic in ql_wait_for_drvr_lock and ql_adapter_reset When calling the 'ql_wait_for_drvr_lock' and 'ql_adapter_reset', the driver has already acquired the spin lock, so the driver should not call 'ssleep' in atomic context. This bug can be fixed by using 'mdelay' instead of 'ssleep'. Reported-by: Letu Ren Signed-off-by: Letu Ren Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qla3xxx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 2376b2729633..c00ad57575ea 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -154,7 +154,7 @@ static int ql_wait_for_drvr_lock(struct ql3_adapter *qdev) "driver lock acquired\n"); return 1; } - ssleep(1); + mdelay(1000); } while (++i < 10); netdev_err(qdev->ndev, "Timed out waiting for driver lock...\n"); @@ -3274,7 +3274,7 @@ static int ql_adapter_reset(struct ql3_adapter *qdev) if ((value & ISP_CONTROL_SR) == 0) break; - ssleep(1); + mdelay(1000); } while ((--max_wait_time)); /* @@ -3310,7 +3310,7 @@ static int ql_adapter_reset(struct ql3_adapter *qdev) ispControlStatus); if ((value & ISP_CONTROL_FSR) == 0) break; - ssleep(1); + mdelay(1000); } while ((--max_wait_time)); } if (max_wait_time == 0) From 44eff40a32e8f5228ae041006352e32638ad2368 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 26 Jul 2021 14:14:31 +0100 Subject: [PATCH 471/794] io_uring: fix io_prep_async_link locking io_prep_async_link() may be called after arming a linked timeout, automatically making it unsafe to traverse the linked list. Guard with completion_lock if there was a linked timeout. Cc: stable@vger.kernel.org # 5.9+ Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/93f7c617e2b4f012a2a175b3dab6bc2f27cebc48.1627304436.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 5a0fd6bcd318..c4d2b320cdd4 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1279,8 +1279,17 @@ static void io_prep_async_link(struct io_kiocb *req) { struct io_kiocb *cur; - io_for_each_link(cur, req) - io_prep_async_work(cur); + if (req->flags & REQ_F_LINK_TIMEOUT) { + struct io_ring_ctx *ctx = req->ctx; + + spin_lock_irq(&ctx->completion_lock); + io_for_each_link(cur, req) + io_prep_async_work(cur); + spin_unlock_irq(&ctx->completion_lock); + } else { + io_for_each_link(cur, req) + io_prep_async_work(cur); + } } static void io_queue_async_work(struct io_kiocb *req) From d47255d3f87338164762ac56df1f28d751e27246 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Thu, 24 Jun 2021 13:20:21 +0200 Subject: [PATCH 472/794] drm/amdgpu: Fix resource leak on probe error path This reverts commit 4192f7b5768912ceda82be2f83c87ea7181f9980. It is not true (as stated in the reverted commit changelog) that we never unmap the BAR on failure; it actually does happen properly on amdgpu_driver_load_kms() -> amdgpu_driver_unload_kms() -> amdgpu_device_fini() error path. What's worse, this commit actually completely breaks resource freeing on probe failure (like e.g. failure to load microcode), as amdgpu_driver_unload_kms() notices adev->rmmio being NULL and bails too early, leaving all the resources that'd normally be freed in amdgpu_acpi_fini() and amdgpu_device_fini() still hanging around, leading to all sorts of oopses when someone tries to, for example, access the sysfs and procfs resources which are still around while the driver is gone. Fixes: 4192f7b57689 ("drm/amdgpu: unmap register bar on device init failure") Reported-by: Vojtech Pavlik Signed-off-by: Jiri Kosina Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d303e88e3c23..f3fd5ec710b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3504,13 +3504,13 @@ int amdgpu_device_init(struct amdgpu_device *adev, r = amdgpu_device_get_job_timeout_settings(adev); if (r) { dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); - goto failed_unmap; + return r; } /* early init functions */ r = amdgpu_device_ip_early_init(adev); if (r) - goto failed_unmap; + return r; /* doorbell bar mapping and doorbell index init*/ amdgpu_device_doorbell_init(adev); @@ -3736,10 +3736,6 @@ release_ras_con: failed: amdgpu_vf_error_trans_all(adev); -failed_unmap: - iounmap(adev->rmmio); - adev->rmmio = NULL; - return r; } From 110aa25c3ce417a44e35990cf8ed22383277933a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 26 Jul 2021 10:42:56 -0600 Subject: [PATCH 473/794] io_uring: fix race in unified task_work running We use a bit to manage if we need to add the shared task_work, but a list + lock for the pending work. Before aborting a current run of the task_work we check if the list is empty, but we do so without grabbing the lock that protects it. This can lead to races where we think we have nothing left to run, where in practice we could be racing with a task adding new work to the list. If we do hit that race condition, we could be left with work items that need processing, but the shared task_work is not active. Ensure that we grab the lock before checking if the list is empty, so we know if it's safe to exit the run or not. Link: https://lore.kernel.org/io-uring/c6bd5987-e9ae-cd02-49d0-1b3ac1ef65b1@tnonline.net/ Cc: stable@vger.kernel.org # 5.11+ Reported-by: Forza Signed-off-by: Jens Axboe --- fs/io_uring.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index c4d2b320cdd4..a4331deb0427 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1959,9 +1959,13 @@ static void tctx_task_work(struct callback_head *cb) node = next; } if (wq_list_empty(&tctx->task_list)) { + spin_lock_irq(&tctx->task_lock); clear_bit(0, &tctx->task_state); - if (wq_list_empty(&tctx->task_list)) + if (wq_list_empty(&tctx->task_list)) { + spin_unlock_irq(&tctx->task_lock); break; + } + spin_unlock_irq(&tctx->task_lock); /* another tctx_task_work() is enqueued, yield */ if (test_and_set_bit(0, &tctx->task_state)) break; From 6aade587d329ebe88319dfdb8e8c7b6aede80417 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Thu, 24 Jun 2021 13:11:36 +0200 Subject: [PATCH 474/794] drm/amdgpu: Avoid printing of stack contents on firmware load error In case when psp_init_asd_microcode() fails to load ASD microcode file, psp_v12_0_init_microcode() tries to print the firmware filename that failed to load before bailing out. This is wrong because: - the firmware filename it would want it print is an incorrect one as psp_init_asd_microcode() and psp_v12_0_init_microcode() are loading different filenames - it tries to print fw_name, but that's not yet been initialized by that time, so it prints random stack contents, e.g. amdgpu 0000:04:00.0: Direct firmware load for amdgpu/renoir_asd.bin failed with error -2 amdgpu 0000:04:00.0: amdgpu: fail to initialize asd microcode amdgpu 0000:04:00.0: amdgpu: psp v12.0: Failed to load firmware "\xfeTO\x8e\xff\xff" Fix that by bailing out immediately, instead of priting the bogus error message. Reported-by: Vojtech Pavlik Signed-off-by: Jiri Kosina Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/psp_v12_0.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c index 618e5b6b85d9..536d41f327c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c @@ -67,7 +67,7 @@ static int psp_v12_0_init_microcode(struct psp_context *psp) err = psp_init_asd_microcode(psp, chip_name); if (err) - goto out; + return err; snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev); @@ -80,7 +80,7 @@ static int psp_v12_0_init_microcode(struct psp_context *psp) } else { err = amdgpu_ucode_validate(adev->psp.ta_fw); if (err) - goto out2; + goto out; ta_hdr = (const struct ta_firmware_header_v1_0 *) adev->psp.ta_fw->data; @@ -105,10 +105,9 @@ static int psp_v12_0_init_microcode(struct psp_context *psp) return 0; -out2: +out: release_firmware(adev->psp.ta_fw); adev->psp.ta_fw = NULL; -out: if (err) { dev_err(adev->dev, "psp v12.0: Failed to load firmware \"%s\"\n", From 66291b6adb66dd3bc96b0f594d88c2ff1300d95f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 26 Jul 2021 08:26:56 +0200 Subject: [PATCH 475/794] ALSA: usb-audio: Fix superfluous autosuspend recovery The change to restore the autosuspend from the disabled state uses a wrong check: namely, it should have been the exact comparison of the quirk_type instead of the bitwise and (&). Otherwise it matches wrongly with the other quirk types. Although re-enabling the autosuspend for the already enabled device shouldn't matter much, it's better to fix the unbalanced call. Fixes: 9799110825db ("ALSA: usb-audio: Disable USB autosuspend properly in setup_disable_autosuspend()") Cc: Link: https://lore.kernel.org/r/s5hr1flh9ov.wl-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/card.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/card.c b/sound/usb/card.c index 2f6a62416c05..a1f8c3a026f5 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -907,7 +907,7 @@ static void usb_audio_disconnect(struct usb_interface *intf) } } - if (chip->quirk_type & QUIRK_SETUP_DISABLE_AUTOSUSPEND) + if (chip->quirk_type == QUIRK_SETUP_DISABLE_AUTOSUSPEND) usb_enable_autosuspend(interface_to_usbdev(intf)); chip->num_interfaces--; From 53ca18acbe645656132fb5a329833db711067e54 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 26 Jul 2021 12:01:02 +0200 Subject: [PATCH 476/794] spi: imx: mx51-ecspi: Fix low-speed CONFIGREG delay calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The spi_imx->spi_bus_clk may be uninitialized and thus also zero in mx51_ecspi_prepare_message(), which would lead to division by zero in kernel. Since bitbang .setup_transfer callback which initializes the spi_imx->spi_bus_clk is called after bitbang prepare_message callback, iterate over all the transfers in spi_message, find the one with lowest bus frequency, and use that bus frequency for the delay calculation. Note that it is not possible to move this CONFIGREG delay back into the .setup_transfer callback, because that is invoked too late, after the GPIO chipselects were already configured. Fixes: 135cbd378eab ("spi: imx: mx51-ecspi: Reinstate low-speed CONFIGREG delay") Signed-off-by: Marek Vasut Cc: Uwe Kleine-König Cc: Mark Brown Link: https://lore.kernel.org/r/20210726100102.5188-1-marex@denx.de Signed-off-by: Mark Brown --- drivers/spi/spi-imx.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 4aee3db6d6df..2872993550bd 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -505,7 +505,9 @@ static int mx51_ecspi_prepare_message(struct spi_imx_data *spi_imx, struct spi_message *msg) { struct spi_device *spi = msg->spi; + struct spi_transfer *xfer; u32 ctrl = MX51_ECSPI_CTRL_ENABLE; + u32 min_speed_hz = ~0U; u32 testreg, delay; u32 cfg = readl(spi_imx->base + MX51_ECSPI_CONFIG); @@ -577,8 +579,20 @@ static int mx51_ecspi_prepare_message(struct spi_imx_data *spi_imx, * be asserted before the SCLK polarity changes, which would disrupt * the SPI communication as the device on the other end would consider * the change of SCLK polarity as a clock tick already. + * + * Because spi_imx->spi_bus_clk is only set in bitbang prepare_message + * callback, iterate over all the transfers in spi_message, find the + * one with lowest bus frequency, and use that bus frequency for the + * delay calculation. In case all transfers have speed_hz == 0, then + * min_speed_hz is ~0 and the resulting delay is zero. */ - delay = (2 * 1000000) / spi_imx->spi_bus_clk; + list_for_each_entry(xfer, &msg->transfers, transfer_list) { + if (!xfer->speed_hz) + continue; + min_speed_hz = min(xfer->speed_hz, min_speed_hz); + } + + delay = (2 * 1000000) / min_speed_hz; if (likely(delay < 10)) /* SCLK is faster than 100 kHz */ udelay(delay); else /* SCLK is _very_ slow */ From 758684e49f4c7ea2a75e249e486659f0950cd63e Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Mon, 26 Jul 2021 14:52:48 -0400 Subject: [PATCH 477/794] bnxt_en: Fix static checker warning in bnxt_fw_reset_task() Now that we return when bnxt_open() fails in bnxt_fw_reset_task(), there is no need to check for 'rc' value again before invoking bnxt_reenable_sriov(). Fixes: 3958b1da725a ("bnxt_en: fix error path of FW reset") Reported-by: Dan Carpenter Signed-off-by: Somnath Kotur Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4db162cee911..89606587b156 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -12131,9 +12131,8 @@ static void bnxt_fw_reset_task(struct work_struct *work) /* Make sure fw_reset_state is 0 before clearing the flag */ smp_mb__before_atomic(); clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); - bnxt_ulp_start(bp, rc); - if (!rc) - bnxt_reenable_sriov(bp); + bnxt_ulp_start(bp, 0); + bnxt_reenable_sriov(bp); bnxt_vf_reps_alloc(bp); bnxt_vf_reps_open(bp); bnxt_dl_health_recovery_done(bp); From 24b5b1978cd5a80db58e2a19db2f9c36fe8d4f7a Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Sun, 25 Jul 2021 18:07:25 +0200 Subject: [PATCH 478/794] clk: stm32f4: fix post divisor setup for I2S/SAI PLLs Enabling the framebuffer leads to a system hang. Running, as a debug hack, the store_pan() function in drivers/video/fbdev/core/fbsysfs.c without taking the console_lock, allows to see the crash backtrace on the serial line. ~ # echo 0 0 > /sys/class/graphics/fb0/pan [ 9.719414] Unhandled exception: IPSR = 00000005 LR = fffffff1 [ 9.726937] CPU: 0 PID: 49 Comm: sh Not tainted 5.13.0-rc5 #9 [ 9.733008] Hardware name: STM32 (Device Tree Support) [ 9.738296] PC is at clk_gate_is_enabled+0x0/0x28 [ 9.743426] LR is at stm32f4_pll_div_set_rate+0xf/0x38 [ 9.748857] pc : [<0011e4be>] lr : [<0011f9e3>] psr: 0100000b [ 9.755373] sp : 00bc7be0 ip : 00000000 fp : 001f3ac4 [ 9.760812] r10: 002610d0 r9 : 01efe920 r8 : 00540560 [ 9.766269] r7 : 02e7ddb0 r6 : 0173eed8 r5 : 00000000 r4 : 004027c0 [ 9.773081] r3 : 0011e4bf r2 : 02e7ddb0 r1 : 0173eed8 r0 : 1d3267b8 [ 9.779911] xPSR: 0100000b [ 9.782719] CPU: 0 PID: 49 Comm: sh Not tainted 5.13.0-rc5 #9 [ 9.788791] Hardware name: STM32 (Device Tree Support) [ 9.794120] [<0000afa1>] (unwind_backtrace) from [<0000a33f>] (show_stack+0xb/0xc) [ 9.802421] [<0000a33f>] (show_stack) from [<0000a8df>] (__invalid_entry+0x4b/0x4c) The `pll_num' field in the post_div_data configuration contained a wrong value which also referenced an uninitialized hardware clock when clk_register_pll_div() was called. Fixes: 517633ef630e ("clk: stm32f4: Add post divisor for I2S & SAI PLLs") Signed-off-by: Dario Binacchi Reviewed-by: Gabriel Fernandez Link: https://lore.kernel.org/r/20210725160725.10788-1-dariobin@libero.it Signed-off-by: Stephen Boyd --- drivers/clk/clk-stm32f4.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/clk/clk-stm32f4.c b/drivers/clk/clk-stm32f4.c index 18117ce5ff85..5c75e3d906c2 100644 --- a/drivers/clk/clk-stm32f4.c +++ b/drivers/clk/clk-stm32f4.c @@ -526,7 +526,7 @@ struct stm32f4_pll { struct stm32f4_pll_post_div_data { int idx; - u8 pll_num; + int pll_idx; const char *name; const char *parent; u8 flag; @@ -557,13 +557,13 @@ static const struct clk_div_table post_divr_table[] = { #define MAX_POST_DIV 3 static const struct stm32f4_pll_post_div_data post_div_data[MAX_POST_DIV] = { - { CLK_I2SQ_PDIV, PLL_I2S, "plli2s-q-div", "plli2s-q", + { CLK_I2SQ_PDIV, PLL_VCO_I2S, "plli2s-q-div", "plli2s-q", CLK_SET_RATE_PARENT, STM32F4_RCC_DCKCFGR, 0, 5, 0, NULL}, - { CLK_SAIQ_PDIV, PLL_SAI, "pllsai-q-div", "pllsai-q", + { CLK_SAIQ_PDIV, PLL_VCO_SAI, "pllsai-q-div", "pllsai-q", CLK_SET_RATE_PARENT, STM32F4_RCC_DCKCFGR, 8, 5, 0, NULL }, - { NO_IDX, PLL_SAI, "pllsai-r-div", "pllsai-r", CLK_SET_RATE_PARENT, + { NO_IDX, PLL_VCO_SAI, "pllsai-r-div", "pllsai-r", CLK_SET_RATE_PARENT, STM32F4_RCC_DCKCFGR, 16, 2, 0, post_divr_table }, }; @@ -1774,7 +1774,7 @@ static void __init stm32f4_rcc_init(struct device_node *np) post_div->width, post_div->flag_div, post_div->div_table, - clks[post_div->pll_num], + clks[post_div->pll_idx], &stm32f4_clk_lock); if (post_div->idx != NO_IDX) From 953a92f0e55f370ec76e7f85e332906f1e898ef4 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 16 Jul 2021 21:31:59 -0700 Subject: [PATCH 479/794] clk: hisilicon: hi3559a: select RESET_HISI The clk-hi3559a driver uses functions from reset.c so it should select RESET_HISI to avoid build errors. Fixes these build errors: aarch64-linux-ld: drivers/clk/hisilicon/clk-hi3559a.o: in function `hi3559av100_crg_remove': clk-hi3559a.c:(.text+0x158): undefined reference to `hisi_reset_exit' aarch64-linux-ld: drivers/clk/hisilicon/clk-hi3559a.o: in function `hi3559av100_crg_probe': clk-hi3559a.c:(.text+0x1f4): undefined reference to `hisi_reset_init' aarch64-linux-ld: clk-hi3559a.c:(.text+0x238): undefined reference to `hisi_reset_exit' Fixes: 6c81966107dc ("clk: hisilicon: Add clock driver for hi3559A SoC") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Dongjiu Geng Cc: Stephen Boyd Cc: stable@vger.kernel.org Cc: linux-clk@vger.kernel.org Cc: Michael Turquette Link: https://lore.kernel.org/r/20210717043159.12566-1-rdunlap@infradead.org Reviewed-by: Dongjiu Geng Signed-off-by: Stephen Boyd --- drivers/clk/hisilicon/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/hisilicon/Kconfig b/drivers/clk/hisilicon/Kconfig index 5ecc37aaa118..c1ec75aa4ccd 100644 --- a/drivers/clk/hisilicon/Kconfig +++ b/drivers/clk/hisilicon/Kconfig @@ -18,6 +18,7 @@ config COMMON_CLK_HI3519 config COMMON_CLK_HI3559A bool "Hi3559A Clock Driver" depends on ARCH_HISI || COMPILE_TEST + select RESET_HISI default ARCH_HISI help Build the clock driver for hi3559a. From f2a26a3cff27dfa456fef386fe5df56dcb4b47b6 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 23 Jul 2021 18:35:15 -0500 Subject: [PATCH 480/794] SMB3: fix readpage for large swap cache readpage was calculating the offset of the page incorrectly for the case of large swapcaches. loff_t offset = (loff_t)page->index << PAGE_SHIFT; As pointed out by Matthew Wilcox, this needs to use page_file_offset() to calculate the offset instead. Pages coming from the swap cache have page->index set to their index within the swapcache, not within the backing file. For a sufficiently large swapcache, we could have overlapping values of page->index within the same backing file. Suggested by: Matthew Wilcox (Oracle) Cc: # v5.7+ Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index cd108607a070..0a72840a88f1 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -4619,7 +4619,7 @@ read_complete: static int cifs_readpage(struct file *file, struct page *page) { - loff_t offset = (loff_t)page->index << PAGE_SHIFT; + loff_t offset = page_file_offset(page); int rc = -EACCES; unsigned int xid; From 5ad4df56cd2158965f73416d41fce37906724822 Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 26 Jul 2021 16:22:55 -0500 Subject: [PATCH 481/794] smb3: rc uninitialized in one fallocate path Clang detected a problem with rc possibly being unitialized (when length is zero) in a recently added fallocate code path. Reported-by: kernel test robot Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 23d6f4d71649..2dfd0d8297eb 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3617,7 +3617,8 @@ static int smb3_simple_fallocate_write_range(unsigned int xid, char *buf) { struct cifs_io_parms io_parms = {0}; - int rc, nbytes; + int nbytes; + int rc = 0; struct kvec iov[2]; io_parms.netfid = cfile->fid.netfid; From 35171fbfc0d94aa31b009bb475d156ad1941ab50 Mon Sep 17 00:00:00 2001 From: Nikos Liolios Date: Tue, 27 Jul 2021 06:05:10 +0300 Subject: [PATCH 482/794] ALSA: hda/realtek: Fix headset mic for Acer SWIFT SF314-56 (ALC256) The issue on Acer SWIFT SF314-56 is that headset microphone doesn't work. The following quirk fixed headset microphone issue. The fixup was found by trial and error. Note that the fixup of SF314-54/55 (ALC256_FIXUP_ACER_HEADSET_MIC) was not successful on my SF314-56. Signed-off-by: Nikos Liolios Cc: Link: https://lore.kernel.org/r/20210727030510.36292-1-liolios.nk@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index caaf0e8aac11..14e1ab7c7954 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8274,6 +8274,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x129c, "Acer SWIFT SF314-55", ALC256_FIXUP_ACER_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x1300, "Acer SWIFT SF314-56", ALC256_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1308, "Acer Aspire Z24-890", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x132a, "Acer TravelMate B114-21", ALC233_FIXUP_ACER_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1330, "Acer TravelMate X514-51T", ALC255_FIXUP_ACER_HEADSET_MIC), From b070f9ca78680486927b799cf6126b128a7c2c1b Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Tue, 20 Jul 2021 11:47:10 -0700 Subject: [PATCH 483/794] ARM: omap2+: hwmod: fix potential NULL pointer access omap_hwmod_get_pwrdm() may access a NULL clk_hw pointer in some failure cases. Add a check for the case and bail out gracely if this happens. Reported-by: Dan Murphy Signed-off-by: Tero Kristo Cc: stable@vger.kernel.org # v5.10+ Signed-off-by: Kevin Hilman Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/omap_hwmod.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 65934b2924fb..12b26e04686f 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -3776,6 +3776,7 @@ struct powerdomain *omap_hwmod_get_pwrdm(struct omap_hwmod *oh) struct omap_hwmod_ocp_if *oi; struct clockdomain *clkdm; struct clk_hw_omap *clk; + struct clk_hw *hw; if (!oh) return NULL; @@ -3792,7 +3793,14 @@ struct powerdomain *omap_hwmod_get_pwrdm(struct omap_hwmod *oh) c = oi->_clk; } - clk = to_clk_hw_omap(__clk_get_hw(c)); + hw = __clk_get_hw(c); + if (!hw) + return NULL; + + clk = to_clk_hw_omap(hw); + if (!clk) + return NULL; + clkdm = clk->clkdm; if (!clkdm) return NULL; From a6d90e9f22328f07343e49e08a4ca483ae8e8abb Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Tue, 20 Jul 2021 11:27:16 -0700 Subject: [PATCH 484/794] bus: ti-sysc: AM3: RNG is GP only Make the RNG on AM3 GP only. Based on this patch from TI v5.4 tree which is based on hwmod data which are now removed: | ARM: AM43xx: hwmod: Move RNG to a GP only links table | | On non-GP devices the RNG is controlled by the secure-side software, | like in DRA7xx hwmod we should not control this IP when we are not | a GP device. | | Signed-off-by: Andrew F. Davis Cc: stable@vger.kernel.org # v5.10+ Signed-off-by: Kevin Hilman Signed-off-by: Tony Lindgren --- drivers/bus/ti-sysc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index 38cb116ed433..2587ed43ee8a 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -2951,6 +2951,8 @@ static int sysc_init_soc(struct sysc *ddata) case SOC_3430 ... SOC_3630: sysc_add_disabled(0x48304000); /* timer12 */ break; + case SOC_AM3: + sysc_add_disabled(0x48310000); /* rng */ default: break; } From 20a6b3fd8e2e2c063b25fbf2ee74d86b898e5087 Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Fri, 16 Jul 2021 09:07:30 -0700 Subject: [PATCH 485/794] ARM: dts: am43x-epos-evm: Reduce i2c0 bus speed for tps65218 Based on the latest timing specifications for the TPS65218 from the data sheet, http://www.ti.com/lit/ds/symlink/tps65218.pdf, document SLDS206 from November 2014, we must change the i2c bus speed to better fit within the minimum high SCL time required for proper i2c transfer. When running at 400khz, measurements show that SCL spends 0.8125 uS/1.666 uS high/low which violates the requirement for minimum high period of SCL provided in datasheet Table 7.6 which is 1 uS. Switching to 100khz gives us 5 uS/5 uS high/low which both fall above the minimum given values for 100 khz, 4.0 uS/4.7 uS high/low. Without this patch occasionally a voltage set operation from the kernel will appear to have worked but the actual voltage reflected on the PMIC will not have updated, causing problems especially with cpufreq that may update to a higher OPP without actually raising the voltage on DCDC2, leading to a hang. Signed-off-by: Dave Gerlach Signed-off-by: Kevin Hilman Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am43x-epos-evm.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/am43x-epos-evm.dts b/arch/arm/boot/dts/am43x-epos-evm.dts index aae0af10a5b1..2aa75abf85a9 100644 --- a/arch/arm/boot/dts/am43x-epos-evm.dts +++ b/arch/arm/boot/dts/am43x-epos-evm.dts @@ -582,7 +582,7 @@ status = "okay"; pinctrl-names = "default"; pinctrl-0 = <&i2c0_pins>; - clock-frequency = <400000>; + clock-frequency = <100000>; tps65218: tps65218@24 { reg = <0x24>; From 0162a9964365fd26e34575e121b17d021204c481 Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Mon, 26 Jul 2021 15:15:25 +0200 Subject: [PATCH 486/794] ARM: dts: am437x-l4: fix typo in can@0 node Replace clock-name with clock-names. Fixes: 2a4117df9b43 ("ARM: dts: Fix dcan driver probe failed on am437x platform") Signed-off-by: Dario Binacchi Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am437x-l4.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/am437x-l4.dtsi b/arch/arm/boot/dts/am437x-l4.dtsi index 40ef3973f2a9..ba58e6b0da1d 100644 --- a/arch/arm/boot/dts/am437x-l4.dtsi +++ b/arch/arm/boot/dts/am437x-l4.dtsi @@ -1595,7 +1595,7 @@ compatible = "ti,am4372-d_can", "ti,am3352-d_can"; reg = <0x0 0x2000>; clocks = <&dcan1_fck>; - clock-name = "fck"; + clock-names = "fck"; syscon-raminit = <&scm_conf 0x644 1>; interrupts = ; status = "disabled"; From c68ef4ad180e09805fa46965d15e1dfadf09ffa5 Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Thu, 1 Jul 2021 16:00:22 +0200 Subject: [PATCH 487/794] omap5-board-common: remove not physically existing vdds_1v8_main fixed-regulator This device tree include file describes a fixed-regulator connecting smps7_reg output (1.8V) to some 1.8V rail and consumers (vdds_1v8_main). This regulator does not physically exist. I assume it was introduced as a wrapper around smps7_reg to provide a speaking signal name "vdds_1v8_main" as label. This fixed-regulator without real function was not an issue in driver code until Commit 98e48cd9283d ("regulator: core: resolve supply for boot-on/always-on regulators") introduced a new check for regulator initialization which makes Palmas regulator registration fail: [ 5.407712] ldo1: supplied by vsys_cobra [ 5.412748] ldo2: supplied by vsys_cobra [ 5.417603] palmas-pmic 48070000.i2c:palmas@48:palmas_pmic: failed to register 48070000.i2c:palmas@48:palmas_pmic regulator The reason is that the supply-chain of regulators is too long and goes from ldo3 through the virtual vdds_1v8_main regulator and then back to smps7. This adds a cross-dependency of probing Palmas regulators and the fixed-regulator which leads to probe deferral by the new check and is no longer resolved. Since we do not control what device tree files including this one reference (either &vdds_1v8_main or &smps7_reg or both) we keep both labels for smps7 for compatibility. Fixes: 98e48cd9283d ("regulator: core: resolve supply for boot-on/always-on regulators") Signed-off-by: H. Nikolaus Schaller Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap5-board-common.dtsi | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index 45435bb88c89..373984c130e0 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -30,14 +30,6 @@ regulator-max-microvolt = <5000000>; }; - vdds_1v8_main: fixedregulator-vdds_1v8_main { - compatible = "regulator-fixed"; - regulator-name = "vdds_1v8_main"; - vin-supply = <&smps7_reg>; - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - }; - vmmcsd_fixed: fixedregulator-mmcsd { compatible = "regulator-fixed"; regulator-name = "vmmcsd_fixed"; @@ -487,6 +479,7 @@ regulator-boot-on; }; + vdds_1v8_main: smps7_reg: smps7 { /* VDDS_1v8_OMAP over VDDS_1v8_MAIN */ regulator-name = "smps7"; From 9f59efcd51e332aad01e7fa2b3a97cd22d347ceb Mon Sep 17 00:00:00 2001 From: Michael Zaidman Date: Mon, 10 May 2021 19:34:28 +0300 Subject: [PATCH 488/794] HID: ft260: fix format type warning in ft260_word_show() Fixes: 6a82582d9fa4 ("HID: ft260: add usb hid to i2c host bridge driver") Fix warning reported by static analysis when built with W=1 for arm64 by clang version 13.0.0 >> drivers/hid/hid-ft260.c:794:44: warning: format specifies type 'short' but the argument has type 'int' [-Wformat] return scnprintf(buf, PAGE_SIZE, "%hi\n", le16_to_cpu(*field)); ~~~ ^~~~~~~~~~~~~~~~~~~ %i include/linux/byteorder/generic.h:91:21: note: expanded from macro 'le16_to_cpu' #define le16_to_cpu __le16_to_cpu ^ include/uapi/linux/byteorder/big_endian.h:36:26: note: expanded from macro '__le16_to_cpu' #define __le16_to_cpu(x) __swab16((__force __u16)(__le16)(x)) ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/uapi/linux/swab.h:105:2: note: expanded from macro '__swab16' (__builtin_constant_p((__u16)(x)) ? \ ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Any sprintf style use of %h or %hi for a sub-int sized value isn't useful since integer promotion is done on the value anyway. So, use %d instead. https://lore.kernel.org/lkml/CAHk-=wgoxnmsj8GEVFJSvTwdnWm8wVJthefNk2n6+4TC=20e0Q@mail.gmail.com/ Signed-off-by: Michael Zaidman Suggested-by: Joe Perches Reported-by: kernel test robot Signed-off-by: Jiri Kosina --- drivers/hid/hid-ft260.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-ft260.c b/drivers/hid/hid-ft260.c index f43a8406cb9a..6f10df2042c4 100644 --- a/drivers/hid/hid-ft260.c +++ b/drivers/hid/hid-ft260.c @@ -785,7 +785,7 @@ static int ft260_byte_show(struct hid_device *hdev, int id, u8 *cfg, int len, if (ret < 0) return ret; - return scnprintf(buf, PAGE_SIZE, "%hi\n", *field); + return scnprintf(buf, PAGE_SIZE, "%d\n", *field); } static int ft260_word_show(struct hid_device *hdev, int id, u8 *cfg, int len, @@ -797,7 +797,7 @@ static int ft260_word_show(struct hid_device *hdev, int id, u8 *cfg, int len, if (ret < 0) return ret; - return scnprintf(buf, PAGE_SIZE, "%hi\n", le16_to_cpu(*field)); + return scnprintf(buf, PAGE_SIZE, "%d\n", le16_to_cpu(*field)); } #define FT260_ATTR_SHOW(name, reptype, id, type, func) \ From 4b0556b96e1fe7723629bd40e3813a30cd632faf Mon Sep 17 00:00:00 2001 From: Alexander Tsoy Date: Tue, 27 Jul 2021 12:33:26 +0300 Subject: [PATCH 489/794] ALSA: usb-audio: Add registration quirk for JBL Quantum 600 Apparently JBL Quantum 600 has multiple hardware revisions. Apply registration quirk to another device id as well. Signed-off-by: Alexander Tsoy Cc: Link: https://lore.kernel.org/r/20210727093326.1153366-1-alexander@tsoy.me Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index e7accd87e063..326d1b0ea5e6 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1899,6 +1899,7 @@ static const struct registration_quirk registration_quirks[] = { REG_QUIRK_ENTRY(0x0951, 0x16ea, 2), /* Kingston HyperX Cloud Flight S */ REG_QUIRK_ENTRY(0x0ecb, 0x1f46, 2), /* JBL Quantum 600 */ REG_QUIRK_ENTRY(0x0ecb, 0x2039, 2), /* JBL Quantum 400 */ + REG_QUIRK_ENTRY(0x0ecb, 0x203c, 2), /* JBL Quantum 600 */ REG_QUIRK_ENTRY(0x0ecb, 0x203e, 2), /* JBL Quantum 800 */ { 0 } /* terminator */ }; From fcef709c2c4baf758950bd7395e4b10527b81e2c Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Sun, 25 Jul 2021 18:54:52 +0530 Subject: [PATCH 490/794] octeontx2-af: Do NIX_RX_SW_SYNC twice NIX_RX_SW_SYNC ensures all existing transactions are finished and pkts are written to LLC/DRAM, queues should be teared down after successful SW_SYNC. Due to a HW errata, in some rare scenarios an existing transaction might end after SW_SYNC operation. To ensure operation is fully done, do the SW_SYNC twice. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- .../net/ethernet/marvell/octeontx2/af/rvu_nix.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 30067668eda7..4bfbbdf38770 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -196,11 +196,22 @@ static void nix_rx_sync(struct rvu *rvu, int blkaddr) { int err; - /*Sync all in flight RX packets to LLC/DRAM */ + /* Sync all in flight RX packets to LLC/DRAM */ rvu_write64(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0)); err = rvu_poll_reg(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0), true); if (err) - dev_err(rvu->dev, "NIX RX software sync failed\n"); + dev_err(rvu->dev, "SYNC1: NIX RX software sync failed\n"); + + /* SW_SYNC ensures all existing transactions are finished and pkts + * are written to LLC/DRAM, queues should be teared down after + * successful SW_SYNC. Due to a HW errata, in some rare scenarios + * an existing transaction might end after SW_SYNC operation. To + * ensure operation is fully done, do the SW_SYNC twice. + */ + rvu_write64(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0)); + err = rvu_poll_reg(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0), true); + if (err) + dev_err(rvu->dev, "SYNC2: NIX RX software sync failed\n"); } static bool is_valid_txschq(struct rvu *rvu, int blkaddr, From c7c9d2102c9c098916ab9e0ab248006107d00d6c Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Sun, 25 Jul 2021 00:11:59 +0300 Subject: [PATCH 491/794] net: llc: fix skb_over_panic Syzbot reported skb_over_panic() in llc_pdu_init_as_xid_cmd(). The problem was in wrong LCC header manipulations. Syzbot's reproducer tries to send XID packet. llc_ui_sendmsg() is doing following steps: 1. skb allocation with size = len + header size len is passed from userpace and header size is 3 since addr->sllc_xid is set. 2. skb_reserve() for header_len = 3 3. filling all other space with memcpy_from_msg() Ok, at this moment we have fully loaded skb, only headers needs to be filled. Then code comes to llc_sap_action_send_xid_c(). This function pushes 3 bytes for LLC PDU header and initializes it. Then comes llc_pdu_init_as_xid_cmd(). It initalizes next 3 bytes *AFTER* LLC PDU header and call skb_push(skb, 3). This looks wrong for 2 reasons: 1. Bytes rigth after LLC header are user data, so this function was overwriting payload. 2. skb_push(skb, 3) call can cause skb_over_panic() since all free space was filled in llc_ui_sendmsg(). (This can happen is user passed 686 len: 686 + 14 (eth header) + 3 (LLC header) = 703. SKB_DATA_ALIGN(703) = 704) So, in this patch I added 2 new private constansts: LLC_PDU_TYPE_U_XID and LLC_PDU_LEN_U_XID. LLC_PDU_LEN_U_XID is used to correctly reserve header size to handle LLC + XID case. LLC_PDU_TYPE_U_XID is used by llc_pdu_header_init() function to push 6 bytes instead of 3. And finally I removed skb_push() call from llc_pdu_init_as_xid_cmd(). This changes should not affect other parts of LLC, since after all steps we just transmit buffer. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-and-tested-by: syzbot+5e5a981ad7cc54c4b2b4@syzkaller.appspotmail.com Signed-off-by: Pavel Skripkin Signed-off-by: David S. Miller --- include/net/llc_pdu.h | 31 +++++++++++++++++++++++-------- net/llc/af_llc.c | 10 +++++++++- net/llc/llc_s_ac.c | 2 +- 3 files changed, 33 insertions(+), 10 deletions(-) diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h index c0f0a13ed818..49aa79c7b278 100644 --- a/include/net/llc_pdu.h +++ b/include/net/llc_pdu.h @@ -15,9 +15,11 @@ #include /* Lengths of frame formats */ -#define LLC_PDU_LEN_I 4 /* header and 2 control bytes */ -#define LLC_PDU_LEN_S 4 -#define LLC_PDU_LEN_U 3 /* header and 1 control byte */ +#define LLC_PDU_LEN_I 4 /* header and 2 control bytes */ +#define LLC_PDU_LEN_S 4 +#define LLC_PDU_LEN_U 3 /* header and 1 control byte */ +/* header and 1 control byte and XID info */ +#define LLC_PDU_LEN_U_XID (LLC_PDU_LEN_U + sizeof(struct llc_xid_info)) /* Known SAP addresses */ #define LLC_GLOBAL_SAP 0xFF #define LLC_NULL_SAP 0x00 /* not network-layer visible */ @@ -50,9 +52,10 @@ #define LLC_PDU_TYPE_U_MASK 0x03 /* 8-bit control field */ #define LLC_PDU_TYPE_MASK 0x03 -#define LLC_PDU_TYPE_I 0 /* first bit */ -#define LLC_PDU_TYPE_S 1 /* first two bits */ -#define LLC_PDU_TYPE_U 3 /* first two bits */ +#define LLC_PDU_TYPE_I 0 /* first bit */ +#define LLC_PDU_TYPE_S 1 /* first two bits */ +#define LLC_PDU_TYPE_U 3 /* first two bits */ +#define LLC_PDU_TYPE_U_XID 4 /* private type for detecting XID commands */ #define LLC_PDU_TYPE_IS_I(pdu) \ ((!(pdu->ctrl_1 & LLC_PDU_TYPE_I_MASK)) ? 1 : 0) @@ -230,9 +233,18 @@ static inline struct llc_pdu_un *llc_pdu_un_hdr(struct sk_buff *skb) static inline void llc_pdu_header_init(struct sk_buff *skb, u8 type, u8 ssap, u8 dsap, u8 cr) { - const int hlen = type == LLC_PDU_TYPE_U ? 3 : 4; + int hlen = 4; /* default value for I and S types */ struct llc_pdu_un *pdu; + switch (type) { + case LLC_PDU_TYPE_U: + hlen = 3; + break; + case LLC_PDU_TYPE_U_XID: + hlen = 6; + break; + } + skb_push(skb, hlen); skb_reset_network_header(skb); pdu = llc_pdu_un_hdr(skb); @@ -374,7 +386,10 @@ static inline void llc_pdu_init_as_xid_cmd(struct sk_buff *skb, xid_info->fmt_id = LLC_XID_FMT_ID; /* 0x81 */ xid_info->type = svcs_supported; xid_info->rw = rx_window << 1; /* size of receive window */ - skb_put(skb, sizeof(struct llc_xid_info)); + + /* no need to push/put since llc_pdu_header_init() has already + * pushed 3 + 3 bytes + */ } /** diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 7180979114e4..ac5cadd02cfa 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -98,8 +98,16 @@ static inline u8 llc_ui_header_len(struct sock *sk, struct sockaddr_llc *addr) { u8 rc = LLC_PDU_LEN_U; - if (addr->sllc_test || addr->sllc_xid) + if (addr->sllc_test) rc = LLC_PDU_LEN_U; + else if (addr->sllc_xid) + /* We need to expand header to sizeof(struct llc_xid_info) + * since llc_pdu_init_as_xid_cmd() sets 4,5,6 bytes of LLC header + * as XID PDU. In llc_ui_sendmsg() we reserved header size and then + * filled all other space with user data. If we won't reserve this + * bytes, llc_pdu_init_as_xid_cmd() will overwrite user data + */ + rc = LLC_PDU_LEN_U_XID; else if (sk->sk_type == SOCK_STREAM) rc = LLC_PDU_LEN_I; return rc; diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c index b554f26c68ee..79d1cef8f15a 100644 --- a/net/llc/llc_s_ac.c +++ b/net/llc/llc_s_ac.c @@ -79,7 +79,7 @@ int llc_sap_action_send_xid_c(struct llc_sap *sap, struct sk_buff *skb) struct llc_sap_state_ev *ev = llc_sap_ev(skb); int rc; - llc_pdu_header_init(skb, LLC_PDU_TYPE_U, ev->saddr.lsap, + llc_pdu_header_init(skb, LLC_PDU_TYPE_U_XID, ev->saddr.lsap, ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_xid_cmd(skb, LLC_XID_NULL_CLASS_2, 0); rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); From 4d1014c1816c0395eca5d1d480f196a4c63119d0 Mon Sep 17 00:00:00 2001 From: Filip Schauer Date: Tue, 27 Jul 2021 13:23:11 +0200 Subject: [PATCH 492/794] drivers core: Fix oops when driver probe fails dma_range_map is freed to early, which might cause an oops when a driver probe fails. Call trace: is_free_buddy_page+0xe4/0x1d4 __free_pages+0x2c/0x88 dma_free_contiguous+0x64/0x80 dma_direct_free+0x38/0xb4 dma_free_attrs+0x88/0xa0 dmam_release+0x28/0x34 release_nodes+0x78/0x8c devres_release_all+0xa8/0x110 really_probe+0x118/0x2d0 __driver_probe_device+0xc8/0xe0 driver_probe_device+0x54/0xec __driver_attach+0xe0/0xf0 bus_for_each_dev+0x7c/0xc8 driver_attach+0x30/0x3c bus_add_driver+0x17c/0x1c4 driver_register+0xc0/0xf8 __platform_driver_register+0x34/0x40 ... This issue is introduced by commit d0243bbd5dd3 ("drivers core: Free dma_range_map when driver probe failed"). It frees dma_range_map before the call to devres_release_all, which is too early. The solution is to free dma_range_map only after devres_release_all. Fixes: d0243bbd5dd3 ("drivers core: Free dma_range_map when driver probe failed") Cc: stable Signed-off-by: Filip Schauer Link: https://lore.kernel.org/r/20210727112311.GA7645@DESKTOP-E8BN1B0.localdomain Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index daeb9b5763ae..437cd61343b2 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -653,8 +653,6 @@ dev_groups_failed: else if (drv->remove) drv->remove(dev); probe_failed: - kfree(dev->dma_range_map); - dev->dma_range_map = NULL; if (dev->bus) blocking_notifier_call_chain(&dev->bus->p->bus_notifier, BUS_NOTIFY_DRIVER_NOT_BOUND, dev); @@ -662,6 +660,8 @@ pinctrl_bind_failed: device_links_no_driver(dev); devres_release_all(dev); arch_teardown_dma_ops(dev); + kfree(dev->dma_range_map); + dev->dma_range_map = NULL; driver_sysfs_remove(dev); dev->driver = NULL; dev_set_drvdata(dev, NULL); From 55f24c27b6c1a840b62fe297616f1f9ea3576cb7 Mon Sep 17 00:00:00 2001 From: Kunihiko Hayashi Date: Tue, 27 Jul 2021 14:47:32 +0900 Subject: [PATCH 493/794] dmaengine: uniphier-xdmac: Use readl_poll_timeout_atomic() in atomic state The function uniphier_xdmac_chan_stop() is only called in atomic state. Should use readl_poll_timeout_atomic() there instead of readl_poll_timeout(). Reported-by: Dan Carpenter Fixes: 667b9251440b ("dmaengine: uniphier-xdmac: Add UniPhier external DMA controller driver") Signed-off-by: Kunihiko Hayashi Link: https://lore.kernel.org/r/1627364852-28432-1-git-send-email-hayashi.kunihiko@socionext.com Signed-off-by: Vinod Koul --- drivers/dma/uniphier-xdmac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/uniphier-xdmac.c b/drivers/dma/uniphier-xdmac.c index 16b19654873d..d6b8a202474f 100644 --- a/drivers/dma/uniphier-xdmac.c +++ b/drivers/dma/uniphier-xdmac.c @@ -209,8 +209,8 @@ static int uniphier_xdmac_chan_stop(struct uniphier_xdmac_chan *xc) writel(0, xc->reg_ch_base + XDMAC_TSS); /* wait until transfer is stopped */ - return readl_poll_timeout(xc->reg_ch_base + XDMAC_STAT, val, - !(val & XDMAC_STAT_TENF), 100, 1000); + return readl_poll_timeout_atomic(xc->reg_ch_base + XDMAC_STAT, val, + !(val & XDMAC_STAT_TENF), 100, 1000); } /* xc->vc.lock must be held by caller */ From 801e541c79bbc63af852ca21b713ba87cc97c6ad Mon Sep 17 00:00:00 2001 From: Tang Bin Date: Tue, 27 Jul 2021 20:25:06 +0800 Subject: [PATCH 494/794] nfc: s3fwrn5: fix undefined parameter values in dev_err() In the function s3fwrn5_fw_download(), the 'ret' is not assigned, so the correct value should be given in dev_err function. Fixes: a0302ff5906a ("nfc: s3fwrn5: remove unnecessary label") Signed-off-by: Zhang Shengju Signed-off-by: Tang Bin Signed-off-by: David S. Miller --- drivers/nfc/s3fwrn5/firmware.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/s3fwrn5/firmware.c b/drivers/nfc/s3fwrn5/firmware.c index eb5d7a5beac7..1340fab9565e 100644 --- a/drivers/nfc/s3fwrn5/firmware.c +++ b/drivers/nfc/s3fwrn5/firmware.c @@ -423,7 +423,7 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info) if (IS_ERR(tfm)) { ret = PTR_ERR(tfm); dev_err(&fw_info->ndev->nfc_dev->dev, - "Cannot allocate shash (code=%d)\n", ret); + "Cannot allocate shash (code=%ld)\n", PTR_ERR(tfm)); goto out; } From 9be550ee43919b070bcd77f9228bdbbbc073245b Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Wed, 21 Jul 2021 22:34:36 +0300 Subject: [PATCH 495/794] staging: rtl8712: get rid of flush_scheduled_work This patch is preparation for following patch for error handling refactoring. flush_scheduled_work() takes (wq_completion)events lock and it can lead to deadlock when r871xu_dev_remove() is called from workqueue. To avoid deadlock sutiation we can change flush_scheduled_work() call to flush_work() call for all possibly scheduled works in this driver, since next patch adds device_release_driver() in case of fw load failure. Signed-off-by: Pavel Skripkin Cc: stable Link: https://lore.kernel.org/r/6e028b4c457eeb7156c76c6ea3cdb3cb0207c7e1.1626895918.git.paskripkin@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8712/rtl8712_led.c | 8 ++++++++ drivers/staging/rtl8712/rtl871x_led.h | 1 + drivers/staging/rtl8712/rtl871x_pwrctrl.c | 8 ++++++++ drivers/staging/rtl8712/rtl871x_pwrctrl.h | 1 + drivers/staging/rtl8712/usb_intf.c | 3 ++- 5 files changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/staging/rtl8712/rtl8712_led.c b/drivers/staging/rtl8712/rtl8712_led.c index 5901026949f2..d5fc9026b036 100644 --- a/drivers/staging/rtl8712/rtl8712_led.c +++ b/drivers/staging/rtl8712/rtl8712_led.c @@ -1820,3 +1820,11 @@ void LedControl871x(struct _adapter *padapter, enum LED_CTL_MODE LedAction) break; } } + +void r8712_flush_led_works(struct _adapter *padapter) +{ + struct led_priv *pledpriv = &padapter->ledpriv; + + flush_work(&pledpriv->SwLed0.BlinkWorkItem); + flush_work(&pledpriv->SwLed1.BlinkWorkItem); +} diff --git a/drivers/staging/rtl8712/rtl871x_led.h b/drivers/staging/rtl8712/rtl871x_led.h index ee19c873cf01..2f0768132ad8 100644 --- a/drivers/staging/rtl8712/rtl871x_led.h +++ b/drivers/staging/rtl8712/rtl871x_led.h @@ -112,6 +112,7 @@ struct led_priv { void r8712_InitSwLeds(struct _adapter *padapter); void r8712_DeInitSwLeds(struct _adapter *padapter); void LedControl871x(struct _adapter *padapter, enum LED_CTL_MODE LedAction); +void r8712_flush_led_works(struct _adapter *padapter); #endif diff --git a/drivers/staging/rtl8712/rtl871x_pwrctrl.c b/drivers/staging/rtl8712/rtl871x_pwrctrl.c index 23cff43437e2..cd6d9ff0bebc 100644 --- a/drivers/staging/rtl8712/rtl871x_pwrctrl.c +++ b/drivers/staging/rtl8712/rtl871x_pwrctrl.c @@ -224,3 +224,11 @@ void r8712_unregister_cmd_alive(struct _adapter *padapter) } mutex_unlock(&pwrctrl->mutex_lock); } + +void r8712_flush_rwctrl_works(struct _adapter *padapter) +{ + struct pwrctrl_priv *pwrctrl = &padapter->pwrctrlpriv; + + flush_work(&pwrctrl->SetPSModeWorkItem); + flush_work(&pwrctrl->rpwm_workitem); +} diff --git a/drivers/staging/rtl8712/rtl871x_pwrctrl.h b/drivers/staging/rtl8712/rtl871x_pwrctrl.h index bf6623cfaf27..b35b9c7920eb 100644 --- a/drivers/staging/rtl8712/rtl871x_pwrctrl.h +++ b/drivers/staging/rtl8712/rtl871x_pwrctrl.h @@ -108,5 +108,6 @@ void r8712_cpwm_int_hdl(struct _adapter *padapter, void r8712_set_ps_mode(struct _adapter *padapter, uint ps_mode, uint smart_ps); void r8712_set_rpwm(struct _adapter *padapter, u8 val8); +void r8712_flush_rwctrl_works(struct _adapter *padapter); #endif /* __RTL871X_PWRCTRL_H_ */ diff --git a/drivers/staging/rtl8712/usb_intf.c b/drivers/staging/rtl8712/usb_intf.c index 2434b13c8b12..643f21eb1128 100644 --- a/drivers/staging/rtl8712/usb_intf.c +++ b/drivers/staging/rtl8712/usb_intf.c @@ -606,7 +606,8 @@ static void r871xu_dev_remove(struct usb_interface *pusb_intf) padapter->surprise_removed = true; if (pnetdev->reg_state != NETREG_UNINITIALIZED) unregister_netdev(pnetdev); /* will call netdev_close() */ - flush_scheduled_work(); + r8712_flush_rwctrl_works(padapter); + r8712_flush_led_works(padapter); udelay(1); /* Stop driver mlme relation timer */ r8712_stop_drv_timers(padapter); From e9e6aa51b2735d83a67d9fa0119cf11abef80d99 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Wed, 21 Jul 2021 22:34:47 +0300 Subject: [PATCH 496/794] staging: rtl8712: error handling refactoring There was strange error handling logic in case of fw load failure. For some reason fw loader callback was doing clean up stuff when fw is not available. I don't see any reason behind doing this. Since this driver doesn't have EEPROM firmware let's just disconnect it in case of fw load failure. Doing clean up stuff in 2 different place which can run concurently is not good idea and syzbot found 2 bugs related to this strange approach. So, in this pacth I deleted all clean up code from fw callback and made a call to device_release_driver() under device_lock(parent) in case of fw load failure. This approach is more generic and it defend driver from UAF bugs, since all clean up code is moved to one place. Fixes: e02a3b945816 ("staging: rtl8712: fix memory leak in rtl871x_load_fw_cb") Fixes: 8c213fa59199 ("staging: r8712u: Use asynchronous firmware loading") Cc: stable Reported-and-tested-by: syzbot+5872a520e0ce0a7c7230@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+cc699626e48a6ebaf295@syzkaller.appspotmail.com Signed-off-by: Pavel Skripkin Link: https://lore.kernel.org/r/d49ecc56e97c4df181d7bd4d240b031f315eacc3.1626895918.git.paskripkin@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8712/hal_init.c | 30 ++++++++++++------- drivers/staging/rtl8712/usb_intf.c | 48 +++++++++++++----------------- 2 files changed, 41 insertions(+), 37 deletions(-) diff --git a/drivers/staging/rtl8712/hal_init.c b/drivers/staging/rtl8712/hal_init.c index 22974277afa0..4eff3fdecdb8 100644 --- a/drivers/staging/rtl8712/hal_init.c +++ b/drivers/staging/rtl8712/hal_init.c @@ -29,21 +29,31 @@ #define FWBUFF_ALIGN_SZ 512 #define MAX_DUMP_FWSZ (48 * 1024) +static void rtl871x_load_fw_fail(struct _adapter *adapter) +{ + struct usb_device *udev = adapter->dvobjpriv.pusbdev; + struct device *dev = &udev->dev; + struct device *parent = dev->parent; + + complete(&adapter->rtl8712_fw_ready); + + dev_err(&udev->dev, "r8712u: Firmware request failed\n"); + + if (parent) + device_lock(parent); + + device_release_driver(dev); + + if (parent) + device_unlock(parent); +} + static void rtl871x_load_fw_cb(const struct firmware *firmware, void *context) { struct _adapter *adapter = context; if (!firmware) { - struct usb_device *udev = adapter->dvobjpriv.pusbdev; - struct usb_interface *usb_intf = adapter->pusb_intf; - - dev_err(&udev->dev, "r8712u: Firmware request failed\n"); - usb_put_dev(udev); - usb_set_intfdata(usb_intf, NULL); - r8712_free_drv_sw(adapter); - adapter->dvobj_deinit(adapter); - complete(&adapter->rtl8712_fw_ready); - free_netdev(adapter->pnetdev); + rtl871x_load_fw_fail(adapter); return; } adapter->fw = firmware; diff --git a/drivers/staging/rtl8712/usb_intf.c b/drivers/staging/rtl8712/usb_intf.c index 643f21eb1128..505ebeb643dc 100644 --- a/drivers/staging/rtl8712/usb_intf.c +++ b/drivers/staging/rtl8712/usb_intf.c @@ -591,36 +591,30 @@ static void r871xu_dev_remove(struct usb_interface *pusb_intf) { struct net_device *pnetdev = usb_get_intfdata(pusb_intf); struct usb_device *udev = interface_to_usbdev(pusb_intf); + struct _adapter *padapter = netdev_priv(pnetdev); - if (pnetdev) { - struct _adapter *padapter = netdev_priv(pnetdev); + /* never exit with a firmware callback pending */ + wait_for_completion(&padapter->rtl8712_fw_ready); + usb_set_intfdata(pusb_intf, NULL); + release_firmware(padapter->fw); + if (drvpriv.drv_registered) + padapter->surprise_removed = true; + if (pnetdev->reg_state != NETREG_UNINITIALIZED) + unregister_netdev(pnetdev); /* will call netdev_close() */ + r8712_flush_rwctrl_works(padapter); + r8712_flush_led_works(padapter); + udelay(1); + /* Stop driver mlme relation timer */ + r8712_stop_drv_timers(padapter); + r871x_dev_unload(padapter); + r8712_free_drv_sw(padapter); + free_netdev(pnetdev); - /* never exit with a firmware callback pending */ - wait_for_completion(&padapter->rtl8712_fw_ready); - pnetdev = usb_get_intfdata(pusb_intf); - usb_set_intfdata(pusb_intf, NULL); - if (!pnetdev) - goto firmware_load_fail; - release_firmware(padapter->fw); - if (drvpriv.drv_registered) - padapter->surprise_removed = true; - if (pnetdev->reg_state != NETREG_UNINITIALIZED) - unregister_netdev(pnetdev); /* will call netdev_close() */ - r8712_flush_rwctrl_works(padapter); - r8712_flush_led_works(padapter); - udelay(1); - /* Stop driver mlme relation timer */ - r8712_stop_drv_timers(padapter); - r871x_dev_unload(padapter); - r8712_free_drv_sw(padapter); - free_netdev(pnetdev); + /* decrease the reference count of the usb device structure + * when disconnect + */ + usb_put_dev(udev); - /* decrease the reference count of the usb device structure - * when disconnect - */ - usb_put_dev(udev); - } -firmware_load_fail: /* If we didn't unplug usb dongle and remove/insert module, driver * fails on sitesurvey for the first time when device is up. * Reset usb port for sitesurvey fail issue. From c7b65650c7f41d3946c4e2f0bb56dfdb92cfe127 Mon Sep 17 00:00:00 2001 From: Sergio Paracuellos Date: Tue, 27 Jul 2021 07:40:58 +0200 Subject: [PATCH 497/794] staging: mt7621-pci: avoid to re-disable clock for those pcies not in use Clock driver for this SoC is using some gates to properly enabling and disabling the access to peripherals. Those gates that are not in use are properly being automatically disabled by the kernel. Pcie driver is explicitly doing a 'clk_disable_unprepare' call for gates of those pcies that are not used. Since kernel has already disabled them, the following warnings appear: WARNING: CPU: 0 PID: 1 at drivers/clk/clk.c:952 clk_core_disable+0xe4/0x100 pcie2 already disabled Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0 #0 Stack : 81661680 80082d00 807c0000 00000004 00000000 80a20000 80860000 80792380 814503d4 80862e83 00000000 1431b70 81454360 00000000 00000000 80792380 81431a08 ffffefff fffffea 00000000 81431a14 0000007b 80868820 ffffffff 80792380 1431c70 803d7a24 00000009 807f3a74 00000001 815df810 00000018 0000000 80a20000 ... Call Trace: [<80007ed8>] show_stack+0x28/0xf0 [<80381e40>] dump_stack_lvl+0x60/0x80 [<8002cf90>] __warn+0xcc/0x140 [<8002d090>] warn_slowpath_fmt+0x8c/0xac [<803d7a24>] clk_core_disable+0xe4/0x100 [<803da468>] clk_disable+0x38/0x58 [<804cb730>] mt7621_pci_probe+0x980/0xa50 [<8041e624>] platform_probe+0x50/0xbc [<8041bfe4>] really_probe.part.0+0xa8/0x340 [<8041c3dc>] driver_probe_device+0x4c/0x154 [<8041cb88>] __driver_attach+0xb4/0x1b4 [<80419a38>] bus_for_each_dev+0x68/0xa4 [<8041b1e8>] bus_add_driver+0x134/0x214 [<8041d3bc>] driver_register+0x98/0x154 [<80001648>] do_one_initcall+0x50/0x1a8 [<808ea1fc>] kernel_init_freeable+0x270/0x30c [<806dd9dc>] kernel_init+0x20/0x110 [<80002d98>] ret_from_kernel_thread+0x14/0x1c WARNING: CPU: 0 PID: 1 at drivers/clk/clk.c:810 clk_core_unprepare+0xf4/0x194 pcie2 already unprepared Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Tainted: G W 5.14.0 #0 Stack : 81661680 80082d00 807c0000 00000004 00000000 00000000 81431bc4 80a20000 80860000 80792380 814503d4 80862e83 00000000 00000001 81431b70 81454360 00000000 00000000 80792380 81431a08 ffffefff 00000000 ffffffea 00000000 81431a14 0000009b 80868820 ffffffff 80792380 00000001 81431c70 803d7764 00000009 807f3a74 00000001 815df810 00000018 8040b36c 00000000 80a20000 ... Call Trace: [<80007ed8>] show_stack+0x28/0xf0 [<80381e40>] dump_stack_lvl+0x60/0x80 [<8002cf90>] __warn+0xcc/0x140 [<8002d090>] warn_slowpath_fmt+0x8c/0xac [<803d7764>] clk_core_unprepare+0xf4/0x194 [<803d97c4>] clk_unprepare+0x30/0x48 [<804cb738>] mt7621_pci_probe+0x988/0xa50 [<8041e624>] platform_probe+0x50/0xbc [<8041bfe4>] really_probe.part.0+0xa8/0x340 [<8041c3dc>] driver_probe_device+0x4c/0x154 [<8041cb88>] __driver_attach+0xb4/0x1b4 [<80419a38>] bus_for_each_dev+0x68/0xa4 [<8041b1e8>] bus_add_driver+0x134/0x214 [<8041d3bc>] driver_register+0x98/0x154 [<80001648>] do_one_initcall+0x50/0x1a8 [<808ea1fc>] kernel_init_freeable+0x270/0x30c [<806dd9dc>] kernel_init+0x20/0x110 [<80002d98>] ret_from_kernel_thread+0x14/0x1c Avoid to explicitly disable already disabled pcie gates fixes the problem. Fixes: cc4e864a5ce4 ("staging: mt7621-pci: make use of kernel clock apis") Reported-by: DENG Qingfang Signed-off-by: Sergio Paracuellos Link: https://lore.kernel.org/r/20210727054058.10612-1-sergio.paracuellos@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/mt7621-pci/pci-mt7621.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/staging/mt7621-pci/pci-mt7621.c b/drivers/staging/mt7621-pci/pci-mt7621.c index 691030e1a5ed..f9bdf4e33134 100644 --- a/drivers/staging/mt7621-pci/pci-mt7621.c +++ b/drivers/staging/mt7621-pci/pci-mt7621.c @@ -422,7 +422,6 @@ static void mt7621_pcie_init_ports(struct mt7621_pcie *pcie) dev_err(dev, "pcie%d no card, disable it (RST & CLK)\n", slot); mt7621_control_assert(port); - clk_disable_unprepare(port->clk); port->enabled = false; if (slot == 0) { From 30fad76ce4e98263edfa8f885c81d5426c1bf169 Mon Sep 17 00:00:00 2001 From: "Qiang.zhang" Date: Fri, 23 Jul 2021 08:43:34 +0800 Subject: [PATCH 498/794] USB: usbtmc: Fix RCU stall warning rcu: INFO: rcu_preempt self-detected stall on CPU rcu: 1-...!: (2 ticks this GP) idle=d92/1/0x4000000000000000 softirq=25390/25392 fqs=3 (t=12164 jiffies g=31645 q=43226) rcu: rcu_preempt kthread starved for 12162 jiffies! g31645 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x0 ->cpu=0 rcu: Unless rcu_preempt kthread gets sufficient CPU time, OOM is now expected behavior. rcu: RCU grace-period kthread stack dump: task:rcu_preempt state:R running task ........... usbtmc 3-1:0.0: unknown status received: -71 usbtmc 3-1:0.0: unknown status received: -71 usbtmc 3-1:0.0: unknown status received: -71 usbtmc 3-1:0.0: unknown status received: -71 usbtmc 3-1:0.0: unknown status received: -71 usbtmc 3-1:0.0: unknown status received: -71 usbtmc 3-1:0.0: unknown status received: -71 usbtmc 3-1:0.0: unknown status received: -71 usbtmc 3-1:0.0: usb_submit_urb failed: -19 The function usbtmc_interrupt() resubmits urbs when the error status of an urb is -EPROTO. In systems using the dummy_hcd usb controller this can result in endless interrupt loops when the usbtmc device is disconnected from the host system. Since host controller drivers already try to recover from transmission errors, there is no need to resubmit the urb or try other solutions to repair the error situation. In case of errors the INT pipe just stops to wait for further packets. Fixes: dbf3e7f654c0 ("Implement an ioctl to support the USMTMC-USB488 READ_STATUS_BYTE operation") Cc: stable@vger.kernel.org Reported-by: syzbot+e2eae5639e7203360018@syzkaller.appspotmail.com Signed-off-by: Qiang.zhang Acked-by: Guido Kiener Link: https://lore.kernel.org/r/20210723004334.458930-1-qiang.zhang@windriver.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usbtmc.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index 74d5a9c5238a..73f419adce61 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c @@ -2324,17 +2324,10 @@ static void usbtmc_interrupt(struct urb *urb) dev_err(dev, "overflow with length %d, actual length is %d\n", data->iin_wMaxPacketSize, urb->actual_length); fallthrough; - case -ECONNRESET: - case -ENOENT: - case -ESHUTDOWN: - case -EILSEQ: - case -ETIME: - case -EPIPE: + default: /* urb terminated, clean up */ dev_dbg(dev, "urb terminated, status: %d\n", status); return; - default: - dev_err(dev, "unknown status received: %d\n", status); } exit: rv = usb_submit_urb(urb, GFP_ATOMIC); From fa4a8dcfd51b911f101ebc461dfe22230b74dd64 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Tue, 27 Jul 2021 15:31:42 +0800 Subject: [PATCH 499/794] usb: gadget: remove leaked entry from udc driver list The usb_add_gadget_udc will add a new gadget to the udc class driver list. Not calling usb_del_gadget_udc in error branch will result in residual gadget entry in the udc driver list. We fix it by calling usb_del_gadget_udc to clean it when error return. Fixes: 48ba02b2e2b1 ("usb: gadget: add udc driver for max3420") Acked-by: Felipe Balbi Signed-off-by: Zhang Qilong Link: https://lore.kernel.org/r/20210727073142.84666-1-zhangqilong3@huawei.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/max3420_udc.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/udc/max3420_udc.c b/drivers/usb/gadget/udc/max3420_udc.c index 34f4db554977..d2a2b20cc1ad 100644 --- a/drivers/usb/gadget/udc/max3420_udc.c +++ b/drivers/usb/gadget/udc/max3420_udc.c @@ -1255,12 +1255,14 @@ static int max3420_probe(struct spi_device *spi) err = devm_request_irq(&spi->dev, irq, max3420_irq_handler, 0, "max3420", udc); if (err < 0) - return err; + goto del_gadget; udc->thread_task = kthread_create(max3420_thread, udc, "max3420-thread"); - if (IS_ERR(udc->thread_task)) - return PTR_ERR(udc->thread_task); + if (IS_ERR(udc->thread_task)) { + err = PTR_ERR(udc->thread_task); + goto del_gadget; + } irq = of_irq_get_byname(spi->dev.of_node, "vbus"); if (irq <= 0) { /* no vbus irq implies self-powered design */ @@ -1280,10 +1282,14 @@ static int max3420_probe(struct spi_device *spi) err = devm_request_irq(&spi->dev, irq, max3420_vbus_handler, 0, "vbus", udc); if (err < 0) - return err; + goto del_gadget; } return 0; + +del_gadget: + usb_del_gadget_udc(&udc->gadget); + return err; } static int max3420_remove(struct spi_device *spi) From 2867652e4766360adf14dfda3832455e04964f2a Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Fri, 23 Jul 2021 18:59:30 +0300 Subject: [PATCH 500/794] usb: gadget: f_hid: fixed NULL pointer dereference Disconnecting and reconnecting the USB cable can lead to crashes and a variety of kernel log spam. The problem was found and reproduced on the Raspberry Pi [1] and the original fix was created in Raspberry's own fork [2]. Link: https://github.com/raspberrypi/linux/issues/3870 [1] Link: https://github.com/raspberrypi/linux/commit/a6e47d5f4efbd2ea6a0b6565cd2f9b7bb217ded5 [2] Signed-off-by: Maxim Devaev Signed-off-by: Phil Elwell Cc: stable Link: https://lore.kernel.org/r/20210723155928.210019-1-mdevaev@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_hid.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index 02683ac0719d..08e73e8127b1 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -338,6 +338,11 @@ static ssize_t f_hidg_write(struct file *file, const char __user *buffer, spin_lock_irqsave(&hidg->write_spinlock, flags); + if (!hidg->req) { + spin_unlock_irqrestore(&hidg->write_spinlock, flags); + return -ESHUTDOWN; + } + #define WRITE_COND (!hidg->write_pending) try_again: /* write queue */ @@ -358,8 +363,14 @@ try_again: count = min_t(unsigned, count, hidg->report_length); spin_unlock_irqrestore(&hidg->write_spinlock, flags); - status = copy_from_user(req->buf, buffer, count); + if (!req) { + ERROR(hidg->func.config->cdev, "hidg->req is NULL\n"); + status = -ESHUTDOWN; + goto release_write_pending; + } + + status = copy_from_user(req->buf, buffer, count); if (status != 0) { ERROR(hidg->func.config->cdev, "copy_from_user error\n"); @@ -387,15 +398,18 @@ try_again: spin_unlock_irqrestore(&hidg->write_spinlock, flags); - status = usb_ep_queue(hidg->in_ep, req, GFP_ATOMIC); - if (status < 0) { - ERROR(hidg->func.config->cdev, - "usb_ep_queue error on int endpoint %zd\n", status); + if (!hidg->in_ep->enabled) { + ERROR(hidg->func.config->cdev, "in_ep is disabled\n"); + status = -ESHUTDOWN; goto release_write_pending; - } else { - status = count; } + status = usb_ep_queue(hidg->in_ep, req, GFP_ATOMIC); + if (status < 0) + goto release_write_pending; + else + status = count; + return status; release_write_pending: spin_lock_irqsave(&hidg->write_spinlock, flags); From afcff6dc690e24d636a41fd4bee6057e7c70eebd Mon Sep 17 00:00:00 2001 From: Maxim Devaev Date: Wed, 21 Jul 2021 21:03:51 +0300 Subject: [PATCH 501/794] usb: gadget: f_hid: added GET_IDLE and SET_IDLE handlers The USB HID standard declares mandatory support for GET_IDLE and SET_IDLE requests for Boot Keyboard. Most hosts can handle their absence, but others like some old/strange UEFIs and BIOSes consider this a critical error and refuse to work with f_hid. This primitive implementation of saving and returning idle is sufficient to meet the requirements of the standard and these devices. Acked-by: Felipe Balbi Cc: stable Signed-off-by: Maxim Devaev Link: https://lore.kernel.org/r/20210721180351.129450-1-mdevaev@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_hid.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index 08e73e8127b1..8d50c8b127fd 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -41,6 +41,7 @@ struct f_hidg { unsigned char bInterfaceSubClass; unsigned char bInterfaceProtocol; unsigned char protocol; + unsigned char idle; unsigned short report_desc_length; char *report_desc; unsigned short report_length; @@ -537,6 +538,14 @@ static int hidg_setup(struct usb_function *f, goto respond; break; + case ((USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE) << 8 + | HID_REQ_GET_IDLE): + VDBG(cdev, "get_idle\n"); + length = min_t(unsigned int, length, 1); + ((u8 *) req->buf)[0] = hidg->idle; + goto respond; + break; + case ((USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE) << 8 | HID_REQ_SET_REPORT): VDBG(cdev, "set_report | wLength=%d\n", ctrl->wLength); @@ -560,6 +569,14 @@ static int hidg_setup(struct usb_function *f, goto stall; break; + case ((USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE) << 8 + | HID_REQ_SET_IDLE): + VDBG(cdev, "set_idle\n"); + length = 0; + hidg->idle = value; + goto respond; + break; + case ((USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_INTERFACE) << 8 | USB_REQ_GET_DESCRIPTOR): switch (value >> 8) { @@ -787,6 +804,7 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f) hidg_interface_desc.bInterfaceSubClass = hidg->bInterfaceSubClass; hidg_interface_desc.bInterfaceProtocol = hidg->bInterfaceProtocol; hidg->protocol = HID_REPORT_PROTOCOL; + hidg->idle = 1; hidg_ss_in_ep_desc.wMaxPacketSize = cpu_to_le16(hidg->report_length); hidg_ss_in_comp_desc.wBytesPerInterval = cpu_to_le16(hidg->report_length); From 68d9f95d6fd5399d105eaf2308c243536c5d7664 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 27 Jul 2021 13:41:34 +0300 Subject: [PATCH 502/794] usb: musb: Fix suspend and resume issues for PHYs on I2C and SPI As the USB PHYs typically are on I2C or SPI bus for the 2430 glue layer, we need configure the PHYs early for suspend. The musb glue layer we need to suspend only after musb_suspend() in suspend_late. Fixes: 62d472d8ad88 ("usb: musb: Add missing PM suspend and resume functions for 2430 glue") Reported-by: Andreas Kemnade Signed-off-by: Tony Lindgren Link: https://lore.kernel.org/r/20210727104134.52800-1-tony@atomide.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/omap2430.c | 43 ++++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index 640a46f0d118..f086960fe2b5 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -35,6 +35,7 @@ struct omap2430_glue { struct device *control_otghs; unsigned int is_runtime_suspended:1; unsigned int needs_resume:1; + unsigned int phy_suspended:1; }; #define glue_to_musb(g) platform_get_drvdata(g->musb) @@ -458,8 +459,10 @@ static int omap2430_runtime_suspend(struct device *dev) omap2430_low_level_exit(musb); - phy_power_off(musb->phy); - phy_exit(musb->phy); + if (!glue->phy_suspended) { + phy_power_off(musb->phy); + phy_exit(musb->phy); + } glue->is_runtime_suspended = 1; @@ -474,8 +477,10 @@ static int omap2430_runtime_resume(struct device *dev) if (!musb) return 0; - phy_init(musb->phy); - phy_power_on(musb->phy); + if (!glue->phy_suspended) { + phy_init(musb->phy); + phy_power_on(musb->phy); + } omap2430_low_level_init(musb); musb_writel(musb->mregs, OTG_INTERFSEL, @@ -489,7 +494,21 @@ static int omap2430_runtime_resume(struct device *dev) return 0; } +/* I2C and SPI PHYs need to be suspended before the glue layer */ static int omap2430_suspend(struct device *dev) +{ + struct omap2430_glue *glue = dev_get_drvdata(dev); + struct musb *musb = glue_to_musb(glue); + + phy_power_off(musb->phy); + phy_exit(musb->phy); + glue->phy_suspended = 1; + + return 0; +} + +/* Glue layer needs to be suspended after musb_suspend() */ +static int omap2430_suspend_late(struct device *dev) { struct omap2430_glue *glue = dev_get_drvdata(dev); @@ -501,7 +520,7 @@ static int omap2430_suspend(struct device *dev) return omap2430_runtime_suspend(dev); } -static int omap2430_resume(struct device *dev) +static int omap2430_resume_early(struct device *dev) { struct omap2430_glue *glue = dev_get_drvdata(dev); @@ -513,10 +532,24 @@ static int omap2430_resume(struct device *dev) return omap2430_runtime_resume(dev); } +static int omap2430_resume(struct device *dev) +{ + struct omap2430_glue *glue = dev_get_drvdata(dev); + struct musb *musb = glue_to_musb(glue); + + phy_init(musb->phy); + phy_power_on(musb->phy); + glue->phy_suspended = 0; + + return 0; +} + static const struct dev_pm_ops omap2430_pm_ops = { .runtime_suspend = omap2430_runtime_suspend, .runtime_resume = omap2430_runtime_resume, .suspend = omap2430_suspend, + .suspend_late = omap2430_suspend_late, + .resume_early = omap2430_resume_early, .resume = omap2430_resume, }; From 00de6a572f30ee93cad7e0704ec4232e5e72bda8 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 21 Jul 2021 16:29:05 +0300 Subject: [PATCH 503/794] usb: host: ohci-at91: suspend/resume ports after/before OHCI accesses On SAMA7G5 suspending ports will cut the access to OHCI registers and any subsequent access to them will lead to CPU being blocked trying to access that memory. Same thing happens on resume: if OHCI memory is accessed before resuming ports the CPU will block on that access. The OCHI memory is accessed on suspend/resume though ohci_suspend()/ohci_resume(). Acked-by: Alan Stern Signed-off-by: Claudiu Beznea Cc: stable Link: https://lore.kernel.org/r/20210721132905.1970713-1-claudiu.beznea@microchip.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ohci-at91.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/ohci-at91.c b/drivers/usb/host/ohci-at91.c index 9bbd7ddd0003..a24aea3d2759 100644 --- a/drivers/usb/host/ohci-at91.c +++ b/drivers/usb/host/ohci-at91.c @@ -611,8 +611,6 @@ ohci_hcd_at91_drv_suspend(struct device *dev) if (ohci_at91->wakeup) enable_irq_wake(hcd->irq); - ohci_at91_port_suspend(ohci_at91->sfr_regmap, 1); - ret = ohci_suspend(hcd, ohci_at91->wakeup); if (ret) { if (ohci_at91->wakeup) @@ -632,7 +630,10 @@ ohci_hcd_at91_drv_suspend(struct device *dev) /* flush the writes */ (void) ohci_readl (ohci, &ohci->regs->control); msleep(1); + ohci_at91_port_suspend(ohci_at91->sfr_regmap, 1); at91_stop_clock(ohci_at91); + } else { + ohci_at91_port_suspend(ohci_at91->sfr_regmap, 1); } return ret; @@ -644,6 +645,8 @@ ohci_hcd_at91_drv_resume(struct device *dev) struct usb_hcd *hcd = dev_get_drvdata(dev); struct ohci_at91_priv *ohci_at91 = hcd_to_ohci_at91_priv(hcd); + ohci_at91_port_suspend(ohci_at91->sfr_regmap, 0); + if (ohci_at91->wakeup) disable_irq_wake(hcd->irq); else @@ -651,8 +654,6 @@ ohci_hcd_at91_drv_resume(struct device *dev) ohci_resume(hcd, false); - ohci_at91_port_suspend(ohci_at91->sfr_regmap, 0); - return 0; } From bf88fef0b6f1488abeca594d377991171c00e52a Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sat, 17 Jul 2021 21:21:27 +0300 Subject: [PATCH 504/794] usb: otg-fsm: Fix hrtimer list corruption The HNP work can be re-scheduled while it's still in-fly. This results in re-initialization of the busy work, resetting the hrtimer's list node of the work and crashing kernel with null dereference within kernel/timer once work's timer is expired. It's very easy to trigger this problem by re-plugging USB cable quickly. Initialize HNP work only once to fix this trouble. Unable to handle kernel NULL pointer dereference at virtual address 00000126) ... PC is at __run_timers.part.0+0x150/0x228 LR is at __next_timer_interrupt+0x51/0x9c ... (__run_timers.part.0) from [] (run_timer_softirq+0x2f/0x50) (run_timer_softirq) from [] (__do_softirq+0xd5/0x2f0) (__do_softirq) from [] (irq_exit+0xab/0xb8) (irq_exit) from [] (handle_domain_irq+0x45/0x60) (handle_domain_irq) from [] (gic_handle_irq+0x6b/0x7c) (gic_handle_irq) from [] (__irq_svc+0x65/0xac) Cc: stable@vger.kernel.org Acked-by: Peter Chen Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20210717182134.30262-6-digetx@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/common/usb-otg-fsm.c | 6 +++++- include/linux/usb/otg-fsm.h | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/common/usb-otg-fsm.c b/drivers/usb/common/usb-otg-fsm.c index 3740cf95560e..0697fde51d00 100644 --- a/drivers/usb/common/usb-otg-fsm.c +++ b/drivers/usb/common/usb-otg-fsm.c @@ -193,7 +193,11 @@ static void otg_start_hnp_polling(struct otg_fsm *fsm) if (!fsm->host_req_flag) return; - INIT_DELAYED_WORK(&fsm->hnp_polling_work, otg_hnp_polling_work); + if (!fsm->hnp_work_inited) { + INIT_DELAYED_WORK(&fsm->hnp_polling_work, otg_hnp_polling_work); + fsm->hnp_work_inited = true; + } + schedule_delayed_work(&fsm->hnp_polling_work, msecs_to_jiffies(T_HOST_REQ_POLL)); } diff --git a/include/linux/usb/otg-fsm.h b/include/linux/usb/otg-fsm.h index 3aee78dda16d..784659d4dc99 100644 --- a/include/linux/usb/otg-fsm.h +++ b/include/linux/usb/otg-fsm.h @@ -196,6 +196,7 @@ struct otg_fsm { struct mutex lock; u8 *host_req_flag; struct delayed_work hnp_polling_work; + bool hnp_work_inited; bool state_changed; }; From 4c4c1257b844ffe5d0933684e612f92c4b78e120 Mon Sep 17 00:00:00 2001 From: Shuo Liu Date: Thu, 22 Jul 2021 14:27:36 +0800 Subject: [PATCH 505/794] virt: acrn: Do hcall_destroy_vm() before resource release The ACRN hypervisor has scenarios which could run a real-time guest VM. The real-time guest VM occupies dedicated CPU cores, be assigned with dedicated PCI devices. It can run without the Service VM after boot up. hcall_destroy_vm() returns failure when a real-time guest VM refuses. The clearing of flag ACRN_VM_FLAG_DESTROYED causes some kernel resource double-freed in a later acrn_vm_destroy(). Do hcall_destroy_vm() before resource release to drop this chance to destroy the VM if hypercall fails. Fixes: 9c5137aedd11 ("virt: acrn: Introduce VM management interfaces") Cc: stable Signed-off-by: Shuo Liu Signed-off-by: Fei Li Link: https://lore.kernel.org/r/20210722062736.15050-1-fei1.li@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/virt/acrn/vm.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/virt/acrn/vm.c b/drivers/virt/acrn/vm.c index 0d002a355a93..fbc9f1042000 100644 --- a/drivers/virt/acrn/vm.c +++ b/drivers/virt/acrn/vm.c @@ -64,6 +64,14 @@ int acrn_vm_destroy(struct acrn_vm *vm) test_and_set_bit(ACRN_VM_FLAG_DESTROYED, &vm->flags)) return 0; + ret = hcall_destroy_vm(vm->vmid); + if (ret < 0) { + dev_err(acrn_dev.this_device, + "Failed to destroy VM %u\n", vm->vmid); + clear_bit(ACRN_VM_FLAG_DESTROYED, &vm->flags); + return ret; + } + /* Remove from global VM list */ write_lock_bh(&acrn_vm_list_lock); list_del_init(&vm->list); @@ -78,14 +86,6 @@ int acrn_vm_destroy(struct acrn_vm *vm) vm->monitor_page = NULL; } - ret = hcall_destroy_vm(vm->vmid); - if (ret < 0) { - dev_err(acrn_dev.this_device, - "Failed to destroy VM %u\n", vm->vmid); - clear_bit(ACRN_VM_FLAG_DESTROYED, &vm->flags); - return ret; - } - acrn_vm_all_ram_unmap(vm); dev_dbg(acrn_dev.this_device, "VM %u destroyed.\n", vm->vmid); From b910a0206b59eb90ea8ff76d146f4c3156da61e9 Mon Sep 17 00:00:00 2001 From: Robert Foss Date: Mon, 28 Jun 2021 10:50:33 +0200 Subject: [PATCH 506/794] drm/msm/dpu: Fix sm8250_mdp register length The downstream dts lists this value as 0x494, and not 0x45c. Fixes: af776a3e1c30 ("drm/msm/dpu: add SM8250 to hw catalog") Signed-off-by: Robert Foss Reviewed-by: Dmitry Baryshkov Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20210628085033.9905-1-robert.foss@linaro.org Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index d01c4c919504..704dace895cb 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -296,7 +296,7 @@ static const struct dpu_mdp_cfg sc7180_mdp[] = { static const struct dpu_mdp_cfg sm8250_mdp[] = { { .name = "top_0", .id = MDP_TOP, - .base = 0x0, .len = 0x45C, + .base = 0x0, .len = 0x494, .features = 0, .highest_bank_bit = 0x3, /* TODO: 2 for LP_DDR4 */ .clk_ctrls[DPU_CLK_CTRL_VIG0] = { From bceddc2cb581dffc94370517f7eedbd9aa16c74b Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 7 Jul 2021 11:01:13 -0700 Subject: [PATCH 507/794] drm/msm: Fix display fault handling It turns out that when the display is enabled by the bootloader, we can get some transient iommu faults from the display. Which doesn't go over too well when we install a fault handler that is gpu specific. To avoid this, defer installing the fault handler until we get around to setting up per-process pgtables (which is adreno_smmu specific). The arm-smmu fallback error reporting is sufficient for reporting display related faults (and in fact was all we had prior to f8f934c180f629bb927a04fd90d) Reported-by: Dmitry Baryshkov Reported-by: Yassine Oudjana Fixes: 2a574cc05d38 ("drm/msm: Improve the a6xx page fault handler") Signed-off-by: Rob Clark Tested-by: John Stultz Tested-by: Yassine Oudjana Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20210707180113.840741-1-robdclark@gmail.com Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_iommu.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index eed2a762e9dd..bcaddbba564d 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -142,6 +142,9 @@ static const struct iommu_flush_ops null_tlb_ops = { .tlb_add_page = msm_iommu_tlb_add_page, }; +static int msm_fault_handler(struct iommu_domain *domain, struct device *dev, + unsigned long iova, int flags, void *arg); + struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent) { struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(parent->dev); @@ -157,6 +160,13 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent) if (!ttbr1_cfg) return ERR_PTR(-ENODEV); + /* + * Defer setting the fault handler until we have a valid adreno_smmu + * to avoid accidentially installing a GPU specific fault handler for + * the display's iommu + */ + iommu_set_fault_handler(iommu->domain, msm_fault_handler, iommu); + pagetable = kzalloc(sizeof(*pagetable), GFP_KERNEL); if (!pagetable) return ERR_PTR(-ENOMEM); @@ -300,7 +310,6 @@ struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain) iommu->domain = domain; msm_mmu_init(&iommu->base, dev, &funcs, MSM_MMU_IOMMU); - iommu_set_fault_handler(domain, msm_fault_handler, iommu); atomic_set(&iommu->pagetables, 0); From 7591c532b818ef4b8e3e635d842547c08b3a32b4 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Tue, 13 Jul 2021 08:54:01 -0700 Subject: [PATCH 508/794] drm/msm/dp: use dp_ctrl_off_link_stream during PHY compliance test run DP cable should always connect to DPU during the entire PHY compliance testing run. Since DP PHY compliance test is executed at irq_hpd event context, dp_ctrl_off_link_stream() should be used instead of dp_ctrl_off(). dp_ctrl_off() is used for unplug event which is triggered when DP cable is dis connected. Changes in V2: -- add fixes statement Fixes: f21c8a276c2d ("drm/msm/dp: handle irq_hpd with sink_count = 0 correctly") Signed-off-by: Kuogee Hsieh Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/1626191647-13901-2-git-send-email-khsieh@codeaurora.org Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dp/dp_ctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index ee221d835fa0..eaddfd739885 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -1526,7 +1526,7 @@ static int dp_ctrl_process_phy_test_request(struct dp_ctrl_private *ctrl) * running. Add the global reset just before disabling the * link clocks and core clocks. */ - ret = dp_ctrl_off(&ctrl->dp_ctrl); + ret = dp_ctrl_off_link_stream(&ctrl->dp_ctrl); if (ret) { DRM_ERROR("failed to disable DP controller\n"); return ret; From f9a39932fa54b6421e751ada7a285da809146421 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Wed, 21 Jul 2021 19:44:34 -0700 Subject: [PATCH 509/794] drm/msm/dp: Initialize the INTF_CONFIG register Some bootloaders set the widebus enable bit in the INTF_CONFIG register, but configuration of widebus isn't yet supported ensure that the register has a known value, with widebus disabled. Fixes: c943b4948b58 ("drm/msm/dp: add displayPort driver support") Signed-off-by: Bjorn Andersson Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20210722024434.3313167-1-bjorn.andersson@linaro.org Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dp/dp_catalog.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/msm/dp/dp_catalog.c b/drivers/gpu/drm/msm/dp/dp_catalog.c index ca96e3514790..c0423e76eed7 100644 --- a/drivers/gpu/drm/msm/dp/dp_catalog.c +++ b/drivers/gpu/drm/msm/dp/dp_catalog.c @@ -771,6 +771,7 @@ int dp_catalog_panel_timing_cfg(struct dp_catalog *dp_catalog) dp_write_link(catalog, REG_DP_HSYNC_VSYNC_WIDTH_POLARITY, dp_catalog->width_blanking); dp_write_link(catalog, REG_DP_ACTIVE_HOR_VER, dp_catalog->dp_active); + dp_write_p0(catalog, MMSS_DP_INTF_CONFIG, 0); return 0; } From afc9b8b6bab8d3d3a9ae67e1d64093ad626c92a0 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Fri, 23 Jul 2021 09:55:39 -0700 Subject: [PATCH 510/794] drm/msm/dp: signal audio plugged change at dp_pm_resume There is a scenario that dp cable is unplugged from DUT during system suspended will cause audio option state does not match real connection state. Fix this problem by Signaling audio plugged change with realtime connection status at dp_pm_resume() so that audio option will be in correct state after system resumed. Changes in V2: -- correct Fixes tag commit id. Fixes: f591dbb5fb8c ("drm/msm/dp: power off DP phy at suspend") Signed-off-by: Kuogee Hsieh Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/1627059339-12142-1-git-send-email-khsieh@codeaurora.org Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dp/dp_display.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 051c1be1de7e..1d204a0fbdd2 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -1311,6 +1311,10 @@ static int dp_pm_resume(struct device *dev) else dp->dp_display.is_connected = false; + dp_display_handle_plugged_change(g_dp_display, + dp->dp_display.is_connected); + + mutex_unlock(&dp->event_mutex); return 0; From fc71c9e6f41f9912d22a75dfa76bc10811af7e22 Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Wed, 14 Jul 2021 11:28:56 -0400 Subject: [PATCH 511/794] drm/msm/dp: Initialize dp->aux->drm_dev before registration Avoids the following WARN: [ 3.009556] ------------[ cut here ]------------ [ 3.014306] WARNING: CPU: 7 PID: 109 at drivers/gpu/drm/drm_dp_helper.c:1796 drm_dp_aux_register+0xa4/0xac [ 3.024209] Modules linked in: [ 3.027351] CPU: 7 PID: 109 Comm: kworker/7:8 Not tainted 5.10.47 #69 [ 3.033958] Hardware name: Google Lazor (rev1 - 2) (DT) [ 3.039323] Workqueue: events deferred_probe_work_func [ 3.044596] pstate: 60c00009 (nZCv daif +PAN +UAO -TCO BTYPE=--) [ 3.050761] pc : drm_dp_aux_register+0xa4/0xac [ 3.055329] lr : dp_aux_register+0x40/0x88 [ 3.059538] sp : ffffffc010ad3920 [ 3.062948] x29: ffffffc010ad3920 x28: ffffffa64196ac70 [ 3.067239] mmc1: Command Queue Engine enabled [ 3.068406] x27: ffffffa64196ac68 x26: 0000000000000001 [ 3.068407] x25: 0000000000000002 x24: 0000000000000060 [ 3.068409] x23: ffffffa642ab3400 x22: ffffffe126c10e5b [ 3.068410] x21: ffffffa641dc3188 x20: ffffffa641963c10 [ 3.068412] x19: ffffffa642aba910 x18: 00000000ffff0a00 [ 3.068414] x17: 000000476f8e002a x16: 00000000000000b8 [ 3.073008] mmc1: new HS400 Enhanced strobe MMC card at address 0001 [ 3.078448] x15: ffffffffffffffff x14: ffffffffffffffff [ 3.078450] x13: 0000000000000030 x12: 0000000000000030 [ 3.078452] x11: 0101010101010101 x10: ffffffe12647a914 [ 3.078453] x9 : ffffffe12647a8cc x8 : 0000000000000000 [ 3.084452] mmcblk1: mmc1:0001 DA4032 29.1 GiB [ 3.089372] [ 3.089372] x7 : 6c6064717372fefe x6 : ffffffa642b11494 [ 3.089374] x5 : 0000000000000000 x4 : 6d006c657869ffff [ 3.089375] x3 : 000000006c657869 x2 : 000000000000000c [ 3.089376] x1 : ffffffe126c3ae3c x0 : ffffffa642aba910 [ 3.089381] Call trace: [ 3.094931] mmcblk1boot0: mmc1:0001 DA4032 partition 1 4.00 MiB [ 3.100291] drm_dp_aux_register+0xa4/0xac [ 3.100292] dp_aux_register+0x40/0x88 [ 3.100294] dp_display_bind+0x64/0xcc [ 3.100295] component_bind_all+0xdc/0x210 [ 3.100298] msm_drm_bind+0x1e8/0x5d4 [ 3.100301] try_to_bring_up_master+0x168/0x1b0 [ 3.105861] mmcblk1boot1: mmc1:0001 DA4032 partition 2 4.00 MiB [ 3.112282] __component_add+0xa0/0x158 [ 3.112283] component_add+0x1c/0x28 [ 3.112284] dp_display_probe+0x33c/0x380 [ 3.112286] platform_drv_probe+0x9c/0xbc [ 3.112287] really_probe+0x140/0x35c [ 3.112289] driver_probe_device+0x84/0xc0 [ 3.112292] __device_attach_driver+0x94/0xb0 [ 3.117967] mmcblk1rpmb: mmc1:0001 DA4032 partition 3 16.0 MiB, chardev (239:0) [ 3.123201] bus_for_each_drv+0x8c/0xd8 [ 3.123202] __device_attach+0xc4/0x150 [ 3.123204] device_initial_probe+0x1c/0x28 [ 3.123205] bus_probe_device+0x3c/0x9c [ 3.123206] deferred_probe_work_func+0x90/0xcc [ 3.123211] process_one_work+0x218/0x3ec [ 3.131976] mmcblk1: p1 p2 p3 p4 p5 p6 p7 p8 p9 p10 p11 p12 [ 3.134123] worker_thread+0x288/0x3e8 [ 3.134124] kthread+0x148/0x1b0 [ 3.134127] ret_from_fork+0x10/0x30 [ 3.134128] ---[ end trace cfb9fce3f70f824d ]--- Signed-off-by: Sean Paul Reviewed-by: Abhinav Kumar Link: https://lore.kernel.org/r/20210714152910.55093-1-sean@poorly.run Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dp/dp_display.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 1d204a0fbdd2..867388a399ad 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -219,6 +219,7 @@ static int dp_display_bind(struct device *dev, struct device *master, goto end; } + dp->aux->drm_dev = drm; rc = dp_aux_register(dp->aux); if (rc) { DRM_ERROR("DRM DP AUX register failed\n"); From 48e8a7b5a551f956002b60d2095bdfb58db96e59 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 24 Jun 2021 17:43:03 +0100 Subject: [PATCH 512/794] perf cs-etm: Split --dump-raw-trace by AUX records Currently --dump-raw-trace skips queueing and splitting buffers because of an early exit condition in cs_etm__process_auxtrace_info(). Once that is removed we can print the split data by using the queues and searching for split buffers with the same reference as the one that is currently being processed. This keeps the same behaviour of dumping in file order when an AUXTRACE event appears, rather than moving trace dump to where AUX records are in the file. There will be a newline and size printout for each fragment. For example this buffer is comprised of two AUX records, but was printed as one: 0 0 0x8098 [0x30]: PERF_RECORD_AUXTRACE size: 0xa0 offset: 0 ref: 0x491a4dfc52fc0e6e idx: 0 t . ... CoreSight ETM Trace data: size 160 bytes Idx:0; ID:10; I_ASYNC : Alignment Synchronisation. Idx:12; ID:10; I_TRACE_INFO : Trace Info.; INFO=0x0 { CC.0 } Idx:17; ID:10; I_ADDR_L_64IS0 : Address, Long, 64 bit, IS0.; Addr=0x0000000000000000; Idx:80; ID:10; I_ASYNC : Alignment Synchronisation. Idx:92; ID:10; I_TRACE_INFO : Trace Info.; INFO=0x0 { CC.0 } Idx:97; ID:10; I_ADDR_L_64IS0 : Address, Long, 64 bit, IS0.; Addr=0xFFFFDE2AD3FD76D4; But is now printed as two fragments: 0 0 0x8098 [0x30]: PERF_RECORD_AUXTRACE size: 0xa0 offset: 0 ref: 0x491a4dfc52fc0e6e idx: 0 t . ... CoreSight ETM Trace data: size 80 bytes Idx:0; ID:10; I_ASYNC : Alignment Synchronisation. Idx:12; ID:10; I_TRACE_INFO : Trace Info.; INFO=0x0 { CC.0 } Idx:17; ID:10; I_ADDR_L_64IS0 : Address, Long, 64 bit, IS0.; Addr=0x0000000000000000; . ... CoreSight ETM Trace data: size 80 bytes Idx:80; ID:10; I_ASYNC : Alignment Synchronisation. Idx:92; ID:10; I_TRACE_INFO : Trace Info.; INFO=0x0 { CC.0 } Idx:97; ID:10; I_ADDR_L_64IS0 : Address, Long, 64 bit, IS0.; Addr=0xFFFFDE2AD3FD76D4; Decoding errors that appeared in problematic files are now not present, for example: Idx:808; ID:1c; I_BAD_SEQUENCE : Invalid Sequence in packet.[I_ASYNC] ... PKTP_ETMV4I_0016 : 0x0014 (OCSD_ERR_INVALID_PCKT_HDR) [Invalid packet header]; TrcIdx=822 Signed-off-by: James Clark Reviewed-by: Mathieu Poirier Tested-by: Leo Yan Cc: Al Grant Cc: Alexander Shishkin Cc: Anshuman Khandual Cc: Branislav Rankov Cc: Denis Nikitin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20210624164303.28632-3-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 22f8326547eb..bc1f64873c8f 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -2434,6 +2434,22 @@ static int cs_etm__process_event(struct perf_session *session, return 0; } +static void dump_queued_data(struct cs_etm_auxtrace *etm, + struct perf_record_auxtrace *event) +{ + struct auxtrace_buffer *buf; + unsigned int i; + /* + * Find all buffers with same reference in the queues and dump them. + * This is because the queues can contain multiple entries of the same + * buffer that were split on aux records. + */ + for (i = 0; i < etm->queues.nr_queues; ++i) + list_for_each_entry(buf, &etm->queues.queue_array[i].head, list) + if (buf->reference == event->reference) + cs_etm__dump_event(etm, buf); +} + static int cs_etm__process_auxtrace_event(struct perf_session *session, union perf_event *event, struct perf_tool *tool __maybe_unused) @@ -2466,7 +2482,8 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session, cs_etm__dump_event(etm, buffer); auxtrace_buffer__put_data(buffer); } - } + } else if (dump_trace) + dump_queued_data(etm, &event->auxtrace); return 0; } @@ -3042,7 +3059,6 @@ int cs_etm__process_auxtrace_info(union perf_event *event, if (dump_trace) { cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); - return 0; } err = cs_etm__synth_events(etm, session); From 8e3341257e3b5774ec8cd3ef1ba0c0d3fada322b Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 27 Jul 2021 17:25:01 +0300 Subject: [PATCH 513/794] Revert "thunderbolt: Hide authorized attribute if router does not support PCIe tunnels" This reverts commit 6f3badead6a078cf3c71f381f9d84ac922984a00. It turns out bolt depends on having authorized attribute visible under each device. Hiding it makes bolt crash as several people have reported on various bug trackers. For this reason revert the commit. Link: https://gitlab.freedesktop.org/bolt/bolt/-/issues/174 Link: https://bugzilla.redhat.com/show_bug.cgi?id=1979765 Link: https://bugs.archlinux.org/task/71569 Cc: stable@vger.kernel.org Cc: Christian Kellner Fixes: 6f3badead6a0 ("thunderbolt: Hide authorized attribute if router does not support PCIe tunnels") Signed-off-by: Mika Westerberg Link: https://lore.kernel.org/r/20210727142501.27476-1-mika.westerberg@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/switch.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 83b1ef3d5d03..10d6b228cc94 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -1875,18 +1875,6 @@ static struct attribute *switch_attrs[] = { NULL, }; -static bool has_port(const struct tb_switch *sw, enum tb_port_type type) -{ - const struct tb_port *port; - - tb_switch_for_each_port(sw, port) { - if (!port->disabled && port->config.type == type) - return true; - } - - return false; -} - static umode_t switch_attr_is_visible(struct kobject *kobj, struct attribute *attr, int n) { @@ -1895,8 +1883,7 @@ static umode_t switch_attr_is_visible(struct kobject *kobj, if (attr == &dev_attr_authorized.attr) { if (sw->tb->security_level == TB_SECURITY_NOPCIE || - sw->tb->security_level == TB_SECURITY_DPONLY || - !has_port(sw, TB_TYPE_PCIE_UP)) + sw->tb->security_level == TB_SECURITY_DPONLY) return 0; } else if (attr == &dev_attr_device.attr) { if (!sw->device) From c07d5c9226980ca5ae21c6a2714baa95be2ce164 Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 20 Jul 2021 23:10:19 +0800 Subject: [PATCH 514/794] perf pmu: Fix alias matching Commit c47a5599eda324ba ("perf tools: Fix pattern matching for same substring in different PMU type"), may have fixed some alias matching, but has broken some others. Firstly it cannot handle the simple scenario of PMU name in form pmu_name{digits} - it can only handle pmu_name_{digits}. Secondly it cannot handle more complex matching in the case where we have multiple tokens. In this scenario, the code failed to realise that we may examine multiple substrings in the PMU name. Fix in two ways: - Change perf_pmu__valid_suffix() to accept a PMU name without '_' in the suffix - Only pay attention to perf_pmu__valid_suffix() for the final token Also add const qualifiers as necessary to avoid casting. Fixes: c47a5599eda324ba ("perf tools: Fix pattern matching for same substring in different PMU type") Signed-off-by: John Garry Tested-by: Jin Yao Cc: Alexander Shishkin Cc: Ian Rogers Cc: Jiri Olsa Cc: Kajol Jain Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/1626793819-79090-1-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index a1bd7007a8b4..fc683bc41715 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -742,9 +742,13 @@ struct pmu_events_map *__weak pmu_events_map__find(void) return perf_pmu__find_map(NULL); } -static bool perf_pmu__valid_suffix(char *pmu_name, char *tok) +/* + * Suffix must be in form tok_{digits}, or tok{digits}, or same as pmu_name + * to be valid. + */ +static bool perf_pmu__valid_suffix(const char *pmu_name, char *tok) { - char *p; + const char *p; if (strncmp(pmu_name, tok, strlen(tok))) return false; @@ -753,12 +757,16 @@ static bool perf_pmu__valid_suffix(char *pmu_name, char *tok) if (*p == 0) return true; - if (*p != '_') - return false; + if (*p == '_') + ++p; - ++p; - if (*p == 0 || !isdigit(*p)) - return false; + /* Ensure we end in a number */ + while (1) { + if (!isdigit(*p)) + return false; + if (*(++p) == 0) + break; + } return true; } @@ -789,12 +797,19 @@ bool pmu_uncore_alias_match(const char *pmu_name, const char *name) * match "socket" in "socketX_pmunameY" and then "pmuname" in * "pmunameY". */ - for (; tok; name += strlen(tok), tok = strtok_r(NULL, ",", &tmp)) { + while (1) { + char *next_tok = strtok_r(NULL, ",", &tmp); + name = strstr(name, tok); - if (!name || !perf_pmu__valid_suffix((char *)name, tok)) { + if (!name || + (!next_tok && !perf_pmu__valid_suffix(name, tok))) { res = false; goto out; } + if (!next_tok) + break; + tok = next_tok; + name += strlen(tok); } res = true; From 91e273712ab8dd8c31924ac7714b21e011137e98 Mon Sep 17 00:00:00 2001 From: Pratik Vishwakarma Date: Fri, 23 Jul 2021 18:08:40 +0530 Subject: [PATCH 515/794] drm/amdgpu: Check pmops for desired suspend state [Why] User might change the suspend behaviour from OS. [How] Check with pm for target suspend state and set s0ix flag only for s2idle state. v2: User might change default suspend state, use target state v3: squash in build fix Suggested-by: Lijo Lazar Signed-off-by: Pratik Vishwakarma Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 84a1b4bc9bb4..6cc0d4fa4d0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -1042,7 +1043,7 @@ bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev) #if defined(CONFIG_AMD_PMC) || defined(CONFIG_AMD_PMC_MODULE) if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) { if (adev->flags & AMD_IS_APU) - return true; + return pm_suspend_target_state == PM_SUSPEND_TO_IDLE; } #endif return false; From c8f8e96805b54968b4d1d54850f87fc39128a532 Mon Sep 17 00:00:00 2001 From: Victor Lu Date: Thu, 8 Jul 2021 14:50:48 -0400 Subject: [PATCH 516/794] drm/amd/display: Guard DST_Y_PREFETCH register overflow in DCN21 [why] DST_Y_PREFETCH can overflow when DestinationLinesForPrefetch values are too large due to the former being limited to 8 bits. [how] Set the maximum value of DestinationLinesForPrefetch to be 255 * refclk period. Reviewed-by: Laktyushkin Dmytro Acked-by: Solomon Chiu Signed-off-by: Victor Lu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c index d25a7d38d21f..6655bb99fdfd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c @@ -841,6 +841,9 @@ static bool CalculatePrefetchSchedule( else *DestinationLinesForPrefetch = dst_y_prefetch_equ; + // Limit to prevent overflow in DST_Y_PREFETCH register + *DestinationLinesForPrefetch = dml_min(*DestinationLinesForPrefetch, 63.75); + dml_print("DML: VStartup: %d\n", VStartup); dml_print("DML: TCalc: %f\n", TCalc); dml_print("DML: TWait: %f\n", TWait); From 8d177577cd9118c29960401a6de9dc4db00f2052 Mon Sep 17 00:00:00 2001 From: Victor Lu Date: Tue, 6 Jul 2021 15:45:11 -0400 Subject: [PATCH 517/794] drm/amd/display: Add missing DCN21 IP parameter [why] IP parameter min_meta_chunk_size_bytes is read for bandwidth calculations but it was never defined. [how] Define min_meta_chunk_size_bytes and initialize value to 256. Reviewed-by: Laktyushkin Dmytro Acked-by: Solomon Chiu Signed-off-by: Victor Lu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index f3d98e3ba624..bf0a198eae15 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -109,6 +109,7 @@ struct _vcs_dpi_ip_params_st dcn2_1_ip = { .max_page_table_levels = 4, .pte_chunk_size_kbytes = 2, .meta_chunk_size_kbytes = 2, + .min_meta_chunk_size_bytes = 256, .writeback_chunk_size_kbytes = 2, .line_buffer_size_bits = 789504, .is_line_buffer_bpp_fixed = 0, From b53e041d8e4308f7324999398aec092dbcb130f5 Mon Sep 17 00:00:00 2001 From: Dale Zhao Date: Fri, 16 Jul 2021 09:38:17 +0800 Subject: [PATCH 518/794] drm/amd/display: ensure dentist display clock update finished in DCN20 [Why] We don't check DENTIST_DISPCLK_CHG_DONE to ensure dentist display clockis updated to target value. In some scenarios with large display clock margin, it will deliver unfinished display clock and cause issues like display black screen. [How] Checking DENTIST_DISPCLK_CHG_DONE to ensure display clock has been update to target value before driver do other clock related actions. Reviewed-by: Cyr Aric Acked-by: Solomon Chiu Signed-off-by: Dale Zhao Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c index 6e0c5c664fdc..a5331b96f551 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c @@ -197,7 +197,7 @@ void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr, struct REG_UPDATE(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, dispclk_wdivider); -// REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 5, 100); + REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 50, 1000); REG_UPDATE(DENTIST_DISPCLK_CNTL, DENTIST_DPPCLK_WDIVIDER, dppclk_wdivider); REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DPPCLK_CHG_DONE, 1, 5, 100); From f2ad3accefc63e72e9932e141c21875cc04beec8 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 21 Jul 2021 18:11:51 -0400 Subject: [PATCH 519/794] drm/amdgpu/display: only enable aux backlight control for OLED panels We've gotten a number of reports about backlight control not working on panels which indicate that they use aux backlight control. A recent patch: commit 2d73eabe2984a435737498ab39bb1500a9ffe9a9 Author: Camille Cho Date: Thu Jul 8 18:28:37 2021 +0800 drm/amd/display: Only set default brightness for OLED [Why] We used to unconditionally set backlight path as AUX for panels capable of backlight adjustment via DPCD in set default brightness. [How] This should be limited to OLED panel only since we control backlight via PWM path for SDR mode in LCD HDR panel. Reviewed-by: Krunoslav Kovac Acked-by: Rodrigo Siqueira Signed-off-by: Camille Cho Signed-off-by: Alex Deucher Changes some other code to only use aux for backlight control on OLED panels. The commit message seems to indicate that PWM should be used for SDR mode on HDR panels. Do something similar for backlight control in general. This may need to be revisited if and when HDR started to get used. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1438 Bug: https://bugzilla.kernel.org/show_bug.cgi?id=213715 Reviewed-by: Roman Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d3a2a5ff57e9..b53f49a23ddc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2429,9 +2429,9 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) max_cll = conn_base->hdr_sink_metadata.hdmi_type1.max_cll; min_cll = conn_base->hdr_sink_metadata.hdmi_type1.min_cll; - if (caps->ext_caps->bits.oled == 1 || + if (caps->ext_caps->bits.oled == 1 /*|| caps->ext_caps->bits.sdr_aux_backlight_control == 1 || - caps->ext_caps->bits.hdr_aux_backlight_control == 1) + caps->ext_caps->bits.hdr_aux_backlight_control == 1*/) caps->aux_support = true; if (amdgpu_backlight == 0) From ec30ce41f03820b6289513344b4281ca3a1151f4 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Sun, 25 Jul 2021 16:49:01 +0000 Subject: [PATCH 520/794] maintainers: add bugs and chat URLs for amdgpu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add links to the issue tracker and the IRC channel for the amdgpu driver. Signed-off-by: Simon Ser Cc: Alex Deucher Cc: Christian König Cc: Pan Xinhui Signed-off-by: Alex Deucher --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 19135a9d778e..056966c9aac9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15468,6 +15468,8 @@ M: Pan, Xinhui L: amd-gfx@lists.freedesktop.org S: Supported T: git https://gitlab.freedesktop.org/agd5f/linux.git +B: https://gitlab.freedesktop.org/drm/amd/-/issues +C: irc://irc.oftc.net/radeon F: drivers/gpu/drm/amd/ F: drivers/gpu/drm/radeon/ F: include/uapi/drm/amdgpu_drm.h From 773af69121ecc6c53d192661af8d53bb3db028ae Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 Jul 2021 10:25:55 -0600 Subject: [PATCH 521/794] io_uring: always reissue from task_work context As a safeguard, if we're going to queue async work, do it from task_work from the original task. This ensures that we can always sanely create threads, regards of what the reissue context may be. Signed-off-by: Jens Axboe --- fs/io_uring.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index a4331deb0427..6ba101cd4661 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2060,6 +2060,12 @@ static void io_req_task_queue(struct io_kiocb *req) io_req_task_work_add(req); } +static void io_req_task_queue_reissue(struct io_kiocb *req) +{ + req->io_task_work.func = io_queue_async_work; + io_req_task_work_add(req); +} + static inline void io_queue_next(struct io_kiocb *req) { struct io_kiocb *nxt = io_req_find_next(req); @@ -2248,7 +2254,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, !(req->flags & REQ_F_DONT_REISSUE)) { req->iopoll_completed = 0; req_ref_get(req); - io_queue_async_work(req); + io_req_task_queue_reissue(req); continue; } @@ -2771,7 +2777,7 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret, req->flags &= ~REQ_F_REISSUE; if (io_resubmit_prep(req)) { req_ref_get(req); - io_queue_async_work(req); + io_req_task_queue_reissue(req); } else { int cflags = 0; From ec6446d5304b3c3dd692a1e244df7e40bbb5af36 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Tue, 13 Jul 2021 13:12:16 +0530 Subject: [PATCH 522/794] fpga: dfl: fme: Fix cpu hotplug issue in performance reporting The performance reporting driver added cpu hotplug feature but it didn't add pmu migration call in cpu offline function. This can create an issue incase the current designated cpu being used to collect fme pmu data got offline, as based on current code we are not migrating fme pmu to new target cpu. Because of that perf will still try to fetch data from that offline cpu and hence we will not get counter data. Patch fixed this issue by adding pmu_migrate_context call in fme_perf_offline_cpu function. Fixes: 724142f8c42a ("fpga: dfl: fme: add performance reporting support") Cc: stable@vger.kernel.org Tested-by: Xu Yilun Acked-by: Wu Hao Signed-off-by: Kajol Jain Signed-off-by: Moritz Fischer --- drivers/fpga/dfl-fme-perf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/fpga/dfl-fme-perf.c b/drivers/fpga/dfl-fme-perf.c index 4299145ef347..587c82be12f7 100644 --- a/drivers/fpga/dfl-fme-perf.c +++ b/drivers/fpga/dfl-fme-perf.c @@ -953,6 +953,8 @@ static int fme_perf_offline_cpu(unsigned int cpu, struct hlist_node *node) return 0; priv->cpu = target; + perf_pmu_migrate_context(&priv->pmu, cpu, target); + return 0; } From 4ee107c514139960682cc0f3623a24e86fda1a13 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Tue, 27 Jul 2021 17:26:13 +0800 Subject: [PATCH 523/794] clk: qcom: smd-rpm: Fix MSM8936 RPM_SMD_PCNOC_A_CLK Commit a0384ecfe2aa ("clk: qcom: smd-rpm: De-duplicate identical entries") introduces the following regression on MSM8936/MSM8939, as RPM_SMD_PCNOC_A_CLK gets pointed to pcnoc_clk by mistake. Fix it by correcting the clock to pcnoc_a_clk. [ 1.307363] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 [ 1.313593] Mem abort info: [ 1.322512] ESR = 0x96000004 [ 1.325132] EC = 0x25: DABT (current EL), IL = 32 bits [ 1.338872] SET = 0, FnV = 0 [ 1.355483] EA = 0, S1PTW = 0 [ 1.368702] FSC = 0x04: level 0 translation fault [ 1.383294] Data abort info: [ 1.398292] ISV = 0, ISS = 0x00000004 [ 1.398297] CM = 0, WnR = 0 [ 1.398301] [0000000000000000] user address but active_mm is swapper [ 1.404193] Internal error: Oops: 96000004 [#1] PREEMPT SMP [ 1.420596] Modules linked in: [ 1.420604] CPU: 0 PID: 5 Comm: kworker/0:0 Not tainted 5.14.0-rc3+ #198 [ 1.441010] pc : __clk_register+0x48/0x780 [ 1.446045] lr : __clk_register+0x3c/0x780 [ 1.449953] sp : ffff800010063440 [ 1.454031] x29: ffff800010063440 x28: 0000000000000004 x27: 0000000000000066 [ 1.457423] x26: 0000000000000001 x25: 000000007fffffff x24: ffff800010f9f388 [ 1.464540] x23: ffff00007fc12a90 x22: ffff0000034b2010 x21: 0000000000000000 [ 1.471658] x20: ffff800010f9fff8 x19: ffff00000152a700 x18: 0000000000000001 [ 1.478778] x17: ffff00007fbd40c8 x16: 0000000000000460 x15: 0000000000000465 [ 1.485895] x14: ffffffffffffffff x13: 746e756f635f7265 x12: 696669746f6e5f6b [ 1.493013] x11: 0000000000000006 x10: 0000000000000000 x9 : 0000000000000000 [ 1.500131] x8 : ffff00000152a800 x7 : 0000000000000000 x6 : 000000000000003f [ 1.507249] x5 : 0000000000000040 x4 : 0000000000000000 x3 : 0000000000000004 [ 1.514367] x2 : 0000000000000000 x1 : 0000000000000cc0 x0 : ffff00000152a700 [ 1.521486] Call trace: [ 1.528598] __clk_register+0x48/0x780 [ 1.530855] clk_hw_register+0x20/0x60 [ 1.534674] devm_clk_hw_register+0x50/0xa8 [ 1.538408] rpm_smd_clk_probe+0x1a4/0x260 [ 1.542488] platform_probe+0x68/0xd8 [ 1.546653] really_probe+0x140/0x2f8 [ 1.550386] __driver_probe_device+0x78/0xe0 [ 1.554033] driver_probe_device+0x80/0x110 [ 1.558373] __device_attach_driver+0x90/0xe0 [ 1.562280] bus_for_each_drv+0x78/0xc8 [ 1.566793] __device_attach+0xf0/0x150 [ 1.570438] device_initial_probe+0x14/0x20 [ 1.574259] bus_probe_device+0x9c/0xa8 [ 1.578425] device_add+0x378/0x870 [ 1.582243] of_device_add+0x44/0x60 [ 1.585716] of_platform_device_create_pdata+0xc0/0x110 [ 1.589538] of_platform_bus_create+0x17c/0x388 [ 1.594485] of_platform_populate+0x50/0xf0 [ 1.598998] qcom_smd_rpm_probe+0xd4/0x128 [ 1.603164] rpmsg_dev_probe+0xbc/0x1a8 [ 1.607330] really_probe+0x140/0x2f8 [ 1.611063] __driver_probe_device+0x78/0xe0 [ 1.614883] driver_probe_device+0x80/0x110 [ 1.619224] __device_attach_driver+0x90/0xe0 [ 1.623131] bus_for_each_drv+0x78/0xc8 [ 1.627643] __device_attach+0xf0/0x150 [ 1.631289] device_initial_probe+0x14/0x20 [ 1.635109] bus_probe_device+0x9c/0xa8 [ 1.639275] device_add+0x378/0x870 [ 1.643095] device_register+0x20/0x30 [ 1.646567] rpmsg_register_device+0x54/0x90 [ 1.650387] qcom_channel_state_worker+0x168/0x288 [ 1.654814] process_one_work+0x1a0/0x328 [ 1.659415] worker_thread+0x4c/0x420 [ 1.663494] kthread+0x150/0x160 [ 1.667138] ret_from_fork+0x10/0x18 [ 1.670442] Code: 97f56b92 b40034a0 aa0003f3 52819801 (f94002a0) [ 1.674004] ---[ end trace 412fa6f47384cdfe ]--- Fixes: a0384ecfe2aa ("clk: qcom: smd-rpm: De-duplicate identical entries") Signed-off-by: Shawn Guo Link: https://lore.kernel.org/r/20210727092613.23056-1-shawn.guo@linaro.org Signed-off-by: Stephen Boyd --- drivers/clk/qcom/clk-smd-rpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/qcom/clk-smd-rpm.c b/drivers/clk/qcom/clk-smd-rpm.c index 800b2fef1887..b2c142f3a649 100644 --- a/drivers/clk/qcom/clk-smd-rpm.c +++ b/drivers/clk/qcom/clk-smd-rpm.c @@ -467,7 +467,7 @@ DEFINE_CLK_SMD_RPM(msm8936, sysmmnoc_clk, sysmmnoc_a_clk, QCOM_SMD_RPM_BUS_CLK, static struct clk_smd_rpm *msm8936_clks[] = { [RPM_SMD_PCNOC_CLK] = &msm8916_pcnoc_clk, - [RPM_SMD_PCNOC_A_CLK] = &msm8916_pcnoc_clk, + [RPM_SMD_PCNOC_A_CLK] = &msm8916_pcnoc_a_clk, [RPM_SMD_SNOC_CLK] = &msm8916_snoc_clk, [RPM_SMD_SNOC_A_CLK] = &msm8916_snoc_a_clk, [RPM_SMD_BIMC_CLK] = &msm8916_bimc_clk, From bb7262b295472eb6858b5c49893954794027cd84 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 6 Dec 2020 22:40:07 +0100 Subject: [PATCH 524/794] timers: Move clearing of base::timer_running under base:: Lock syzbot reported KCSAN data races vs. timer_base::timer_running being set to NULL without holding base::lock in expire_timers(). This looks innocent and most reads are clearly not problematic, but Frederic identified an issue which is: int data = 0; void timer_func(struct timer_list *t) { data = 1; } CPU 0 CPU 1 ------------------------------ -------------------------- base = lock_timer_base(timer, &flags); raw_spin_unlock(&base->lock); if (base->running_timer != timer) call_timer_fn(timer, fn, baseclk); ret = detach_if_pending(timer, base, true); base->running_timer = NULL; raw_spin_unlock_irqrestore(&base->lock, flags); raw_spin_lock(&base->lock); x = data; If the timer has previously executed on CPU 1 and then CPU 0 can observe base->running_timer == NULL and returns, assuming the timer has completed, but it's not guaranteed on all architectures. The comment for del_timer_sync() makes that guarantee. Moving the assignment under base->lock prevents this. For non-RT kernel it's performance wise completely irrelevant whether the store happens before or after taking the lock. For an RT kernel moving the store under the lock requires an extra unlock/lock pair in the case that there is a waiter for the timer, but that's not the end of the world. Reported-by: syzbot+aa7c2385d46c5eba0b89@syzkaller.appspotmail.com Reported-by: syzbot+abea4558531bae1ba9fe@syzkaller.appspotmail.com Fixes: 030dcdd197d7 ("timers: Prepare support for PREEMPT_RT") Signed-off-by: Thomas Gleixner Tested-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/87lfea7gw8.fsf@nanos.tec.linutronix.de Cc: stable@vger.kernel.org --- kernel/time/timer.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 9eb11c2209e5..e3d2c23c413d 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1265,8 +1265,10 @@ static inline void timer_base_unlock_expiry(struct timer_base *base) static void timer_sync_wait_running(struct timer_base *base) { if (atomic_read(&base->timer_waiters)) { + raw_spin_unlock_irq(&base->lock); spin_unlock(&base->expiry_lock); spin_lock(&base->expiry_lock); + raw_spin_lock_irq(&base->lock); } } @@ -1457,14 +1459,14 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head) if (timer->flags & TIMER_IRQSAFE) { raw_spin_unlock(&base->lock); call_timer_fn(timer, fn, baseclk); - base->running_timer = NULL; raw_spin_lock(&base->lock); + base->running_timer = NULL; } else { raw_spin_unlock_irq(&base->lock); call_timer_fn(timer, fn, baseclk); + raw_spin_lock_irq(&base->lock); base->running_timer = NULL; timer_sync_wait_running(base); - raw_spin_lock_irq(&base->lock); } } } From 8373cd38a8888549ace7c7617163a2e826970a92 Mon Sep 17 00:00:00 2001 From: Yufeng Mo Date: Tue, 27 Jul 2021 22:03:50 +0800 Subject: [PATCH 525/794] net: hns3: change the method of obtaining default ptp cycle The ptp cycle is related to the hardware, so it may cause compatibility issues if a fixed value is used in driver. Therefore, the method of obtaining this value is changed to read from the register rather than use a fixed value in driver. Fixes: 0bf5eb788512 ("net: hns3: add support for PTP") Signed-off-by: Yufeng Mo Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hclge_ptp.c | 36 +++++++++++++++---- .../hisilicon/hns3/hns3pf/hclge_ptp.h | 10 ++++-- 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c index 3b1f84502e36..befa9bcc2f2f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c @@ -5,9 +5,27 @@ #include "hclge_main.h" #include "hnae3.h" +static int hclge_ptp_get_cycle(struct hclge_dev *hdev) +{ + struct hclge_ptp *ptp = hdev->ptp; + + ptp->cycle.quo = readl(hdev->ptp->io_base + HCLGE_PTP_CYCLE_QUO_REG) & + HCLGE_PTP_CYCLE_QUO_MASK; + ptp->cycle.numer = readl(hdev->ptp->io_base + HCLGE_PTP_CYCLE_NUM_REG); + ptp->cycle.den = readl(hdev->ptp->io_base + HCLGE_PTP_CYCLE_DEN_REG); + + if (ptp->cycle.den == 0) { + dev_err(&hdev->pdev->dev, "invalid ptp cycle denominator!\n"); + return -EINVAL; + } + + return 0; +} + static int hclge_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) { struct hclge_dev *hdev = hclge_ptp_get_hdev(ptp); + struct hclge_ptp_cycle *cycle = &hdev->ptp->cycle; u64 adj_val, adj_base, diff; unsigned long flags; bool is_neg = false; @@ -18,7 +36,7 @@ static int hclge_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) is_neg = true; } - adj_base = HCLGE_PTP_CYCLE_ADJ_BASE * HCLGE_PTP_CYCLE_ADJ_UNIT; + adj_base = (u64)cycle->quo * (u64)cycle->den + (u64)cycle->numer; adj_val = adj_base * ppb; diff = div_u64(adj_val, 1000000000ULL); @@ -29,16 +47,16 @@ static int hclge_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) /* This clock cycle is defined by three part: quotient, numerator * and denominator. For example, 2.5ns, the quotient is 2, - * denominator is fixed to HCLGE_PTP_CYCLE_ADJ_UNIT, and numerator - * is 0.5 * HCLGE_PTP_CYCLE_ADJ_UNIT. + * denominator is fixed to ptp->cycle.den, and numerator + * is 0.5 * ptp->cycle.den. */ - quo = div_u64_rem(adj_val, HCLGE_PTP_CYCLE_ADJ_UNIT, &numerator); + quo = div_u64_rem(adj_val, cycle->den, &numerator); spin_lock_irqsave(&hdev->ptp->lock, flags); - writel(quo, hdev->ptp->io_base + HCLGE_PTP_CYCLE_QUO_REG); + writel(quo & HCLGE_PTP_CYCLE_QUO_MASK, + hdev->ptp->io_base + HCLGE_PTP_CYCLE_QUO_REG); writel(numerator, hdev->ptp->io_base + HCLGE_PTP_CYCLE_NUM_REG); - writel(HCLGE_PTP_CYCLE_ADJ_UNIT, - hdev->ptp->io_base + HCLGE_PTP_CYCLE_DEN_REG); + writel(cycle->den, hdev->ptp->io_base + HCLGE_PTP_CYCLE_DEN_REG); writel(HCLGE_PTP_CYCLE_ADJ_EN, hdev->ptp->io_base + HCLGE_PTP_CYCLE_CFG_REG); spin_unlock_irqrestore(&hdev->ptp->lock, flags); @@ -475,6 +493,10 @@ int hclge_ptp_init(struct hclge_dev *hdev) ret = hclge_ptp_create_clock(hdev); if (ret) return ret; + + ret = hclge_ptp_get_cycle(hdev); + if (ret) + return ret; } ret = hclge_ptp_int_en(hdev, true); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h index 5a202b775471..dbf5f4c08019 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h @@ -29,6 +29,7 @@ #define HCLGE_PTP_TIME_ADJ_REG 0x60 #define HCLGE_PTP_TIME_ADJ_EN BIT(0) #define HCLGE_PTP_CYCLE_QUO_REG 0x64 +#define HCLGE_PTP_CYCLE_QUO_MASK GENMASK(7, 0) #define HCLGE_PTP_CYCLE_DEN_REG 0x68 #define HCLGE_PTP_CYCLE_NUM_REG 0x6C #define HCLGE_PTP_CYCLE_CFG_REG 0x70 @@ -37,9 +38,7 @@ #define HCLGE_PTP_CUR_TIME_SEC_L_REG 0x78 #define HCLGE_PTP_CUR_TIME_NSEC_REG 0x7C -#define HCLGE_PTP_CYCLE_ADJ_BASE 2 #define HCLGE_PTP_CYCLE_ADJ_MAX 500000000 -#define HCLGE_PTP_CYCLE_ADJ_UNIT 100000000 #define HCLGE_PTP_SEC_H_OFFSET 32u #define HCLGE_PTP_SEC_L_MASK GENMASK(31, 0) @@ -47,6 +46,12 @@ #define HCLGE_PTP_FLAG_TX_EN 1 #define HCLGE_PTP_FLAG_RX_EN 2 +struct hclge_ptp_cycle { + u32 quo; + u32 numer; + u32 den; +}; + struct hclge_ptp { struct hclge_dev *hdev; struct ptp_clock *clock; @@ -58,6 +63,7 @@ struct hclge_ptp { spinlock_t lock; /* protects ptp registers */ u32 ptp_cfg; u32 last_tx_seqid; + struct hclge_ptp_cycle cycle; unsigned long tx_start; unsigned long tx_cnt; unsigned long tx_skipped; From 76b4f357d0e7d8f6f0013c733e6cba1773c266d3 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 1 Jul 2021 17:41:00 +0200 Subject: [PATCH 526/794] x86/kvm: fix vcpu-id indexed array sizes KVM_MAX_VCPU_ID is the maximum vcpu-id of a guest, and not the number of vcpu-ids. Fix array indexed by vcpu-id to have KVM_MAX_VCPU_ID+1 elements. Note that this is currently no real problem, as KVM_MAX_VCPU_ID is an odd number, resulting in always enough padding being available at the end of those arrays. Nevertheless this should be fixed in order to avoid rare problems in case someone is using an even number for KVM_MAX_VCPU_ID. Signed-off-by: Juergen Gross Message-Id: <20210701154105.23215-2-jgross@suse.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/ioapic.c | 2 +- arch/x86/kvm/ioapic.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 698969e18fe3..ff005fe738a4 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -96,7 +96,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic) { ioapic->rtc_status.pending_eoi = 0; - bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID); + bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID + 1); } static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic); diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index 660401700075..11e4065e1617 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -43,13 +43,13 @@ struct kvm_vcpu; struct dest_map { /* vcpu bitmap where IRQ has been sent */ - DECLARE_BITMAP(map, KVM_MAX_VCPU_ID); + DECLARE_BITMAP(map, KVM_MAX_VCPU_ID + 1); /* * Vector sent to a given vcpu, only valid when * the vcpu's bit in map is set */ - u8 vectors[KVM_MAX_VCPU_ID]; + u8 vectors[KVM_MAX_VCPU_ID + 1]; }; From 15b7b737deb30e1f8f116a08e723173b55ebd2f3 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Tue, 13 Jul 2021 22:09:56 +0000 Subject: [PATCH 527/794] KVM: selftests: Fix missing break in dirty_log_perf_test arg parsing There is a missing break statement which causes a fallthrough to the next statement where optarg will be null and a segmentation fault will be generated. Fixes: 9e965bb75aae ("KVM: selftests: Add backing src parameter to dirty_log_perf_test") Reviewed-by: Ben Gardon Signed-off-by: David Matlack Message-Id: <20210713220957.3493520-6-dmatlack@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_perf_test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 04a2641261be..80cbd3a748c0 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -312,6 +312,7 @@ int main(int argc, char *argv[]) break; case 'o': p.partition_vcpu_memory_access = false; + break; case 's': p.backing_src = parse_backing_src_type(optarg); break; From c33e05d9b067433252b1008d2f37bf64e11151f1 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Tue, 13 Jul 2021 22:09:57 +0000 Subject: [PATCH 528/794] KVM: selftests: Introduce access_tracking_perf_test This test measures the performance effects of KVM's access tracking. Access tracking is driven by the MMU notifiers test_young, clear_young, and clear_flush_young. These notifiers do not have a direct userspace API, however the clear_young notifier can be triggered by marking a pages as idle in /sys/kernel/mm/page_idle/bitmap. This test leverages that mechanism to enable access tracking on guest memory. To measure performance this test runs a VM with a configurable number of vCPUs that each touch every page in disjoint regions of memory. Performance is measured in the time it takes all vCPUs to finish touching their predefined region. Example invocation: $ ./access_tracking_perf_test -v 8 Testing guest mode: PA-bits:ANY, VA-bits:48, 4K pages guest physical test memory offset: 0xffdfffff000 Populating memory : 1.337752570s Writing to populated memory : 0.010177640s Reading from populated memory : 0.009548239s Mark memory idle : 23.973131748s Writing to idle memory : 0.063584496s Mark memory idle : 24.924652964s Reading from idle memory : 0.062042814s Breaking down the results: * "Populating memory": The time it takes for all vCPUs to perform the first write to every page in their region. * "Writing to populated memory" / "Reading from populated memory": The time it takes for all vCPUs to write and read to every page in their region after it has been populated. This serves as a control for the later results. * "Mark memory idle": The time it takes for every vCPU to mark every page in their region as idle through page_idle. * "Writing to idle memory" / "Reading from idle memory": The time it takes for all vCPUs to write and read to every page in their region after it has been marked idle. This test should be portable across architectures but it is only enabled for x86_64 since that's all I have tested. Reviewed-by: Ben Gardon Signed-off-by: David Matlack Message-Id: <20210713220957.3493520-7-dmatlack@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + .../selftests/kvm/access_tracking_perf_test.c | 429 ++++++++++++++++++ 3 files changed, 431 insertions(+) create mode 100644 tools/testing/selftests/kvm/access_tracking_perf_test.c diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 06a351b4f93b..0709af0144c8 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -38,6 +38,7 @@ /x86_64/xen_vmcall_test /x86_64/xss_msr_test /x86_64/vmx_pmu_msrs_test +/access_tracking_perf_test /demand_paging_test /dirty_log_test /dirty_log_perf_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index b853be2ae3c6..5832f510a16c 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -71,6 +71,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test +TEST_GEN_PROGS_x86_64 += access_tracking_perf_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_perf_test diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c new file mode 100644 index 000000000000..e2baa187a21e --- /dev/null +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -0,0 +1,429 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * access_tracking_perf_test + * + * Copyright (C) 2021, Google, Inc. + * + * This test measures the performance effects of KVM's access tracking. + * Access tracking is driven by the MMU notifiers test_young, clear_young, and + * clear_flush_young. These notifiers do not have a direct userspace API, + * however the clear_young notifier can be triggered by marking a pages as idle + * in /sys/kernel/mm/page_idle/bitmap. This test leverages that mechanism to + * enable access tracking on guest memory. + * + * To measure performance this test runs a VM with a configurable number of + * vCPUs that each touch every page in disjoint regions of memory. Performance + * is measured in the time it takes all vCPUs to finish touching their + * predefined region. + * + * Note that a deterministic correctness test of access tracking is not possible + * by using page_idle as it exists today. This is for a few reasons: + * + * 1. page_idle only issues clear_young notifiers, which lack a TLB flush. This + * means subsequent guest accesses are not guaranteed to see page table + * updates made by KVM until some time in the future. + * + * 2. page_idle only operates on LRU pages. Newly allocated pages are not + * immediately allocated to LRU lists. Instead they are held in a "pagevec", + * which is drained to LRU lists some time in the future. There is no + * userspace API to force this drain to occur. + * + * These limitations are worked around in this test by using a large enough + * region of memory for each vCPU such that the number of translations cached in + * the TLB and the number of pages held in pagevecs are a small fraction of the + * overall workload. And if either of those conditions are not true this test + * will fail rather than silently passing. + */ +#include +#include +#include +#include +#include +#include + +#include "kvm_util.h" +#include "test_util.h" +#include "perf_test_util.h" +#include "guest_modes.h" + +/* Global variable used to synchronize all of the vCPU threads. */ +static int iteration = -1; + +/* Defines what vCPU threads should do during a given iteration. */ +static enum { + /* Run the vCPU to access all its memory. */ + ITERATION_ACCESS_MEMORY, + /* Mark the vCPU's memory idle in page_idle. */ + ITERATION_MARK_IDLE, +} iteration_work; + +/* Set to true when vCPU threads should exit. */ +static bool done; + +/* The iteration that was last completed by each vCPU. */ +static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; + +/* Whether to overlap the regions of memory vCPUs access. */ +static bool overlap_memory_access; + +struct test_params { + /* The backing source for the region of memory. */ + enum vm_mem_backing_src_type backing_src; + + /* The amount of memory to allocate for each vCPU. */ + uint64_t vcpu_memory_bytes; + + /* The number of vCPUs to create in the VM. */ + int vcpus; +}; + +static uint64_t pread_uint64(int fd, const char *filename, uint64_t index) +{ + uint64_t value; + off_t offset = index * sizeof(value); + + TEST_ASSERT(pread(fd, &value, sizeof(value), offset) == sizeof(value), + "pread from %s offset 0x%" PRIx64 " failed!", + filename, offset); + + return value; + +} + +#define PAGEMAP_PRESENT (1ULL << 63) +#define PAGEMAP_PFN_MASK ((1ULL << 55) - 1) + +static uint64_t lookup_pfn(int pagemap_fd, struct kvm_vm *vm, uint64_t gva) +{ + uint64_t hva = (uint64_t) addr_gva2hva(vm, gva); + uint64_t entry; + uint64_t pfn; + + entry = pread_uint64(pagemap_fd, "pagemap", hva / getpagesize()); + if (!(entry & PAGEMAP_PRESENT)) + return 0; + + pfn = entry & PAGEMAP_PFN_MASK; + if (!pfn) { + print_skip("Looking up PFNs requires CAP_SYS_ADMIN"); + exit(KSFT_SKIP); + } + + return pfn; +} + +static bool is_page_idle(int page_idle_fd, uint64_t pfn) +{ + uint64_t bits = pread_uint64(page_idle_fd, "page_idle", pfn / 64); + + return !!((bits >> (pfn % 64)) & 1); +} + +static void mark_page_idle(int page_idle_fd, uint64_t pfn) +{ + uint64_t bits = 1ULL << (pfn % 64); + + TEST_ASSERT(pwrite(page_idle_fd, &bits, 8, 8 * (pfn / 64)) == 8, + "Set page_idle bits for PFN 0x%" PRIx64, pfn); +} + +static void mark_vcpu_memory_idle(struct kvm_vm *vm, int vcpu_id) +{ + uint64_t base_gva = perf_test_args.vcpu_args[vcpu_id].gva; + uint64_t pages = perf_test_args.vcpu_args[vcpu_id].pages; + uint64_t page; + uint64_t still_idle = 0; + uint64_t no_pfn = 0; + int page_idle_fd; + int pagemap_fd; + + /* If vCPUs are using an overlapping region, let vCPU 0 mark it idle. */ + if (overlap_memory_access && vcpu_id) + return; + + page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR); + TEST_ASSERT(page_idle_fd > 0, "Failed to open page_idle."); + + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + TEST_ASSERT(pagemap_fd > 0, "Failed to open pagemap."); + + for (page = 0; page < pages; page++) { + uint64_t gva = base_gva + page * perf_test_args.guest_page_size; + uint64_t pfn = lookup_pfn(pagemap_fd, vm, gva); + + if (!pfn) { + no_pfn++; + continue; + } + + if (is_page_idle(page_idle_fd, pfn)) { + still_idle++; + continue; + } + + mark_page_idle(page_idle_fd, pfn); + } + + /* + * Assumption: Less than 1% of pages are going to be swapped out from + * under us during this test. + */ + TEST_ASSERT(no_pfn < pages / 100, + "vCPU %d: No PFN for %" PRIu64 " out of %" PRIu64 " pages.", + vcpu_id, no_pfn, pages); + + /* + * Test that at least 90% of memory has been marked idle (the rest might + * not be marked idle because the pages have not yet made it to an LRU + * list or the translations are still cached in the TLB). 90% is + * arbitrary; high enough that we ensure most memory access went through + * access tracking but low enough as to not make the test too brittle + * over time and across architectures. + */ + TEST_ASSERT(still_idle < pages / 10, + "vCPU%d: Too many pages still idle (%"PRIu64 " out of %" + PRIu64 ").\n", + vcpu_id, still_idle, pages); + + close(page_idle_fd); + close(pagemap_fd); +} + +static void assert_ucall(struct kvm_vm *vm, uint32_t vcpu_id, + uint64_t expected_ucall) +{ + struct ucall uc; + uint64_t actual_ucall = get_ucall(vm, vcpu_id, &uc); + + TEST_ASSERT(expected_ucall == actual_ucall, + "Guest exited unexpectedly (expected ucall %" PRIu64 + ", got %" PRIu64 ")", + expected_ucall, actual_ucall); +} + +static bool spin_wait_for_next_iteration(int *current_iteration) +{ + int last_iteration = *current_iteration; + + do { + if (READ_ONCE(done)) + return false; + + *current_iteration = READ_ONCE(iteration); + } while (last_iteration == *current_iteration); + + return true; +} + +static void *vcpu_thread_main(void *arg) +{ + struct perf_test_vcpu_args *vcpu_args = arg; + struct kvm_vm *vm = perf_test_args.vm; + int vcpu_id = vcpu_args->vcpu_id; + int current_iteration = -1; + + vcpu_args_set(vm, vcpu_id, 1, vcpu_id); + + while (spin_wait_for_next_iteration(¤t_iteration)) { + switch (READ_ONCE(iteration_work)) { + case ITERATION_ACCESS_MEMORY: + vcpu_run(vm, vcpu_id); + assert_ucall(vm, vcpu_id, UCALL_SYNC); + break; + case ITERATION_MARK_IDLE: + mark_vcpu_memory_idle(vm, vcpu_id); + break; + }; + + vcpu_last_completed_iteration[vcpu_id] = current_iteration; + } + + return NULL; +} + +static void spin_wait_for_vcpu(int vcpu_id, int target_iteration) +{ + while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != + target_iteration) { + continue; + } +} + +/* The type of memory accesses to perform in the VM. */ +enum access_type { + ACCESS_READ, + ACCESS_WRITE, +}; + +static void run_iteration(struct kvm_vm *vm, int vcpus, const char *description) +{ + struct timespec ts_start; + struct timespec ts_elapsed; + int next_iteration; + int vcpu_id; + + /* Kick off the vCPUs by incrementing iteration. */ + next_iteration = ++iteration; + + clock_gettime(CLOCK_MONOTONIC, &ts_start); + + /* Wait for all vCPUs to finish the iteration. */ + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) + spin_wait_for_vcpu(vcpu_id, next_iteration); + + ts_elapsed = timespec_elapsed(ts_start); + pr_info("%-30s: %ld.%09lds\n", + description, ts_elapsed.tv_sec, ts_elapsed.tv_nsec); +} + +static void access_memory(struct kvm_vm *vm, int vcpus, enum access_type access, + const char *description) +{ + perf_test_args.wr_fract = (access == ACCESS_READ) ? INT_MAX : 1; + sync_global_to_guest(vm, perf_test_args); + iteration_work = ITERATION_ACCESS_MEMORY; + run_iteration(vm, vcpus, description); +} + +static void mark_memory_idle(struct kvm_vm *vm, int vcpus) +{ + /* + * Even though this parallelizes the work across vCPUs, this is still a + * very slow operation because page_idle forces the test to mark one pfn + * at a time and the clear_young notifier serializes on the KVM MMU + * lock. + */ + pr_debug("Marking VM memory idle (slow)...\n"); + iteration_work = ITERATION_MARK_IDLE; + run_iteration(vm, vcpus, "Mark memory idle"); +} + +static pthread_t *create_vcpu_threads(int vcpus) +{ + pthread_t *vcpu_threads; + int i; + + vcpu_threads = malloc(vcpus * sizeof(vcpu_threads[0])); + TEST_ASSERT(vcpu_threads, "Failed to allocate vcpu_threads."); + + for (i = 0; i < vcpus; i++) { + vcpu_last_completed_iteration[i] = iteration; + pthread_create(&vcpu_threads[i], NULL, vcpu_thread_main, + &perf_test_args.vcpu_args[i]); + } + + return vcpu_threads; +} + +static void terminate_vcpu_threads(pthread_t *vcpu_threads, int vcpus) +{ + int i; + + /* Set done to signal the vCPU threads to exit */ + done = true; + + for (i = 0; i < vcpus; i++) + pthread_join(vcpu_threads[i], NULL); +} + +static void run_test(enum vm_guest_mode mode, void *arg) +{ + struct test_params *params = arg; + struct kvm_vm *vm; + pthread_t *vcpu_threads; + int vcpus = params->vcpus; + + vm = perf_test_create_vm(mode, vcpus, params->vcpu_memory_bytes, + params->backing_src); + + perf_test_setup_vcpus(vm, vcpus, params->vcpu_memory_bytes, + !overlap_memory_access); + + vcpu_threads = create_vcpu_threads(vcpus); + + pr_info("\n"); + access_memory(vm, vcpus, ACCESS_WRITE, "Populating memory"); + + /* As a control, read and write to the populated memory first. */ + access_memory(vm, vcpus, ACCESS_WRITE, "Writing to populated memory"); + access_memory(vm, vcpus, ACCESS_READ, "Reading from populated memory"); + + /* Repeat on memory that has been marked as idle. */ + mark_memory_idle(vm, vcpus); + access_memory(vm, vcpus, ACCESS_WRITE, "Writing to idle memory"); + mark_memory_idle(vm, vcpus); + access_memory(vm, vcpus, ACCESS_READ, "Reading from idle memory"); + + terminate_vcpu_threads(vcpu_threads, vcpus); + free(vcpu_threads); + perf_test_destroy_vm(vm); +} + +static void help(char *name) +{ + puts(""); + printf("usage: %s [-h] [-m mode] [-b vcpu_bytes] [-v vcpus] [-o] [-s mem_type]\n", + name); + puts(""); + printf(" -h: Display this help message."); + guest_modes_help(); + printf(" -b: specify the size of the memory region which should be\n" + " dirtied by each vCPU. e.g. 10M or 3G.\n" + " (default: 1G)\n"); + printf(" -v: specify the number of vCPUs to run.\n"); + printf(" -o: Overlap guest memory accesses instead of partitioning\n" + " them into a separate region of memory for each vCPU.\n"); + printf(" -s: specify the type of memory that should be used to\n" + " back the guest data region.\n\n"); + backing_src_help(); + puts(""); + exit(0); +} + +int main(int argc, char *argv[]) +{ + struct test_params params = { + .backing_src = VM_MEM_SRC_ANONYMOUS, + .vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE, + .vcpus = 1, + }; + int page_idle_fd; + int opt; + + guest_modes_append_default(); + + while ((opt = getopt(argc, argv, "hm:b:v:os:")) != -1) { + switch (opt) { + case 'm': + guest_modes_cmdline(optarg); + break; + case 'b': + params.vcpu_memory_bytes = parse_size(optarg); + break; + case 'v': + params.vcpus = atoi(optarg); + break; + case 'o': + overlap_memory_access = true; + break; + case 's': + params.backing_src = parse_backing_src_type(optarg); + break; + case 'h': + default: + help(argv[0]); + break; + } + } + + page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR); + if (page_idle_fd < 0) { + print_skip("CONFIG_IDLE_PAGE_TRACKING is not enabled"); + exit(KSFT_SKIP); + } + close(page_idle_fd); + + for_each_guest_mode(run_test, ¶ms); + + return 0; +} From 3fa5e8fd0a0e4ccc03c91df225be2e9b7100800c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 26 Jul 2021 12:39:01 -0400 Subject: [PATCH 529/794] KVM: SVM: delay svm_vcpu_init_msrpm after svm->vmcb is initialized Right now, svm_hv_vmcb_dirty_nested_enlightenments has an incorrect dereference of vmcb->control.reserved_sw before the vmcb is checked for being non-NULL. The compiler is usually sinking the dereference after the check; instead of doing this ourselves in the source, ensure that svm_hv_vmcb_dirty_nested_enlightenments is only called with a non-NULL VMCB. Reported-by: Dan Carpenter Cc: Vineeth Pillai Signed-off-by: Paolo Bonzini [Untested for now due to issues with my AMD machine. - Paolo] --- arch/x86/kvm/svm/svm.c | 4 ++-- arch/x86/kvm/svm/svm_onhyperv.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 9a6987549e1b..4bcb95bb8ed7 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1406,8 +1406,6 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) goto error_free_vmsa_page; } - svm_vcpu_init_msrpm(vcpu, svm->msrpm); - svm->vmcb01.ptr = page_address(vmcb01_page); svm->vmcb01.pa = __sme_set(page_to_pfn(vmcb01_page) << PAGE_SHIFT); @@ -1419,6 +1417,8 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) svm_switch_vmcb(svm, &svm->vmcb01); init_vmcb(vcpu); + svm_vcpu_init_msrpm(vcpu, svm->msrpm); + svm_init_osvw(vcpu); vcpu->arch.microcode_version = 0x01000065; diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h index 9b9a55abc29f..c53b8bf8d013 100644 --- a/arch/x86/kvm/svm/svm_onhyperv.h +++ b/arch/x86/kvm/svm/svm_onhyperv.h @@ -89,7 +89,7 @@ static inline void svm_hv_vmcb_dirty_nested_enlightenments( * as we mark it dirty unconditionally towards end of vcpu * init phase. */ - if (vmcb && vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) && + if (vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) && hve->hv_enlightenments_control.msr_bitmap) vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS); } From bb000f640e76c4c2402990d0613d4269e9c6dd29 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 26 Jul 2021 17:01:08 +0200 Subject: [PATCH 530/794] KVM: s390: restore old debugfs names commit bc9e9e672df9 ("KVM: debugfs: Reuse binary stats descriptors") did replace the old definitions with the binary ones. While doing that it missed that some files are names different than the counters. This is especially important for kvm_stat which does have special handling for counters named instruction_*. Fixes: commit bc9e9e672df9 ("KVM: debugfs: Reuse binary stats descriptors") CC: Jing Zhang Signed-off-by: Christian Borntraeger Message-Id: <20210726150108.5603-1-borntraeger@de.ibm.com> Signed-off-by: Paolo Bonzini --- arch/s390/include/asm/kvm_host.h | 18 +++++++++--------- arch/s390/kvm/diag.c | 18 +++++++++--------- arch/s390/kvm/kvm-s390.c | 18 +++++++++--------- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 9b4473f76e56..161a9e12bfb8 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -445,15 +445,15 @@ struct kvm_vcpu_stat { u64 instruction_sigp_init_cpu_reset; u64 instruction_sigp_cpu_reset; u64 instruction_sigp_unknown; - u64 diagnose_10; - u64 diagnose_44; - u64 diagnose_9c; - u64 diagnose_9c_ignored; - u64 diagnose_9c_forward; - u64 diagnose_258; - u64 diagnose_308; - u64 diagnose_500; - u64 diagnose_other; + u64 instruction_diagnose_10; + u64 instruction_diagnose_44; + u64 instruction_diagnose_9c; + u64 diag_9c_ignored; + u64 diag_9c_forward; + u64 instruction_diagnose_258; + u64 instruction_diagnose_308; + u64 instruction_diagnose_500; + u64 instruction_diagnose_other; u64 pfault_sync; }; diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 02c146f9e5cd..807fa9da1e72 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -24,7 +24,7 @@ static int diag_release_pages(struct kvm_vcpu *vcpu) start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + PAGE_SIZE; - vcpu->stat.diagnose_10++; + vcpu->stat.instruction_diagnose_10++; if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end || start < 2 * PAGE_SIZE) @@ -74,7 +74,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 3, "diag page reference parameter block at 0x%llx", vcpu->run->s.regs.gprs[rx]); - vcpu->stat.diagnose_258++; + vcpu->stat.instruction_diagnose_258++; if (vcpu->run->s.regs.gprs[rx] & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm)); @@ -145,7 +145,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu) static int __diag_time_slice_end(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); - vcpu->stat.diagnose_44++; + vcpu->stat.instruction_diagnose_44++; kvm_vcpu_on_spin(vcpu, true); return 0; } @@ -169,7 +169,7 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) int tid; tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; - vcpu->stat.diagnose_9c++; + vcpu->stat.instruction_diagnose_9c++; /* yield to self */ if (tid == vcpu->vcpu_id) @@ -192,7 +192,7 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: yield forwarded", tid); - vcpu->stat.diagnose_9c_forward++; + vcpu->stat.diag_9c_forward++; return 0; } @@ -203,7 +203,7 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) return 0; no_yield: VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: ignored", tid); - vcpu->stat.diagnose_9c_ignored++; + vcpu->stat.diag_9c_ignored++; return 0; } @@ -213,7 +213,7 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu) unsigned long subcode = vcpu->run->s.regs.gprs[reg] & 0xffff; VCPU_EVENT(vcpu, 3, "diag ipl functions, subcode %lx", subcode); - vcpu->stat.diagnose_308++; + vcpu->stat.instruction_diagnose_308++; switch (subcode) { case 3: vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR; @@ -245,7 +245,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) { int ret; - vcpu->stat.diagnose_500++; + vcpu->stat.instruction_diagnose_500++; /* No virtio-ccw notification? Get out quickly. */ if (!vcpu->kvm->arch.css_support || (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY)) @@ -299,7 +299,7 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) case 0x500: return __diag_virtio_hypercall(vcpu); default: - vcpu->stat.diagnose_other++; + vcpu->stat.instruction_diagnose_other++; return -EOPNOTSUPP; } } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b655a7d82bf0..4527ac7b5961 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -163,15 +163,15 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset), STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset), STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown), - STATS_DESC_COUNTER(VCPU, diagnose_10), - STATS_DESC_COUNTER(VCPU, diagnose_44), - STATS_DESC_COUNTER(VCPU, diagnose_9c), - STATS_DESC_COUNTER(VCPU, diagnose_9c_ignored), - STATS_DESC_COUNTER(VCPU, diagnose_9c_forward), - STATS_DESC_COUNTER(VCPU, diagnose_258), - STATS_DESC_COUNTER(VCPU, diagnose_308), - STATS_DESC_COUNTER(VCPU, diagnose_500), - STATS_DESC_COUNTER(VCPU, diagnose_other), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_10), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_44), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c), + STATS_DESC_COUNTER(VCPU, diag_9c_ignored), + STATS_DESC_COUNTER(VCPU, diag_9c_forward), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_258), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_308), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_500), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_other), STATS_DESC_COUNTER(VCPU, pfault_sync) }; static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) == From f1577ab21442476a1015d09e861c08ca76262c06 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 13 Jul 2021 17:20:16 +0300 Subject: [PATCH 531/794] KVM: SVM: svm_set_vintr don't warn if AVIC is active but is about to be deactivated It is possible for AVIC inhibit and AVIC active state to be mismatched. Currently we disable AVIC right away on vCPU which started the AVIC inhibit request thus this warning doesn't trigger but at least in theory, if svm_set_vintr is called at the same time on multiple vCPUs, the warning can happen. Signed-off-by: Maxim Levitsky Message-Id: <20210713142023.106183-2-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 4bcb95bb8ed7..e8ccab50ebf6 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1568,8 +1568,11 @@ static void svm_set_vintr(struct vcpu_svm *svm) { struct vmcb_control_area *control; - /* The following fields are ignored when AVIC is enabled */ - WARN_ON(kvm_vcpu_apicv_active(&svm->vcpu)); + /* + * The following fields are ignored when AVIC is enabled + */ + WARN_ON(kvm_apicv_activated(svm->vcpu.kvm)); + svm_set_intercept(svm, INTERCEPT_VINTR); /* From feea01360cb1925dd31a3d38514eb86f61d69468 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 13 Jul 2021 17:20:17 +0300 Subject: [PATCH 532/794] KVM: SVM: tweak warning about enabled AVIC on nested entry It is possible that AVIC was requested to be disabled but not yet disabled, e.g if the nested entry is done right after svm_vcpu_after_set_cpuid. Signed-off-by: Maxim Levitsky Message-Id: <20210713142023.106183-3-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 1c2a0414a88d..61738ff8ef33 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -515,7 +515,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm) * Also covers avic_vapic_bar, avic_backing_page, avic_logical_id, * avic_physical_id. */ - WARN_ON(svm->vmcb01.ptr->control.int_ctl & AVIC_ENABLE_MASK); + WARN_ON(kvm_apicv_activated(svm->vcpu.kvm)); /* Copied from vmcb01. msrpm_base can be overwritten later. */ svm->vmcb->control.nested_ctl = svm->vmcb01.ptr->control.nested_ctl; From 5868b8225ecef4ba3f5b17e65984d60bc5fd6254 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 13 Jul 2021 17:20:18 +0300 Subject: [PATCH 533/794] KVM: SVM: use vmcb01 in svm_refresh_apicv_exec_ctrl Currently when SVM is enabled in guest CPUID, AVIC is inhibited as soon as the guest CPUID is set. AVIC happens to be fully disabled on all vCPUs by the time any guest entry starts (if after migration the entry can be nested). The reason is that currently we disable avic right away on vCPU from which the kvm_request_apicv_update was called and for this case, it happens to be called on all vCPUs (by svm_vcpu_after_set_cpuid). After we stop doing this, AVIC will end up being disabled only when KVM_REQ_APICV_UPDATE is processed which is after we done switching to the nested guest. Fix this by just using vmcb01 in svm_refresh_apicv_exec_ctrl for avic (which is a right thing to do anyway). Signed-off-by: Maxim Levitsky Message-Id: <20210713142023.106183-4-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/avic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 1d01da64c333..a8ad78a2faa1 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -646,7 +646,7 @@ out: void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - struct vmcb *vmcb = svm->vmcb; + struct vmcb *vmcb = svm->vmcb01.ptr; bool activated = kvm_vcpu_apicv_active(vcpu); if (!enable_apicv) From 74775654332b2682a5580d6f954e5a9ac81e7477 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 27 Jul 2021 19:12:47 +0800 Subject: [PATCH 534/794] KVM: use cpu_relax when halt polling SMT siblings share caches and other hardware, and busy halt polling will degrade its sibling performance if its sibling is working Sean Christopherson suggested as below: "Rather than disallowing halt-polling entirely, on x86 it should be sufficient to simply have the hardware thread yield to its sibling(s) via PAUSE. It probably won't get back all performance, but I would expect it to be close. This compiles on all KVM architectures, and AFAICT the intended usage of cpu_relax() is identical for all architectures." Suggested-by: Sean Christopherson Signed-off-by: Li RongQing Message-Id: <20210727111247.55510-1-lirongqing@baidu.com> Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 986959833d70..0d732813fa80 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3110,6 +3110,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) ++vcpu->stat.generic.halt_poll_invalid; goto out; } + cpu_relax(); poll_end = cur = ktime_get(); } while (kvm_vcpu_can_poll(cur, stop)); } From 8750f9bbda115f3f79bfe43be85551ee5e12b6ff Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 27 Jul 2021 08:43:10 -0400 Subject: [PATCH 535/794] KVM: add missing compat KVM_CLEAR_DIRTY_LOG The arguments to the KVM_CLEAR_DIRTY_LOG ioctl include a pointer, therefore it needs a compat ioctl implementation. Otherwise, 32-bit userspace fails to invoke it on 64-bit kernels; for x86 it might work fine by chance if the padding is zero, but not on big-endian architectures. Reported-by: Thomas Sattler Cc: stable@vger.kernel.org Fixes: 2a31b9db1535 ("kvm: introduce manual dirty log reprotect") Reviewed-by: Peter Xu Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0d732813fa80..d20fba0fc290 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4391,6 +4391,16 @@ struct compat_kvm_dirty_log { }; }; +struct compat_kvm_clear_dirty_log { + __u32 slot; + __u32 num_pages; + __u64 first_page; + union { + compat_uptr_t dirty_bitmap; /* one bit per page */ + __u64 padding2; + }; +}; + static long kvm_vm_compat_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -4400,6 +4410,24 @@ static long kvm_vm_compat_ioctl(struct file *filp, if (kvm->mm != current->mm) return -EIO; switch (ioctl) { +#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT + case KVM_CLEAR_DIRTY_LOG: { + struct compat_kvm_clear_dirty_log compat_log; + struct kvm_clear_dirty_log log; + + if (copy_from_user(&compat_log, (void __user *)arg, + sizeof(compat_log))) + return -EFAULT; + log.slot = compat_log.slot; + log.num_pages = compat_log.num_pages; + log.first_page = compat_log.first_page; + log.padding2 = compat_log.padding2; + log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap); + + r = kvm_vm_ioctl_clear_dirty_log(kvm, &log); + break; + } +#endif case KVM_GET_DIRTY_LOG: { struct compat_kvm_dirty_log compat_log; struct kvm_dirty_log log; From 2bcc025ab9bbd029b1730cde71cb4e4f0ed35d0f Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sat, 17 Jul 2021 14:27:42 +0300 Subject: [PATCH 536/794] clk: tegra: Implement disable_unused() of tegra_clk_sdmmc_mux_ops Implement disable_unused() callback of tegra_clk_sdmmc_mux_ops to fix imbalanced disabling of the unused MMC clock on Tegra210 Jetson Nano. Fixes: c592c8a28f58 ("clk: tegra: Fix refcounting of gate clocks") Reported-by: Jon Hunter # T210 Nano Tested-by: Jon Hunter # T210 Nano Acked-by: Jon Hunter Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20210717112742.7196-1-digetx@gmail.com Signed-off-by: Stephen Boyd --- drivers/clk/tegra/clk-sdmmc-mux.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/clk/tegra/clk-sdmmc-mux.c b/drivers/clk/tegra/clk-sdmmc-mux.c index 316912d3b1a4..4f2c3309eea4 100644 --- a/drivers/clk/tegra/clk-sdmmc-mux.c +++ b/drivers/clk/tegra/clk-sdmmc-mux.c @@ -194,6 +194,15 @@ static void clk_sdmmc_mux_disable(struct clk_hw *hw) gate_ops->disable(gate_hw); } +static void clk_sdmmc_mux_disable_unused(struct clk_hw *hw) +{ + struct tegra_sdmmc_mux *sdmmc_mux = to_clk_sdmmc_mux(hw); + const struct clk_ops *gate_ops = sdmmc_mux->gate_ops; + struct clk_hw *gate_hw = &sdmmc_mux->gate.hw; + + gate_ops->disable_unused(gate_hw); +} + static void clk_sdmmc_mux_restore_context(struct clk_hw *hw) { struct clk_hw *parent = clk_hw_get_parent(hw); @@ -218,6 +227,7 @@ static const struct clk_ops tegra_clk_sdmmc_mux_ops = { .is_enabled = clk_sdmmc_mux_is_enabled, .enable = clk_sdmmc_mux_enable, .disable = clk_sdmmc_mux_disable, + .disable_unused = clk_sdmmc_mux_disable_unused, .restore_context = clk_sdmmc_mux_restore_context, }; From 343597d558e79fe704ba8846b5b2ed24056b89c2 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 27 Jul 2021 09:04:58 -0700 Subject: [PATCH 537/794] bpf, sockmap: Zap ingress queues after stopping strparser We don't want strparser to run and pass skbs into skmsg handlers when the psock is null. We just sk_drop them in this case. When removing a live socket from map it means extra drops that we do not need to incur. Move the zap below strparser close to avoid this condition. This way we stop the stream parser first stopping it from processing packets and then delete the psock. Fixes: a136678c0bdbb ("bpf: sk_msg, zap ingress queue on psock down") Signed-off-by: John Fastabend Signed-off-by: Andrii Nakryiko Acked-by: Jakub Sitnicki Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210727160500.1713554-2-john.fastabend@gmail.com --- net/core/skmsg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 15d71288e741..28115ef742e8 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -773,8 +773,6 @@ static void sk_psock_destroy(struct work_struct *work) void sk_psock_drop(struct sock *sk, struct sk_psock *psock) { - sk_psock_stop(psock, false); - write_lock_bh(&sk->sk_callback_lock); sk_psock_restore_proto(sk, psock); rcu_assign_sk_user_data(sk, NULL); @@ -784,6 +782,8 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock) sk_psock_stop_verdict(sk, psock); write_unlock_bh(&sk->sk_callback_lock); + sk_psock_stop(psock, false); + INIT_RCU_WORK(&psock->rwork, sk_psock_destroy); queue_rcu_work(system_wq, &psock->rwork); } From 476d98018f32e68e7c5d4e8456940cf2b6d66f10 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 27 Jul 2021 09:04:59 -0700 Subject: [PATCH 538/794] bpf, sockmap: On cleanup we additionally need to remove cached skb Its possible if a socket is closed and the receive thread is under memory pressure it may have cached a skb. We need to ensure these skbs are free'd along with the normal ingress_skb queue. Before 799aa7f98d53 ("skmsg: Avoid lock_sock() in sk_psock_backlog()") tear down and backlog processing both had sock_lock for the common case of socket close or unhash. So it was not possible to have both running in parrallel so all we would need is the kfree in those kernels. But, latest kernels include the commit 799aa7f98d5e and this requires a bit more work. Without the ingress_lock guarding reading/writing the state->skb case its possible the tear down could run before the state update causing it to leak memory or worse when the backlog reads the state it could potentially run interleaved with the tear down and we might end up free'ing the state->skb from tear down side but already have the reference from backlog side. To resolve such races we wrap accesses in ingress_lock on both sides serializing tear down and backlog case. In both cases this only happens after an EAGAIN error case so having an extra lock in place is likely fine. The normal path will skip the locks. Note, we check state->skb before grabbing lock. This works because we can only enqueue with the mutex we hold already. Avoiding a race on adding state->skb after the check. And if tear down path is running that is also fine if the tear down path then removes state->skb we will simply set skb=NULL and the subsequent goto is skipped. This slight complication avoids locking in normal case. With this fix we no longer see this warning splat from tcp side on socket close when we hit the above case with redirect to ingress self. [224913.935822] WARNING: CPU: 3 PID: 32100 at net/core/stream.c:208 sk_stream_kill_queues+0x212/0x220 [224913.935841] Modules linked in: fuse overlay bpf_preload x86_pkg_temp_thermal intel_uncore wmi_bmof squashfs sch_fq_codel efivarfs ip_tables x_tables uas xhci_pci ixgbe mdio xfrm_algo xhci_hcd wmi [224913.935897] CPU: 3 PID: 32100 Comm: fgs-bench Tainted: G I 5.14.0-rc1alu+ #181 [224913.935908] Hardware name: Dell Inc. Precision 5820 Tower/002KVM, BIOS 1.9.2 01/24/2019 [224913.935914] RIP: 0010:sk_stream_kill_queues+0x212/0x220 [224913.935923] Code: 8b 83 20 02 00 00 85 c0 75 20 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 89 df e8 2b 11 fe ff eb c3 0f 0b e9 7c ff ff ff 0f 0b eb ce <0f> 0b 5b 5d 41 5c 41 5d 41 5e 41 5f c3 90 0f 1f 44 00 00 41 57 41 [224913.935932] RSP: 0018:ffff88816271fd38 EFLAGS: 00010206 [224913.935941] RAX: 0000000000000ae8 RBX: ffff88815acd5240 RCX: dffffc0000000000 [224913.935948] RDX: 0000000000000003 RSI: 0000000000000ae8 RDI: ffff88815acd5460 [224913.935954] RBP: ffff88815acd5460 R08: ffffffff955c0ae8 R09: fffffbfff2e6f543 [224913.935961] R10: ffffffff9737aa17 R11: fffffbfff2e6f542 R12: ffff88815acd5390 [224913.935967] R13: ffff88815acd5480 R14: ffffffff98d0c080 R15: ffffffff96267500 [224913.935974] FS: 00007f86e6bd1700(0000) GS:ffff888451cc0000(0000) knlGS:0000000000000000 [224913.935981] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [224913.935988] CR2: 000000c0008eb000 CR3: 00000001020e0005 CR4: 00000000003706e0 [224913.935994] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [224913.936000] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [224913.936007] Call Trace: [224913.936016] inet_csk_destroy_sock+0xba/0x1f0 [224913.936033] __tcp_close+0x620/0x790 [224913.936047] tcp_close+0x20/0x80 [224913.936056] inet_release+0x8f/0xf0 [224913.936070] __sock_release+0x72/0x120 [224913.936083] sock_close+0x14/0x20 Fixes: a136678c0bdbb ("bpf: sk_msg, zap ingress queue on psock down") Signed-off-by: John Fastabend Signed-off-by: Andrii Nakryiko Acked-by: Jakub Sitnicki Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210727160500.1713554-3-john.fastabend@gmail.com --- net/core/skmsg.c | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 28115ef742e8..036cdb33a94a 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -590,23 +590,42 @@ static void sock_drop(struct sock *sk, struct sk_buff *skb) kfree_skb(skb); } +static void sk_psock_skb_state(struct sk_psock *psock, + struct sk_psock_work_state *state, + struct sk_buff *skb, + int len, int off) +{ + spin_lock_bh(&psock->ingress_lock); + if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { + state->skb = skb; + state->len = len; + state->off = off; + } else { + sock_drop(psock->sk, skb); + } + spin_unlock_bh(&psock->ingress_lock); +} + static void sk_psock_backlog(struct work_struct *work) { struct sk_psock *psock = container_of(work, struct sk_psock, work); struct sk_psock_work_state *state = &psock->work_state; - struct sk_buff *skb; + struct sk_buff *skb = NULL; bool ingress; u32 len, off; int ret; mutex_lock(&psock->work_mutex); - if (state->skb) { + if (unlikely(state->skb)) { + spin_lock_bh(&psock->ingress_lock); skb = state->skb; len = state->len; off = state->off; state->skb = NULL; - goto start; + spin_unlock_bh(&psock->ingress_lock); } + if (skb) + goto start; while ((skb = skb_dequeue(&psock->ingress_skb))) { len = skb->len; @@ -621,9 +640,8 @@ start: len, ingress); if (ret <= 0) { if (ret == -EAGAIN) { - state->skb = skb; - state->len = len; - state->off = off; + sk_psock_skb_state(psock, state, skb, + len, off); goto end; } /* Hard errors break pipe and stop xmit. */ @@ -722,6 +740,11 @@ static void __sk_psock_zap_ingress(struct sk_psock *psock) skb_bpf_redirect_clear(skb); sock_drop(psock->sk, skb); } + kfree_skb(psock->work_state.skb); + /* We null the skb here to ensure that calls to sk_psock_backlog + * do not pick up the free'd skb. + */ + psock->work_state.skb = NULL; __sk_psock_purge_ingress_msg(psock); } From 9635720b7c88592214562cb72605bdab6708006c Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 27 Jul 2021 09:05:00 -0700 Subject: [PATCH 539/794] bpf, sockmap: Fix memleak on ingress msg enqueue If backlog handler is running during a tear down operation we may enqueue data on the ingress msg queue while tear down is trying to free it. sk_psock_backlog() sk_psock_handle_skb() skb_psock_skb_ingress() sk_psock_skb_ingress_enqueue() sk_psock_queue_msg(psock,msg) spin_lock(ingress_lock) sk_psock_zap_ingress() _sk_psock_purge_ingerss_msg() _sk_psock_purge_ingress_msg() -- free ingress_msg list -- spin_unlock(ingress_lock) spin_lock(ingress_lock) list_add_tail(msg,ingress_msg) <- entry on list with no one left to free it. spin_unlock(ingress_lock) To fix we only enqueue from backlog if the ENABLED bit is set. The tear down logic clears the bit with ingress_lock set so we wont enqueue the msg in the last step. Fixes: 799aa7f98d53 ("skmsg: Avoid lock_sock() in sk_psock_backlog()") Signed-off-by: John Fastabend Signed-off-by: Andrii Nakryiko Acked-by: Jakub Sitnicki Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210727160500.1713554-4-john.fastabend@gmail.com --- include/linux/skmsg.h | 54 ++++++++++++++++++++++++++++--------------- net/core/skmsg.c | 6 ----- 2 files changed, 35 insertions(+), 25 deletions(-) diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 96f319099744..14ab0c0bc924 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -285,11 +285,45 @@ static inline struct sk_psock *sk_psock(const struct sock *sk) return rcu_dereference_sk_user_data(sk); } +static inline void sk_psock_set_state(struct sk_psock *psock, + enum sk_psock_state_bits bit) +{ + set_bit(bit, &psock->state); +} + +static inline void sk_psock_clear_state(struct sk_psock *psock, + enum sk_psock_state_bits bit) +{ + clear_bit(bit, &psock->state); +} + +static inline bool sk_psock_test_state(const struct sk_psock *psock, + enum sk_psock_state_bits bit) +{ + return test_bit(bit, &psock->state); +} + +static inline void sock_drop(struct sock *sk, struct sk_buff *skb) +{ + sk_drops_add(sk, skb); + kfree_skb(skb); +} + +static inline void drop_sk_msg(struct sk_psock *psock, struct sk_msg *msg) +{ + if (msg->skb) + sock_drop(psock->sk, msg->skb); + kfree(msg); +} + static inline void sk_psock_queue_msg(struct sk_psock *psock, struct sk_msg *msg) { spin_lock_bh(&psock->ingress_lock); - list_add_tail(&msg->list, &psock->ingress_msg); + if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) + list_add_tail(&msg->list, &psock->ingress_msg); + else + drop_sk_msg(psock, msg); spin_unlock_bh(&psock->ingress_lock); } @@ -406,24 +440,6 @@ static inline void sk_psock_restore_proto(struct sock *sk, psock->psock_update_sk_prot(sk, psock, true); } -static inline void sk_psock_set_state(struct sk_psock *psock, - enum sk_psock_state_bits bit) -{ - set_bit(bit, &psock->state); -} - -static inline void sk_psock_clear_state(struct sk_psock *psock, - enum sk_psock_state_bits bit) -{ - clear_bit(bit, &psock->state); -} - -static inline bool sk_psock_test_state(const struct sk_psock *psock, - enum sk_psock_state_bits bit) -{ - return test_bit(bit, &psock->state); -} - static inline struct sk_psock *sk_psock_get(struct sock *sk) { struct sk_psock *psock; diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 036cdb33a94a..2d6249b28928 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -584,12 +584,6 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb, return sk_psock_skb_ingress(psock, skb); } -static void sock_drop(struct sock *sk, struct sk_buff *skb) -{ - sk_drops_add(sk, skb); - kfree_skb(skb); -} - static void sk_psock_skb_state(struct sk_psock *psock, struct sk_psock_work_state *state, struct sk_buff *skb, From b93af3055d6f32d3b0361cfdb110c9399c1241ba Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 27 Jul 2021 17:32:53 +0800 Subject: [PATCH 540/794] blk-mq-sched: Fix blk_mq_sched_alloc_tags() error handling If the blk_mq_sched_alloc_tags() -> blk_mq_alloc_rqs() call fails, then we call blk_mq_sched_free_tags() -> blk_mq_free_rqs(). It is incorrect to do so, as any rqs would have already been freed in the blk_mq_alloc_rqs() call. Fix by calling blk_mq_free_rq_map() only directly. Fixes: 6917ff0b5bd41 ("blk-mq-sched: refactor scheduler initialization") Signed-off-by: John Garry Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/1627378373-148090-1-git-send-email-john.garry@huawei.com Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index c838d81ac058..0f006cabfd91 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -515,17 +515,6 @@ void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx, percpu_ref_put(&q->q_usage_counter); } -static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set, - struct blk_mq_hw_ctx *hctx, - unsigned int hctx_idx) -{ - if (hctx->sched_tags) { - blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx); - blk_mq_free_rq_map(hctx->sched_tags, set->flags); - hctx->sched_tags = NULL; - } -} - static int blk_mq_sched_alloc_tags(struct request_queue *q, struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) @@ -539,8 +528,10 @@ static int blk_mq_sched_alloc_tags(struct request_queue *q, return -ENOMEM; ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests); - if (ret) - blk_mq_sched_free_tags(set, hctx, hctx_idx); + if (ret) { + blk_mq_free_rq_map(hctx->sched_tags, set->flags); + hctx->sched_tags = NULL; + } return ret; } From 8b54874ef1617185048029a3083d510569e93751 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 26 Jul 2021 09:20:14 +0300 Subject: [PATCH 541/794] net/mlx5: Fix flow table chaining Fix a bug when flow table is created in priority that already has other flow tables as shown in the below diagram. If the new flow table (FT-B) has the lowest level in the priority, we need to connect the flow tables from the previous priority (p0) to this new table. In addition when this flow table is destroyed (FT-B), we need to connect the flow tables from the previous priority (p0) to the next level flow table (FT-C) in the same priority of the destroyed table (if exists). --------- |root_ns| --------- | -------------------------------- | | | ---------- ---------- --------- |p(prio)-x| | p-y | | p-n | ---------- ---------- --------- | | ---------------- ------------------ |ns(e.g bypass)| |ns(e.g. kernel) | ---------------- ------------------ | | | ------- ------ ---- | p0 | | p1 | |p2| ------- ------ ---- | | \ -------- ------- ------ | FT-A | |FT-B | |FT-C| -------- ------- ------ Fixes: f90edfd279f3 ("net/mlx5_core: Connect flow tables") Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index d7bf0a3e4a52..c0697e1b7118 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1024,17 +1024,19 @@ static int connect_fwd_rules(struct mlx5_core_dev *dev, static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, struct fs_prio *prio) { - struct mlx5_flow_table *next_ft; + struct mlx5_flow_table *next_ft, *first_ft; int err = 0; /* Connect_prev_fts and update_root_ft_create are mutually exclusive */ - if (list_empty(&prio->node.children)) { + first_ft = list_first_entry_or_null(&prio->node.children, + struct mlx5_flow_table, node.list); + if (!first_ft || first_ft->level > ft->level) { err = connect_prev_fts(dev, ft, prio); if (err) return err; - next_ft = find_next_chained_ft(prio); + next_ft = first_ft ? first_ft : find_next_chained_ft(prio); err = connect_fwd_rules(dev, ft, next_ft); if (err) return err; @@ -2120,7 +2122,7 @@ static int disconnect_flow_table(struct mlx5_flow_table *ft) node.list) == ft)) return 0; - next_ft = find_next_chained_ft(prio); + next_ft = find_next_ft(ft); err = connect_fwd_rules(dev, next_ft, ft); if (err) return err; From 90b22b9bcd242a3ba238f2c6f7eab771799001f8 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Thu, 8 Jul 2021 15:24:58 +0300 Subject: [PATCH 542/794] net/mlx5e: Disable Rx ntuple offload for uplink representor Rx ntuple offload is not supported in switchdev mode. Tryng to enable it cause kernel panic. BUG: kernel NULL pointer dereference, address: 0000000000000008 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 80000001065a5067 P4D 80000001065a5067 PUD 106594067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 7 PID: 1089 Comm: ethtool Not tainted 5.13.0-rc7_for_upstream_min_debug_2021_06_23_16_44 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:mlx5e_arfs_enable+0x70/0xd0 [mlx5_core] Code: 44 24 10 00 00 00 00 48 c7 44 24 18 00 00 00 00 49 63 c4 48 89 e2 44 89 e6 48 69 c0 20 08 00 00 48 89 ef 48 03 85 68 ac 00 00 <48> 8b 40 08 48 89 44 24 08 e8 d2 aa fd ff 48 83 05 82 96 18 00 01 RSP: 0018:ffff8881047679e0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000004000000000 RCX: 0000004000000000 RDX: ffff8881047679e0 RSI: 0000000000000000 RDI: ffff888115100880 RBP: ffff888115100880 R08: ffffffffa00f6cb0 R09: ffff888104767a18 R10: ffff8881151000a0 R11: ffff888109479540 R12: 0000000000000000 R13: ffff888104767bb8 R14: ffff888115100000 R15: ffff8881151000a0 FS: 00007f41a64ab740(0000) GS:ffff8882f5dc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 0000000104cbc005 CR4: 0000000000370ea0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: set_feature_arfs+0x1e/0x40 [mlx5_core] mlx5e_handle_feature+0x43/0xa0 [mlx5_core] mlx5e_set_features+0x139/0x1b0 [mlx5_core] __netdev_update_features+0x2b3/0xaf0 ethnl_set_features+0x176/0x3a0 ? __nla_parse+0x22/0x30 genl_family_rcv_msg_doit+0xe2/0x140 genl_rcv_msg+0xde/0x1d0 ? features_reply_size+0xe0/0xe0 ? genl_get_cmd+0xd0/0xd0 netlink_rcv_skb+0x4e/0xf0 genl_rcv+0x24/0x40 netlink_unicast+0x1f6/0x2b0 netlink_sendmsg+0x225/0x450 sock_sendmsg+0x33/0x40 __sys_sendto+0xd4/0x120 ? __sys_recvmsg+0x4e/0x90 ? exc_page_fault+0x219/0x740 __x64_sys_sendto+0x25/0x30 do_syscall_64+0x3f/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f41a65b0cba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007ffd8d688358 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00000000010f42a0 RCX: 00007f41a65b0cba RDX: 0000000000000058 RSI: 00000000010f43b0 RDI: 0000000000000003 RBP: 000000000047ae60 R08: 00007f41a667c000 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 00000000010f4340 R13: 00000000010f4350 R14: 00007ffd8d688400 R15: 00000000010f42a0 Modules linked in: mlx5_vdpa vhost_iotlb vdpa xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 br_netfilter rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad ib_ipoib rdma_cm iw_cm ib_cm mlx5_ib ib_uverbs ib_core overlay mlx5_core ptp pps_core fuse CR2: 0000000000000008 ---[ end trace c66523f2aba94b43 ]--- Fixes: 7a9fb35e8c3a ("net/mlx5e: Do not reload ethernet ports when changing eswitch mode") Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 29 +++++++++++++------ 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d09e65557e75..c6f99fc77411 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3829,6 +3829,24 @@ int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) return 0; } +static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev, + netdev_features_t features) +{ + features &= ~NETIF_F_HW_TLS_RX; + if (netdev->features & NETIF_F_HW_TLS_RX) + netdev_warn(netdev, "Disabling hw_tls_rx, not supported in switchdev mode\n"); + + features &= ~NETIF_F_HW_TLS_TX; + if (netdev->features & NETIF_F_HW_TLS_TX) + netdev_warn(netdev, "Disabling hw_tls_tx, not supported in switchdev mode\n"); + + features &= ~NETIF_F_NTUPLE; + if (netdev->features & NETIF_F_NTUPLE) + netdev_warn(netdev, "Disabling ntuple, not supported in switchdev mode\n"); + + return features; +} + static netdev_features_t mlx5e_fix_features(struct net_device *netdev, netdev_features_t features) { @@ -3860,15 +3878,8 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n"); } - if (mlx5e_is_uplink_rep(priv)) { - features &= ~NETIF_F_HW_TLS_RX; - if (netdev->features & NETIF_F_HW_TLS_RX) - netdev_warn(netdev, "Disabling hw_tls_rx, not supported in switchdev mode\n"); - - features &= ~NETIF_F_HW_TLS_TX; - if (netdev->features & NETIF_F_HW_TLS_TX) - netdev_warn(netdev, "Disabling hw_tls_tx, not supported in switchdev mode\n"); - } + if (mlx5e_is_uplink_rep(priv)) + features = mlx5e_fix_uplink_rep_features(netdev, features); mutex_unlock(&priv->state_lock); From c671972534c6f7fce789ac8156a2bc3bd146f806 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Tue, 22 Jun 2021 17:07:02 +0300 Subject: [PATCH 543/794] net/mlx5: E-Switch, Set destination vport vhca id only when merged eswitch is supported Destination vport vhca id is valid flag is set only merged eswitch isn't supported. Change destination vport vhca id value to be set also only when merged eswitch is supported. Fixes: e4ad91f23f10 ("net/mlx5e: Split offloaded eswitch TC rules for port mirroring") Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 7579f3402776..b0a2ca9037ac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -382,10 +382,11 @@ esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *f { dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest[dest_idx].vport.num = esw_attr->dests[attr_idx].rep->vport; - dest[dest_idx].vport.vhca_id = - MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id); - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) { + dest[dest_idx].vport.vhca_id = + MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id); dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; + } if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP) { if (pkt_reformat) { flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; From dd3fddb82780bfa24124834edd90bbc63bd689cc Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 2 Jun 2021 14:17:07 +0300 Subject: [PATCH 544/794] net/mlx5: E-Switch, handle devcom events only for ports on the same device This is the same check as LAG mode checks if to enable lag. This will fix adding peer miss rules if lag is not supported and even an incorrect rules in socket direct mode. Also fix the incorrect comment on mlx5_get_next_phys_dev() as flow #1 doesn't exists. Fixes: ac004b832128 ("net/mlx5e: E-Switch, Add peer miss rules") Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 5 +---- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 3 +++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index ceebfc20f65e..def2156e50ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -500,10 +500,7 @@ static int next_phys_dev(struct device *dev, const void *data) return 1; } -/* This function is called with two flows: - * 1. During initialization of mlx5_core_dev and we don't need to lock it. - * 2. During LAG configure stage and caller holds &mlx5_intf_mutex. - */ +/* Must be called with intf_mutex held */ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) { struct auxiliary_device *adev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index b0a2ca9037ac..011e766e4f67 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2368,6 +2368,9 @@ static int mlx5_esw_offloads_devcom_event(int event, switch (event) { case ESW_OFFLOADS_DEVCOM_PAIR: + if (mlx5_get_next_phys_dev(esw->dev) != peer_esw->dev) + break; + if (mlx5_eswitch_vport_match_metadata_enabled(esw) != mlx5_eswitch_vport_match_metadata_enabled(peer_esw)) break; From e2351e517068718724f1d3b4010e2a41ec91fa76 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 30 Jun 2021 13:45:05 +0300 Subject: [PATCH 545/794] net/mlx5e: RX, Avoid possible data corruption when relaxed ordering and LRO combined When HW aggregates packets for an LRO session, it writes the payload of two consecutive packets of a flow contiguously, so that they usually share a cacheline. The first byte of a packet's payload is written immediately after the last byte of the preceding packet. In this flow, there are two consecutive write requests to the shared cacheline: 1. Regular write for the earlier packet. 2. Read-modify-write for the following packet. In case of relaxed-ordering on, these two writes might be re-ordered. Using the end padding optimization (to avoid partial write for the last cacheline of a packet) becomes problematic if the two writes occur out-of-order, as the padding would overwrite payload that belongs to the following packet, causing data corruption. Avoid this by disabling the end padding optimization when both LRO and relaxed-ordering are enabled. Fixes: 17347d5430c4 ("net/mlx5e: Add support for PCI relaxed ordering") Signed-off-by: Tariq Toukan Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/params.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 150c8e82c738..2cbf18c967f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -471,6 +471,15 @@ static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev, param->cq_period_mode = params->rx_cq_moderation.cq_period_mode; } +static u8 rq_end_pad_mode(struct mlx5_core_dev *mdev, struct mlx5e_params *params) +{ + bool ro = pcie_relaxed_ordering_enabled(mdev->pdev) && + MLX5_CAP_GEN(mdev, relaxed_ordering_write); + + return ro && params->lro_en ? + MLX5_WQ_END_PAD_MODE_NONE : MLX5_WQ_END_PAD_MODE_ALIGN; +} + int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, @@ -508,7 +517,7 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, } MLX5_SET(wq, wq, wq_type, params->rq_wq_type); - MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); + MLX5_SET(wq, wq, end_padding_mode, rq_end_pad_mode(mdev, params)); MLX5_SET(wq, wq, log_wq_stride, mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs)); MLX5_SET(wq, wq, pd, mdev->mlx5e_res.hw_objs.pdn); From 9841d58f3550d11c6181424427e8ad8c9c80f1b6 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 30 Jun 2021 13:33:31 +0300 Subject: [PATCH 546/794] net/mlx5e: Add NETIF_F_HW_TC to hw_features when HTB offload is available If a feature flag is only present in features, but not in hw_features, the user can't reset it. Although hw_features may contain NETIF_F_HW_TC by the point where the driver checks whether HTB offload is supported, this flag is controlled by another condition that may not hold. Set it explicitly to make sure the user can disable it. Fixes: 214baf22870c ("net/mlx5e: Support HTB offload") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c6f99fc77411..c5a2e3e6fe4b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4870,6 +4870,9 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) if (MLX5_CAP_ETH(mdev, scatter_fcs)) netdev->hw_features |= NETIF_F_RXFCS; + if (mlx5_qos_is_supported(mdev)) + netdev->hw_features |= NETIF_F_HW_TC; + netdev->features = netdev->hw_features; /* Defaults */ @@ -4890,8 +4893,6 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_NTUPLE; #endif } - if (mlx5_qos_is_supported(mdev)) - netdev->features |= NETIF_F_HW_TC; netdev->features |= NETIF_F_HIGHDMA; netdev->features |= NETIF_F_HW_VLAN_STAG_FILTER; From a759f845d1f78634b54744db0fa48524ef6d0e14 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Wed, 30 Jun 2021 11:17:12 +0300 Subject: [PATCH 547/794] net/mlx5e: Consider PTP-RQ when setting RX VLAN stripping Add PTP-RQ to the loop when setting rx-vlan-offload feature via ethtool. On PTP-RQ's creation, set rx-vlan-offload into its parameters. Fixes: a099da8ffcf6 ("net/mlx5e: Add RQ to PTP channel") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 5 ++++- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 778e229310a9..07b429b94d93 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -482,8 +482,11 @@ static void mlx5e_ptp_build_params(struct mlx5e_ptp *c, params->log_sq_size = orig->log_sq_size; mlx5e_ptp_build_sq_param(c->mdev, params, &cparams->txq_sq_param); } - if (test_bit(MLX5E_PTP_STATE_RX, c->state)) + /* RQ */ + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) { + params->vlan_strip_disable = orig->vlan_strip_disable; mlx5e_ptp_build_rq_param(c->mdev, c->netdev, c->priv->q_counter, cparams); + } } static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c5a2e3e6fe4b..37c440837945 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3384,7 +3384,7 @@ static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool en static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd) { - int err = 0; + int err; int i; for (i = 0; i < chs->num; i++) { @@ -3392,6 +3392,8 @@ static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd) if (err) return err; } + if (chs->ptp && test_bit(MLX5E_PTP_STATE_RX, chs->ptp->state)) + return mlx5e_modify_rq_vsd(&chs->ptp->rq, vsd); return 0; } From 497008e783452a2ec45c7ec5835cfe6950dcb097 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Mon, 21 Jun 2021 18:04:07 +0300 Subject: [PATCH 548/794] net/mlx5e: Fix page allocation failure for trap-RQ over SF Set the correct device pointer to the trap-RQ, to allow access to dma_mask and avoid allocation request with the wrong pci-dev. WARNING: CPU: 1 PID: 12005 at kernel/dma/mapping.c:151 dma_map_page_attrs+0x139/0x1c0 ... all Trace: ? __page_pool_alloc_pages_slow+0x5a/0x210 mlx5e_post_rx_wqes+0x258/0x400 [mlx5_core] mlx5e_trap_napi_poll+0x44/0xc0 [mlx5_core] __napi_poll+0x24/0x150 net_rx_action+0x22b/0x280 __do_softirq+0xc7/0x27e do_softirq+0x61/0x80 __local_bh_enable_ip+0x4b/0x50 mlx5e_handle_action_trap+0x2dd/0x4d0 [mlx5_core] blocking_notifier_call_chain+0x5a/0x80 mlx5_devlink_trap_action_set+0x8b/0x100 [mlx5_core] Fixes: 5543e989fe5e ("net/mlx5e: Add trap entity to ETH driver") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/trap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c index 86ab4e864fe6..7f94508594fb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c @@ -37,7 +37,7 @@ static void mlx5e_init_trap_rq(struct mlx5e_trap *t, struct mlx5e_params *params struct mlx5e_priv *priv = t->priv; rq->wq_type = params->rq_wq_type; - rq->pdev = mdev->device; + rq->pdev = t->pdev; rq->netdev = priv->netdev; rq->priv = priv; rq->clock = &mdev->clock; From 678b1ae1af4aef488fcc42baa663e737b9a531ba Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 22 Jun 2021 10:24:17 +0300 Subject: [PATCH 549/794] net/mlx5e: Fix page allocation failure for ptp-RQ over SF Set the correct pci-device pointer to the ptp-RQ. This allows access to dma_mask and avoids allocation request with wrong pci-device. Fixes: a099da8ffcf6 ("net/mlx5e: Add RQ to PTP channel") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 07b429b94d93..efef4adce086 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -497,7 +497,7 @@ static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params, int err; rq->wq_type = params->rq_wq_type; - rq->pdev = mdev->device; + rq->pdev = c->pdev; rq->netdev = priv->netdev; rq->priv = priv; rq->clock = &mdev->clock; From 7f331bf0f060c2727e36d64f9b098b4ee5f3dfad Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Wed, 16 Jun 2021 19:11:03 +0300 Subject: [PATCH 550/794] net/mlx5: Unload device upon firmware fatal error When fw_fatal reporter reports an error, the firmware in not responding. Unload the device to ensure that the driver closes all its resources, even if recovery is not due (user disabled auto-recovery or reporter is in grace period). On successful recovery the device is loaded back up. Fixes: b3bd076f7501 ("net/mlx5: Report devlink health on FW fatal issues") Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 9ff163c5bcde..9abeb80ffa31 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -626,8 +626,16 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work) } fw_reporter_ctx.err_synd = health->synd; fw_reporter_ctx.miss_counter = health->miss_counter; - devlink_health_report(health->fw_fatal_reporter, - "FW fatal error reported", &fw_reporter_ctx); + if (devlink_health_report(health->fw_fatal_reporter, + "FW fatal error reported", &fw_reporter_ctx) == -ECANCELED) { + /* If recovery wasn't performed, due to grace period, + * unload the driver. This ensures that the driver + * closes all its resources and it is not subjected to + * requests from the kernel. + */ + mlx5_core_err(dev, "Driver is in error state. Unloading\n"); + mlx5_unload_one(dev); + } } static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = { From b1c2f6312c5005c928a72e668bf305a589d828d4 Mon Sep 17 00:00:00 2001 From: Dima Chumak Date: Mon, 26 Apr 2021 15:16:26 +0300 Subject: [PATCH 551/794] net/mlx5e: Fix nullptr in mlx5e_hairpin_get_mdev() The result of __dev_get_by_index() is not checked for NULL and then gets dereferenced immediately. Also, __dev_get_by_index() must be called while holding either RTNL lock or @dev_base_lock, which isn't satisfied by mlx5e_hairpin_get_mdev() or its callers. This makes the underlying hlist_for_each_entry() loop not safe, and can have adverse effects in itself. Fix by using dev_get_by_index() and handling nullptr return value when ifindex device is not found. Update mlx5e_hairpin_get_mdev() callers to check for possible PTR_ERR() result. Fixes: 77ab67b7f0f9 ("net/mlx5e: Basic setup of hairpin object") Addresses-Coverity: ("Dereference null return value") Signed-off-by: Dima Chumak Reviewed-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 33 +++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 629a61e8022f..d273758255c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -452,12 +452,32 @@ static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv, static struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex) { + struct mlx5_core_dev *mdev; struct net_device *netdev; struct mlx5e_priv *priv; - netdev = __dev_get_by_index(net, ifindex); + netdev = dev_get_by_index(net, ifindex); + if (!netdev) + return ERR_PTR(-ENODEV); + priv = netdev_priv(netdev); - return priv->mdev; + mdev = priv->mdev; + dev_put(netdev); + + /* Mirred tc action holds a refcount on the ifindex net_device (see + * net/sched/act_mirred.c:tcf_mirred_get_dev). So, it's okay to continue using mdev + * after dev_put(netdev), while we're in the context of adding a tc flow. + * + * The mdev pointer corresponds to the peer/out net_device of a hairpin. It is then + * stored in a hairpin object, which exists until all flows, that refer to it, get + * removed. + * + * On the other hand, after a hairpin object has been created, the peer net_device may + * be removed/unbound while there are still some hairpin flows that are using it. This + * case is handled by mlx5e_tc_hairpin_update_dead_peer, which is hooked to + * NETDEV_UNREGISTER event of the peer net_device. + */ + return mdev; } static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp) @@ -666,6 +686,10 @@ mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params func_mdev = priv->mdev; peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex); + if (IS_ERR(peer_mdev)) { + err = PTR_ERR(peer_mdev); + goto create_pair_err; + } pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params); if (IS_ERR(pair)) { @@ -804,6 +828,11 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, int err; peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex); + if (IS_ERR(peer_mdev)) { + NL_SET_ERR_MSG_MOD(extack, "invalid ifindex of mirred device"); + return PTR_ERR(peer_mdev); + } + if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) { NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported"); return -EOPNOTSUPP; From 740452e09cf5fc489ce60831cf11abef117b5d26 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Mon, 26 Apr 2021 11:06:37 +0800 Subject: [PATCH 552/794] net/mlx5: Fix mlx5_vport_tbl_attr chain from u16 to u32 The offending refactor commit uses u16 chain wrongly. Actually, it should be u32. Fixes: c620b772152b ("net/mlx5: Refactor tc flow attributes structure") CC: Ariel Levkovich Signed-off-by: Chris Mi Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 48cac5bf606d..d562edf5b0bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -636,7 +636,7 @@ struct esw_vport_tbl_namespace { }; struct mlx5_vport_tbl_attr { - u16 chain; + u32 chain; u16 prio; u16 vport; const struct esw_vport_tbl_namespace *vport_ns; From 5ab189cf3abbc9994bae3be524c5b88589ed56e2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 27 Jul 2021 14:38:09 -1000 Subject: [PATCH 553/794] blk-iocost: fix operation ordering in iocg_wake_fn() iocg_wake_fn() open-codes wait_queue_entry removal and wakeup because it wants the wq_entry to be always removed whether it ended up waking the task or not. finish_wait() tests whether wq_entry needs removal without grabbing the wait_queue lock and expects the waker to use list_del_init_careful() after all waking operations are complete, which iocg_wake_fn() didn't do. The operation order was wrong and the regular list_del_init() was used. The result is that if a waiter wakes up racing the waker, it can free pop the wq_entry off stack before the waker is still looking at it, which can lead to a backtrace like the following. [7312084.588951] general protection fault, probably for non-canonical address 0x586bf4005b2b88: 0000 [#1] SMP ... [7312084.647079] RIP: 0010:queued_spin_lock_slowpath+0x171/0x1b0 ... [7312084.858314] Call Trace: [7312084.863548] _raw_spin_lock_irqsave+0x22/0x30 [7312084.872605] try_to_wake_up+0x4c/0x4f0 [7312084.880444] iocg_wake_fn+0x71/0x80 [7312084.887763] __wake_up_common+0x71/0x140 [7312084.895951] iocg_kick_waitq+0xe8/0x2b0 [7312084.903964] ioc_rqos_throttle+0x275/0x650 [7312084.922423] __rq_qos_throttle+0x20/0x30 [7312084.930608] blk_mq_make_request+0x120/0x650 [7312084.939490] generic_make_request+0xca/0x310 [7312084.957600] submit_bio+0x173/0x200 [7312084.981806] swap_readpage+0x15c/0x240 [7312084.989646] read_swap_cache_async+0x58/0x60 [7312084.998527] swap_cluster_readahead+0x201/0x320 [7312085.023432] swapin_readahead+0x2df/0x450 [7312085.040672] do_swap_page+0x52f/0x820 [7312085.058259] handle_mm_fault+0xa16/0x1420 [7312085.066620] do_page_fault+0x2c6/0x5c0 [7312085.074459] page_fault+0x2f/0x40 Fix it by switching to list_del_init_careful() and putting it at the end. Signed-off-by: Tejun Heo Reported-by: Rik van Riel Fixes: 7caa47151ab2 ("blkcg: implement blk-iocost") Cc: stable@vger.kernel.org # v5.4+ Signed-off-by: Jens Axboe --- block/blk-iocost.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index c2d6bc88d3f1..5fac3757e6e0 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -1440,16 +1440,17 @@ static int iocg_wake_fn(struct wait_queue_entry *wq_entry, unsigned mode, return -1; iocg_commit_bio(ctx->iocg, wait->bio, wait->abs_cost, cost); + wait->committed = true; /* * autoremove_wake_function() removes the wait entry only when it - * actually changed the task state. We want the wait always - * removed. Remove explicitly and use default_wake_function(). + * actually changed the task state. We want the wait always removed. + * Remove explicitly and use default_wake_function(). Note that the + * order of operations is important as finish_wait() tests whether + * @wq_entry is removed without grabbing the lock. */ - list_del_init(&wq_entry->entry); - wait->committed = true; - default_wake_function(wq_entry, mode, flags, key); + list_del_init_careful(&wq_entry->entry); return 0; } From 340e84573878b2b9d63210482af46883366361b9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 22 Jul 2021 09:53:54 +0200 Subject: [PATCH 554/794] block: delay freeing the gendisk blkdev_get_no_open acquires a reference to the block_device through the block device inode and then tries to acquire a device model reference to the gendisk. But at this point the disk migh already be freed (although the race is free). Fix this by only freeing the gendisk from the whole device bdevs ->free_inode callback as well. Fixes: 22ae8ce8b892 ("block: simplify bdev/disk lookup in blkdev_get") Signed-off-by: Christoph Hellwig Reviewed-by: Josef Bacik Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20210722075402.983367-2-hch@lst.de Signed-off-by: Jens Axboe --- block/genhd.c | 3 +-- fs/block_dev.c | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index af4d2ab4a633..298ee78c1bda 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1079,10 +1079,9 @@ static void disk_release(struct device *dev) disk_release_events(disk); kfree(disk->random); xa_destroy(&disk->part_tbl); - bdput(disk->part0); if (test_bit(GD_QUEUE_REF, &disk->state) && disk->queue) blk_put_queue(disk->queue); - kfree(disk); + bdput(disk->part0); /* frees the disk */ } struct class block_class = { .name = "block", diff --git a/fs/block_dev.c b/fs/block_dev.c index ca8bf1869ca8..a38b0f33211c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -812,6 +812,8 @@ static void bdev_free_inode(struct inode *inode) free_percpu(bdev->bd_stats); kfree(bdev->bd_meta_info); + if (!bdev_is_partition(bdev)) + kfree(bdev->bd_disk); kmem_cache_free(bdev_cachep, BDEV_I(inode)); } From fa20bada3f934e3b3e4af4c77e5b518cd5a282e5 Mon Sep 17 00:00:00 2001 From: Maxim Devaev Date: Tue, 27 Jul 2021 21:58:00 +0300 Subject: [PATCH 555/794] usb: gadget: f_hid: idle uses the highest byte for duration SET_IDLE value must be shifted 8 bits to the right to get duration. This confirmed by USBCV test. Fixes: afcff6dc690e ("usb: gadget: f_hid: added GET_IDLE and SET_IDLE handlers") Cc: stable Signed-off-by: Maxim Devaev Link: https://lore.kernel.org/r/20210727185800.43796-1-mdevaev@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_hid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index 8d50c8b127fd..bb476e121eae 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -573,7 +573,7 @@ static int hidg_setup(struct usb_function *f, | HID_REQ_SET_IDLE): VDBG(cdev, "set_idle\n"); length = 0; - hidg->idle = value; + hidg->idle = value >> 8; goto respond; break; From d54db74ad6e0dea8c253fb68c689b836657ab914 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Mon, 7 Jun 2021 14:46:38 +0800 Subject: [PATCH 556/794] dmaengine: stm32-dma: Fix PM usage counter imbalance in stm32 dma ops pm_runtime_get_sync will increment pm usage counter even it failed. Forgetting to putting operation will result in reference leak here. We fix it by replacing it with pm_runtime_resume_and_get to keep usage counter balanced. Fixes: 48bc73ba14bcd ("dmaengine: stm32-dma: Add PM Runtime support") Fixes: 05f8740a0e6fc ("dmaengine: stm32-dma: add suspend/resume power management support") Signed-off-by: Zhang Qilong Link: https://lore.kernel.org/r/20210607064640.121394-2-zhangqilong3@huawei.com Signed-off-by: Vinod Koul --- drivers/dma/stm32-dma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c index f54ecb123a52..7dd1d3d0bf06 100644 --- a/drivers/dma/stm32-dma.c +++ b/drivers/dma/stm32-dma.c @@ -1200,7 +1200,7 @@ static int stm32_dma_alloc_chan_resources(struct dma_chan *c) chan->config_init = false; - ret = pm_runtime_get_sync(dmadev->ddev.dev); + ret = pm_runtime_resume_and_get(dmadev->ddev.dev); if (ret < 0) return ret; @@ -1470,7 +1470,7 @@ static int stm32_dma_suspend(struct device *dev) struct stm32_dma_device *dmadev = dev_get_drvdata(dev); int id, ret, scr; - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret < 0) return ret; From baa16371c9525f24d508508e4d296c031e1de29c Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Mon, 7 Jun 2021 14:46:39 +0800 Subject: [PATCH 557/794] dmaengine: stm32-dmamux: Fix PM usage counter unbalance in stm32 dmamux ops pm_runtime_get_sync will increment pm usage counter even it failed. Forgetting to putting operation will result in reference leak here. We fix it by replacing it with pm_runtime_resume_and_get to keep usage counter balanced. Fixes: 4f3ceca254e0f ("dmaengine: stm32-dmamux: Add PM Runtime support") Signed-off-by: Zhang Qilong Link: https://lore.kernel.org/r/20210607064640.121394-3-zhangqilong3@huawei.com Signed-off-by: Vinod Koul --- drivers/dma/stm32-dmamux.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma/stm32-dmamux.c b/drivers/dma/stm32-dmamux.c index ef0d0555103d..a42164389ebc 100644 --- a/drivers/dma/stm32-dmamux.c +++ b/drivers/dma/stm32-dmamux.c @@ -137,7 +137,7 @@ static void *stm32_dmamux_route_allocate(struct of_phandle_args *dma_spec, /* Set dma request */ spin_lock_irqsave(&dmamux->lock, flags); - ret = pm_runtime_get_sync(&pdev->dev); + ret = pm_runtime_resume_and_get(&pdev->dev); if (ret < 0) { spin_unlock_irqrestore(&dmamux->lock, flags); goto error; @@ -336,7 +336,7 @@ static int stm32_dmamux_suspend(struct device *dev) struct stm32_dmamux_data *stm32_dmamux = platform_get_drvdata(pdev); int i, ret; - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret < 0) return ret; @@ -361,7 +361,7 @@ static int stm32_dmamux_resume(struct device *dev) if (ret < 0) return ret; - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret < 0) return ret; From eda97cb095f2958bbad55684a6ca3e7d7af0176a Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Sat, 17 Jul 2021 22:00:21 +0300 Subject: [PATCH 558/794] dmaengine: of-dma: router_xlate to return -EPROBE_DEFER if controller is not yet available If the router_xlate can not find the controller in the available DMA devices then it should return with -EPORBE_DEFER in a same way as the of_dma_request_slave_channel() does. The issue can be reproduced if the event router is registered before the DMA controller itself and a driver would request for a channel before the controller is registered. In of_dma_request_slave_channel(): 1. of_dma_find_controller() would find the dma_router 2. ofdma->of_dma_xlate() would fail and returned NULL 3. -ENODEV is returned as error code with this patch we would return in this case the correct -EPROBE_DEFER and the client can try to request the channel later. Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20210717190021.21897-1-peter.ujfalusi@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/of-dma.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c index ec00b20ae8e4..ac61ecda2926 100644 --- a/drivers/dma/of-dma.c +++ b/drivers/dma/of-dma.c @@ -67,8 +67,12 @@ static struct dma_chan *of_dma_router_xlate(struct of_phandle_args *dma_spec, return NULL; ofdma_target = of_dma_find_controller(&dma_spec_target); - if (!ofdma_target) - return NULL; + if (!ofdma_target) { + ofdma->dma_router->route_free(ofdma->dma_router->dev, + route_data); + chan = ERR_PTR(-EPROBE_DEFER); + goto err; + } chan = ofdma_target->of_dma_xlate(&dma_spec_target, ofdma_target); if (IS_ERR_OR_NULL(chan)) { @@ -89,6 +93,7 @@ static struct dma_chan *of_dma_router_xlate(struct of_phandle_args *dma_spec, } } +err: /* * Need to put the node back since the ofdma->of_dma_route_allocate * has taken it for generating the new, translated dma_spec From 46573e3ab08fb041d5ba7bf7bf3215a1e724c78c Mon Sep 17 00:00:00 2001 From: Tang Bin Date: Wed, 28 Jul 2021 09:49:25 +0800 Subject: [PATCH 559/794] nfc: s3fwrn5: fix undefined parameter values in dev_err() In the function s3fwrn5_fw_download(), the 'ret' is not assigned, so the correct value should be given in dev_err function. Fixes: a0302ff5906a ("nfc: s3fwrn5: remove unnecessary label") Signed-off-by: Zhang Shengju Signed-off-by: Tang Bin Reviewed-by: Nathan Chancellor Signed-off-by: David S. Miller --- drivers/nfc/s3fwrn5/firmware.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/s3fwrn5/firmware.c b/drivers/nfc/s3fwrn5/firmware.c index 1340fab9565e..e3e72b8a29e3 100644 --- a/drivers/nfc/s3fwrn5/firmware.c +++ b/drivers/nfc/s3fwrn5/firmware.c @@ -423,7 +423,7 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info) if (IS_ERR(tfm)) { ret = PTR_ERR(tfm); dev_err(&fw_info->ndev->nfc_dev->dev, - "Cannot allocate shash (code=%ld)\n", PTR_ERR(tfm)); + "Cannot allocate shash (code=%pe)\n", tfm); goto out; } From 557fb5862c9272ad9b21407afe1da8acfd9b53eb Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 27 Jul 2021 23:40:54 -0300 Subject: [PATCH 560/794] sctp: fix return value check in __sctp_rcv_asconf_lookup As Ben Hutchings noticed, this check should have been inverted: the call returns true in case of success. Reported-by: Ben Hutchings Fixes: 0c5dc070ff3d ("sctp: validate from_addr_param return") Signed-off-by: Marcelo Ricardo Leitner Reviewed-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/input.c b/net/sctp/input.c index eb3c2a34a31c..5ef86fdb1176 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -1203,7 +1203,7 @@ static struct sctp_association *__sctp_rcv_asconf_lookup( if (unlikely(!af)) return NULL; - if (af->from_addr_param(&paddr, param, peer_port, 0)) + if (!af->from_addr_param(&paddr, param, peer_port, 0)) return NULL; return __sctp_lookup_association(net, laddr, &paddr, transportp); From 76a16be07b209a3f507c72abe823bd3af1c8661a Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Wed, 28 Jul 2021 15:43:13 +0800 Subject: [PATCH 561/794] tulip: windbond-840: Fix missing pci_disable_device() in probe and remove Replace pci_enable_device() with pcim_enable_device(), pci_disable_device() and pci_release_regions() will be called in release automatically. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Hulk Robot Signed-off-by: Wang Hai Signed-off-by: David S. Miller --- drivers/net/ethernet/dec/tulip/winbond-840.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c index f6ff1f76eacb..1876f15dd827 100644 --- a/drivers/net/ethernet/dec/tulip/winbond-840.c +++ b/drivers/net/ethernet/dec/tulip/winbond-840.c @@ -357,7 +357,7 @@ static int w840_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) int i, option = find_cnt < MAX_UNITS ? options[find_cnt] : 0; void __iomem *ioaddr; - i = pci_enable_device(pdev); + i = pcim_enable_device(pdev); if (i) return i; pci_set_master(pdev); @@ -379,7 +379,7 @@ static int w840_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) ioaddr = pci_iomap(pdev, TULIP_BAR, netdev_res_size); if (!ioaddr) - goto err_out_free_res; + goto err_out_netdev; for (i = 0; i < 3; i++) ((__le16 *)dev->dev_addr)[i] = cpu_to_le16(eeprom_read(ioaddr, i)); @@ -458,8 +458,6 @@ static int w840_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) err_out_cleardev: pci_iounmap(pdev, ioaddr); -err_out_free_res: - pci_release_regions(pdev); err_out_netdev: free_netdev (dev); return -ENODEV; @@ -1526,7 +1524,6 @@ static void w840_remove1(struct pci_dev *pdev) if (dev) { struct netdev_private *np = netdev_priv(dev); unregister_netdev(dev); - pci_release_regions(pdev); pci_iounmap(pdev, np->base_addr); free_netdev(dev); } From 5e7b30d24a5b8cb691c173b45b50e3ca0191be19 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 28 Jul 2021 08:49:09 +0200 Subject: [PATCH 562/794] nfc: nfcsim: fix use after free during module unload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a use after free memory corruption during module exit: - nfcsim_exit() - nfcsim_device_free(dev0) - nfc_digital_unregister_device() This iterates over command queue and frees all commands, - dev->up = false - nfcsim_link_shutdown() - nfcsim_link_recv_wake() This wakes the sleeping thread nfcsim_link_recv_skb(). - nfcsim_link_recv_skb() Wake from wait_event_interruptible_timeout(), call directly the deb->cb callback even though (dev->up == false), - digital_send_cmd_complete() Dereference of "struct digital_cmd" cmd which was freed earlier by nfc_digital_unregister_device(). This causes memory corruption shortly after (with unrelated stack trace): nfc nfc0: NFC: nfcsim_recv_wq: Device is down llcp: nfc_llcp_recv: err -19 nfc nfc1: NFC: nfcsim_recv_wq: Device is down BUG: unable to handle page fault for address: ffffffffffffffed Call Trace: fsnotify+0x54b/0x5c0 __fsnotify_parent+0x1fe/0x300 ? vfs_write+0x27c/0x390 vfs_write+0x27c/0x390 ksys_write+0x63/0xe0 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae KASAN report: BUG: KASAN: use-after-free in digital_send_cmd_complete+0x16/0x50 Write of size 8 at addr ffff88800a05f720 by task kworker/0:2/71 Workqueue: events nfcsim_recv_wq [nfcsim] Call Trace:  dump_stack_lvl+0x45/0x59  print_address_description.constprop.0+0x21/0x140  ? digital_send_cmd_complete+0x16/0x50  ? digital_send_cmd_complete+0x16/0x50  kasan_report.cold+0x7f/0x11b  ? digital_send_cmd_complete+0x16/0x50  ? digital_dep_link_down+0x60/0x60  digital_send_cmd_complete+0x16/0x50  nfcsim_recv_wq+0x38f/0x3d5 [nfcsim]  ? nfcsim_in_send_cmd+0x4a/0x4a [nfcsim]  ? lock_is_held_type+0x98/0x110  ? finish_wait+0x110/0x110  ? rcu_read_lock_sched_held+0x9c/0xd0  ? rcu_read_lock_bh_held+0xb0/0xb0  ? lockdep_hardirqs_on_prepare+0x12e/0x1f0 This flow of calling digital_send_cmd_complete() callback on driver exit is specific to nfcsim which implements reading and sending work queues. Since the NFC digital device was unregistered, the callback should not be called. Fixes: 204bddcb508f ("NFC: nfcsim: Make use of the Digital layer") Cc: Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- drivers/nfc/nfcsim.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nfc/nfcsim.c b/drivers/nfc/nfcsim.c index a9864fcdfba6..dd27c85190d3 100644 --- a/drivers/nfc/nfcsim.c +++ b/drivers/nfc/nfcsim.c @@ -192,8 +192,7 @@ static void nfcsim_recv_wq(struct work_struct *work) if (!IS_ERR(skb)) dev_kfree_skb(skb); - - skb = ERR_PTR(-ENODEV); + return; } dev->cb(dev->nfc_digital_dev, dev->arg, skb); From e9c6729acb38bcf027e40a5b50b2e1b0aa4bc170 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 23 Jul 2021 17:08:40 +0200 Subject: [PATCH 563/794] HID: fix typo in Kconfig There is a missing space in "relyingon". Add it. Signed-off-by: Christophe JAILLET Signed-off-by: Jiri Kosina --- drivers/hid/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 160554903ef9..76937f716fbe 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -576,7 +576,7 @@ config HID_LOGITECH_HIDPP depends on HID_LOGITECH select POWER_SUPPLY help - Support for Logitech devices relyingon the HID++ Logitech specification + Support for Logitech devices relying on the HID++ Logitech specification Say Y if you want support for Logitech devices relying on the HID++ specification. Such devices are the various Logitech Touchpads (T650, From ebe0b42a4252333aa4af60fd4d11b69405aa6068 Mon Sep 17 00:00:00 2001 From: Haochen Tong Date: Sun, 18 Jul 2021 01:04:31 +0800 Subject: [PATCH 564/794] HID: apple: Add support for Keychron K1 wireless keyboard The Keychron K1 wireless keyboard has a set of Apple-like function keys and an Fn key that works like on an Apple bluetooth keyboard. It identifies as an Apple Alu RevB ANSI keyboard (05ac:024f) over USB and BT. Use hid-apple for it so the Fn key and function keys work correctly. Signed-off-by: Haochen Tong Signed-off-by: Jiri Kosina --- drivers/hid/hid-apple.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index 6b8f0d004d34..dc6bd4299c54 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -501,6 +501,8 @@ static const struct hid_device_id apple_devices[] = { APPLE_RDESC_JIS }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI), .driver_data = APPLE_HAS_FN }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI), + .driver_data = APPLE_HAS_FN }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ISO), .driver_data = APPLE_HAS_FN }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ISO), From 0818ec1f508fc3b8e957f6c7f77b988c5bc24da7 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 19 Jul 2021 11:27:31 +0100 Subject: [PATCH 565/794] HID: Kconfig: Fix spelling mistake "Uninterruptable" -> "Uninterruptible" There is a spelling mistake in the Kconfig text. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Jiri Kosina --- drivers/hid/usbhid/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/usbhid/Kconfig b/drivers/hid/usbhid/Kconfig index dcf3a235870f..7c2032f7f44d 100644 --- a/drivers/hid/usbhid/Kconfig +++ b/drivers/hid/usbhid/Kconfig @@ -38,7 +38,7 @@ config USB_HIDDEV help Say Y here if you want to support HID devices (from the USB specification standpoint) that aren't strictly user interface - devices, like monitor controls and Uninterruptable Power Supplies. + devices, like monitor controls and Uninterruptible Power Supplies. This module supports these devices separately using a separate event interface on /dev/usb/hiddevX (char 180:96 to 180:111). From 6ca2350e11f09d5d3e53777d1eff8ff6d300ed93 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Mon, 19 Jul 2021 13:55:28 -0700 Subject: [PATCH 566/794] HID: wacom: Re-enable touch by default for Cintiq 24HDT / 27QHDT Commit 670e90924bfe ("HID: wacom: support named keys on older devices") added support for sending named events from the soft buttons on the 24HDT and 27QHDT. In the process, however, it inadvertantly disabled the touchscreen of the 24HDT and 27QHDT by default. The `wacom_set_shared_values` function would normally enable touch by default but because it checks the state of the non-shared `has_mute_touch_switch` flag and `wacom_setup_touch_input_capabilities` sets the state of the /shared/ version, touch ends up being disabled by default. This patch sets the non-shared flag, letting `wacom_set_shared_values` take care of copying the value over to the shared version and setting the default touch state to "on". Fixes: 670e90924bfe ("HID: wacom: support named keys on older devices") CC: stable@vger.kernel.org # 5.4+ Signed-off-by: Jason Gerecke Reviewed-by: Ping Cheng Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 81d7d12bcf34..496a000ef862 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -3831,7 +3831,7 @@ int wacom_setup_touch_input_capabilities(struct input_dev *input_dev, wacom_wac->shared->touch->product == 0xF6) { input_dev->evbit[0] |= BIT_MASK(EV_SW); __set_bit(SW_MUTE_DEVICE, input_dev->swbit); - wacom_wac->shared->has_mute_touch_switch = true; + wacom_wac->has_mute_touch_switch = true; } fallthrough; From 7cc8524f65ce1a350042836c7cf837046aaa6e21 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Mon, 19 Jul 2021 13:55:31 -0700 Subject: [PATCH 567/794] HID: wacom: Skip processing of touches with negative slot values The `input_mt_get_slot_by_key` function may return a negative value if an error occurs (e.g. running out of slots). If this occurs we should really avoid reporting any data for the slot. Signed-off-by: Ping Cheng Signed-off-by: Jason Gerecke Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 496a000ef862..81ba642adcb7 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -2548,6 +2548,9 @@ static void wacom_wac_finger_slot(struct wacom_wac *wacom_wac, int slot; slot = input_mt_get_slot_by_key(input, hid_data->id); + if (slot < 0) + return; + input_mt_slot(input, slot); input_mt_report_slot_state(input, MT_TOOL_FINGER, prox); } From a59c7b6c6ff6d5437f293709e766f939d7107266 Mon Sep 17 00:00:00 2001 From: Ping Bao Date: Wed, 21 Jul 2021 15:56:15 -0700 Subject: [PATCH 568/794] platform/x86: intel-hid: add Alder Lake ACPI device ID Alder Lake has a new ACPI ID for Intel HID event filter device. Signed-off-by: Ping Bao Link: https://lore.kernel.org/r/20210721225615.20575-1-ping.a.bao@intel.com Signed-off-by: Hans de Goede --- drivers/platform/x86/intel-hid.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/intel-hid.c b/drivers/platform/x86/intel-hid.c index 078648a9201b..e5fbe017f8e1 100644 --- a/drivers/platform/x86/intel-hid.c +++ b/drivers/platform/x86/intel-hid.c @@ -25,6 +25,7 @@ static const struct acpi_device_id intel_hid_ids[] = { {"INT33D5", 0}, {"INTC1051", 0}, {"INTC1054", 0}, + {"INTC1070", 0}, {"", 0}, }; MODULE_DEVICE_TABLE(acpi, intel_hid_ids); From 2b2c66f607d00d17f879c0d946d44340bfbdc501 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 26 Jul 2021 17:36:30 +0200 Subject: [PATCH 569/794] platform/x86: gigabyte-wmi: add support for B550 Aorus Elite V2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported as working here: https://github.com/t-8ch/linux-gigabyte-wmi-driver/issues/1#issuecomment-879398883 Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20210726153630.65213-1-linux@weissschuh.net Signed-off-by: Hans de Goede --- drivers/platform/x86/gigabyte-wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c index 5529d7b0abea..fbb224a82e34 100644 --- a/drivers/platform/x86/gigabyte-wmi.c +++ b/drivers/platform/x86/gigabyte-wmi.c @@ -141,6 +141,7 @@ static u8 gigabyte_wmi_detect_sensor_usability(struct wmi_device *wdev) static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = { DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE"), + DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE V2"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 GAMING X V2"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M AORUS PRO-P"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M DS3H"), From 1e60cebf82948cfdc9497ea4553bab125587593c Mon Sep 17 00:00:00 2001 From: zhang kai Date: Wed, 28 Jul 2021 18:54:18 +0800 Subject: [PATCH 570/794] net: let flow have same hash in two directions using same source and destination ip/port for flow hash calculation within the two directions. Signed-off-by: zhang kai Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 2aadbfc5193b..4b2415d34873 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1504,7 +1504,7 @@ __be32 flow_get_u32_dst(const struct flow_keys *flow) } EXPORT_SYMBOL(flow_get_u32_dst); -/* Sort the source and destination IP (and the ports if the IP are the same), +/* Sort the source and destination IP and the ports, * to have consistent hash within the two directions */ static inline void __flow_hash_consistentify(struct flow_keys *keys) @@ -1515,11 +1515,11 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys) case FLOW_DISSECTOR_KEY_IPV4_ADDRS: addr_diff = (__force u32)keys->addrs.v4addrs.dst - (__force u32)keys->addrs.v4addrs.src; - if ((addr_diff < 0) || - (addr_diff == 0 && - ((__force u16)keys->ports.dst < - (__force u16)keys->ports.src))) { + if (addr_diff < 0) swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst); + + if ((__force u16)keys->ports.dst < + (__force u16)keys->ports.src) { swap(keys->ports.src, keys->ports.dst); } break; @@ -1527,13 +1527,13 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys) addr_diff = memcmp(&keys->addrs.v6addrs.dst, &keys->addrs.v6addrs.src, sizeof(keys->addrs.v6addrs.dst)); - if ((addr_diff < 0) || - (addr_diff == 0 && - ((__force u16)keys->ports.dst < - (__force u16)keys->ports.src))) { + if (addr_diff < 0) { for (i = 0; i < 4; i++) swap(keys->addrs.v6addrs.src.s6_addr32[i], keys->addrs.v6addrs.dst.s6_addr32[i]); + } + if ((__force u16)keys->ports.dst < + (__force u16)keys->ports.src) { swap(keys->ports.src, keys->ports.dst); } break; From 89fb62fde3b226f99b7015280cf132e2a7438edf Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Wed, 28 Jul 2021 20:11:07 +0800 Subject: [PATCH 571/794] sis900: Fix missing pci_disable_device() in probe and remove Replace pci_enable_device() with pcim_enable_device(), pci_disable_device() and pci_release_regions() will be called in release automatically. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Hulk Robot Signed-off-by: Wang Hai Signed-off-by: David S. Miller --- drivers/net/ethernet/sis/sis900.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index ca9c00b7f588..cff87de9178a 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -443,7 +443,7 @@ static int sis900_probe(struct pci_dev *pci_dev, #endif /* setup various bits in PCI command register */ - ret = pci_enable_device(pci_dev); + ret = pcim_enable_device(pci_dev); if(ret) return ret; i = dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(32)); @@ -469,7 +469,7 @@ static int sis900_probe(struct pci_dev *pci_dev, ioaddr = pci_iomap(pci_dev, 0, 0); if (!ioaddr) { ret = -ENOMEM; - goto err_out_cleardev; + goto err_out; } sis_priv = netdev_priv(net_dev); @@ -581,8 +581,6 @@ err_unmap_tx: sis_priv->tx_ring_dma); err_out_unmap: pci_iounmap(pci_dev, ioaddr); -err_out_cleardev: - pci_release_regions(pci_dev); err_out: free_netdev(net_dev); return ret; @@ -2499,7 +2497,6 @@ static void sis900_remove(struct pci_dev *pci_dev) sis_priv->tx_ring_dma); pci_iounmap(pci_dev, sis_priv->ioaddr); free_netdev(net_dev); - pci_release_regions(pci_dev); } static int __maybe_unused sis900_suspend(struct device *dev) From ef04688871f3386b6d40ade8f5c664290420f819 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 Jul 2021 10:50:31 -0600 Subject: [PATCH 572/794] io_uring: don't block level reissue off completion path Some setups, like SCSI, can throw spurious -EAGAIN off the softirq completion path. Normally we expect this to happen inline as part of submission, but apparently SCSI has a weird corner case where it can happen as part of normal completions. This should be solved by having the -EAGAIN bubble back up the stack as part of submission, but previous attempts at this failed and we're not just quite there yet. Instead we currently use REQ_F_REISSUE to handle this case. For now, catch it in io_rw_should_reissue() and prevent a reissue from a bogus path. Cc: stable@vger.kernel.org Reported-by: Fabian Ebner Tested-by: Fabian Ebner Signed-off-by: Jens Axboe --- fs/io_uring.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 6ba101cd4661..83f67d33bf67 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2447,6 +2447,12 @@ static bool io_rw_should_reissue(struct io_kiocb *req) */ if (percpu_ref_is_dying(&ctx->refs)) return false; + /* + * Play it safe and assume not safe to re-import and reissue if we're + * not in the original thread group (or in task context). + */ + if (!same_thread_group(req->task, current) || !in_task()) + return false; return true; } #else From a890d01e4ee016978776e45340e521b3bbbdf41f Mon Sep 17 00:00:00 2001 From: Hao Xu Date: Wed, 28 Jul 2021 11:03:22 +0800 Subject: [PATCH 573/794] io_uring: fix poll requests leaking second poll entries For pure poll requests, it doesn't remove the second poll wait entry when it's done, neither after vfs_poll() or in the poll completion handler. We should remove the second poll wait entry. And we use io_poll_remove_double() rather than io_poll_remove_waitqs() since the latter has some redundant logic. Fixes: 88e41cf928a6 ("io_uring: add multishot mode for IORING_OP_POLL_ADD") Cc: stable@vger.kernel.org # 5.13+ Signed-off-by: Hao Xu Link: https://lore.kernel.org/r/20210728030322.12307-1-haoxu@linux.alibaba.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 83f67d33bf67..bf548af0426c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4939,7 +4939,6 @@ static bool io_poll_complete(struct io_kiocb *req, __poll_t mask) if (req->poll.events & EPOLLONESHOT) flags = 0; if (!io_cqring_fill_event(ctx, req->user_data, error, flags)) { - io_poll_remove_waitqs(req); req->poll.done = true; flags = 0; } @@ -4962,6 +4961,7 @@ static void io_poll_task_func(struct io_kiocb *req) done = io_poll_complete(req, req->result); if (done) { + io_poll_remove_double(req); hash_del(&req->hash_node); } else { req->result = 0; @@ -5149,7 +5149,7 @@ static __poll_t __io_arm_poll_handler(struct io_kiocb *req, ipt->error = -EINVAL; spin_lock_irq(&ctx->completion_lock); - if (ipt->error) + if (ipt->error || (mask && (poll->events & EPOLLONESHOT))) io_poll_remove_double(req); if (likely(poll->head)) { spin_lock(&poll->head->lock); @@ -5221,7 +5221,6 @@ static int io_arm_poll_handler(struct io_kiocb *req) ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, io_async_wake); if (ret || ipt.error) { - io_poll_remove_double(req); spin_unlock_irq(&ctx->completion_lock); if (ret) return IO_APOLL_READY; From 36c2530ea963884eeb0097169f853fdc36f16ad7 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Tue, 27 Jul 2021 18:04:28 +0200 Subject: [PATCH 574/794] spi: imx: mx51-ecspi: Fix CONFIGREG delay comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For (2 * 1000000) / min_speed_hz < 10 to be true in naturals with zero, the min_speed_hz must be above 200000 (i.e. 200001 rounds down to 9, so the condition triggers). Update the comment. No functional change. Fixes: 6fd8b8503a0dc ("spi: spi-imx: Fix out-of-order CS/SCLK operation at low speeds") Signed-off-by: Marek Vasut Cc: Uwe Kleine-König Cc: Mark Brown Acked-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20210727160428.7673-1-marex@denx.de Signed-off-by: Mark Brown --- drivers/spi/spi-imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 2872993550bd..fa68e9817929 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -593,7 +593,7 @@ static int mx51_ecspi_prepare_message(struct spi_imx_data *spi_imx, } delay = (2 * 1000000) / min_speed_hz; - if (likely(delay < 10)) /* SCLK is faster than 100 kHz */ + if (likely(delay < 10)) /* SCLK is faster than 200 kHz */ udelay(delay); else /* SCLK is _very_ slow */ usleep_range(delay, delay + 10); From e0eef3690dc66b3ecc6e0f1267f332403eb22bea Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Wed, 28 Jul 2021 23:19:58 +0800 Subject: [PATCH 575/794] Revert "ACPI: resources: Add checks for ACPI IRQ override" The commit 0ec4e55e9f57 ("ACPI: resources: Add checks for ACPI IRQ override") introduces regression on some platforms, at least it makes the UART can't get correct irq setting on two different platforms, and it makes the kernel can't bootup on these two platforms. This reverts commit 0ec4e55e9f571f08970ed115ec0addc691eda613. Regression-discuss: https://bugzilla.kernel.org/show_bug.cgi?id=213031 Reported-by: PGNd Cc: 5.4+ # 5.4+ Signed-off-by: Hui Wang Acked-by: Greg Kroah-Hartman Signed-off-by: Rafael J. Wysocki --- drivers/acpi/resource.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index dc01fb550b28..ee78a210c606 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -423,13 +423,6 @@ static void acpi_dev_get_irqresource(struct resource *res, u32 gsi, } } -static bool irq_is_legacy(struct acpi_resource_irq *irq) -{ - return irq->triggering == ACPI_EDGE_SENSITIVE && - irq->polarity == ACPI_ACTIVE_HIGH && - irq->shareable == ACPI_EXCLUSIVE; -} - /** * acpi_dev_resource_interrupt - Extract ACPI interrupt resource information. * @ares: Input ACPI resource object. @@ -468,7 +461,7 @@ bool acpi_dev_resource_interrupt(struct acpi_resource *ares, int index, } acpi_dev_get_irqresource(res, irq->interrupts[index], irq->triggering, irq->polarity, - irq->shareable, irq_is_legacy(irq)); + irq->shareable, true); break; case ACPI_RESOURCE_TYPE_EXTENDED_IRQ: ext_irq = &ares->data.extended_irq; From 41a8457f3f6f829be1f8f8fa7577a46b9b7223ef Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 27 Jul 2021 09:18:24 -0700 Subject: [PATCH 576/794] ACPI: DPTF: Fix reading of attributes The current assumption that methods to read PCH FIVR attributes will return integer, is not correct. There is no good way to return integer as negative numbers are also valid. These read methods return a package of integers. The first integer returns status, which is 0 on success and any other value for failure. When the returned status is zero, then the second integer returns the actual value. This change fixes this issue by replacing acpi_evaluate_integer() with acpi_evaluate_object() and use acpi_extract_package() to extract results. Fixes: 2ce6324eadb01 ("ACPI: DPTF: Add PCH FIVR participant driver") Signed-off-by: Srinivas Pandruvada Cc: 5.10+ # 5.10+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/dptf/dptf_pch_fivr.c | 51 ++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/dptf/dptf_pch_fivr.c b/drivers/acpi/dptf/dptf_pch_fivr.c index 5fca18296bf6..550b9081fcbc 100644 --- a/drivers/acpi/dptf/dptf_pch_fivr.c +++ b/drivers/acpi/dptf/dptf_pch_fivr.c @@ -9,6 +9,42 @@ #include #include +struct pch_fivr_resp { + u64 status; + u64 result; +}; + +static int pch_fivr_read(acpi_handle handle, char *method, struct pch_fivr_resp *fivr_resp) +{ + struct acpi_buffer resp = { sizeof(struct pch_fivr_resp), fivr_resp}; + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + struct acpi_buffer format = { sizeof("NN"), "NN" }; + union acpi_object *obj; + acpi_status status; + int ret = -EFAULT; + + status = acpi_evaluate_object(handle, method, NULL, &buffer); + if (ACPI_FAILURE(status)) + return ret; + + obj = buffer.pointer; + if (!obj || obj->type != ACPI_TYPE_PACKAGE) + goto release_buffer; + + status = acpi_extract_package(obj, &format, &resp); + if (ACPI_FAILURE(status)) + goto release_buffer; + + if (fivr_resp->status) + goto release_buffer; + + ret = 0; + +release_buffer: + kfree(buffer.pointer); + return ret; +} + /* * Presentation of attributes which are defined for INT1045 * They are: @@ -23,15 +59,14 @@ static ssize_t name##_show(struct device *dev,\ char *buf)\ {\ struct acpi_device *acpi_dev = dev_get_drvdata(dev);\ - unsigned long long val;\ - acpi_status status;\ + struct pch_fivr_resp fivr_resp;\ + int status;\ \ - status = acpi_evaluate_integer(acpi_dev->handle, #method,\ - NULL, &val);\ - if (ACPI_SUCCESS(status))\ - return sprintf(buf, "%d\n", (int)val);\ - else\ - return -EINVAL;\ + status = pch_fivr_read(acpi_dev->handle, #method, &fivr_resp);\ + if (status)\ + return status;\ +\ + return sprintf(buf, "%llu\n", fivr_resp.result);\ } #define PCH_FIVR_STORE(name, method) \ From 240246f6b913b0c23733cfd2def1d283f8cc9bbe Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Fri, 9 Jul 2021 11:29:22 -0500 Subject: [PATCH 577/794] btrfs: mark compressed range uptodate only if all bio succeed In compression write endio sequence, the range which the compressed_bio writes is marked as uptodate if the last bio of the compressed (sub)bios is completed successfully. There could be previous bio which may have failed which is recorded in cb->errors. Set the writeback range as uptodate only if cb->errors is zero, as opposed to checking only the last bio's status. Backporting notes: in all versions up to 4.4 the last argument is always replaced by "!cb->errors". CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Goldwyn Rodrigues Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/compression.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 9a023ae0f98b..30d82cdf128c 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -352,7 +352,7 @@ static void end_compressed_bio_write(struct bio *bio) btrfs_record_physical_zoned(inode, cb->start, bio); btrfs_writepage_endio_finish_ordered(BTRFS_I(inode), NULL, cb->start, cb->start + cb->len - 1, - bio->bi_status == BLK_STS_OK); + !cb->errors); end_compressed_writeback(inode, cb); /* note, our inode could be gone now */ From ecc64fab7d49c678e70bd4c35fe64d2ab3e3d212 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 27 Jul 2021 11:24:43 +0100 Subject: [PATCH 578/794] btrfs: fix lost inode on log replay after mix of fsync, rename and inode eviction When checking if we need to log the new name of a renamed inode, we are checking if the inode and its parent inode have been logged before, and if not we don't log the new name. The check however is buggy, as it directly compares the logged_trans field of the inodes versus the ID of the current transaction. The problem is that logged_trans is a transient field, only stored in memory and never persisted in the inode item, so if an inode was logged before, evicted and reloaded, its logged_trans field is set to a value of 0, meaning the check will return false and the new name of the renamed inode is not logged. If the old parent directory was previously fsynced and we deleted the logged directory entries corresponding to the old name, we end up with a log that when replayed will delete the renamed inode. The following example triggers the problem: $ mkfs.btrfs -f /dev/sdc $ mount /dev/sdc /mnt $ mkdir /mnt/A $ mkdir /mnt/B $ echo -n "hello world" > /mnt/A/foo $ sync # Add some new file to A and fsync directory A. $ touch /mnt/A/bar $ xfs_io -c "fsync" /mnt/A # Now trigger inode eviction. We are only interested in triggering # eviction for the inode of directory A. $ echo 2 > /proc/sys/vm/drop_caches # Move foo from directory A to directory B. # This deletes the directory entries for foo in A from the log, and # does not add the new name for foo in directory B to the log, because # logged_trans of A is 0, which is less than the current transaction ID. $ mv /mnt/A/foo /mnt/B/foo # Now make an fsync to anything except A, B or any file inside them, # like for example create a file at the root directory and fsync this # new file. This syncs the log that contains all the changes done by # previous rename operation. $ touch /mnt/baz $ xfs_io -c "fsync" /mnt/baz # Mount the filesystem and replay the log. $ mount /dev/sdc /mnt # Check the filesystem content. $ ls -1R /mnt /mnt/: A B baz /mnt/A: bar /mnt/B: $ # File foo is gone, it's neither in A/ nor in B/. Fix this by using the inode_logged() helper at btrfs_log_new_name(), which safely checks if an inode was logged before in the current transaction. A test case for fstests will follow soon. CC: stable@vger.kernel.org # 4.14+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 9fd0348be7f5..e6430ac9bbe8 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -6503,8 +6503,8 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, * if this inode hasn't been logged and directory we're renaming it * from hasn't been logged, we don't need to log it */ - if (inode->logged_trans < trans->transid && - (!old_dir || old_dir->logged_trans < trans->transid)) + if (!inode_logged(trans, inode) && + (!old_dir || !inode_logged(trans, old_dir))) return; /* From b2a616676839e2a6b02c8e40be7f886f882ed194 Mon Sep 17 00:00:00 2001 From: Desmond Cheong Zhi Xi Date: Tue, 27 Jul 2021 15:13:03 +0800 Subject: [PATCH 579/794] btrfs: fix rw device counting in __btrfs_free_extra_devids When removing a writeable device in __btrfs_free_extra_devids, the rw device count should be decremented. This error was caught by Syzbot which reported a warning in close_fs_devices: WARNING: CPU: 1 PID: 9355 at fs/btrfs/volumes.c:1168 close_fs_devices+0x763/0x880 fs/btrfs/volumes.c:1168 Modules linked in: CPU: 0 PID: 9355 Comm: syz-executor552 Not tainted 5.13.0-rc1-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:close_fs_devices+0x763/0x880 fs/btrfs/volumes.c:1168 RSP: 0018:ffffc9000333f2f0 EFLAGS: 00010293 RAX: ffffffff8365f5c3 RBX: 0000000000000001 RCX: ffff888029afd4c0 RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000000 RBP: ffff88802846f508 R08: ffffffff8365f525 R09: ffffed100337d128 R10: ffffed100337d128 R11: 0000000000000000 R12: dffffc0000000000 R13: ffff888019be8868 R14: 1ffff1100337d10d R15: 1ffff1100337d10a FS: 00007f6f53828700(0000) GS:ffff8880b9a00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000047c410 CR3: 00000000302a6000 CR4: 00000000001506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: btrfs_close_devices+0xc9/0x450 fs/btrfs/volumes.c:1180 open_ctree+0x8e1/0x3968 fs/btrfs/disk-io.c:3693 btrfs_fill_super fs/btrfs/super.c:1382 [inline] btrfs_mount_root+0xac5/0xc60 fs/btrfs/super.c:1749 legacy_get_tree+0xea/0x180 fs/fs_context.c:592 vfs_get_tree+0x86/0x270 fs/super.c:1498 fc_mount fs/namespace.c:993 [inline] vfs_kern_mount+0xc9/0x160 fs/namespace.c:1023 btrfs_mount+0x3d3/0xb50 fs/btrfs/super.c:1809 legacy_get_tree+0xea/0x180 fs/fs_context.c:592 vfs_get_tree+0x86/0x270 fs/super.c:1498 do_new_mount fs/namespace.c:2905 [inline] path_mount+0x196f/0x2be0 fs/namespace.c:3235 do_mount fs/namespace.c:3248 [inline] __do_sys_mount fs/namespace.c:3456 [inline] __se_sys_mount+0x2f9/0x3b0 fs/namespace.c:3433 do_syscall_64+0x3f/0xb0 arch/x86/entry/common.c:47 entry_SYSCALL_64_after_hwframe+0x44/0xae Because fs_devices->rw_devices was not 0 after closing all devices. Here is the call trace that was observed: btrfs_mount_root(): btrfs_scan_one_device(): device_list_add(); <---------------- device added btrfs_open_devices(): open_fs_devices(): btrfs_open_one_device(); <-------- writable device opened, rw device count ++ btrfs_fill_super(): open_ctree(): btrfs_free_extra_devids(): __btrfs_free_extra_devids(); <--- writable device removed, rw device count not decremented fail_tree_roots: btrfs_close_devices(): close_fs_devices(); <------- rw device count off by 1 As a note, prior to commit cf89af146b7e ("btrfs: dev-replace: fail mount if we don't have replace item with target device"), rw_devices was decremented on removing a writable device in __btrfs_free_extra_devids only if the BTRFS_DEV_STATE_REPLACE_TGT bit was not set for the device. However, this check does not need to be reinstated as it is now redundant and incorrect. In __btrfs_free_extra_devids, we skip removing the device if it is the target for replacement. This is done by checking whether device->devid == BTRFS_DEV_REPLACE_DEVID. Since BTRFS_DEV_STATE_REPLACE_TGT is set only on the device with devid BTRFS_DEV_REPLACE_DEVID, no devices should have the BTRFS_DEV_STATE_REPLACE_TGT bit set after the check, and so it's redundant to test for that bit. Additionally, following commit 82372bc816d7 ("Btrfs: make the logic of source device removing more clear"), rw_devices is incremented whenever a writeable device is added to the alloc list (including the target device in btrfs_dev_replace_finishing), so all removals of writable devices from the alloc list should also be accompanied by a decrement to rw_devices. Reported-by: syzbot+a70e2ad0879f160b9217@syzkaller.appspotmail.com Fixes: cf89af146b7e ("btrfs: dev-replace: fail mount if we don't have replace item with target device") CC: stable@vger.kernel.org # 5.10+ Tested-by: syzbot+a70e2ad0879f160b9217@syzkaller.appspotmail.com Reviewed-by: Anand Jain Signed-off-by: Desmond Cheong Zhi Xi Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c6c14315b1c9..4c83256ae37f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1078,6 +1078,7 @@ static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { list_del_init(&device->dev_alloc_list); clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); + fs_devices->rw_devices--; } list_del_init(&device->dev_list); fs_devices->num_devices--; From cbcf01128d0a92e131bd09f1688fe032480b65ca Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 28 Jul 2021 14:47:20 +0200 Subject: [PATCH 580/794] af_unix: fix garbage collect vs MSG_PEEK unix_gc() assumes that candidate sockets can never gain an external reference (i.e. be installed into an fd) while the unix_gc_lock is held. Except for MSG_PEEK this is guaranteed by modifying inflight count under the unix_gc_lock. MSG_PEEK does not touch any variable protected by unix_gc_lock (file count is not), yet it needs to be serialized with garbage collection. Do this by locking/unlocking unix_gc_lock: 1) increment file count 2) lock/unlock barrier to make sure incremented file count is visible to garbage collection 3) install file into fd This is a lock barrier (unlike smp_mb()) that ensures that garbage collection is run completely before or completely after the barrier. Cc: Signed-off-by: Greg Kroah-Hartman Signed-off-by: Miklos Szeredi Signed-off-by: Linus Torvalds --- net/unix/af_unix.c | 51 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 23c92ad15c61..ba7ced947e51 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1526,6 +1526,53 @@ out: return err; } +static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) +{ + scm->fp = scm_fp_dup(UNIXCB(skb).fp); + + /* + * Garbage collection of unix sockets starts by selecting a set of + * candidate sockets which have reference only from being in flight + * (total_refs == inflight_refs). This condition is checked once during + * the candidate collection phase, and candidates are marked as such, so + * that non-candidates can later be ignored. While inflight_refs is + * protected by unix_gc_lock, total_refs (file count) is not, hence this + * is an instantaneous decision. + * + * Once a candidate, however, the socket must not be reinstalled into a + * file descriptor while the garbage collection is in progress. + * + * If the above conditions are met, then the directed graph of + * candidates (*) does not change while unix_gc_lock is held. + * + * Any operations that changes the file count through file descriptors + * (dup, close, sendmsg) does not change the graph since candidates are + * not installed in fds. + * + * Dequeing a candidate via recvmsg would install it into an fd, but + * that takes unix_gc_lock to decrement the inflight count, so it's + * serialized with garbage collection. + * + * MSG_PEEK is special in that it does not change the inflight count, + * yet does install the socket into an fd. The following lock/unlock + * pair is to ensure serialization with garbage collection. It must be + * done between incrementing the file count and installing the file into + * an fd. + * + * If garbage collection starts after the barrier provided by the + * lock/unlock, then it will see the elevated refcount and not mark this + * as a candidate. If a garbage collection is already in progress + * before the file count was incremented, then the lock/unlock pair will + * ensure that garbage collection is finished before progressing to + * installing the fd. + * + * (*) A -> B where B is on the queue of A or B is on the queue of C + * which is on the queue of listening socket A. + */ + spin_lock(&unix_gc_lock); + spin_unlock(&unix_gc_lock); +} + static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) { int err = 0; @@ -2175,7 +2222,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, sk_peek_offset_fwd(sk, size); if (UNIXCB(skb).fp) - scm.fp = scm_fp_dup(UNIXCB(skb).fp); + unix_peek_fds(&scm, skb); } err = (flags & MSG_TRUNC) ? skb->len - skip : size; @@ -2418,7 +2465,7 @@ unlock: /* It is questionable, see note in unix_dgram_recvmsg. */ if (UNIXCB(skb).fp) - scm.fp = scm_fp_dup(UNIXCB(skb).fp); + unix_peek_fds(&scm, skb); sk_peek_offset_fwd(sk, chunk); From 25905f602fdb0cfa147017056636768a7aa1ff6f Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 21 Jul 2021 12:25:20 -0700 Subject: [PATCH 581/794] dmaengine: idxd: Change license on idxd.h to LGPL This file was given GPL-2.0 license. But LGPL-2.1 makes more sense as it needs to be used by libraries outside of the kernel source tree. Signed-off-by: Tony Luck Signed-off-by: Linus Torvalds --- include/uapi/linux/idxd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index e33997b4d750..edc346a77c91 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* SPDX-License-Identifier: LGPL-2.1 WITH Linux-syscall-note */ /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */ #ifndef _USR_IDXD_H_ #define _USR_IDXD_H_ From 345daff2e994ee844d6a609c37f085695fbb4c4d Mon Sep 17 00:00:00 2001 From: Alexey Gladkov Date: Tue, 27 Jul 2021 17:24:18 +0200 Subject: [PATCH 582/794] ucounts: Fix race condition between alloc_ucounts and put_ucounts The race happens because put_ucounts() doesn't use spinlock and get_ucounts is not under spinlock: CPU0 CPU1 ---- ---- alloc_ucounts() put_ucounts() spin_lock_irq(&ucounts_lock); ucounts = find_ucounts(ns, uid, hashent); atomic_dec_and_test(&ucounts->count)) spin_unlock_irq(&ucounts_lock); spin_lock_irqsave(&ucounts_lock, flags); hlist_del_init(&ucounts->node); spin_unlock_irqrestore(&ucounts_lock, flags); kfree(ucounts); ucounts = get_ucounts(ucounts); ================================================================== BUG: KASAN: use-after-free in instrument_atomic_read_write include/linux/instrumented.h:101 [inline] BUG: KASAN: use-after-free in atomic_add_negative include/asm-generic/atomic-instrumented.h:556 [inline] BUG: KASAN: use-after-free in get_ucounts kernel/ucount.c:152 [inline] BUG: KASAN: use-after-free in get_ucounts kernel/ucount.c:150 [inline] BUG: KASAN: use-after-free in alloc_ucounts+0x19b/0x5b0 kernel/ucount.c:188 Write of size 4 at addr ffff88802821e41c by task syz-executor.4/16785 CPU: 1 PID: 16785 Comm: syz-executor.4 Not tainted 5.14.0-rc1-next-20210712-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:105 print_address_description.constprop.0.cold+0x6c/0x309 mm/kasan/report.c:233 __kasan_report mm/kasan/report.c:419 [inline] kasan_report.cold+0x83/0xdf mm/kasan/report.c:436 check_region_inline mm/kasan/generic.c:183 [inline] kasan_check_range+0x13d/0x180 mm/kasan/generic.c:189 instrument_atomic_read_write include/linux/instrumented.h:101 [inline] atomic_add_negative include/asm-generic/atomic-instrumented.h:556 [inline] get_ucounts kernel/ucount.c:152 [inline] get_ucounts kernel/ucount.c:150 [inline] alloc_ucounts+0x19b/0x5b0 kernel/ucount.c:188 set_cred_ucounts+0x171/0x3a0 kernel/cred.c:684 __sys_setuid+0x285/0x400 kernel/sys.c:623 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x4665d9 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fde54097188 EFLAGS: 00000246 ORIG_RAX: 0000000000000069 RAX: ffffffffffffffda RBX: 000000000056bf80 RCX: 00000000004665d9 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00000000000000ff RBP: 00000000004bfcb9 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 000000000056bf80 R13: 00007ffc8655740f R14: 00007fde54097300 R15: 0000000000022000 Allocated by task 16784: kasan_save_stack+0x1b/0x40 mm/kasan/common.c:38 kasan_set_track mm/kasan/common.c:46 [inline] set_alloc_info mm/kasan/common.c:434 [inline] ____kasan_kmalloc mm/kasan/common.c:513 [inline] ____kasan_kmalloc mm/kasan/common.c:472 [inline] __kasan_kmalloc+0x9b/0xd0 mm/kasan/common.c:522 kmalloc include/linux/slab.h:591 [inline] kzalloc include/linux/slab.h:721 [inline] alloc_ucounts+0x23d/0x5b0 kernel/ucount.c:169 set_cred_ucounts+0x171/0x3a0 kernel/cred.c:684 __sys_setuid+0x285/0x400 kernel/sys.c:623 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Freed by task 16785: kasan_save_stack+0x1b/0x40 mm/kasan/common.c:38 kasan_set_track+0x1c/0x30 mm/kasan/common.c:46 kasan_set_free_info+0x20/0x30 mm/kasan/generic.c:360 ____kasan_slab_free mm/kasan/common.c:366 [inline] ____kasan_slab_free mm/kasan/common.c:328 [inline] __kasan_slab_free+0xfb/0x130 mm/kasan/common.c:374 kasan_slab_free include/linux/kasan.h:229 [inline] slab_free_hook mm/slub.c:1650 [inline] slab_free_freelist_hook+0xdf/0x240 mm/slub.c:1675 slab_free mm/slub.c:3235 [inline] kfree+0xeb/0x650 mm/slub.c:4295 put_ucounts kernel/ucount.c:200 [inline] put_ucounts+0x117/0x150 kernel/ucount.c:192 put_cred_rcu+0x27a/0x520 kernel/cred.c:124 rcu_do_batch kernel/rcu/tree.c:2550 [inline] rcu_core+0x7ab/0x1380 kernel/rcu/tree.c:2785 __do_softirq+0x29b/0x9c2 kernel/softirq.c:558 Last potentially related work creation: kasan_save_stack+0x1b/0x40 mm/kasan/common.c:38 kasan_record_aux_stack+0xe5/0x110 mm/kasan/generic.c:348 insert_work+0x48/0x370 kernel/workqueue.c:1332 __queue_work+0x5c1/0xed0 kernel/workqueue.c:1498 queue_work_on+0xee/0x110 kernel/workqueue.c:1525 queue_work include/linux/workqueue.h:507 [inline] call_usermodehelper_exec+0x1f0/0x4c0 kernel/umh.c:435 kobject_uevent_env+0xf8f/0x1650 lib/kobject_uevent.c:618 netdev_queue_add_kobject net/core/net-sysfs.c:1621 [inline] netdev_queue_update_kobjects+0x374/0x450 net/core/net-sysfs.c:1655 register_queue_kobjects net/core/net-sysfs.c:1716 [inline] netdev_register_kobject+0x35a/0x430 net/core/net-sysfs.c:1959 register_netdevice+0xd33/0x1500 net/core/dev.c:10331 nsim_init_netdevsim drivers/net/netdevsim/netdev.c:317 [inline] nsim_create+0x381/0x4d0 drivers/net/netdevsim/netdev.c:364 __nsim_dev_port_add+0x32e/0x830 drivers/net/netdevsim/dev.c:1295 nsim_dev_port_add_all+0x53/0x150 drivers/net/netdevsim/dev.c:1355 nsim_dev_probe+0xcb5/0x1190 drivers/net/netdevsim/dev.c:1496 call_driver_probe drivers/base/dd.c:517 [inline] really_probe+0x23c/0xcd0 drivers/base/dd.c:595 __driver_probe_device+0x338/0x4d0 drivers/base/dd.c:747 driver_probe_device+0x4c/0x1a0 drivers/base/dd.c:777 __device_attach_driver+0x20b/0x2f0 drivers/base/dd.c:894 bus_for_each_drv+0x15f/0x1e0 drivers/base/bus.c:427 __device_attach+0x228/0x4a0 drivers/base/dd.c:965 bus_probe_device+0x1e4/0x290 drivers/base/bus.c:487 device_add+0xc2f/0x2180 drivers/base/core.c:3356 nsim_bus_dev_new drivers/net/netdevsim/bus.c:431 [inline] new_device_store+0x436/0x710 drivers/net/netdevsim/bus.c:298 bus_attr_store+0x72/0xa0 drivers/base/bus.c:122 sysfs_kf_write+0x110/0x160 fs/sysfs/file.c:139 kernfs_fop_write_iter+0x342/0x500 fs/kernfs/file.c:296 call_write_iter include/linux/fs.h:2152 [inline] new_sync_write+0x426/0x650 fs/read_write.c:518 vfs_write+0x75a/0xa40 fs/read_write.c:605 ksys_write+0x12d/0x250 fs/read_write.c:658 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Second to last potentially related work creation: kasan_save_stack+0x1b/0x40 mm/kasan/common.c:38 kasan_record_aux_stack+0xe5/0x110 mm/kasan/generic.c:348 insert_work+0x48/0x370 kernel/workqueue.c:1332 __queue_work+0x5c1/0xed0 kernel/workqueue.c:1498 queue_work_on+0xee/0x110 kernel/workqueue.c:1525 queue_work include/linux/workqueue.h:507 [inline] call_usermodehelper_exec+0x1f0/0x4c0 kernel/umh.c:435 kobject_uevent_env+0xf8f/0x1650 lib/kobject_uevent.c:618 kobject_synth_uevent+0x701/0x850 lib/kobject_uevent.c:208 uevent_store+0x20/0x50 drivers/base/core.c:2371 dev_attr_store+0x50/0x80 drivers/base/core.c:2072 sysfs_kf_write+0x110/0x160 fs/sysfs/file.c:139 kernfs_fop_write_iter+0x342/0x500 fs/kernfs/file.c:296 call_write_iter include/linux/fs.h:2152 [inline] new_sync_write+0x426/0x650 fs/read_write.c:518 vfs_write+0x75a/0xa40 fs/read_write.c:605 ksys_write+0x12d/0x250 fs/read_write.c:658 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae The buggy address belongs to the object at ffff88802821e400 which belongs to the cache kmalloc-192 of size 192 The buggy address is located 28 bytes inside of 192-byte region [ffff88802821e400, ffff88802821e4c0) The buggy address belongs to the page: page:ffffea0000a08780 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x2821e flags: 0xfff00000000200(slab|node=0|zone=1|lastcpupid=0x7ff) raw: 00fff00000000200 dead000000000100 dead000000000122 ffff888010841a00 raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 0, migratetype Unmovable, gfp_mask 0x12cc0(GFP_KERNEL|__GFP_NOWARN|__GFP_NORETRY), pid 1, ts 12874702440, free_ts 12637793385 prep_new_page mm/page_alloc.c:2433 [inline] get_page_from_freelist+0xa72/0x2f80 mm/page_alloc.c:4166 __alloc_pages+0x1b2/0x500 mm/page_alloc.c:5374 alloc_page_interleave+0x1e/0x200 mm/mempolicy.c:2119 alloc_pages+0x238/0x2a0 mm/mempolicy.c:2242 alloc_slab_page mm/slub.c:1713 [inline] allocate_slab+0x32b/0x4c0 mm/slub.c:1853 new_slab mm/slub.c:1916 [inline] new_slab_objects mm/slub.c:2662 [inline] ___slab_alloc+0x4ba/0x820 mm/slub.c:2825 __slab_alloc.constprop.0+0xa7/0xf0 mm/slub.c:2865 slab_alloc_node mm/slub.c:2947 [inline] slab_alloc mm/slub.c:2989 [inline] __kmalloc+0x312/0x330 mm/slub.c:4133 kmalloc include/linux/slab.h:596 [inline] kzalloc include/linux/slab.h:721 [inline] __register_sysctl_table+0x112/0x1090 fs/proc/proc_sysctl.c:1318 rds_tcp_init_net+0x1db/0x4f0 net/rds/tcp.c:551 ops_init+0xaf/0x470 net/core/net_namespace.c:140 __register_pernet_operations net/core/net_namespace.c:1137 [inline] register_pernet_operations+0x35a/0x850 net/core/net_namespace.c:1214 register_pernet_device+0x26/0x70 net/core/net_namespace.c:1301 rds_tcp_init+0x77/0xe0 net/rds/tcp.c:717 do_one_initcall+0x103/0x650 init/main.c:1285 do_initcall_level init/main.c:1360 [inline] do_initcalls init/main.c:1376 [inline] do_basic_setup init/main.c:1396 [inline] kernel_init_freeable+0x6b8/0x741 init/main.c:1598 page last free stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1343 [inline] free_pcp_prepare+0x312/0x7d0 mm/page_alloc.c:1394 free_unref_page_prepare mm/page_alloc.c:3329 [inline] free_unref_page+0x19/0x690 mm/page_alloc.c:3408 __vunmap+0x783/0xb70 mm/vmalloc.c:2587 free_work+0x58/0x70 mm/vmalloc.c:82 process_one_work+0x98d/0x1630 kernel/workqueue.c:2276 worker_thread+0x658/0x11f0 kernel/workqueue.c:2422 kthread+0x3e5/0x4d0 kernel/kthread.c:319 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 Memory state around the buggy address: ffff88802821e300: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff88802821e380: 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc >ffff88802821e400: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88802821e480: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ffff88802821e500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ================================================================== - The race fix has two parts. * Changing the code to guarantee that ucounts->count is only decremented when ucounts_lock is held. This guarantees that find_ucounts will never find a structure with a zero reference count. * Changing alloc_ucounts to increment ucounts->count while ucounts_lock is held. This guarantees the reference count on the found data structure will not be decremented to zero (and the data structure freed) before the reference count is incremented. -- Eric Biederman Reported-by: syzbot+01985d7909f9468f013c@syzkaller.appspotmail.com Reported-by: syzbot+59dd63761094a80ad06d@syzkaller.appspotmail.com Reported-by: syzbot+6cd79f45bb8fa1c9eeae@syzkaller.appspotmail.com Reported-by: syzbot+b6e65bd125a05f803d6b@syzkaller.appspotmail.com Fixes: b6c336528926 ("Use atomic_t for ucounts reference counting") Cc: Hillf Danton Signed-off-by: Alexey Gladkov Link: https://lkml.kernel.org/r/7b2ace1759b281cdd2d66101d6b305deef722efb.1627397820.git.legion@kernel.org Signed-off-by: Eric W. Biederman --- kernel/ucount.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/kernel/ucount.c b/kernel/ucount.c index 87799e2379bd..77be3bbe3cc4 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -160,6 +160,7 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) { struct hlist_head *hashent = ucounts_hashentry(ns, uid); struct ucounts *ucounts, *new; + long overflow; spin_lock_irq(&ucounts_lock); ucounts = find_ucounts(ns, uid, hashent); @@ -184,8 +185,12 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) return new; } } + overflow = atomic_add_negative(1, &ucounts->count); spin_unlock_irq(&ucounts_lock); - ucounts = get_ucounts(ucounts); + if (overflow) { + put_ucounts(ucounts); + return NULL; + } return ucounts; } @@ -193,8 +198,7 @@ void put_ucounts(struct ucounts *ucounts) { unsigned long flags; - if (atomic_dec_and_test(&ucounts->count)) { - spin_lock_irqsave(&ucounts_lock, flags); + if (atomic_dec_and_lock_irqsave(&ucounts->count, &ucounts_lock, flags)) { hlist_del_init(&ucounts->node); spin_unlock_irqrestore(&ucounts_lock, flags); kfree(ucounts); From b946dbcfa4df80ec81b442964e07ad37000cc059 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 28 Jul 2021 16:38:29 +1000 Subject: [PATCH 583/794] cifs: add missing parsing of backupuid We lost parsing of backupuid in the switch to new mount API. Add it back. Signed-off-by: Ronnie Sahlberg Reviewed-by: Shyam Prasad N Cc: # v5.11+ Reported-by: Xiaoli Feng Signed-off-by: Steve French --- fs/cifs/fs_context.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c index 9a59d7ff9a11..eed59bc1d913 100644 --- a/fs/cifs/fs_context.c +++ b/fs/cifs/fs_context.c @@ -925,6 +925,13 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, ctx->cred_uid = uid; ctx->cruid_specified = true; break; + case Opt_backupuid: + uid = make_kuid(current_user_ns(), result.uint_32); + if (!uid_valid(uid)) + goto cifs_parse_mount_err; + ctx->backupuid = uid; + ctx->backupuid_specified = true; + break; case Opt_backupgid: gid = make_kgid(current_user_ns(), result.uint_32); if (!gid_valid(gid)) From f5e81d1117501546b7be050c5fbafa6efd2c722c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 13 Jul 2021 08:18:31 +0000 Subject: [PATCH 584/794] bpf: Introduce BPF nospec instruction for mitigating Spectre v4 In case of JITs, each of the JIT backends compiles the BPF nospec instruction /either/ to a machine instruction which emits a speculation barrier /or/ to /no/ machine instruction in case the underlying architecture is not affected by Speculative Store Bypass or has different mitigations in place already. This covers both x86 and (implicitly) arm64: In case of x86, we use 'lfence' instruction for mitigation. In case of arm64, we rely on the firmware mitigation as controlled via the ssbd kernel parameter. Whenever the mitigation is enabled, it works for all of the kernel code with no need to provide any additional instructions here (hence only comment in arm64 JIT). Other archs can follow as needed. The BPF nospec instruction is specifically targeting Spectre v4 since i) we don't use a serialization barrier for the Spectre v1 case, and ii) mitigation instructions for v1 and v4 might be different on some archs. The BPF nospec is required for a future commit, where the BPF verifier does annotate intermediate BPF programs with speculation barriers. Co-developed-by: Piotr Krysiuk Co-developed-by: Benedict Schlueter Signed-off-by: Daniel Borkmann Signed-off-by: Piotr Krysiuk Signed-off-by: Benedict Schlueter Acked-by: Alexei Starovoitov --- arch/arm/net/bpf_jit_32.c | 3 +++ arch/arm64/net/bpf_jit_comp.c | 13 +++++++++++++ arch/mips/net/ebpf_jit.c | 3 +++ arch/powerpc/net/bpf_jit_comp32.c | 6 ++++++ arch/powerpc/net/bpf_jit_comp64.c | 6 ++++++ arch/riscv/net/bpf_jit_comp32.c | 4 ++++ arch/riscv/net/bpf_jit_comp64.c | 4 ++++ arch/s390/net/bpf_jit_comp.c | 5 +++++ arch/sparc/net/bpf_jit_comp_64.c | 3 +++ arch/x86/net/bpf_jit_comp.c | 7 +++++++ arch/x86/net/bpf_jit_comp32.c | 6 ++++++ include/linux/filter.h | 15 +++++++++++++++ kernel/bpf/core.c | 19 ++++++++++++++++++- kernel/bpf/disasm.c | 16 +++++++++------- 14 files changed, 102 insertions(+), 8 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 897634d0a67c..a951276f0547 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -1602,6 +1602,9 @@ exit: rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code)); break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_W: case BPF_ST | BPF_MEM | BPF_H: diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index dccf98a37283..41c23f474ea6 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -823,6 +823,19 @@ emit_cond_jmp: return ret; break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + /* + * Nothing required here. + * + * In case of arm64, we rely on the firmware mitigation of + * Speculative Store Bypass as controlled via the ssbd kernel + * parameter. Whenever the mitigation is enabled, it works + * for all of the kernel code with no need to provide any + * additional instructions. + */ + break; + /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_W: case BPF_ST | BPF_MEM | BPF_H: diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index 939dd06764bc..3a73e9375712 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c @@ -1355,6 +1355,9 @@ jeq_common: } break; + case BPF_ST | BPF_NOSPEC: /* speculation barrier */ + break; + case BPF_ST | BPF_B | BPF_MEM: case BPF_ST | BPF_H | BPF_MEM: case BPF_ST | BPF_W | BPF_MEM: diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 34bb1583fc0c..beb12cbc8c29 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -737,6 +737,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * } break; + /* + * BPF_ST NOSPEC (speculation barrier) + */ + case BPF_ST | BPF_NOSPEC: + break; + /* * BPF_ST(X) */ diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index de8595880fee..b87a63dba9c8 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -627,6 +627,12 @@ emit_clear: } break; + /* + * BPF_ST NOSPEC (speculation barrier) + */ + case BPF_ST | BPF_NOSPEC: + break; + /* * BPF_ST(X) */ diff --git a/arch/riscv/net/bpf_jit_comp32.c b/arch/riscv/net/bpf_jit_comp32.c index 81de865f4c7c..e6497424cbf6 100644 --- a/arch/riscv/net/bpf_jit_comp32.c +++ b/arch/riscv/net/bpf_jit_comp32.c @@ -1251,6 +1251,10 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, return -1; break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; + case BPF_ST | BPF_MEM | BPF_B: case BPF_ST | BPF_MEM | BPF_H: case BPF_ST | BPF_MEM | BPF_W: diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 87e3bf5b9086..3af4131c22c7 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -939,6 +939,10 @@ out_be: emit_ld(rd, 0, RV_REG_T1, ctx); break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; + /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_B: emit_imm(RV_REG_T1, imm, ctx); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 2ae419f5115a..88419263a89a 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1153,6 +1153,11 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, break; } break; + /* + * BPF_NOSPEC (speculation barrier) + */ + case BPF_ST | BPF_NOSPEC: + break; /* * BPF_ST(X) */ diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index 4b8d3c65d266..9a2f20cbd48b 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -1287,6 +1287,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) return 1; break; } + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_W: case BPF_ST | BPF_MEM | BPF_H: diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 4b951458c9fc..16d76f814e9b 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1219,6 +1219,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, } break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + if (boot_cpu_has(X86_FEATURE_XMM2)) + /* Emit 'lfence' */ + EMIT3(0x0F, 0xAE, 0xE8); + break; + /* ST: *(u8*)(dst_reg + off) = imm */ case BPF_ST | BPF_MEM | BPF_B: if (is_ereg(dst_reg)) diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index 3da88ded6ee3..3bfda5f502cb 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -1886,6 +1886,12 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, i++; break; } + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + if (boot_cpu_has(X86_FEATURE_XMM2)) + /* Emit 'lfence' */ + EMIT3(0x0F, 0xAE, 0xE8); + break; /* ST: *(u8*)(dst_reg + off) = imm */ case BPF_ST | BPF_MEM | BPF_H: case BPF_ST | BPF_MEM | BPF_B: diff --git a/include/linux/filter.h b/include/linux/filter.h index 472f97074da0..83b896044e79 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -73,6 +73,11 @@ struct ctl_table_header; /* unused opcode to mark call to interpreter with arguments */ #define BPF_CALL_ARGS 0xe0 +/* unused opcode to mark speculation barrier for mitigating + * Speculative Store Bypass + */ +#define BPF_NOSPEC 0xc0 + /* As per nm, we expose JITed images as text (code) section for * kallsyms. That way, tools like perf can find it to match * addresses. @@ -390,6 +395,16 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) .off = 0, \ .imm = 0 }) +/* Speculation barrier */ + +#define BPF_ST_NOSPEC() \ + ((struct bpf_insn) { \ + .code = BPF_ST | BPF_NOSPEC, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = 0 }) + /* Internal classic blocks for direct assignment */ #define __BPF_STMT(CODE, K) \ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 9b1577498373..b1a5fc04492b 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -32,6 +32,8 @@ #include #include #include + +#include #include /* Registers */ @@ -1377,6 +1379,7 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) /* Non-UAPI available opcodes. */ [BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS, [BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL, + [BPF_ST | BPF_NOSPEC] = &&ST_NOSPEC, [BPF_LDX | BPF_PROBE_MEM | BPF_B] = &&LDX_PROBE_MEM_B, [BPF_LDX | BPF_PROBE_MEM | BPF_H] = &&LDX_PROBE_MEM_H, [BPF_LDX | BPF_PROBE_MEM | BPF_W] = &&LDX_PROBE_MEM_W, @@ -1621,7 +1624,21 @@ out: COND_JMP(s, JSGE, >=) COND_JMP(s, JSLE, <=) #undef COND_JMP - /* STX and ST and LDX*/ + /* ST, STX and LDX*/ + ST_NOSPEC: + /* Speculation barrier for mitigating Speculative Store Bypass. + * In case of arm64, we rely on the firmware mitigation as + * controlled via the ssbd kernel parameter. Whenever the + * mitigation is enabled, it works for all of the kernel code + * with no need to provide any additional instructions here. + * In case of x86, we use 'lfence' insn for mitigation. We + * reuse preexisting logic from Spectre v1 mitigation that + * happens to produce the required code on x86 for v4 as well. + */ +#ifdef CONFIG_X86 + barrier_nospec(); +#endif + CONT; #define LDST(SIZEOP, SIZE) \ STX_MEM_##SIZEOP: \ *(SIZE *)(unsigned long) (DST + insn->off) = SRC; \ diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c index bbfc6bb79240..ca3cd9aaa6ce 100644 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@ -206,15 +206,17 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs, verbose(cbs->private_data, "BUG_%02x\n", insn->code); } } else if (class == BPF_ST) { - if (BPF_MODE(insn->code) != BPF_MEM) { + if (BPF_MODE(insn->code) == BPF_MEM) { + verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = %d\n", + insn->code, + bpf_ldst_string[BPF_SIZE(insn->code) >> 3], + insn->dst_reg, + insn->off, insn->imm); + } else if (BPF_MODE(insn->code) == 0xc0 /* BPF_NOSPEC, no UAPI */) { + verbose(cbs->private_data, "(%02x) nospec\n", insn->code); + } else { verbose(cbs->private_data, "BUG_st_%02x\n", insn->code); - return; } - verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = %d\n", - insn->code, - bpf_ldst_string[BPF_SIZE(insn->code) >> 3], - insn->dst_reg, - insn->off, insn->imm); } else if (class == BPF_LDX) { if (BPF_MODE(insn->code) != BPF_MEM) { verbose(cbs->private_data, "BUG_ldx_%02x\n", insn->code); From 2039f26f3aca5b0e419b98f65dd36481337b86ee Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 13 Jul 2021 08:18:31 +0000 Subject: [PATCH 585/794] bpf: Fix leakage due to insufficient speculative store bypass mitigation Spectre v4 gadgets make use of memory disambiguation, which is a set of techniques that execute memory access instructions, that is, loads and stores, out of program order; Intel's optimization manual, section 2.4.4.5: A load instruction micro-op may depend on a preceding store. Many microarchitectures block loads until all preceding store addresses are known. The memory disambiguator predicts which loads will not depend on any previous stores. When the disambiguator predicts that a load does not have such a dependency, the load takes its data from the L1 data cache. Eventually, the prediction is verified. If an actual conflict is detected, the load and all succeeding instructions are re-executed. af86ca4e3088 ("bpf: Prevent memory disambiguation attack") tried to mitigate this attack by sanitizing the memory locations through preemptive "fast" (low latency) stores of zero prior to the actual "slow" (high latency) store of a pointer value such that upon dependency misprediction the CPU then speculatively executes the load of the pointer value and retrieves the zero value instead of the attacker controlled scalar value previously stored at that location, meaning, subsequent access in the speculative domain is then redirected to the "zero page". The sanitized preemptive store of zero prior to the actual "slow" store is done through a simple ST instruction based on r10 (frame pointer) with relative offset to the stack location that the verifier has been tracking on the original used register for STX, which does not have to be r10. Thus, there are no memory dependencies for this store, since it's only using r10 and immediate constant of zero; hence af86ca4e3088 /assumed/ a low latency operation. However, a recent attack demonstrated that this mitigation is not sufficient since the preemptive store of zero could also be turned into a "slow" store and is thus bypassed as well: [...] // r2 = oob address (e.g. scalar) // r7 = pointer to map value 31: (7b) *(u64 *)(r10 -16) = r2 // r9 will remain "fast" register, r10 will become "slow" register below 32: (bf) r9 = r10 // JIT maps BPF reg to x86 reg: // r9 -> r15 (callee saved) // r10 -> rbp // train store forward prediction to break dependency link between both r9 // and r10 by evicting them from the predictor's LRU table. 33: (61) r0 = *(u32 *)(r7 +24576) 34: (63) *(u32 *)(r7 +29696) = r0 35: (61) r0 = *(u32 *)(r7 +24580) 36: (63) *(u32 *)(r7 +29700) = r0 37: (61) r0 = *(u32 *)(r7 +24584) 38: (63) *(u32 *)(r7 +29704) = r0 39: (61) r0 = *(u32 *)(r7 +24588) 40: (63) *(u32 *)(r7 +29708) = r0 [...] 543: (61) r0 = *(u32 *)(r7 +25596) 544: (63) *(u32 *)(r7 +30716) = r0 // prepare call to bpf_ringbuf_output() helper. the latter will cause rbp // to spill to stack memory while r13/r14/r15 (all callee saved regs) remain // in hardware registers. rbp becomes slow due to push/pop latency. below is // disasm of bpf_ringbuf_output() helper for better visual context: // // ffffffff8117ee20: 41 54 push r12 // ffffffff8117ee22: 55 push rbp // ffffffff8117ee23: 53 push rbx // ffffffff8117ee24: 48 f7 c1 fc ff ff ff test rcx,0xfffffffffffffffc // ffffffff8117ee2b: 0f 85 af 00 00 00 jne ffffffff8117eee0 <-- jump taken // [...] // ffffffff8117eee0: 49 c7 c4 ea ff ff ff mov r12,0xffffffffffffffea // ffffffff8117eee7: 5b pop rbx // ffffffff8117eee8: 5d pop rbp // ffffffff8117eee9: 4c 89 e0 mov rax,r12 // ffffffff8117eeec: 41 5c pop r12 // ffffffff8117eeee: c3 ret 545: (18) r1 = map[id:4] 547: (bf) r2 = r7 548: (b7) r3 = 0 549: (b7) r4 = 4 550: (85) call bpf_ringbuf_output#194288 // instruction 551 inserted by verifier \ 551: (7a) *(u64 *)(r10 -16) = 0 | /both/ are now slow stores here // storing map value pointer r7 at fp-16 | since value of r10 is "slow". 552: (7b) *(u64 *)(r10 -16) = r7 / // following "fast" read to the same memory location, but due to dependency // misprediction it will speculatively execute before insn 551/552 completes. 553: (79) r2 = *(u64 *)(r9 -16) // in speculative domain contains attacker controlled r2. in non-speculative // domain this contains r7, and thus accesses r7 +0 below. 554: (71) r3 = *(u8 *)(r2 +0) // leak r3 As can be seen, the current speculative store bypass mitigation which the verifier inserts at line 551 is insufficient since /both/, the write of the zero sanitation as well as the map value pointer are a high latency instruction due to prior memory access via push/pop of r10 (rbp) in contrast to the low latency read in line 553 as r9 (r15) which stays in hardware registers. Thus, architecturally, fp-16 is r7, however, microarchitecturally, fp-16 can still be r2. Initial thoughts to address this issue was to track spilled pointer loads from stack and enforce their load via LDX through r10 as well so that /both/ the preemptive store of zero /as well as/ the load use the /same/ register such that a dependency is created between the store and load. However, this option is not sufficient either since it can be bypassed as well under speculation. An updated attack with pointer spill/fills now _all_ based on r10 would look as follows: [...] // r2 = oob address (e.g. scalar) // r7 = pointer to map value [...] // longer store forward prediction training sequence than before. 2062: (61) r0 = *(u32 *)(r7 +25588) 2063: (63) *(u32 *)(r7 +30708) = r0 2064: (61) r0 = *(u32 *)(r7 +25592) 2065: (63) *(u32 *)(r7 +30712) = r0 2066: (61) r0 = *(u32 *)(r7 +25596) 2067: (63) *(u32 *)(r7 +30716) = r0 // store the speculative load address (scalar) this time after the store // forward prediction training. 2068: (7b) *(u64 *)(r10 -16) = r2 // preoccupy the CPU store port by running sequence of dummy stores. 2069: (63) *(u32 *)(r7 +29696) = r0 2070: (63) *(u32 *)(r7 +29700) = r0 2071: (63) *(u32 *)(r7 +29704) = r0 2072: (63) *(u32 *)(r7 +29708) = r0 2073: (63) *(u32 *)(r7 +29712) = r0 2074: (63) *(u32 *)(r7 +29716) = r0 2075: (63) *(u32 *)(r7 +29720) = r0 2076: (63) *(u32 *)(r7 +29724) = r0 2077: (63) *(u32 *)(r7 +29728) = r0 2078: (63) *(u32 *)(r7 +29732) = r0 2079: (63) *(u32 *)(r7 +29736) = r0 2080: (63) *(u32 *)(r7 +29740) = r0 2081: (63) *(u32 *)(r7 +29744) = r0 2082: (63) *(u32 *)(r7 +29748) = r0 2083: (63) *(u32 *)(r7 +29752) = r0 2084: (63) *(u32 *)(r7 +29756) = r0 2085: (63) *(u32 *)(r7 +29760) = r0 2086: (63) *(u32 *)(r7 +29764) = r0 2087: (63) *(u32 *)(r7 +29768) = r0 2088: (63) *(u32 *)(r7 +29772) = r0 2089: (63) *(u32 *)(r7 +29776) = r0 2090: (63) *(u32 *)(r7 +29780) = r0 2091: (63) *(u32 *)(r7 +29784) = r0 2092: (63) *(u32 *)(r7 +29788) = r0 2093: (63) *(u32 *)(r7 +29792) = r0 2094: (63) *(u32 *)(r7 +29796) = r0 2095: (63) *(u32 *)(r7 +29800) = r0 2096: (63) *(u32 *)(r7 +29804) = r0 2097: (63) *(u32 *)(r7 +29808) = r0 2098: (63) *(u32 *)(r7 +29812) = r0 // overwrite scalar with dummy pointer; same as before, also including the // sanitation store with 0 from the current mitigation by the verifier. 2099: (7a) *(u64 *)(r10 -16) = 0 | /both/ are now slow stores here 2100: (7b) *(u64 *)(r10 -16) = r7 | since store unit is still busy. // load from stack intended to bypass stores. 2101: (79) r2 = *(u64 *)(r10 -16) 2102: (71) r3 = *(u8 *)(r2 +0) // leak r3 [...] Looking at the CPU microarchitecture, the scheduler might issue loads (such as seen in line 2101) before stores (line 2099,2100) because the load execution units become available while the store execution unit is still busy with the sequence of dummy stores (line 2069-2098). And so the load may use the prior stored scalar from r2 at address r10 -16 for speculation. The updated attack may work less reliable on CPU microarchitectures where loads and stores share execution resources. This concludes that the sanitizing with zero stores from af86ca4e3088 ("bpf: Prevent memory disambiguation attack") is insufficient. Moreover, the detection of stack reuse from af86ca4e3088 where previously data (STACK_MISC) has been written to a given stack slot where a pointer value is now to be stored does not have sufficient coverage as precondition for the mitigation either; for several reasons outlined as follows: 1) Stack content from prior program runs could still be preserved and is therefore not "random", best example is to split a speculative store bypass attack between tail calls, program A would prepare and store the oob address at a given stack slot and then tail call into program B which does the "slow" store of a pointer to the stack with subsequent "fast" read. From program B PoV such stack slot type is STACK_INVALID, and therefore also must be subject to mitigation. 2) The STACK_SPILL must not be coupled to register_is_const(&stack->spilled_ptr) condition, for example, the previous content of that memory location could also be a pointer to map or map value. Without the fix, a speculative store bypass is not mitigated in such precondition and can then lead to a type confusion in the speculative domain leaking kernel memory near these pointer types. While brainstorming on various alternative mitigation possibilities, we also stumbled upon a retrospective from Chrome developers [0]: [...] For variant 4, we implemented a mitigation to zero the unused memory of the heap prior to allocation, which cost about 1% when done concurrently and 4% for scavenging. Variant 4 defeats everything we could think of. We explored more mitigations for variant 4 but the threat proved to be more pervasive and dangerous than we anticipated. For example, stack slots used by the register allocator in the optimizing compiler could be subject to type confusion, leading to pointer crafting. Mitigating type confusion for stack slots alone would have required a complete redesign of the backend of the optimizing compiler, perhaps man years of work, without a guarantee of completeness. [...] From BPF side, the problem space is reduced, however, options are rather limited. One idea that has been explored was to xor-obfuscate pointer spills to the BPF stack: [...] // preoccupy the CPU store port by running sequence of dummy stores. [...] 2106: (63) *(u32 *)(r7 +29796) = r0 2107: (63) *(u32 *)(r7 +29800) = r0 2108: (63) *(u32 *)(r7 +29804) = r0 2109: (63) *(u32 *)(r7 +29808) = r0 2110: (63) *(u32 *)(r7 +29812) = r0 // overwrite scalar with dummy pointer; xored with random 'secret' value // of 943576462 before store ... 2111: (b4) w11 = 943576462 2112: (af) r11 ^= r7 2113: (7b) *(u64 *)(r10 -16) = r11 2114: (79) r11 = *(u64 *)(r10 -16) 2115: (b4) w2 = 943576462 2116: (af) r2 ^= r11 // ... and restored with the same 'secret' value with the help of AX reg. 2117: (71) r3 = *(u8 *)(r2 +0) [...] While the above would not prevent speculation, it would make data leakage infeasible by directing it to random locations. In order to be effective and prevent type confusion under speculation, such random secret would have to be regenerated for each store. The additional complexity involved for a tracking mechanism that prevents jumps such that restoring spilled pointers would not get corrupted is not worth the gain for unprivileged. Hence, the fix in here eventually opted for emitting a non-public BPF_ST | BPF_NOSPEC instruction which the x86 JIT translates into a lfence opcode. Inserting the latter in between the store and load instruction is one of the mitigations options [1]. The x86 instruction manual notes: [...] An LFENCE that follows an instruction that stores to memory might complete before the data being stored have become globally visible. [...] The latter meaning that the preceding store instruction finished execution and the store is at minimum guaranteed to be in the CPU's store queue, but it's not guaranteed to be in that CPU's L1 cache at that point (globally visible). The latter would only be guaranteed via sfence. So the load which is guaranteed to execute after the lfence for that local CPU would have to rely on store-to-load forwarding. [2], in section 2.3 on store buffers says: [...] For every store operation that is added to the ROB, an entry is allocated in the store buffer. This entry requires both the virtual and physical address of the target. Only if there is no free entry in the store buffer, the frontend stalls until there is an empty slot available in the store buffer again. Otherwise, the CPU can immediately continue adding subsequent instructions to the ROB and execute them out of order. On Intel CPUs, the store buffer has up to 56 entries. [...] One small upside on the fix is that it lifts constraints from af86ca4e3088 where the sanitize_stack_off relative to r10 must be the same when coming from different paths. The BPF_ST | BPF_NOSPEC gets emitted after a BPF_STX or BPF_ST instruction. This happens either when we store a pointer or data value to the BPF stack for the first time, or upon later pointer spills. The former needs to be enforced since otherwise stale stack data could be leaked under speculation as outlined earlier. For non-x86 JITs the BPF_ST | BPF_NOSPEC mapping is currently optimized away, but others could emit a speculation barrier as well if necessary. For real-world unprivileged programs e.g. generated by LLVM, pointer spill/fill is only generated upon register pressure and LLVM only tries to do that for pointers which are not used often. The program main impact will be the initial BPF_ST | BPF_NOSPEC sanitation for the STACK_INVALID case when the first write to a stack slot occurs e.g. upon map lookup. In future we might refine ways to mitigate the latter cost. [0] https://arxiv.org/pdf/1902.05178.pdf [1] https://msrc-blog.microsoft.com/2018/05/21/analysis-and-mitigation-of-speculative-store-bypass-cve-2018-3639/ [2] https://arxiv.org/pdf/1905.05725.pdf Fixes: af86ca4e3088 ("bpf: Prevent memory disambiguation attack") Fixes: f7cf25b2026d ("bpf: track spill/fill of constants") Co-developed-by: Piotr Krysiuk Co-developed-by: Benedict Schlueter Signed-off-by: Daniel Borkmann Signed-off-by: Piotr Krysiuk Signed-off-by: Benedict Schlueter Acked-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 2 +- kernel/bpf/verifier.c | 87 +++++++++++++----------------------- 2 files changed, 33 insertions(+), 56 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7ba7e800d472..828d08afeee0 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -340,8 +340,8 @@ struct bpf_insn_aux_data { }; u64 map_key_state; /* constant (32 bit) key tracking for maps */ int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ - int sanitize_stack_off; /* stack slot to be cleared */ u32 seen; /* this insn was processed by the verifier at env->pass_cnt */ + bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */ bool zext_dst; /* this insn zero extends dst reg */ u8 alu_state; /* used in combination with alu_limit */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 657062cb4d85..f9bda5476ea5 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2610,6 +2610,19 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, cur = env->cur_state->frame[env->cur_state->curframe]; if (value_regno >= 0) reg = &cur->regs[value_regno]; + if (!env->bypass_spec_v4) { + bool sanitize = reg && is_spillable_regtype(reg->type); + + for (i = 0; i < size; i++) { + if (state->stack[spi].slot_type[i] == STACK_INVALID) { + sanitize = true; + break; + } + } + + if (sanitize) + env->insn_aux_data[insn_idx].sanitize_stack_spill = true; + } if (reg && size == BPF_REG_SIZE && register_is_bounded(reg) && !register_is_null(reg) && env->bpf_capable) { @@ -2632,47 +2645,10 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, verbose(env, "invalid size of register spill\n"); return -EACCES; } - if (state != cur && reg->type == PTR_TO_STACK) { verbose(env, "cannot spill pointers to stack into stack frame of the caller\n"); return -EINVAL; } - - if (!env->bypass_spec_v4) { - bool sanitize = false; - - if (state->stack[spi].slot_type[0] == STACK_SPILL && - register_is_const(&state->stack[spi].spilled_ptr)) - sanitize = true; - for (i = 0; i < BPF_REG_SIZE; i++) - if (state->stack[spi].slot_type[i] == STACK_MISC) { - sanitize = true; - break; - } - if (sanitize) { - int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off; - int soff = (-spi - 1) * BPF_REG_SIZE; - - /* detected reuse of integer stack slot with a pointer - * which means either llvm is reusing stack slot or - * an attacker is trying to exploit CVE-2018-3639 - * (speculative store bypass) - * Have to sanitize that slot with preemptive - * store of zero. - */ - if (*poff && *poff != soff) { - /* disallow programs where single insn stores - * into two different stack slots, since verifier - * cannot sanitize them - */ - verbose(env, - "insn %d cannot access two stack slots fp%d and fp%d", - insn_idx, *poff, soff); - return -EINVAL; - } - *poff = soff; - } - } save_register_state(state, spi, reg); } else { u8 type = STACK_MISC; @@ -11913,35 +11889,33 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) for (i = 0; i < insn_cnt; i++, insn++) { bpf_convert_ctx_access_t convert_ctx_access; + bool ctx_access; if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) || insn->code == (BPF_LDX | BPF_MEM | BPF_H) || insn->code == (BPF_LDX | BPF_MEM | BPF_W) || - insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) + insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) { type = BPF_READ; - else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) || - insn->code == (BPF_STX | BPF_MEM | BPF_H) || - insn->code == (BPF_STX | BPF_MEM | BPF_W) || - insn->code == (BPF_STX | BPF_MEM | BPF_DW)) + ctx_access = true; + } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) || + insn->code == (BPF_STX | BPF_MEM | BPF_H) || + insn->code == (BPF_STX | BPF_MEM | BPF_W) || + insn->code == (BPF_STX | BPF_MEM | BPF_DW) || + insn->code == (BPF_ST | BPF_MEM | BPF_B) || + insn->code == (BPF_ST | BPF_MEM | BPF_H) || + insn->code == (BPF_ST | BPF_MEM | BPF_W) || + insn->code == (BPF_ST | BPF_MEM | BPF_DW)) { type = BPF_WRITE; - else + ctx_access = BPF_CLASS(insn->code) == BPF_STX; + } else { continue; + } if (type == BPF_WRITE && - env->insn_aux_data[i + delta].sanitize_stack_off) { + env->insn_aux_data[i + delta].sanitize_stack_spill) { struct bpf_insn patch[] = { - /* Sanitize suspicious stack slot with zero. - * There are no memory dependencies for this store, - * since it's only using frame pointer and immediate - * constant of zero - */ - BPF_ST_MEM(BPF_DW, BPF_REG_FP, - env->insn_aux_data[i + delta].sanitize_stack_off, - 0), - /* the original STX instruction will immediately - * overwrite the same stack slot with appropriate value - */ *insn, + BPF_ST_NOSPEC(), }; cnt = ARRAY_SIZE(patch); @@ -11955,6 +11929,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) continue; } + if (!ctx_access) + continue; + switch (env->insn_aux_data[i + delta].ptr_type) { case PTR_TO_CTX: if (!ops->convert_ctx_access) From d712d3fb484b7fa8d1d57e9ca6f134bb9d8c18b1 Mon Sep 17 00:00:00 2001 From: Igor Pylypiv Date: Wed, 7 Jul 2021 11:59:45 -0700 Subject: [PATCH 586/794] scsi: pm80xx: Fix TMF task completion race condition The TMF timeout timer may trigger at the same time when the response from a controller is being handled. When this happens the SAS task may get freed before the response processing is finished. Fix this by calling complete() only when SAS_TASK_STATE_DONE is not set. A similar race condition was fixed in commit b90cd6f2b905 ("scsi: libsas: fix a race condition when smp task timeout") Link: https://lore.kernel.org/r/20210707185945.35559-1-ipylypiv@google.com Reviewed-by: Vishakha Channapattan Acked-by: Jack Wang Signed-off-by: Igor Pylypiv Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm8001_sas.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index 48548a95327b..32e60f0c3b14 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -684,8 +684,7 @@ int pm8001_dev_found(struct domain_device *dev) void pm8001_task_done(struct sas_task *task) { - if (!del_timer(&task->slow_task->timer)) - return; + del_timer(&task->slow_task->timer); complete(&task->slow_task->completion); } @@ -693,9 +692,14 @@ static void pm8001_tmf_timedout(struct timer_list *t) { struct sas_task_slow *slow = from_timer(slow, t, timer); struct sas_task *task = slow->task; + unsigned long flags; - task->task_state_flags |= SAS_TASK_STATE_ABORTED; - complete(&task->slow_task->completion); + spin_lock_irqsave(&task->task_state_lock, flags); + if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) { + task->task_state_flags |= SAS_TASK_STATE_ABORTED; + complete(&task->slow_task->completion); + } + spin_unlock_irqrestore(&task->task_state_lock, flags); } #define PM8001_TASK_TIMEOUT 20 @@ -748,13 +752,10 @@ static int pm8001_exec_internal_tmf_task(struct domain_device *dev, } res = -TMF_RESP_FUNC_FAILED; /* Even TMF timed out, return direct. */ - if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) { - if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) { - pm8001_dbg(pm8001_ha, FAIL, - "TMF task[%x]timeout.\n", - tmf->tmf); - goto ex_err; - } + if (task->task_state_flags & SAS_TASK_STATE_ABORTED) { + pm8001_dbg(pm8001_ha, FAIL, "TMF task[%x]timeout.\n", + tmf->tmf); + goto ex_err; } if (task->task_status.resp == SAS_TASK_COMPLETE && @@ -834,12 +835,9 @@ pm8001_exec_internal_task_abort(struct pm8001_hba_info *pm8001_ha, wait_for_completion(&task->slow_task->completion); res = TMF_RESP_FUNC_FAILED; /* Even TMF timed out, return direct. */ - if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) { - if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) { - pm8001_dbg(pm8001_ha, FAIL, - "TMF task timeout.\n"); - goto ex_err; - } + if (task->task_state_flags & SAS_TASK_STATE_ABORTED) { + pm8001_dbg(pm8001_ha, FAIL, "TMF task timeout.\n"); + goto ex_err; } if (task->task_status.resp == SAS_TASK_COMPLETE && From 77541f78eadfe9fdb018a7b8b69f0f2af2cf4b82 Mon Sep 17 00:00:00 2001 From: Harshvardhan Jha Date: Thu, 8 Jul 2021 13:16:42 +0530 Subject: [PATCH 587/794] scsi: megaraid_mm: Fix end of loop tests for list_for_each_entry() The list_for_each_entry() iterator, "adapter" in this code, can never be NULL. If we exit the loop without finding the correct adapter then "adapter" points invalid memory that is an offset from the list head. This will eventually lead to memory corruption and presumably a kernel crash. Link: https://lore.kernel.org/r/20210708074642.23599-1-harshvardhan.jha@oracle.com Acked-by: Sumit Saxena Signed-off-by: Harshvardhan Jha Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_mm.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_mm.c b/drivers/scsi/megaraid/megaraid_mm.c index abf7b401f5b9..c509440bd161 100644 --- a/drivers/scsi/megaraid/megaraid_mm.c +++ b/drivers/scsi/megaraid/megaraid_mm.c @@ -238,7 +238,7 @@ mraid_mm_get_adapter(mimd_t __user *umimd, int *rval) mimd_t mimd; uint32_t adapno; int iterator; - + bool is_found; if (copy_from_user(&mimd, umimd, sizeof(mimd_t))) { *rval = -EFAULT; @@ -254,12 +254,16 @@ mraid_mm_get_adapter(mimd_t __user *umimd, int *rval) adapter = NULL; iterator = 0; + is_found = false; list_for_each_entry(adapter, &adapters_list_g, list) { - if (iterator++ == adapno) break; + if (iterator++ == adapno) { + is_found = true; + break; + } } - if (!adapter) { + if (!is_found) { *rval = -ENODEV; return NULL; } @@ -725,6 +729,7 @@ ioctl_done(uioc_t *kioc) uint32_t adapno; int iterator; mraid_mmadp_t* adapter; + bool is_found; /* * When the kioc returns from driver, make sure it still doesn't @@ -747,19 +752,23 @@ ioctl_done(uioc_t *kioc) iterator = 0; adapter = NULL; adapno = kioc->adapno; + is_found = false; con_log(CL_ANN, ( KERN_WARNING "megaraid cmm: completed " "ioctl that was timedout before\n")); list_for_each_entry(adapter, &adapters_list_g, list) { - if (iterator++ == adapno) break; + if (iterator++ == adapno) { + is_found = true; + break; + } } kioc->timedout = 0; - if (adapter) { + if (is_found) mraid_mm_dealloc_kioc( adapter, kioc ); - } + } else { wake_up(&wait_q); From 640b7ea5f888b521dcf28e2564ce75d08a783fd7 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 27 Jul 2021 23:38:24 +0300 Subject: [PATCH 588/794] alpha: register early reserved memory in memblock The memory reserved by console/PALcode or non-volatile memory is not added to memblock.memory. Since commit fa3354e4ea39 (mm: free_area_init: use maximal zone PFNs rather than zone sizes) the initialization of the memory map relies on the accuracy of memblock.memory to properly calculate zone sizes. The holes in memblock.memory caused by absent regions reserved by the firmware cause incorrect initialization of struct pages which leads to BUG() during the initial page freeing: BUG: Bad page state in process swapper pfn:2ffc53 page:fffffc000ecf14c0 refcount:0 mapcount:1 mapping:0000000000000000 index:0x0 flags: 0x0() raw: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 raw: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 page dumped because: nonzero mapcount Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 5.7.0-03841-gfa3354e4ea39-dirty #26 fffffc0001b5bd68 fffffc0001b5be80 fffffc00011cd148 fffffc000ecf14c0 fffffc00019803df fffffc0001b5be80 fffffc00011ce340 fffffc000ecf14c0 0000000000000000 fffffc0001b5be80 fffffc0001b482c0 fffffc00027d6618 fffffc00027da7d0 00000000002ff97a 0000000000000000 fffffc0001b5be80 fffffc00011d1abc fffffc000ecf14c0 fffffc0002d00000 fffffc0001b5be80 fffffc0001b2350c 0000000000300000 fffffc0001b48298 fffffc0001b482c0 Trace: [] bad_page+0x168/0x1b0 [] free_pcp_prepare+0x1e0/0x290 [] free_unref_page+0x2c/0xa0 [] cmp_ex_sort+0x0/0x30 [] cmp_ex_sort+0x0/0x30 [] _stext+0x1c/0x20 Fix this by registering the reserved ranges in memblock.memory. Link: https://lore.kernel.org/lkml/20210726192311.uffqnanxw3ac5wwi@ivybridge Fixes: fa3354e4ea39 ("mm: free_area_init: use maximal zone PFNs rather than zone sizes") Reported-by: Matt Turner Cc: Signed-off-by: Mike Rapoport Signed-off-by: Matt Turner --- arch/alpha/kernel/setup.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index 7d56c217b235..b4fbbba30aa2 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -319,18 +319,19 @@ setup_memory(void *kernel_end) i, cluster->usage, cluster->start_pfn, cluster->start_pfn + cluster->numpages); - /* Bit 0 is console/PALcode reserved. Bit 1 is - non-volatile memory -- we might want to mark - this for later. */ - if (cluster->usage & 3) - continue; - end = cluster->start_pfn + cluster->numpages; if (end > max_low_pfn) max_low_pfn = end; memblock_add(PFN_PHYS(cluster->start_pfn), cluster->numpages << PAGE_SHIFT); + + /* Bit 0 is console/PALcode reserved. Bit 1 is + non-volatile memory -- we might want to mark + this for later. */ + if (cluster->usage & 3) + memblock_reserve(PFN_PHYS(cluster->start_pfn), + cluster->numpages << PAGE_SHIFT); } /* From aa35772f61752d4c636d46be51a4f7ca6c029ee6 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Wed, 23 Jun 2021 09:02:47 +0200 Subject: [PATCH 589/794] usb: cdns3: Fixed incorrect gadget state For delayed status phase, the usb_gadget->state was set to USB_STATE_ADDRESS and it has never been updated to USB_STATE_CONFIGURED. Patch updates the gadget state to correct USB_STATE_CONFIGURED. As a result of this bug the controller was not able to enter to Test Mode while using MSC function. Cc: Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver") Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20210623070247.46151-1-pawell@gli-login.cadence.com Signed-off-by: Peter Chen --- drivers/usb/cdns3/cdns3-ep0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/cdns3/cdns3-ep0.c b/drivers/usb/cdns3/cdns3-ep0.c index 02ec7ab4bb48..e29989d57bef 100644 --- a/drivers/usb/cdns3/cdns3-ep0.c +++ b/drivers/usb/cdns3/cdns3-ep0.c @@ -731,6 +731,7 @@ static int cdns3_gadget_ep0_queue(struct usb_ep *ep, request->actual = 0; priv_dev->status_completion_no_call = true; priv_dev->pending_status_request = request; + usb_gadget_set_state(&priv_dev->gadget, USB_STATE_CONFIGURED); spin_unlock_irqrestore(&priv_dev->lock, flags); /* From aa82f94e869edd72f4fadb08c6ffca8927e4934e Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Fri, 25 Jun 2021 12:25:02 +0200 Subject: [PATCH 590/794] usb: cdnsp: Fix incorrect supported maximum speed Driver had hardcoded in initialization maximum supported speed to USB_SPEED_SUPER_PLUS but it should consider the speed returned from usb_get_maximum_speed function. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20210625102502.26336-1-pawell@gli-login.cadence.com Signed-off-by: Peter Chen --- drivers/usb/cdns3/cdnsp-gadget.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/cdnsp-gadget.c b/drivers/usb/cdns3/cdnsp-gadget.c index c23f53e9b1ef..27df0c697897 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.c +++ b/drivers/usb/cdns3/cdnsp-gadget.c @@ -1882,7 +1882,7 @@ static int __cdnsp_gadget_init(struct cdns *cdns) pdev->gadget.name = "cdnsp-gadget"; pdev->gadget.speed = USB_SPEED_UNKNOWN; pdev->gadget.sg_supported = 1; - pdev->gadget.max_speed = USB_SPEED_SUPER_PLUS; + pdev->gadget.max_speed = max_speed; pdev->gadget.lpm_capable = 1; pdev->setup_buf = kzalloc(CDNSP_EP0_SETUP_SIZE, GFP_KERNEL); From e913aada06830338633fb8524733b0ad3d38a7c1 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Wed, 23 Jun 2021 09:27:28 +0200 Subject: [PATCH 591/794] usb: cdnsp: Fixed issue with ZLP The condition "if (need_zero_pkt && zero_len_trb)" was always false and it caused that TRB for ZLP was not prepared. Fix causes that after preparing last TRB in TD, the driver prepares additional TD with ZLP when a ZLP is required. Cc: Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20210623072728.41275-1-pawell@gli-login.cadence.com Signed-off-by: Peter Chen --- drivers/usb/cdns3/cdnsp-ring.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c index 68972746e363..1b1438457fb0 100644 --- a/drivers/usb/cdns3/cdnsp-ring.c +++ b/drivers/usb/cdns3/cdnsp-ring.c @@ -1932,15 +1932,13 @@ int cdnsp_queue_bulk_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq) } if (enqd_len + trb_buff_len >= full_len) { - if (need_zero_pkt && zero_len_trb) { - zero_len_trb = true; - } else { - field &= ~TRB_CHAIN; - field |= TRB_IOC; - more_trbs_coming = false; - need_zero_pkt = false; - preq->td.last_trb = ring->enqueue; - } + if (need_zero_pkt) + zero_len_trb = !zero_len_trb; + + field &= ~TRB_CHAIN; + field |= TRB_IOC; + more_trbs_coming = false; + preq->td.last_trb = ring->enqueue; } /* Only set interrupt on short packet for OUT endpoints. */ @@ -1955,7 +1953,7 @@ int cdnsp_queue_bulk_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq) length_field = TRB_LEN(trb_buff_len) | TRB_TD_SIZE(remainder) | TRB_INTR_TARGET(0); - cdnsp_queue_trb(pdev, ring, more_trbs_coming | need_zero_pkt, + cdnsp_queue_trb(pdev, ring, more_trbs_coming | zero_len_trb, lower_32_bits(send_addr), upper_32_bits(send_addr), length_field, From 5df09c15bab98463203c83ecab88b9321466e626 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 22 Jun 2021 21:37:48 +0200 Subject: [PATCH 592/794] usb: cdnsp: Fix the IMAN_IE_SET and IMAN_IE_CLEAR macro IMAN_IE is BIT(1), so these macro are respectively equivalent to BIT(1) and 0, whatever the value of 'p'. The purpose was to set and reset a single bit in 'p'. Fix these macros to do that correctly. Acked-by: Pawel Laszczak Fixes: e93e58d27402 ("usb: cdnsp: Device side header file for CDNSP driver") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/d12bfcc9cbffb89e27b120668821b3c4f09b6755.1624390584.git.christophe.jaillet@wanadoo.fr Signed-off-by: Peter Chen --- drivers/usb/cdns3/cdnsp-gadget.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/cdns3/cdnsp-gadget.h b/drivers/usb/cdns3/cdnsp-gadget.h index 783ca8ffde00..f740fa6089d8 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.h +++ b/drivers/usb/cdns3/cdnsp-gadget.h @@ -383,8 +383,8 @@ struct cdnsp_intr_reg { #define IMAN_IE BIT(1) #define IMAN_IP BIT(0) /* bits 2:31 need to be preserved */ -#define IMAN_IE_SET(p) (((p) & IMAN_IE) | 0x2) -#define IMAN_IE_CLEAR(p) (((p) & IMAN_IE) & ~(0x2)) +#define IMAN_IE_SET(p) ((p) | IMAN_IE) +#define IMAN_IE_CLEAR(p) ((p) & ~IMAN_IE) /* IMOD - Interrupter Moderation Register - irq_control bitmasks. */ /* From 0d4867a185460397af56b9afe3e2243d3e610e37 Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Wed, 21 Jul 2021 20:01:41 +0300 Subject: [PATCH 593/794] ALSA: hda/realtek: add mic quirk for Acer SF314-42 The Acer Swift SF314-42 laptop is using Realtek ALC255 codec. Add a quirk so microphone in a headset connected via the right-hand side jack is usable. Signed-off-by: Alexander Monakov Cc: Link: https://lore.kernel.org/r/20210721170141.24807-1-amonakov@ispras.ru Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 14e1ab7c7954..21c521596c9d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8278,6 +8278,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x1308, "Acer Aspire Z24-890", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x132a, "Acer TravelMate B114-21", ALC233_FIXUP_ACER_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1330, "Acer TravelMate X514-51T", ALC255_FIXUP_ACER_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x142b, "Acer Swift SF314-42", ALC255_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1430, "Acer TravelMate B311R-31", ALC256_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1466, "Acer Aspire A515-56", ALC255_FIXUP_ACER_HEADPHONE_AND_MIC), SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z), From db8d3a21275c807a4047a21bde3b57d49ca55d82 Mon Sep 17 00:00:00 2001 From: Michael Zaidman Date: Thu, 29 Jul 2021 13:26:03 +0300 Subject: [PATCH 594/794] HID: ft260: fix device removal due to USB disconnect This commit fixes a functional regression introduced by the commit 82f09a637dd3 ("HID: ft260: improve error handling of ft260_hid_feature_report_get()") when upon USB disconnect, the FTDI FT260 i2c device is still available within the /dev folder. In my company's product, where the host USB to FT260 USB connection is hard-wired in the PCB, the issue is not reproducible. To reproduce it, I used the VirtualBox Ubuntu 20.04 VM and the UMFT260EV1A development module for the FTDI FT260 chip: Plug the UMFT260EV1A module into a USB port and attach it to VM. The VM shows 2 i2c devices under the /dev: michael@michael-VirtualBox:~$ ls /dev/i2c-* /dev/i2c-0 /dev/i2c-1 The i2c-0 is not related to the FTDI FT260: michael@michael-VirtualBox:~$ cat /sys/bus/i2c/devices/i2c-0/name SMBus PIIX4 adapter at 4100 The i2c-1 is created by hid-ft260.ko: michael@michael-VirtualBox:~$ cat /sys/bus/i2c/devices/i2c-1/name FT260 usb-i2c bridge on hidraw1 Now, detach the FTDI FT260 USB device from VM. We expect the /dev/i2c-1 to disappear, but it's still here: michael@michael-VirtualBox:~$ ls /dev/i2c-* /dev/i2c-0 /dev/i2c-1 And the kernel log shows: [ +0.001202] usb 2-2: USB disconnect, device number 3 [ +0.000109] ft260 0003:0403:6030.0002: failed to retrieve system status [ +0.000316] ft260 0003:0403:6030.0003: failed to retrieve system status It happens because the commit 82f09a637dd3 changed the ft260_get_system_config() return logic. This caused the ft260_is_interface_enabled() to exit with error upon the FT260 device USB disconnect, which in turn, aborted the ft260_remove() before deleting the FT260 i2c device and cleaning its sysfs stuff. This commit restores the FT260 USB removal functionality and improves the ft260_is_interface_enabled() code to handle correctly all chip modes defined by the device interface configuration pins DCNF0 and DCNF1. Signed-off-by: Michael Zaidman Acked-by: Aaron Jones (FTDI-UK) Signed-off-by: Jiri Kosina --- drivers/hid/hid-ft260.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/drivers/hid/hid-ft260.c b/drivers/hid/hid-ft260.c index 6f10df2042c4..4ef1c3b8094e 100644 --- a/drivers/hid/hid-ft260.c +++ b/drivers/hid/hid-ft260.c @@ -742,7 +742,7 @@ static int ft260_is_interface_enabled(struct hid_device *hdev) int ret; ret = ft260_get_system_config(hdev, &cfg); - if (ret) + if (ret < 0) return ret; ft260_dbg("interface: 0x%02x\n", interface); @@ -754,23 +754,16 @@ static int ft260_is_interface_enabled(struct hid_device *hdev) switch (cfg.chip_mode) { case FT260_MODE_ALL: case FT260_MODE_BOTH: - if (interface == 1) { + if (interface == 1) hid_info(hdev, "uart interface is not supported\n"); - return 0; - } - ret = 1; + else + ret = 1; break; case FT260_MODE_UART: - if (interface == 0) { - hid_info(hdev, "uart is unsupported on interface 0\n"); - ret = 0; - } + hid_info(hdev, "uart interface is not supported\n"); break; case FT260_MODE_I2C: - if (interface == 1) { - hid_info(hdev, "i2c is unsupported on interface 1\n"); - ret = 0; - } + ret = 1; break; } return ret; @@ -1004,11 +997,9 @@ err_hid_stop: static void ft260_remove(struct hid_device *hdev) { - int ret; struct ft260_device *dev = hid_get_drvdata(hdev); - ret = ft260_is_interface_enabled(hdev); - if (ret <= 0) + if (!dev) return; sysfs_remove_group(&hdev->dev.kobj, &ft260_attr_group); From 7280305eb57dd32735f795ed4ee679bf9854f9d0 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 28 Jul 2021 18:00:24 +0200 Subject: [PATCH 595/794] btrfs: calculate number of eb pages properly in csum_tree_block MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building with -Warray-bounds on systems with 64K pages there's a warning: fs/btrfs/disk-io.c: In function ‘csum_tree_block’: fs/btrfs/disk-io.c:226:34: warning: array subscript 1 is above array bounds of ‘struct page *[1]’ [-Warray-bounds] 226 | kaddr = page_address(buf->pages[i]); | ~~~~~~~~~~^~~ ./include/linux/mm.h:1630:48: note: in definition of macro ‘page_address’ 1630 | #define page_address(page) lowmem_page_address(page) | ^~~~ In file included from fs/btrfs/ctree.h:32, from fs/btrfs/disk-io.c:23: fs/btrfs/extent_io.h:98:15: note: while referencing ‘pages’ 98 | struct page *pages[1]; | ^~~~~ The compiler has no way to know that in that case the nodesize is exactly PAGE_SIZE, so the resulting number of pages will be correct (1). Let's use num_extent_pages that makes the case nodesize == PAGE_SIZE explicitly 1. Reported-by: Gustavo A. R. Silva Reviewed-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b117dd3b8172..a59ab7b9aea0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -209,7 +209,7 @@ void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, static void csum_tree_block(struct extent_buffer *buf, u8 *result) { struct btrfs_fs_info *fs_info = buf->fs_info; - const int num_pages = fs_info->nodesize >> PAGE_SHIFT; + const int num_pages = num_extent_pages(buf); const int first_page_part = min_t(u32, PAGE_SIZE, fs_info->nodesize); SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); char *kaddr; From 3c18e9baee0ef97510dcda78c82285f52626764b Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sat, 24 Jul 2021 17:27:39 +0200 Subject: [PATCH 596/794] USB: serial: ch341: fix character loss at high transfer rates The chip supports high transfer rates, but with the small default buffers (64 bytes read), some entire blocks are regularly lost. This typically happens at 1.5 Mbps (which is the default speed on Rockchip devices) when used as a console to access U-Boot where the output of the "help" command misses many lines and where "printenv" mangles the environment. The FTDI driver doesn't suffer at all from this. One difference is that it uses 512 bytes rx buffers and 256 bytes tx buffers. Adopting these values completely resolved the issue, even the output of "dmesg" is reliable. I preferred to leave the Tx value unchanged as it is not involved in this issue, while a change could increase the risk of triggering the same issue with other devices having too small buffers. I verified that it backports well (and works) at least to 5.4. It's of low importance enough to be dropped where it doesn't trivially apply anymore. Cc: stable@vger.kernel.org Signed-off-by: Willy Tarreau Link: https://lore.kernel.org/r/20210724152739.18726-1-w@1wt.eu Signed-off-by: Johan Hovold --- drivers/usb/serial/ch341.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 2db917eab799..8a521b5ea769 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -851,6 +851,7 @@ static struct usb_serial_driver ch341_device = { .owner = THIS_MODULE, .name = "ch341-uart", }, + .bulk_in_size = 512, .id_table = id_table, .num_ports = 1, .open = ch341_open, From 333cf507465fbebb3727f5b53e77538467df312a Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Thu, 29 Jul 2021 11:34:49 +0530 Subject: [PATCH 597/794] powerpc/pseries: Fix regression while building external modules With commit c9f3401313a5 ("powerpc: Always enable queued spinlocks for 64s, disable for others") CONFIG_PPC_QUEUED_SPINLOCKS is always enabled on ppc64le, external modules that use spinlock APIs are failing. ERROR: modpost: GPL-incompatible module XXX.ko uses GPL-only symbol 'shared_processor' Before the above commit, modules were able to build without any issues. Also this problem is not seen on other architectures. This problem can be workaround if CONFIG_UNINLINE_SPIN_UNLOCK is enabled in the config. However CONFIG_UNINLINE_SPIN_UNLOCK is not enabled by default and only enabled in certain conditions like CONFIG_DEBUG_SPINLOCKS is set in the kernel config. #include spinlock_t spLock; static int __init spinlock_test_init(void) { spin_lock_init(&spLock); spin_lock(&spLock); spin_unlock(&spLock); return 0; } static void __exit spinlock_test_exit(void) { printk("spinlock_test unloaded\n"); } module_init(spinlock_test_init); module_exit(spinlock_test_exit); MODULE_DESCRIPTION ("spinlock_test"); MODULE_LICENSE ("non-GPL"); MODULE_AUTHOR ("Srikar Dronamraju"); Given that spin locks are one of the basic facilities for module code, this effectively makes it impossible to build/load almost any non GPL modules on ppc64le. This was first reported at https://github.com/openzfs/zfs/issues/11172 Currently shared_processor is exported as GPL only symbol. Fix this for parity with other architectures by exposing shared_processor to non-GPL modules too. Fixes: 14c73bd344da ("powerpc/vcpu: Assume dedicated processors as non-preempt") Cc: stable@vger.kernel.org # v5.5+ Reported-by: marc.c.dionne@gmail.com Signed-off-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210729060449.292780-1-srikar@linux.vnet.ibm.com --- arch/powerpc/platforms/pseries/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 631a0d57b6cd..6b0886668465 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -77,7 +77,7 @@ #include "../../../../drivers/pci/pci.h" DEFINE_STATIC_KEY_FALSE(shared_processor); -EXPORT_SYMBOL_GPL(shared_processor); +EXPORT_SYMBOL(shared_processor); int CMO_PrPSP = -1; int CMO_SecPSP = -1; From a88603f4b92ecef9e2359e40bcb99ad399d85dd7 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 29 Jul 2021 22:56:36 +1000 Subject: [PATCH 598/794] powerpc/vdso: Don't use r30 to avoid breaking Go lang The Go runtime uses r30 for some special value called 'g'. It assumes that value will remain unchanged even when calling VDSO functions. Although r30 is non-volatile across function calls, the callee is free to use it, as long as the callee saves the value and restores it before returning. It used to be true by accident that the VDSO didn't use r30, because the VDSO was hand-written asm. When we switched to building the VDSO from C the compiler started using r30, at least in some builds, leading to crashes in Go. eg: ~/go/src$ ./all.bash Building Go cmd/dist using /usr/lib/go-1.16. (go1.16.2 linux/ppc64le) Building Go toolchain1 using /usr/lib/go-1.16. go build os/exec: /usr/lib/go-1.16/pkg/tool/linux_ppc64le/compile: signal: segmentation fault go build reflect: /usr/lib/go-1.16/pkg/tool/linux_ppc64le/compile: signal: segmentation fault go tool dist: FAILED: /usr/lib/go-1.16/bin/go install -gcflags=-l -tags=math_big_pure_go compiler_bootstrap bootstrap/cmd/...: exit status 1 There are patches in flight to fix Go[1], but until they are released and widely deployed we can workaround it in the VDSO by avoiding use of r30. Note this only works with GCC, clang does not support -ffixed-rN. 1: https://go-review.googlesource.com/c/go/+/328110 Fixes: ab037dd87a2f ("powerpc/vdso: Switch VDSO to generic C implementation.") Cc: stable@vger.kernel.org # v5.11+ Reported-by: Paul Menzel Tested-by: Paul Menzel Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210729131244.2595519-1-mpe@ellerman.id.au --- arch/powerpc/kernel/vdso64/Makefile | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index 2813e3f98db6..3c5baaa6f1e7 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -27,6 +27,13 @@ KASAN_SANITIZE := n ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both + +# Go prior to 1.16.x assumes r30 is not clobbered by any VDSO code. That used to be true +# by accident when the VDSO was hand-written asm code, but may not be now that the VDSO is +# compiler generated. To avoid breaking Go tell GCC not to use r30. Impact on code +# generation is minimal, it will just use r29 instead. +ccflags-y += $(call cc-option, -ffixed-r30) + asflags-y := -D__VDSO64__ -s targets += vdso64.lds From 06e91df16f3e1ca1a1886968fb22d4258f3b6b6f Mon Sep 17 00:00:00 2001 From: Sherry Sun Date: Thu, 29 Jul 2021 16:31:09 +0800 Subject: [PATCH 599/794] tty: serial: fsl_lpuart: fix the wrong return value in lpuart32_get_mctrl Patch e60c2991f18b make the lpuart32_get_mctrl always return 0, actually this will break the functions of device which use flow control such as Bluetooth. For lpuart32 plaform, the hardware can handle the CTS automatically. So we should set TIOCM_CTS active. Also need to set CAR and DSR active. Patch has been tested on lpuart32 platforms such as imx8qm and imx8ulp. Fixes: e60c2991f18b ("serial: fsl_lpuart: remove RTSCTS handling from get_mctrl()") Cc: stable Signed-off-by: Sherry Sun Link: https://lore.kernel.org/r/20210729083109.31541-1-sherry.sun@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 508128ddfa01..f0e5da77ed6d 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -1415,7 +1415,7 @@ static unsigned int lpuart_get_mctrl(struct uart_port *port) static unsigned int lpuart32_get_mctrl(struct uart_port *port) { - unsigned int mctrl = 0; + unsigned int mctrl = TIOCM_CAR | TIOCM_DSR | TIOCM_CTS; u32 reg; reg = lpuart32_read(port, UARTCTRL); From 7c4a509d3815a260c423c0633bd73695250ac26d Mon Sep 17 00:00:00 2001 From: Zhiyong Tao Date: Thu, 29 Jul 2021 16:46:40 +0800 Subject: [PATCH 600/794] serial: 8250_mtk: fix uart corruption issue when rx power off Fix uart corruption issue when rx power off. Add spin lock in mtk8250_dma_rx_complete function in APDMA mode. when uart is used as a communication port with external device(GPS). when external device(GPS) power off, the power of rx pin is also from 1.8v to 0v. Even if there is not any data in rx. But uart rx pin can capture the data "0". If uart don't receive any data in specified cycle, uart will generates BI(Break interrupt) interrupt. If external device(GPS) power off, we found that BI interrupt appeared continuously and very frequently. When uart interrupt type is BI, uart IRQ handler(8250 framwork API:serial8250_handle_irq) will push data to tty buffer. mtk8250_dma_rx_complete is a task of mtk_uart_apdma_rx_handler. mtk8250_dma_rx_complete priority is lower than uart irq handler(serial8250_handle_irq). if we are in process of mtk8250_dma_rx_complete, uart appear BI interrupt:1)serial8250_handle_irq will priority execution.2)it may cause write tty buffer conflict in mtk8250_dma_rx_complete. So the spin lock protect the rx receive data process is not break. Signed-off-by: Zhiyong Tao Cc: stable Link: https://lore.kernel.org/r/20210729084640.17613-2-zhiyong.tao@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_mtk.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c index f7d3023f860f..fb65dc601b23 100644 --- a/drivers/tty/serial/8250/8250_mtk.c +++ b/drivers/tty/serial/8250/8250_mtk.c @@ -93,10 +93,13 @@ static void mtk8250_dma_rx_complete(void *param) struct dma_tx_state state; int copied, total, cnt; unsigned char *ptr; + unsigned long flags; if (data->rx_status == DMA_RX_SHUTDOWN) return; + spin_lock_irqsave(&up->port.lock, flags); + dmaengine_tx_status(dma->rxchan, dma->rx_cookie, &state); total = dma->rx_size - state.residue; cnt = total; @@ -120,6 +123,8 @@ static void mtk8250_dma_rx_complete(void *param) tty_flip_buffer_push(tty_port); mtk8250_rx_dma(up); + + spin_unlock_irqrestore(&up->port.lock, flags); } static void mtk8250_rx_dma(struct uart_8250_port *up) From 0d6434e10b5377a006f6dd995c8fc5e2d82acddc Mon Sep 17 00:00:00 2001 From: Anirudh Rayabharam Date: Wed, 28 Jul 2021 14:21:06 +0530 Subject: [PATCH 601/794] firmware_loader: use -ETIMEDOUT instead of -EAGAIN in fw_load_sysfs_fallback The only motivation for using -EAGAIN in commit 0542ad88fbdd81bb ("firmware loader: Fix _request_firmware_load() return val for fw load abort") was to distinguish the error from -ENOMEM, and so there is no real reason in keeping it. -EAGAIN is typically used to tell the userspace to try something again and in this case re-using the sysfs loading interface cannot be retried when a timeout happens, so the return value is also bogus. -ETIMEDOUT is received when the wait times out and returning that is much more telling of what the reason for the failure was. So, just propagate that instead of returning -EAGAIN. Suggested-by: Luis Chamberlain Reviewed-by: Shuah Khan Acked-by: Luis Chamberlain Signed-off-by: Anirudh Rayabharam Cc: stable Link: https://lore.kernel.org/r/20210728085107.4141-2-mail@anirudhrb.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/firmware_loader/fallback.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/base/firmware_loader/fallback.c b/drivers/base/firmware_loader/fallback.c index 91899d185e31..1a48be0a030e 100644 --- a/drivers/base/firmware_loader/fallback.c +++ b/drivers/base/firmware_loader/fallback.c @@ -535,8 +535,6 @@ static int fw_load_sysfs_fallback(struct fw_sysfs *fw_sysfs, long timeout) if (fw_state_is_aborted(fw_priv)) { if (retval == -ERESTARTSYS) retval = -EINTR; - else - retval = -EAGAIN; } else if (fw_priv->is_paged_buf && !fw_priv->data) retval = -ENOMEM; From 75d95e2e39b27f733f21e6668af1c9893a97de5e Mon Sep 17 00:00:00 2001 From: Anirudh Rayabharam Date: Wed, 28 Jul 2021 14:21:07 +0530 Subject: [PATCH 602/794] firmware_loader: fix use-after-free in firmware_fallback_sysfs This use-after-free happens when a fw_priv object has been freed but hasn't been removed from the pending list (pending_fw_head). The next time fw_load_sysfs_fallback tries to insert into the list, it ends up accessing the pending_list member of the previously freed fw_priv. The root cause here is that all code paths that abort the fw load don't delete it from the pending list. For example: _request_firmware() -> fw_abort_batch_reqs() -> fw_state_aborted() To fix this, delete the fw_priv from the list in __fw_set_state() if the new state is DONE or ABORTED. This way, all aborts will remove the fw_priv from the list. Accordingly, remove calls to list_del_init that were being made before calling fw_state_(aborted|done). Also, in fw_load_sysfs_fallback, don't add the fw_priv to the pending list if it is already aborted. Instead, just jump out and return early. Fixes: bcfbd3523f3c ("firmware: fix a double abort case with fw_load_sysfs_fallback") Cc: stable Reported-by: syzbot+de271708674e2093097b@syzkaller.appspotmail.com Tested-by: syzbot+de271708674e2093097b@syzkaller.appspotmail.com Reviewed-by: Shuah Khan Acked-by: Luis Chamberlain Signed-off-by: Anirudh Rayabharam Link: https://lore.kernel.org/r/20210728085107.4141-3-mail@anirudhrb.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/firmware_loader/fallback.c | 12 ++++++++---- drivers/base/firmware_loader/firmware.h | 10 +++++++++- drivers/base/firmware_loader/main.c | 2 ++ 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/base/firmware_loader/fallback.c b/drivers/base/firmware_loader/fallback.c index 1a48be0a030e..d7d63c1aa993 100644 --- a/drivers/base/firmware_loader/fallback.c +++ b/drivers/base/firmware_loader/fallback.c @@ -89,12 +89,11 @@ static void __fw_load_abort(struct fw_priv *fw_priv) { /* * There is a small window in which user can write to 'loading' - * between loading done and disappearance of 'loading' + * between loading done/aborted and disappearance of 'loading' */ - if (fw_sysfs_done(fw_priv)) + if (fw_state_is_aborted(fw_priv) || fw_sysfs_done(fw_priv)) return; - list_del_init(&fw_priv->pending_list); fw_state_aborted(fw_priv); } @@ -280,7 +279,6 @@ static ssize_t firmware_loading_store(struct device *dev, * Same logic as fw_load_abort, only the DONE bit * is ignored and we set ABORT only on failure. */ - list_del_init(&fw_priv->pending_list); if (rc) { fw_state_aborted(fw_priv); written = rc; @@ -513,6 +511,11 @@ static int fw_load_sysfs_fallback(struct fw_sysfs *fw_sysfs, long timeout) } mutex_lock(&fw_lock); + if (fw_state_is_aborted(fw_priv)) { + mutex_unlock(&fw_lock); + retval = -EINTR; + goto out; + } list_add(&fw_priv->pending_list, &pending_fw_head); mutex_unlock(&fw_lock); @@ -538,6 +541,7 @@ static int fw_load_sysfs_fallback(struct fw_sysfs *fw_sysfs, long timeout) } else if (fw_priv->is_paged_buf && !fw_priv->data) retval = -ENOMEM; +out: device_del(f_dev); err_put_dev: put_device(f_dev); diff --git a/drivers/base/firmware_loader/firmware.h b/drivers/base/firmware_loader/firmware.h index 63bd29fdcb9c..a3014e9e2c85 100644 --- a/drivers/base/firmware_loader/firmware.h +++ b/drivers/base/firmware_loader/firmware.h @@ -117,8 +117,16 @@ static inline void __fw_state_set(struct fw_priv *fw_priv, WRITE_ONCE(fw_st->status, status); - if (status == FW_STATUS_DONE || status == FW_STATUS_ABORTED) + if (status == FW_STATUS_DONE || status == FW_STATUS_ABORTED) { +#ifdef CONFIG_FW_LOADER_USER_HELPER + /* + * Doing this here ensures that the fw_priv is deleted from + * the pending list in all abort/done paths. + */ + list_del_init(&fw_priv->pending_list); +#endif complete_all(&fw_st->completion); + } } static inline void fw_state_aborted(struct fw_priv *fw_priv) diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c index 4fdb8219cd08..68c549d71230 100644 --- a/drivers/base/firmware_loader/main.c +++ b/drivers/base/firmware_loader/main.c @@ -783,8 +783,10 @@ static void fw_abort_batch_reqs(struct firmware *fw) return; fw_priv = fw->priv; + mutex_lock(&fw_lock); if (!fw_state_is_aborted(fw_priv)) fw_state_aborted(fw_priv); + mutex_unlock(&fw_lock); } /* called from request_firmware() and request_firmware_work_func() */ From b1e27239b9169f07edba0ca0e52805645a1768ba Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 27 Jul 2021 16:23:46 -0700 Subject: [PATCH 603/794] xfs: flush data dev on external log write We incorrectly flush the log device instead of the data device when trying to ensure metadata is correctly on disk before writing the unmount record. Fixes: eef983ffeae7 ("xfs: journal IO cache flush reductions") Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 36fa2650b081..96434cc4df6e 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -833,7 +833,7 @@ xlog_write_unmount_record( * stamp the tail LSN into the unmount record. */ if (log->l_targ != log->l_mp->m_ddev_targp) - blkdev_issue_flush(log->l_targ->bt_bdev); + blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev); return xlog_write(log, &vec, ticket, NULL, NULL, XLOG_UNMOUNT_TRANS); } From b5d721eaae47eaa4b4c2754699dadacc4cbca2e0 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 27 Jul 2021 16:23:47 -0700 Subject: [PATCH 604/794] xfs: external logs need to flush data device The recent journal flush/FUA changes replaced the flushing of the data device on every iclog write with an up-front async data device cache flush. Unfortunately, the assumption of which this was based on has been proven incorrect by the flush vs log tail update ordering issue. As the fix for that issue uses the XLOG_ICL_NEED_FLUSH flag to indicate that data device needs a cache flush, we now need to (once again) ensure that an iclog write to external logs that need a cache flush to be issued actually issue a cache flush to the data device as well as the log device. Fixes: eef983ffeae7 ("xfs: journal IO cache flush reductions") Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 96434cc4df6e..a3c4d48195d9 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -827,13 +827,6 @@ xlog_write_unmount_record( /* account for space used by record data */ ticket->t_curr_res -= sizeof(ulf); - /* - * For external log devices, we need to flush the data device cache - * first to ensure all metadata writeback is on stable storage before we - * stamp the tail LSN into the unmount record. - */ - if (log->l_targ != log->l_mp->m_ddev_targp) - blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev); return xlog_write(log, &vec, ticket, NULL, NULL, XLOG_UNMOUNT_TRANS); } @@ -1796,10 +1789,20 @@ xlog_write_iclog( * metadata writeback and causing priority inversions. */ iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_IDLE; - if (iclog->ic_flags & XLOG_ICL_NEED_FLUSH) + if (iclog->ic_flags & XLOG_ICL_NEED_FLUSH) { iclog->ic_bio.bi_opf |= REQ_PREFLUSH; + /* + * For external log devices, we also need to flush the data + * device cache first to ensure all metadata writeback covered + * by the LSN in this iclog is on stable storage. This is slow, + * but it *must* complete before we issue the external log IO. + */ + if (log->l_targ != log->l_mp->m_ddev_targp) + blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev); + } if (iclog->ic_flags & XLOG_ICL_NEED_FUA) iclog->ic_bio.bi_opf |= REQ_FUA; + iclog->ic_flags &= ~(XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA); if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) { From 9d3920644081edf311878b56e0c1e1477991a195 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 27 Jul 2021 16:23:47 -0700 Subject: [PATCH 605/794] xfs: fold __xlog_state_release_iclog into xlog_state_release_iclog Fold __xlog_state_release_iclog into its only caller to prepare make an upcoming fix easier. Signed-off-by: Dave Chinner [hch: split from a larger patch] Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 45 +++++++++++++++++---------------------------- 1 file changed, 17 insertions(+), 28 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index a3c4d48195d9..82f5996d3889 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -487,29 +487,6 @@ out_error: return error; } -static bool -__xlog_state_release_iclog( - struct xlog *log, - struct xlog_in_core *iclog) -{ - lockdep_assert_held(&log->l_icloglock); - - if (iclog->ic_state == XLOG_STATE_WANT_SYNC) { - /* update tail before writing to iclog */ - xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp); - - iclog->ic_state = XLOG_STATE_SYNCING; - iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); - xlog_verify_tail_lsn(log, iclog, tail_lsn); - /* cycle incremented when incrementing curr_block */ - trace_xlog_iclog_syncing(iclog, _RET_IP_); - return true; - } - - ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); - return false; -} - /* * Flush iclog to disk if this is the last reference to the given iclog and the * it is in the WANT_SYNC state. @@ -519,19 +496,31 @@ xlog_state_release_iclog( struct xlog *log, struct xlog_in_core *iclog) { + xfs_lsn_t tail_lsn; lockdep_assert_held(&log->l_icloglock); trace_xlog_iclog_release(iclog, _RET_IP_); if (iclog->ic_state == XLOG_STATE_IOERROR) return -EIO; - if (atomic_dec_and_test(&iclog->ic_refcnt) && - __xlog_state_release_iclog(log, iclog)) { - spin_unlock(&log->l_icloglock); - xlog_sync(log, iclog); - spin_lock(&log->l_icloglock); + if (!atomic_dec_and_test(&iclog->ic_refcnt)) + return 0; + + if (iclog->ic_state != XLOG_STATE_WANT_SYNC) { + ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); + return 0; } + /* update tail before writing to iclog */ + tail_lsn = xlog_assign_tail_lsn(log->l_mp); + iclog->ic_state = XLOG_STATE_SYNCING; + iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); + xlog_verify_tail_lsn(log, iclog, tail_lsn); + trace_xlog_iclog_syncing(iclog, _RET_IP_); + + spin_unlock(&log->l_icloglock); + xlog_sync(log, iclog); + spin_lock(&log->l_icloglock); return 0; } From 0dc8f7f139f07aaca1afcec0ade5718c4ebba91e Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 27 Jul 2021 16:23:48 -0700 Subject: [PATCH 606/794] xfs: fix ordering violation between cache flushes and tail updates There is a race between the new CIL async data device metadata IO completion cache flush and the log tail in the iclog the flush covers being updated. This can be seen by repeating generic/482 in a loop and eventually log recovery fails with a failures such as this: XFS (dm-3): Starting recovery (logdev: internal) XFS (dm-3): bad inode magic/vsn daddr 228352 #0 (magic=0) XFS (dm-3): Metadata corruption detected at xfs_inode_buf_verify+0x180/0x190, xfs_inode block 0x37c00 xfs_inode_buf_verify XFS (dm-3): Unmount and run xfs_repair XFS (dm-3): First 128 bytes of corrupted metadata buffer: 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000040: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000050: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000060: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000070: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ XFS (dm-3): metadata I/O error in "xlog_recover_items_pass2+0x55/0xc0" at daddr 0x37c00 len 32 error 117 Analysis of the logwrite replay shows that there were no writes to the data device between the FUA @ write 124 and the FUA at write @ 125, but log recovery @ 125 failed. The difference was the one log write @ 125 moved the tail of the log forwards from (1,8) to (1,32) and so the inode create intent in (1,8) was not replayed and so the inode cluster was zero on disk when replay of the first inode item in (1,32) was attempted. What this meant was that the journal write that occurred at @ 125 did not ensure that metadata completed before the iclog was written was correctly on stable storage. The tail of the log moved forward, so IO must have been completed between the two iclog writes. This means that there is a race condition between the unconditional async cache flush in the CIL push work and the tail LSN that is written to the iclog. This happens like so: CIL push work AIL push work ------------- ------------- Add to committing list start async data dev cache flush ..... xlog_write .... push inode create buffer ..... xlog_write(commit record) .... log tail moves xlog_assign_tail_lsn() start_lsn == commit_lsn xlog_state_release_iclog __xlog_state_release_iclog() xlog_sync() .... submit_bio() Essentially, this can only occur if the commit iclog is issued without a cache flush. If the iclog bio is submitted with REQ_PREFLUSH, then it will guarantee that all the completed IO is one stable storage before the iclog bio with the new tail LSN in it is written to the log. IOWs, the tail lsn that is written to the iclog needs to be sampled *before* we issue the cache flush that guarantees all IO up to that LSN has been completed. To fix this without giving up the performance advantage of the flush/FUA optimisations (e.g. g/482 runtime halves with 5.14-rc1 compared to 5.13), we need to ensure that we always issue a cache flush if the tail LSN changes between the initial async flush and the commit record being written. THis requires sampling the tail_lsn before we start the flush, and then passing the sampled tail LSN to xlog_state_release_iclog() so it can determine if the the tail LSN has changed while writing the checkpoint. If the tail LSN has changed, then it needs to set the NEED_FLUSH flag on the iclog and we'll issue another cache flush before writing the iclog. Fixes: eef983ffeae7 ("xfs: journal IO cache flush reductions") Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 36 ++++++++++++++++++++++++++---------- fs/xfs/xfs_log_cil.c | 13 +++++++++++-- fs/xfs/xfs_log_priv.h | 3 ++- 3 files changed, 39 insertions(+), 13 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 82f5996d3889..e8c6c96d4f7c 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -489,12 +489,17 @@ out_error: /* * Flush iclog to disk if this is the last reference to the given iclog and the - * it is in the WANT_SYNC state. + * it is in the WANT_SYNC state. If the caller passes in a non-zero + * @old_tail_lsn and the current log tail does not match, there may be metadata + * on disk that must be persisted before this iclog is written. To satisfy that + * requirement, set the XLOG_ICL_NEED_FLUSH flag as a condition for writing this + * iclog with the new log tail value. */ int xlog_state_release_iclog( struct xlog *log, - struct xlog_in_core *iclog) + struct xlog_in_core *iclog, + xfs_lsn_t old_tail_lsn) { xfs_lsn_t tail_lsn; lockdep_assert_held(&log->l_icloglock); @@ -503,6 +508,19 @@ xlog_state_release_iclog( if (iclog->ic_state == XLOG_STATE_IOERROR) return -EIO; + /* + * Grabbing the current log tail needs to be atomic w.r.t. the writing + * of the tail LSN into the iclog so we guarantee that the log tail does + * not move between deciding if a cache flush is required and writing + * the LSN into the iclog below. + */ + if (old_tail_lsn || iclog->ic_state == XLOG_STATE_WANT_SYNC) { + tail_lsn = xlog_assign_tail_lsn(log->l_mp); + + if (old_tail_lsn && tail_lsn != old_tail_lsn) + iclog->ic_flags |= XLOG_ICL_NEED_FLUSH; + } + if (!atomic_dec_and_test(&iclog->ic_refcnt)) return 0; @@ -511,8 +529,6 @@ xlog_state_release_iclog( return 0; } - /* update tail before writing to iclog */ - tail_lsn = xlog_assign_tail_lsn(log->l_mp); iclog->ic_state = XLOG_STATE_SYNCING; iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); xlog_verify_tail_lsn(log, iclog, tail_lsn); @@ -858,7 +874,7 @@ out_err: * iclog containing the unmount record is written. */ iclog->ic_flags |= (XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA); - error = xlog_state_release_iclog(log, iclog); + error = xlog_state_release_iclog(log, iclog, 0); xlog_wait_on_iclog(iclog); if (tic) { @@ -2302,7 +2318,7 @@ xlog_write_copy_finish( return 0; release_iclog: - error = xlog_state_release_iclog(log, iclog); + error = xlog_state_release_iclog(log, iclog, 0); spin_unlock(&log->l_icloglock); return error; } @@ -2521,7 +2537,7 @@ next_lv: ASSERT(optype & XLOG_COMMIT_TRANS); *commit_iclog = iclog; } else { - error = xlog_state_release_iclog(log, iclog); + error = xlog_state_release_iclog(log, iclog, 0); } spin_unlock(&log->l_icloglock); @@ -2959,7 +2975,7 @@ restart: * reference to the iclog. */ if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1)) - error = xlog_state_release_iclog(log, iclog); + error = xlog_state_release_iclog(log, iclog, 0); spin_unlock(&log->l_icloglock); if (error) return error; @@ -3195,7 +3211,7 @@ xfs_log_force( atomic_inc(&iclog->ic_refcnt); lsn = be64_to_cpu(iclog->ic_header.h_lsn); xlog_state_switch_iclogs(log, iclog, 0); - if (xlog_state_release_iclog(log, iclog)) + if (xlog_state_release_iclog(log, iclog, 0)) goto out_error; if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) @@ -3275,7 +3291,7 @@ xlog_force_lsn( } atomic_inc(&iclog->ic_refcnt); xlog_state_switch_iclogs(log, iclog, 0); - if (xlog_state_release_iclog(log, iclog)) + if (xlog_state_release_iclog(log, iclog, 0)) goto out_error; if (log_flushed) *log_flushed = 1; diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index b128aaa9b870..4c44bc3786c0 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -654,8 +654,9 @@ xlog_cil_push_work( struct xfs_trans_header thdr; struct xfs_log_iovec lhdr; struct xfs_log_vec lvhdr = { NULL }; + xfs_lsn_t preflush_tail_lsn; xfs_lsn_t commit_lsn; - xfs_lsn_t push_seq; + xfs_csn_t push_seq; struct bio bio; DECLARE_COMPLETION_ONSTACK(bdev_flush); @@ -730,7 +731,15 @@ xlog_cil_push_work( * because we hold the flush lock exclusively. Hence we can now issue * a cache flush to ensure all the completed metadata in the journal we * are about to overwrite is on stable storage. + * + * Because we are issuing this cache flush before we've written the + * tail lsn to the iclog, we can have metadata IO completions move the + * tail forwards between the completion of this flush and the iclog + * being written. In this case, we need to re-issue the cache flush + * before the iclog write. To detect whether the log tail moves, sample + * the tail LSN *before* we issue the flush. */ + preflush_tail_lsn = atomic64_read(&log->l_tail_lsn); xfs_flush_bdev_async(&bio, log->l_mp->m_ddev_targp->bt_bdev, &bdev_flush); @@ -941,7 +950,7 @@ restart: * storage. */ commit_iclog->ic_flags |= XLOG_ICL_NEED_FUA; - xlog_state_release_iclog(log, commit_iclog); + xlog_state_release_iclog(log, commit_iclog, preflush_tail_lsn); spin_unlock(&log->l_icloglock); return; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 4c41bbfa33b0..7cbde0b4f990 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -497,7 +497,8 @@ int xlog_commit_record(struct xlog *log, struct xlog_ticket *ticket, void xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket); void xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket); -int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog); +int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog, + xfs_lsn_t log_tail_lsn); /* * When we crack an atomic LSN, we sample it first so that the value will not From 45eddb414047c366744cc60dd6cef7c7e58c6ab9 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 27 Jul 2021 16:23:48 -0700 Subject: [PATCH 607/794] xfs: factor out forced iclog flushes We force iclogs in several places - we need them all to have the same cache flush semantics, so start by factoring out the iclog force into a common helper. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 42 ++++++++++++++++++------------------------ 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index e8c6c96d4f7c..184c68ea62bb 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -778,6 +778,20 @@ xfs_log_mount_cancel( xfs_log_unmount(mp); } +/* + * Flush out the iclog to disk ensuring that device caches are flushed and + * the iclog hits stable storage before any completion waiters are woken. + */ +static inline int +xlog_force_iclog( + struct xlog_in_core *iclog) +{ + atomic_inc(&iclog->ic_refcnt); + if (iclog->ic_state == XLOG_STATE_ACTIVE) + xlog_state_switch_iclogs(iclog->ic_log, iclog, 0); + return xlog_state_release_iclog(iclog->ic_log, iclog, 0); +} + /* * Wait for the iclog and all prior iclogs to be written disk as required by the * log force state machine. Waiting on ic_force_wait ensures iclog completions @@ -863,18 +877,8 @@ out_err: spin_lock(&log->l_icloglock); iclog = log->l_iclog; - atomic_inc(&iclog->ic_refcnt); - if (iclog->ic_state == XLOG_STATE_ACTIVE) - xlog_state_switch_iclogs(log, iclog, 0); - else - ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC || - iclog->ic_state == XLOG_STATE_IOERROR); - /* - * Ensure the journal is fully flushed and on stable storage once the - * iclog containing the unmount record is written. - */ iclog->ic_flags |= (XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA); - error = xlog_state_release_iclog(log, iclog, 0); + error = xlog_force_iclog(iclog); xlog_wait_on_iclog(iclog); if (tic) { @@ -3201,17 +3205,9 @@ xfs_log_force( iclog = iclog->ic_prev; } else if (iclog->ic_state == XLOG_STATE_ACTIVE) { if (atomic_read(&iclog->ic_refcnt) == 0) { - /* - * We are the only one with access to this iclog. - * - * Flush it out now. There should be a roundoff of zero - * to show that someone has already taken care of the - * roundoff from the previous sync. - */ - atomic_inc(&iclog->ic_refcnt); + /* We have exclusive access to this iclog. */ lsn = be64_to_cpu(iclog->ic_header.h_lsn); - xlog_state_switch_iclogs(log, iclog, 0); - if (xlog_state_release_iclog(log, iclog, 0)) + if (xlog_force_iclog(iclog)) goto out_error; if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) @@ -3289,9 +3285,7 @@ xlog_force_lsn( &log->l_icloglock); return -EAGAIN; } - atomic_inc(&iclog->ic_refcnt); - xlog_state_switch_iclogs(log, iclog, 0); - if (xlog_state_release_iclog(log, iclog, 0)) + if (xlog_force_iclog(iclog)) goto out_error; if (log_flushed) *log_flushed = 1; From 2bf1ec0ff067ff8f692d261b29c713f3583f7e2a Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 27 Jul 2021 16:23:49 -0700 Subject: [PATCH 608/794] xfs: log forces imply data device cache flushes After fixing the tail_lsn vs cache flush race, generic/482 continued to fail in a similar way where cache flushes were missing before iclog FUA writes. Tracing of iclog state changes during the fsstress workload portion of the test (via xlog_iclog* events) indicated that iclog writes were coming from two sources - CIL pushes and log forces (due to fsync/O_SYNC operations). All of the cases where a recovery problem was triggered indicated that the log force was the source of the iclog write that was not preceeded by a cache flush. This was an oversight in the modifications made in commit eef983ffeae7 ("xfs: journal IO cache flush reductions"). Log forces for fsync imply a data device cache flush has been issued if an iclog was flushed to disk and is indicated to the caller via the log_flushed parameter so they can elide the device cache flush if the journal issued one. The change in eef983ffeae7 results in iclogs only issuing a cache flush if XLOG_ICL_NEED_FLUSH is set on the iclog, but this was not added to the iclogs that the log force code flushes to disk. Hence log forces are no longer guaranteeing that a cache flush is issued, hence opening up a potential on-disk ordering failure. Log forces should also set XLOG_ICL_NEED_FUA as well to ensure that the actual iclogs it forces to the journal are also on stable storage before it returns to the caller. This patch introduces the xlog_force_iclog() helper function to encapsulate the process of taking a reference to an iclog, switching its state if WANT_SYNC and flushing it to stable storage correctly. Both xfs_log_force() and xfs_log_force_lsn() are converted to use it, as is xlog_unmount_write() which has an elaborate method of doing exactly the same "write this iclog to stable storage" operation. Further, if the log force code needs to wait on a iclog in the WANT_SYNC state, it needs to ensure that iclog also results in a cache flush being issued. This covers the case where the iclog contains the commit record of the CIL flush that the log force triggered, but it hasn't been written yet because there is still an active reference to the iclog. Note: this whole cache flush whack-a-mole patch is a result of log forces still being iclog state centric rather than being CIL sequence centric. Most of this nasty code will go away in future when log forces are converted to wait on CIL sequence push completion rather than iclog completion. With the CIL push algorithm guaranteeing that the CIL checkpoint is fully on stable storage when it completes, we no longer need to iterate iclogs and push them to ensure a CIL sequence push has completed and so all this nasty iclog iteration and flushing code will go away. Fixes: eef983ffeae7 ("xfs: journal IO cache flush reductions") Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 47 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 184c68ea62bb..160b8bb7ee60 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -787,6 +787,7 @@ xlog_force_iclog( struct xlog_in_core *iclog) { atomic_inc(&iclog->ic_refcnt); + iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA; if (iclog->ic_state == XLOG_STATE_ACTIVE) xlog_state_switch_iclogs(iclog->ic_log, iclog, 0); return xlog_state_release_iclog(iclog->ic_log, iclog, 0); @@ -877,7 +878,6 @@ out_err: spin_lock(&log->l_icloglock); iclog = log->l_iclog; - iclog->ic_flags |= (XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA); error = xlog_force_iclog(iclog); xlog_wait_on_iclog(iclog); @@ -3214,22 +3214,23 @@ xfs_log_force( goto out_unlock; } else { /* - * Someone else is writing to this iclog. - * - * Use its call to flush out the data. However, the - * other thread may not force out this LR, so we mark - * it WANT_SYNC. + * Someone else is still writing to this iclog, so we + * need to ensure that when they release the iclog it + * gets synced immediately as we may be waiting on it. */ xlog_state_switch_iclogs(log, iclog, 0); } - } else { - /* - * If the head iclog is not active nor dirty, we just attach - * ourselves to the head and go to sleep if necessary. - */ - ; } + /* + * The iclog we are about to wait on may contain the checkpoint pushed + * by the above xlog_cil_force() call, but it may not have been pushed + * to disk yet. Like the ACTIVE case above, we need to make sure caches + * are flushed when this iclog is written. + */ + if (iclog->ic_state == XLOG_STATE_WANT_SYNC) + iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA; + if (flags & XFS_LOG_SYNC) return xlog_wait_on_iclog(iclog); out_unlock: @@ -3262,7 +3263,8 @@ xlog_force_lsn( goto out_unlock; } - if (iclog->ic_state == XLOG_STATE_ACTIVE) { + switch (iclog->ic_state) { + case XLOG_STATE_ACTIVE: /* * We sleep here if we haven't already slept (e.g. this is the * first time we've looked at the correct iclog buf) and the @@ -3289,6 +3291,25 @@ xlog_force_lsn( goto out_error; if (log_flushed) *log_flushed = 1; + break; + case XLOG_STATE_WANT_SYNC: + /* + * This iclog may contain the checkpoint pushed by the + * xlog_cil_force_seq() call, but there are other writers still + * accessing it so it hasn't been pushed to disk yet. Like the + * ACTIVE case above, we need to make sure caches are flushed + * when this iclog is written. + */ + iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA; + break; + default: + /* + * The entire checkpoint was written by the CIL force and is on + * its way to disk already. It will be stable when it + * completes, so we don't need to manipulate caches here at all. + * We just need to wait for completion if necessary. + */ + break; } if (flags & XFS_LOG_SYNC) From 8191d8222c514c69a8e1ac46bd9812b9e0aab7d0 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 27 Jul 2021 16:23:49 -0700 Subject: [PATCH 609/794] xfs: avoid unnecessary waits in xfs_log_force_lsn() Before waiting on a iclog in xfs_log_force_lsn(), we don't check to see if the iclog has already been completed and the contents on stable storage. We check for completed iclogs in xfs_log_force(), so we should do the same thing for xfs_log_force_lsn(). This fixed some random up-to-30s pauses seen in unmounting filesystems in some tests. A log force ends up waiting on completed iclog, and that doesn't then get flushed (and hence the log force get completed) until the background log worker issues a log force that flushes the iclog in question. Then the unmount unblocks and continues. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 42 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 160b8bb7ee60..1c328efdca66 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -3143,6 +3143,35 @@ xlog_state_switch_iclogs( log->l_iclog = iclog->ic_next; } +/* + * Force the iclog to disk and check if the iclog has been completed before + * xlog_force_iclog() returns. This can happen on synchronous (e.g. + * pmem) or fast async storage because we drop the icloglock to issue the IO. + * If completion has already occurred, tell the caller so that it can avoid an + * unnecessary wait on the iclog. + */ +static int +xlog_force_and_check_iclog( + struct xlog_in_core *iclog, + bool *completed) +{ + xfs_lsn_t lsn = be64_to_cpu(iclog->ic_header.h_lsn); + int error; + + *completed = false; + error = xlog_force_iclog(iclog); + if (error) + return error; + + /* + * If the iclog has already been completed and reused the header LSN + * will have been rewritten by completion + */ + if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) + *completed = true; + return 0; +} + /* * Write out all data in the in-core log as of this exact moment in time. * @@ -3177,7 +3206,6 @@ xfs_log_force( { struct xlog *log = mp->m_log; struct xlog_in_core *iclog; - xfs_lsn_t lsn; XFS_STATS_INC(mp, xs_log_force); trace_xfs_log_force(mp, 0, _RET_IP_); @@ -3206,11 +3234,12 @@ xfs_log_force( } else if (iclog->ic_state == XLOG_STATE_ACTIVE) { if (atomic_read(&iclog->ic_refcnt) == 0) { /* We have exclusive access to this iclog. */ - lsn = be64_to_cpu(iclog->ic_header.h_lsn); - if (xlog_force_iclog(iclog)) + bool completed; + + if (xlog_force_and_check_iclog(iclog, &completed)) goto out_error; - if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) + if (completed) goto out_unlock; } else { /* @@ -3250,6 +3279,7 @@ xlog_force_lsn( bool already_slept) { struct xlog_in_core *iclog; + bool completed; spin_lock(&log->l_icloglock); iclog = log->l_iclog; @@ -3287,10 +3317,12 @@ xlog_force_lsn( &log->l_icloglock); return -EAGAIN; } - if (xlog_force_iclog(iclog)) + if (xlog_force_and_check_iclog(iclog, &completed)) goto out_error; if (log_flushed) *log_flushed = 1; + if (completed) + goto out_unlock; break; case XLOG_STATE_WANT_SYNC: /* From 32baa63d82ee3f5ab3bd51bae6bf7d1c15aed8c7 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 27 Jul 2021 16:23:49 -0700 Subject: [PATCH 610/794] xfs: logging the on disk inode LSN can make it go backwards When we log an inode, we format the "log inode" core and set an LSN in that inode core. We do that via xfs_inode_item_format_core(), which calls: xfs_inode_to_log_dinode(ip, dic, ip->i_itemp->ili_item.li_lsn); to format the log inode. It writes the LSN from the inode item into the log inode, and if recovery decides the inode item needs to be replayed, it recovers the log inode LSN field and writes it into the on disk inode LSN field. Now this might seem like a reasonable thing to do, but it is wrong on multiple levels. Firstly, if the item is not yet in the AIL, item->li_lsn is zero. i.e. the first time the inode it is logged and formatted, the LSN we write into the log inode will be zero. If we only log it once, recovery will run and can write this zero LSN into the inode. This means that the next time the inode is logged and log recovery runs, it will *always* replay changes to the inode regardless of whether the inode is newer on disk than the version in the log and that violates the entire purpose of recording the LSN in the inode at writeback time (i.e. to stop it going backwards in time on disk during recovery). Secondly, if we commit the CIL to the journal so the inode item moves to the AIL, and then relog the inode, the LSN that gets stamped into the log inode will be the LSN of the inode's current location in the AIL, not it's age on disk. And it's not the LSN that will be associated with the current change. That means when log recovery replays this inode item, the LSN that ends up on disk is the LSN for the previous changes in the log, not the current changes being replayed. IOWs, after recovery the LSN on disk is not in sync with the LSN of the modifications that were replayed into the inode. This, again, violates the recovery ordering semantics that on-disk writeback LSNs provide. Hence the inode LSN in the log dinode is -always- invalid. Thirdly, recovery actually has the LSN of the log transaction it is replaying right at hand - it uses it to determine if it should replay the inode by comparing it to the on-disk inode's LSN. But it doesn't use that LSN to stamp the LSN into the inode which will be written back when the transaction is fully replayed. It uses the one in the log dinode, which we know is always going to be incorrect. Looking back at the change history, the inode logging was broken by commit 93f958f9c41f ("xfs: cull unnecessary icdinode fields") way back in 2016 by a stupid idiot who thought he knew how this code worked. i.e. me. That commit replaced an in memory di_lsn field that was updated only at inode writeback time from the inode item.li_lsn value - and hence always contained the same LSN that appeared in the on-disk inode - with a read of the inode item LSN at inode format time. CLearly these are not the same thing. Before 93f958f9c41f, the log recovery behaviour was irrelevant, because the LSN in the log inode always matched the on-disk LSN at the time the inode was logged, hence recovery of the transaction would never make the on-disk LSN in the inode go backwards or get out of sync. A symptom of the problem is this, caught from a failure of generic/482. Before log recovery, the inode has been allocated but never used: xfs_db> inode 393388 xfs_db> p core.magic = 0x494e core.mode = 0 .... v3.crc = 0x99126961 (correct) v3.change_count = 0 v3.lsn = 0 v3.flags2 = 0 v3.cowextsize = 0 v3.crtime.sec = Thu Jan 1 10:00:00 1970 v3.crtime.nsec = 0 After log recovery: xfs_db> p core.magic = 0x494e core.mode = 020444 .... v3.crc = 0x23e68f23 (correct) v3.change_count = 2 v3.lsn = 0 v3.flags2 = 0 v3.cowextsize = 0 v3.crtime.sec = Thu Jul 22 17:03:03 2021 v3.crtime.nsec = 751000000 ... You can see that the LSN of the on-disk inode is 0, even though it clearly has been written to disk. I point out this inode, because the generic/482 failure occurred because several adjacent inodes in this specific inode cluster were not replayed correctly and still appeared to be zero on disk when all the other metadata (inobt, finobt, directories, etc) indicated they should be allocated and written back. The fix for this is two-fold. The first is that we need to either revert the LSN changes in 93f958f9c41f or stop logging the inode LSN altogether. If we do the former, log recovery does not need to change but we add 8 bytes of memory per inode to store what is largely a write-only inode field. If we do the latter, log recovery needs to stamp the on-disk inode in the same manner that inode writeback does. I prefer the latter, because we shouldn't really be trying to log and replay changes to the on disk LSN as the on-disk value is the canonical source of the on-disk version of the inode. It also matches the way we recover buffer items - we create a buf_log_item that carries the current recovery transaction LSN that gets stamped into the buffer by the write verifier when it gets written back when the transaction is fully recovered. However, this might break log recovery on older kernels even more, so I'm going to simply ignore the logged value in recovery and stamp the on-disk inode with the LSN of the transaction being recovered that will trigger writeback on transaction recovery completion. This will ensure that the on-disk inode LSN always reflects the LSN of the last change that was written to disk, regardless of whether it comes from log recovery or runtime writeback. Fixes: 93f958f9c41f ("xfs: cull unnecessary icdinode fields") Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_log_format.h | 11 +++++++++- fs/xfs/xfs_inode_item_recover.c | 39 ++++++++++++++++++++++++--------- 2 files changed, 39 insertions(+), 11 deletions(-) diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index d548ea4b6aab..2c5bcbc19264 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -411,7 +411,16 @@ struct xfs_log_dinode { /* start of the extended dinode, writable fields */ uint32_t di_crc; /* CRC of the inode */ uint64_t di_changecount; /* number of attribute changes */ - xfs_lsn_t di_lsn; /* flush sequence */ + + /* + * The LSN we write to this field during formatting is not a reflection + * of the current on-disk LSN. It should never be used for recovery + * sequencing, nor should it be recovered into the on-disk inode at all. + * See xlog_recover_inode_commit_pass2() and xfs_log_dinode_to_disk() + * for details. + */ + xfs_lsn_t di_lsn; + uint64_t di_flags2; /* more random flags */ uint32_t di_cowextsize; /* basic cow extent size for file */ uint8_t di_pad2[12]; /* more padding for future expansion */ diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c index 7b79518b6c20..e0072a6cd2d3 100644 --- a/fs/xfs/xfs_inode_item_recover.c +++ b/fs/xfs/xfs_inode_item_recover.c @@ -145,7 +145,8 @@ xfs_log_dinode_to_disk_ts( STATIC void xfs_log_dinode_to_disk( struct xfs_log_dinode *from, - struct xfs_dinode *to) + struct xfs_dinode *to, + xfs_lsn_t lsn) { to->di_magic = cpu_to_be16(from->di_magic); to->di_mode = cpu_to_be16(from->di_mode); @@ -182,7 +183,7 @@ xfs_log_dinode_to_disk( to->di_flags2 = cpu_to_be64(from->di_flags2); to->di_cowextsize = cpu_to_be32(from->di_cowextsize); to->di_ino = cpu_to_be64(from->di_ino); - to->di_lsn = cpu_to_be64(from->di_lsn); + to->di_lsn = cpu_to_be64(lsn); memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); uuid_copy(&to->di_uuid, &from->di_uuid); to->di_flushiter = 0; @@ -261,16 +262,25 @@ xlog_recover_inode_commit_pass2( } /* - * If the inode has an LSN in it, recover the inode only if it's less - * than the lsn of the transaction we are replaying. Note: we still - * need to replay an owner change even though the inode is more recent - * than the transaction as there is no guarantee that all the btree - * blocks are more recent than this transaction, too. + * If the inode has an LSN in it, recover the inode only if the on-disk + * inode's LSN is older than the lsn of the transaction we are + * replaying. We can have multiple checkpoints with the same start LSN, + * so the current LSN being equal to the on-disk LSN doesn't necessarily + * mean that the on-disk inode is more recent than the change being + * replayed. + * + * We must check the current_lsn against the on-disk inode + * here because the we can't trust the log dinode to contain a valid LSN + * (see comment below before replaying the log dinode for details). + * + * Note: we still need to replay an owner change even though the inode + * is more recent than the transaction as there is no guarantee that all + * the btree blocks are more recent than this transaction, too. */ if (dip->di_version >= 3) { xfs_lsn_t lsn = be64_to_cpu(dip->di_lsn); - if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { + if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) > 0) { trace_xfs_log_recover_inode_skip(log, in_f); error = 0; goto out_owner_change; @@ -368,8 +378,17 @@ xlog_recover_inode_commit_pass2( goto out_release; } - /* recover the log dinode inode into the on disk inode */ - xfs_log_dinode_to_disk(ldip, dip); + /* + * Recover the log dinode inode into the on disk inode. + * + * The LSN in the log dinode is garbage - it can be zero or reflect + * stale in-memory runtime state that isn't coherent with the changes + * logged in this transaction or the changes written to the on-disk + * inode. Hence we write the current lSN into the inode because that + * matches what xfs_iflush() would write inode the inode when flushing + * the changes in this transaction. + */ + xfs_log_dinode_to_disk(ldip, dip, current_lsn); fields = in_f->ilf_fields; if (fields & XFS_ILOG_DEV) From d8f4c2d0398fa1d92cacf854daf80d21a46bfefc Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 27 Jul 2021 16:23:50 -0700 Subject: [PATCH 611/794] xfs: Enforce attr3 buffer recovery order From the department of "WTAF? How did we miss that!?"... When we are recovering a buffer, the first thing we do is check the buffer magic number and extract the LSN from the buffer. If the LSN is older than the current LSN, we replay the modification to it. If the metadata on disk is newer than the transaction in the log, we skip it. This is a fundamental v5 filesystem metadata recovery behaviour. generic/482 failed with an attribute writeback failure during log recovery. The write verifier caught the corruption before it got written to disk, and the attr buffer dump looked like: XFS (dm-3): Metadata corruption detected at xfs_attr3_leaf_verify+0x275/0x2e0, xfs_attr3_leaf block 0x19be8 XFS (dm-3): Unmount and run xfs_repair XFS (dm-3): First 128 bytes of corrupted metadata buffer: 00000000: 00 00 00 00 00 00 00 00 3b ee 00 00 4d 2a 01 e1 ........;...M*.. 00000010: 00 00 00 00 00 01 9b e8 00 00 00 01 00 00 05 38 ...............8 ^^^^^^^^^^^^^^^^^^^^^^^ 00000020: df 39 5e 51 58 ac 44 b6 8d c5 e7 10 44 09 bc 17 .9^QX.D.....D... 00000030: 00 00 00 00 00 02 00 83 00 03 00 cc 0f 24 01 00 .............$.. 00000040: 00 68 0e bc 0f c8 00 10 00 00 00 00 00 00 00 00 .h.............. 00000050: 00 00 3c 31 0f 24 01 00 00 00 3c 32 0f 88 01 00 ..<1.$....<2.... 00000060: 00 00 3c 33 0f d8 01 00 00 00 00 00 00 00 00 00 ..<3............ 00000070: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ ..... The highlighted bytes are the LSN that was replayed into the buffer: 0x100000538. This is cycle 1, block 0x538. Prior to replay, that block on disk looks like this: $ sudo xfs_db -c "fsb 0x417d" -c "type attr3" -c p /dev/mapper/thin-vol hdr.info.hdr.forw = 0 hdr.info.hdr.back = 0 hdr.info.hdr.magic = 0x3bee hdr.info.crc = 0xb5af0bc6 (correct) hdr.info.bno = 105448 hdr.info.lsn = 0x100000900 ^^^^^^^^^^^ hdr.info.uuid = df395e51-58ac-44b6-8dc5-e7104409bc17 hdr.info.owner = 131203 hdr.count = 2 hdr.usedbytes = 120 hdr.firstused = 3796 hdr.holes = 1 hdr.freemap[0-2] = [base,size] Note the LSN stamped into the buffer on disk: 1/0x900. The version on disk is much newer than the log transaction that was being replayed. That's a bug, and should -never- happen. So I immediately went to look at xlog_recover_get_buf_lsn() to check that we handled the LSN correctly. I was wondering if there was a similar "two commits with the same start LSN skips the second replay" problem with buffers. I didn't get that far, because I found a much more basic, rudimentary bug: xlog_recover_get_buf_lsn() doesn't recognise buffers with XFS_ATTR3_LEAF_MAGIC set in them!!! IOWs, attr3 leaf buffers fall through the magic number checks unrecognised, so trigger the "recover immediately" behaviour instead of undergoing an LSN check. IOWs, we incorrectly replay ATTR3 leaf buffers and that causes silent on disk corruption of inode attribute forks and potentially other things.... Git history shows this is *another* zero day bug, this time introduced in commit 50d5c8d8e938 ("xfs: check LSN ordering for v5 superblocks during recovery") which failed to handle the attr3 leaf buffers in recovery. And we've failed to handle them ever since... Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_buf_item_recover.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c index d44e8b4a3391..05fd816edf59 100644 --- a/fs/xfs/xfs_buf_item_recover.c +++ b/fs/xfs/xfs_buf_item_recover.c @@ -796,6 +796,7 @@ xlog_recover_get_buf_lsn( switch (magicda) { case XFS_DIR3_LEAF1_MAGIC: case XFS_DIR3_LEAFN_MAGIC: + case XFS_ATTR3_LEAF_MAGIC: case XFS_DA3_NODE_MAGIC: lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn); uuid = &((struct xfs_da3_blkinfo *)blk)->uuid; From b2ae3a9ef91152931b99620c431cf3805daa1429 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 27 Jul 2021 16:23:50 -0700 Subject: [PATCH 612/794] xfs: need to see iclog flags in tracing Because I cannot tell if the NEED_FLUSH flag is being set correctly by the log force and CIL push machinery without it. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log_priv.h | 13 ++++++++++--- fs/xfs/xfs_trace.h | 5 ++++- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 7cbde0b4f990..f3e79a45d60a 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -59,6 +59,16 @@ enum xlog_iclog_state { { XLOG_STATE_DIRTY, "XLOG_STATE_DIRTY" }, \ { XLOG_STATE_IOERROR, "XLOG_STATE_IOERROR" } +/* + * In core log flags + */ +#define XLOG_ICL_NEED_FLUSH (1 << 0) /* iclog needs REQ_PREFLUSH */ +#define XLOG_ICL_NEED_FUA (1 << 1) /* iclog needs REQ_FUA */ + +#define XLOG_ICL_STRINGS \ + { XLOG_ICL_NEED_FLUSH, "XLOG_ICL_NEED_FLUSH" }, \ + { XLOG_ICL_NEED_FUA, "XLOG_ICL_NEED_FUA" } + /* * Log ticket flags @@ -143,9 +153,6 @@ enum xlog_iclog_state { #define XLOG_COVER_OPS 5 -#define XLOG_ICL_NEED_FLUSH (1 << 0) /* iclog needs REQ_PREFLUSH */ -#define XLOG_ICL_NEED_FUA (1 << 1) /* iclog needs REQ_FUA */ - /* Ticket reservation region accounting */ #define XLOG_TIC_LEN_MAX 15 diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index f9d8d605f9b1..19260291ff8b 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -3944,6 +3944,7 @@ DECLARE_EVENT_CLASS(xlog_iclog_class, __field(uint32_t, state) __field(int32_t, refcount) __field(uint32_t, offset) + __field(uint32_t, flags) __field(unsigned long long, lsn) __field(unsigned long, caller_ip) ), @@ -3952,15 +3953,17 @@ DECLARE_EVENT_CLASS(xlog_iclog_class, __entry->state = iclog->ic_state; __entry->refcount = atomic_read(&iclog->ic_refcnt); __entry->offset = iclog->ic_offset; + __entry->flags = iclog->ic_flags; __entry->lsn = be64_to_cpu(iclog->ic_header.h_lsn); __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d state %s refcnt %d offset %u lsn 0x%llx caller %pS", + TP_printk("dev %d:%d state %s refcnt %d offset %u lsn 0x%llx flags %s caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __print_symbolic(__entry->state, XLOG_STATE_STRINGS), __entry->refcount, __entry->offset, __entry->lsn, + __print_flags(__entry->flags, "|", XLOG_ICL_STRINGS), (char *)__entry->caller_ip) ); From 9d110014205cb1129fa570d8de83d486fa199354 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 28 Jul 2021 17:14:11 -0700 Subject: [PATCH 613/794] xfs: limit iclog tail updates From the department of "generic/482 keeps on giving", we bring you another tail update race condition: iclog: S1 C1 +-----------------------+-----------------------+ S2 EOIC Two checkpoints in a single iclog. One is complete, the other just contains the start record and overruns into a new iclog. Timeline: Before S1: Cache flush, log tail = X At S1: Metadata stable, write start record and checkpoint At C1: Write commit record, set NEED_FUA Single iclog checkpoint, so no need for NEED_FLUSH Log tail still = X, so no need for NEED_FLUSH After C1, Before S2: Cache flush, log tail = X At S2: Metadata stable, write start record and checkpoint After S2: Log tail moves to X+1 At EOIC: End of iclog, more journal data to write Releases iclog Not a commit iclog, so no need for NEED_FLUSH Writes log tail X+1 into iclog. At this point, the iclog has tail X+1 and NEED_FUA set. There has been no cache flush for the metadata between X and X+1, and the iclog writes the new tail permanently to the log. THis is sufficient to violate on disk metadata/journal ordering. We have two options here. The first is to detect this case in some manner and ensure that the partial checkpoint write sets NEED_FLUSH when the iclog is already marked NEED_FUA and the log tail changes. This seems somewhat fragile and quite complex to get right, and it doesn't actually make it obvious what underlying problem it is actually addressing from reading the code. The second option seems much cleaner to me, because it is derived directly from the requirements of the C1 commit record in the iclog. That is, when we write this commit record to the iclog, we've guaranteed that the metadata/data ordering is correct for tail update purposes. Hence if we only write the log tail into the iclog for the *first* commit record rather than the log tail at the last release, we guarantee that the log tail does not move past where the the first commit record in the log expects it to be. IOWs, taking the first option means that replay of C1 becomes dependent on future operations doing the right thing, not just the C1 checkpoint itself doing the right thing. This makes log recovery almost impossible to reason about because now we have to take into account what might or might not have happened in the future when looking at checkpoints in the log rather than just having to reconstruct the past... Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 50 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 1c328efdca66..60ac5fd63f1e 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -78,13 +78,12 @@ xlog_verify_iclog( STATIC void xlog_verify_tail_lsn( struct xlog *log, - struct xlog_in_core *iclog, - xfs_lsn_t tail_lsn); + struct xlog_in_core *iclog); #else #define xlog_verify_dest_ptr(a,b) #define xlog_verify_grant_tail(a) #define xlog_verify_iclog(a,b,c) -#define xlog_verify_tail_lsn(a,b,c) +#define xlog_verify_tail_lsn(a,b) #endif STATIC int @@ -489,12 +488,31 @@ out_error: /* * Flush iclog to disk if this is the last reference to the given iclog and the - * it is in the WANT_SYNC state. If the caller passes in a non-zero - * @old_tail_lsn and the current log tail does not match, there may be metadata - * on disk that must be persisted before this iclog is written. To satisfy that - * requirement, set the XLOG_ICL_NEED_FLUSH flag as a condition for writing this - * iclog with the new log tail value. + * it is in the WANT_SYNC state. + * + * If the caller passes in a non-zero @old_tail_lsn and the current log tail + * does not match, there may be metadata on disk that must be persisted before + * this iclog is written. To satisfy that requirement, set the + * XLOG_ICL_NEED_FLUSH flag as a condition for writing this iclog with the new + * log tail value. + * + * If XLOG_ICL_NEED_FUA is already set on the iclog, we need to ensure that the + * log tail is updated correctly. NEED_FUA indicates that the iclog will be + * written to stable storage, and implies that a commit record is contained + * within the iclog. We need to ensure that the log tail does not move beyond + * the tail that the first commit record in the iclog ordered against, otherwise + * correct recovery of that checkpoint becomes dependent on future operations + * performed on this iclog. + * + * Hence if NEED_FUA is set and the current iclog tail lsn is empty, write the + * current tail into iclog. Once the iclog tail is set, future operations must + * not modify it, otherwise they potentially violate ordering constraints for + * the checkpoint commit that wrote the initial tail lsn value. The tail lsn in + * the iclog will get zeroed on activation of the iclog after sync, so we + * always capture the tail lsn on the iclog on the first NEED_FUA release + * regardless of the number of active reference counts on this iclog. */ + int xlog_state_release_iclog( struct xlog *log, @@ -519,6 +537,10 @@ xlog_state_release_iclog( if (old_tail_lsn && tail_lsn != old_tail_lsn) iclog->ic_flags |= XLOG_ICL_NEED_FLUSH; + + if ((iclog->ic_flags & XLOG_ICL_NEED_FUA) && + !iclog->ic_header.h_tail_lsn) + iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); } if (!atomic_dec_and_test(&iclog->ic_refcnt)) @@ -530,8 +552,9 @@ xlog_state_release_iclog( } iclog->ic_state = XLOG_STATE_SYNCING; - iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); - xlog_verify_tail_lsn(log, iclog, tail_lsn); + if (!iclog->ic_header.h_tail_lsn) + iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); + xlog_verify_tail_lsn(log, iclog); trace_xlog_iclog_syncing(iclog, _RET_IP_); spin_unlock(&log->l_icloglock); @@ -2579,6 +2602,7 @@ xlog_state_activate_iclog( memset(iclog->ic_header.h_cycle_data, 0, sizeof(iclog->ic_header.h_cycle_data)); iclog->ic_header.h_lsn = 0; + iclog->ic_header.h_tail_lsn = 0; } /* @@ -3614,10 +3638,10 @@ xlog_verify_grant_tail( STATIC void xlog_verify_tail_lsn( struct xlog *log, - struct xlog_in_core *iclog, - xfs_lsn_t tail_lsn) + struct xlog_in_core *iclog) { - int blocks; + xfs_lsn_t tail_lsn = be64_to_cpu(iclog->ic_header.h_tail_lsn); + int blocks; if (CYCLE_LSN(tail_lsn) == log->l_prev_cycle) { blocks = From 81a448d7b0668ae39c08e6f34a54cc7eafb844f1 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 26 Jul 2021 16:43:17 -0700 Subject: [PATCH 614/794] xfs: prevent spoofing of rtbitmap blocks when recovering buffers While reviewing the buffer item recovery code, the thought occurred to me: in V5 filesystems we use log sequence number (LSN) tracking to avoid replaying older metadata updates against newer log items. However, we use the magic number of the ondisk buffer to find the LSN of the ondisk metadata, which means that if an attacker can control the layout of the realtime device precisely enough that the start of an rt bitmap block matches the magic and UUID of some other kind of block, they can control the purported LSN of that spoofed block and thereby break log replay. Since realtime bitmap and summary blocks don't have headers at all, we have no way to tell if a block really should be replayed. The best we can do is replay unconditionally and hope for the best. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Reviewed-by: Carlos Maiolino --- fs/xfs/xfs_buf_item_recover.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c index 05fd816edf59..4775485b4062 100644 --- a/fs/xfs/xfs_buf_item_recover.c +++ b/fs/xfs/xfs_buf_item_recover.c @@ -698,7 +698,8 @@ xlog_recover_do_inode_buffer( static xfs_lsn_t xlog_recover_get_buf_lsn( struct xfs_mount *mp, - struct xfs_buf *bp) + struct xfs_buf *bp, + struct xfs_buf_log_format *buf_f) { uint32_t magic32; uint16_t magic16; @@ -706,11 +707,20 @@ xlog_recover_get_buf_lsn( void *blk = bp->b_addr; uuid_t *uuid; xfs_lsn_t lsn = -1; + uint16_t blft; /* v4 filesystems always recover immediately */ if (!xfs_sb_version_hascrc(&mp->m_sb)) goto recover_immediately; + /* + * realtime bitmap and summary file blocks do not have magic numbers or + * UUIDs, so we must recover them immediately. + */ + blft = xfs_blft_from_flags(buf_f); + if (blft == XFS_BLFT_RTBITMAP_BUF || blft == XFS_BLFT_RTSUMMARY_BUF) + goto recover_immediately; + magic32 = be32_to_cpu(*(__be32 *)blk); switch (magic32) { case XFS_ABTB_CRC_MAGIC: @@ -920,7 +930,7 @@ xlog_recover_buf_commit_pass2( * the verifier will be reset to match whatever recover turns that * buffer into. */ - lsn = xlog_recover_get_buf_lsn(mp, bp); + lsn = xlog_recover_get_buf_lsn(mp, bp, buf_f); if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { trace_xfs_log_recover_buf_skip(log, buf_f); xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN); From 696e572dc85c674b31f4f13f59d8e217ee1b057f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 26 Jul 2021 15:06:22 -0500 Subject: [PATCH 615/794] ARM: riscpc: Fix fall-through warning for Clang Fix the following fallthrough warning: arch/arm/mach-rpc/riscpc.c:52:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] default: ^ arch/arm/mach-rpc/riscpc.c:52:2: note: insert 'break;' to avoid fall-through default: ^ break; Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/202107260355.bF00i5bi-lkp@intel.com/ Signed-off-by: Gustavo A. R. Silva --- arch/arm/mach-rpc/riscpc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-rpc/riscpc.c b/arch/arm/mach-rpc/riscpc.c index d23970bd638d..f70fb9c4b0cb 100644 --- a/arch/arm/mach-rpc/riscpc.c +++ b/arch/arm/mach-rpc/riscpc.c @@ -49,6 +49,7 @@ static int __init parse_tag_acorn(const struct tag *tag) fallthrough; /* ??? */ case 256: vram_size += PAGE_SIZE * 256; + break; default: break; } From eb4f520ca691f109f5fb1d16fc9cc26447a941e1 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 26 Jul 2021 15:33:53 -0500 Subject: [PATCH 616/794] scsi: acornscsi: Fix fall-through warning for clang Fix the following fallthrough warning (on ARM): drivers/scsi/arm/acornscsi.c:2651:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] case res_success: ^ drivers/scsi/arm/acornscsi.c:2651:2: note: insert '__attribute__((fallthrough));' to silence this warning case res_success: ^ __attribute__((fallthrough)); drivers/scsi/arm/acornscsi.c:2651:2: note: insert 'break;' to avoid fall-through case res_success: ^ break; Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/202107260355.bF00i5bi-lkp@intel.com/ Signed-off-by: Gustavo A. R. Silva --- drivers/scsi/arm/acornscsi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/arm/acornscsi.c b/drivers/scsi/arm/acornscsi.c index 84fc7a0c6ff4..4a84599ff491 100644 --- a/drivers/scsi/arm/acornscsi.c +++ b/drivers/scsi/arm/acornscsi.c @@ -2642,6 +2642,7 @@ int acornscsi_abort(struct scsi_cmnd *SCpnt) //#endif clear_bit(SCpnt->device->id * 8 + (u8)(SCpnt->device->lun & 0x7), host->busyluns); + fallthrough; /* * We found the command, and cleared it out. Either From cb163627e6d32dbaca4d89b2292788cee895b06d Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 26 Jul 2021 15:46:47 -0500 Subject: [PATCH 617/794] scsi: fas216: Fix fall-through warning for Clang Fix the following fallthrough warning (on ARM): drivers/scsi/arm/fas216.c:1379:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] default: ^ drivers/scsi/arm/fas216.c:1379:2: note: insert 'break;' to avoid fall-through default: ^ break; Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/202107260355.bF00i5bi-lkp@intel.com/ Signed-off-by: Gustavo A. R. Silva --- drivers/scsi/arm/fas216.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/arm/fas216.c b/drivers/scsi/arm/fas216.c index 6baa9b36367d..9c4458a99025 100644 --- a/drivers/scsi/arm/fas216.c +++ b/drivers/scsi/arm/fas216.c @@ -1375,6 +1375,7 @@ static void fas216_busservice_intr(FAS216_Info *info, unsigned int stat, unsigne case IS_COMPLETE: break; } + break; default: break; From 0aab5dce395636eddf4e5f33eba88390328a95b4 Mon Sep 17 00:00:00 2001 From: Edmund Dea Date: Tue, 25 Aug 2020 14:51:17 -0700 Subject: [PATCH 618/794] drm/kmb: Enable LCD DMA for low TVDDCV There's an undocumented dependency between LCD layer enable bits [2-5] and the AXI pipelined read enable bit [28] in the LCD_CONTROL register. The proper order of operation is: 1) Clear AXI pipelined read enable bit 2) Set LCD layers 3) Set AXI pipelined read enable bit With this update, LCD can start DMA when TVDDCV is reduced down to 700mV. Fixes: 7f7b96a8a0a1 ("drm/kmb: Add support for KeemBay Display") Signed-off-by: Edmund Dea Signed-off-by: Anitha Chrisanthus Acked-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20210728003126.1425028-1-anitha.chrisanthus@intel.com --- drivers/gpu/drm/kmb/kmb_drv.c | 14 ++++++++++++++ drivers/gpu/drm/kmb/kmb_plane.c | 15 +++++++++++++-- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/kmb/kmb_drv.c b/drivers/gpu/drm/kmb/kmb_drv.c index 96ea1a2c11dd..c0b1c6f99249 100644 --- a/drivers/gpu/drm/kmb/kmb_drv.c +++ b/drivers/gpu/drm/kmb/kmb_drv.c @@ -203,6 +203,7 @@ static irqreturn_t handle_lcd_irq(struct drm_device *dev) unsigned long status, val, val1; int plane_id, dma0_state, dma1_state; struct kmb_drm_private *kmb = to_kmb(dev); + u32 ctrl = 0; status = kmb_read_lcd(kmb, LCD_INT_STATUS); @@ -227,6 +228,19 @@ static irqreturn_t handle_lcd_irq(struct drm_device *dev) kmb_clr_bitmask_lcd(kmb, LCD_CONTROL, kmb->plane_status[plane_id].ctrl); + ctrl = kmb_read_lcd(kmb, LCD_CONTROL); + if (!(ctrl & (LCD_CTRL_VL1_ENABLE | + LCD_CTRL_VL2_ENABLE | + LCD_CTRL_GL1_ENABLE | + LCD_CTRL_GL2_ENABLE))) { + /* If no LCD layers are using DMA, + * then disable DMA pipelined AXI read + * transactions. + */ + kmb_clr_bitmask_lcd(kmb, LCD_CONTROL, + LCD_CTRL_PIPELINE_DMA); + } + kmb->plane_status[plane_id].disable = false; } } diff --git a/drivers/gpu/drm/kmb/kmb_plane.c b/drivers/gpu/drm/kmb/kmb_plane.c index d5b6195856d1..ecee6782612d 100644 --- a/drivers/gpu/drm/kmb/kmb_plane.c +++ b/drivers/gpu/drm/kmb/kmb_plane.c @@ -427,8 +427,14 @@ static void kmb_plane_atomic_update(struct drm_plane *plane, kmb_set_bitmask_lcd(kmb, LCD_CONTROL, ctrl); - /* FIXME no doc on how to set output format,these values are - * taken from the Myriadx tests + /* Enable pipeline AXI read transactions for the DMA + * after setting graphics layers. This must be done + * in a separate write cycle. + */ + kmb_set_bitmask_lcd(kmb, LCD_CONTROL, LCD_CTRL_PIPELINE_DMA); + + /* FIXME no doc on how to set output format, these values are taken + * from the Myriadx tests */ out_format |= LCD_OUTF_FORMAT_RGB888; @@ -526,6 +532,11 @@ struct kmb_plane *kmb_plane_init(struct drm_device *drm) plane->id = i; } + /* Disable pipeline AXI read transactions for the DMA + * prior to setting graphics layers + */ + kmb_clr_bitmask_lcd(kmb, LCD_CONTROL, LCD_CTRL_PIPELINE_DMA); + return primary; cleanup: drmm_kfree(drm, plane); From eb92830cdbc232a0e8166c48061ca276132646a7 Mon Sep 17 00:00:00 2001 From: Edmund Dea Date: Wed, 26 Aug 2020 13:17:29 -0700 Subject: [PATCH 619/794] drm/kmb: Define driver date and major/minor version Added macros for date and version Fixes: 7f7b96a8a0a1 ("drm/kmb: Add support for KeemBay Display") Signed-off-by: Edmund Dea Signed-off-by: Anitha Chrisanthus Acked-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20210728003126.1425028-2-anitha.chrisanthus@intel.com --- drivers/gpu/drm/kmb/kmb_drv.c | 8 ++++---- drivers/gpu/drm/kmb/kmb_drv.h | 5 +++++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/kmb/kmb_drv.c b/drivers/gpu/drm/kmb/kmb_drv.c index c0b1c6f99249..f54392ec4fab 100644 --- a/drivers/gpu/drm/kmb/kmb_drv.c +++ b/drivers/gpu/drm/kmb/kmb_drv.c @@ -425,10 +425,10 @@ static const struct drm_driver kmb_driver = { .fops = &fops, DRM_GEM_CMA_DRIVER_OPS_VMAP, .name = "kmb-drm", - .desc = "KEEMBAY DISPLAY DRIVER ", - .date = "20201008", - .major = 1, - .minor = 0, + .desc = "KEEMBAY DISPLAY DRIVER", + .date = DRIVER_DATE, + .major = DRIVER_MAJOR, + .minor = DRIVER_MINOR, }; static int kmb_remove(struct platform_device *pdev) diff --git a/drivers/gpu/drm/kmb/kmb_drv.h b/drivers/gpu/drm/kmb/kmb_drv.h index 02e806712a64..ebbaa5f422d5 100644 --- a/drivers/gpu/drm/kmb/kmb_drv.h +++ b/drivers/gpu/drm/kmb/kmb_drv.h @@ -15,6 +15,11 @@ #define KMB_MAX_HEIGHT 1080 /*Max height in pixels */ #define KMB_MIN_WIDTH 1920 /*Max width in pixels */ #define KMB_MIN_HEIGHT 1080 /*Max height in pixels */ + +#define DRIVER_DATE "20210223" +#define DRIVER_MAJOR 1 +#define DRIVER_MINOR 1 + #define KMB_LCD_DEFAULT_CLK 200000000 #define KMB_SYS_CLK_MHZ 500 From bc546c0c9abb3bb2fb46866b3d1e6ade9695a5f6 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Wed, 13 Jan 2021 14:31:03 +0800 Subject: [PATCH 620/794] scsi: scsi_dh_rdac: Avoid crash during rdac_bus_attach() The following BUG_ON() was observed during RDAC scan: [595952.944297] kernel BUG at drivers/scsi/device_handler/scsi_dh_rdac.c:427! [595952.951143] Internal error: Oops - BUG: 0 [#1] SMP ...... [595953.251065] Call trace: [595953.259054] check_ownership+0xb0/0x118 [595953.269794] rdac_bus_attach+0x1f0/0x4b0 [595953.273787] scsi_dh_handler_attach+0x3c/0xe8 [595953.278211] scsi_dh_add_device+0xc4/0xe8 [595953.282291] scsi_sysfs_add_sdev+0x8c/0x2a8 [595953.286544] scsi_probe_and_add_lun+0x9fc/0xd00 [595953.291142] __scsi_scan_target+0x598/0x630 [595953.295395] scsi_scan_target+0x120/0x130 [595953.299481] fc_user_scan+0x1a0/0x1c0 [scsi_transport_fc] [595953.304944] store_scan+0xb0/0x108 [595953.308420] dev_attr_store+0x44/0x60 [595953.312160] sysfs_kf_write+0x58/0x80 [595953.315893] kernfs_fop_write+0xe8/0x1f0 [595953.319888] __vfs_write+0x60/0x190 [595953.323448] vfs_write+0xac/0x1c0 [595953.326836] ksys_write+0x74/0xf0 [595953.330221] __arm64_sys_write+0x24/0x30 Code is in check_ownership: list_for_each_entry_rcu(tmp, &h->ctlr->dh_list, node) { /* h->sdev should always be valid */ BUG_ON(!tmp->sdev); tmp->sdev->access_state = access_state; } rdac_bus_attach initialize_controller list_add_rcu(&h->node, &h->ctlr->dh_list); h->sdev = sdev; rdac_bus_detach list_del_rcu(&h->node); h->sdev = NULL; Fix the race between rdac_bus_attach() and rdac_bus_detach() where h->sdev is NULL when processing the RDAC attach. Link: https://lore.kernel.org/r/20210113063103.2698953-1-yebin10@huawei.com Reviewed-by: Bart Van Assche Signed-off-by: Ye Bin Signed-off-by: Martin K. Petersen --- drivers/scsi/device_handler/scsi_dh_rdac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c index 25f6e1ac9e7b..66652ab409cc 100644 --- a/drivers/scsi/device_handler/scsi_dh_rdac.c +++ b/drivers/scsi/device_handler/scsi_dh_rdac.c @@ -453,8 +453,8 @@ static int initialize_controller(struct scsi_device *sdev, if (!h->ctlr) err = SCSI_DH_RES_TEMP_UNAVAIL; else { - list_add_rcu(&h->node, &h->ctlr->dh_list); h->sdev = sdev; + list_add_rcu(&h->node, &h->ctlr->dh_list); } spin_unlock(&list_lock); err = SCSI_DH_OK; @@ -778,11 +778,11 @@ static void rdac_bus_detach( struct scsi_device *sdev ) spin_lock(&list_lock); if (h->ctlr) { list_del_rcu(&h->node); - h->sdev = NULL; kref_put(&h->ctlr->kref, release_controller); } spin_unlock(&list_lock); sdev->handler_data = NULL; + synchronize_rcu(); kfree(h); } From 70edd2e6f652f67d854981fd67f9ad0f1deaea92 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Mon, 26 Jul 2021 17:24:02 +0530 Subject: [PATCH 621/794] scsi: core: Avoid printing an error if target_alloc() returns -ENXIO Avoid printing a 'target allocation failed' error if the driver target_alloc() callback function returns -ENXIO. This return value indicates that the corresponding H:C:T:L entry is empty. Removing this error reduces the scan time if the user issues SCAN_WILD_CARD scan operation through sysfs parameter on a host with a lot of empty H:C:T:L entries. Avoiding the printk on -ENXIO matches the behavior of the other callback functions during scanning. Link: https://lore.kernel.org/r/20210726115402.1936-1-sreekanth.reddy@broadcom.com Signed-off-by: Sreekanth Reddy Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_scan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index b059bf2b61d4..5b6996a2401b 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -475,7 +475,8 @@ static struct scsi_target *scsi_alloc_target(struct device *parent, error = shost->hostt->target_alloc(starget); if(error) { - dev_printk(KERN_ERR, dev, "target allocation failed, error %d\n", error); + if (error != -ENXIO) + dev_err(dev, "target allocation failed, error %d\n", error); /* don't want scsi_target_reap to do the final * put because it will be under the host lock */ scsi_target_destroy(starget); From a264cf5e81c78e2b9918b8b9ef2ace9dde1850df Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Fri, 16 Jul 2021 14:52:20 -0600 Subject: [PATCH 622/794] scsi: ibmvfc: Fix command state accounting and stale response detection Prior to commit 1f4a4a19508d ("scsi: ibmvfc: Complete commands outside the host/queue lock") responses to commands were completed sequentially with the host lock held such that a command had a basic binary state of active or free. It was therefore a simple affair of ensuring the assocaiated ibmvfc_event to a VIOS response was valid by testing that it was not already free. The lock relexation work to complete commands outside the lock inadverdently made it a trinary command state such that a command is either in flight, received and being completed, or completed and now free. This breaks the stale command detection logic as a command may be still marked active and been placed on the delayed completion list when a second stale response for the same command arrives. This can lead to double completions and list corruption. This issue was exposed by a recent VIOS regression were a missing memory barrier could occasionally result in the ibmvfc client receiving a duplicate response for the same command. Fix the issue by introducing the atomic ibmvfc_event.active to track the trinary state of a command. The state is explicitly set to 1 when a command is successfully sent. The CRQ response handlers use atomic_dec_if_positive() to test for stale responses and correctly transition to the completion state when a active command is received. Finally, atomic_dec_and_test() is used to sanity check transistions when commands are freed as a result of a completion, or moved to the purge list as a result of error handling or adapter reset. Link: https://lore.kernel.org/r/20210716205220.1101150-1-tyreld@linux.ibm.com Fixes: 1f4a4a19508d ("scsi: ibmvfc: Complete commands outside the host/queue lock") Cc: stable@vger.kernel.org Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvfc.c | 19 +++++++++++++++++-- drivers/scsi/ibmvscsi/ibmvfc.h | 1 + 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index bee1bec49c09..935b01ee44b7 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -807,6 +807,13 @@ static int ibmvfc_init_event_pool(struct ibmvfc_host *vhost, for (i = 0; i < size; ++i) { struct ibmvfc_event *evt = &pool->events[i]; + /* + * evt->active states + * 1 = in flight + * 0 = being completed + * -1 = free/freed + */ + atomic_set(&evt->active, -1); atomic_set(&evt->free, 1); evt->crq.valid = 0x80; evt->crq.ioba = cpu_to_be64(pool->iu_token + (sizeof(*evt->xfer_iu) * i)); @@ -1017,6 +1024,7 @@ static void ibmvfc_free_event(struct ibmvfc_event *evt) BUG_ON(!ibmvfc_valid_event(pool, evt)); BUG_ON(atomic_inc_return(&evt->free) != 1); + BUG_ON(atomic_dec_and_test(&evt->active)); spin_lock_irqsave(&evt->queue->l_lock, flags); list_add_tail(&evt->queue_list, &evt->queue->free); @@ -1072,6 +1080,12 @@ static void ibmvfc_complete_purge(struct list_head *purge_list) **/ static void ibmvfc_fail_request(struct ibmvfc_event *evt, int error_code) { + /* + * Anything we are failing should still be active. Otherwise, it + * implies we already got a response for the command and are doing + * something bad like double completing it. + */ + BUG_ON(!atomic_dec_and_test(&evt->active)); if (evt->cmnd) { evt->cmnd->result = (error_code << 16); evt->done = ibmvfc_scsi_eh_done; @@ -1723,6 +1737,7 @@ static int ibmvfc_send_event(struct ibmvfc_event *evt, evt->done(evt); } else { + atomic_set(&evt->active, 1); spin_unlock_irqrestore(&evt->queue->l_lock, flags); ibmvfc_trc_start(evt); } @@ -3251,7 +3266,7 @@ static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost, return; } - if (unlikely(atomic_read(&evt->free))) { + if (unlikely(atomic_dec_if_positive(&evt->active))) { dev_err(vhost->dev, "Received duplicate correlation_token 0x%08llx!\n", crq->ioba); return; @@ -3778,7 +3793,7 @@ static void ibmvfc_handle_scrq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost return; } - if (unlikely(atomic_read(&evt->free))) { + if (unlikely(atomic_dec_if_positive(&evt->active))) { dev_err(vhost->dev, "Received duplicate correlation_token 0x%08llx!\n", crq->ioba); return; diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h index 4f0f3baefae4..92fb889d7eb0 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.h +++ b/drivers/scsi/ibmvscsi/ibmvfc.h @@ -745,6 +745,7 @@ struct ibmvfc_event { struct ibmvfc_target *tgt; struct scsi_cmnd *cmnd; atomic_t free; + atomic_t active; union ibmvfc_iu *xfer_iu; void (*done)(struct ibmvfc_event *evt); void (*_done)(struct ibmvfc_event *evt); From 5c04243a56a7977185b00400e59ca7e108004faf Mon Sep 17 00:00:00 2001 From: Li Manyi Date: Mon, 26 Jul 2021 19:49:13 +0800 Subject: [PATCH 623/794] scsi: sr: Return correct event when media event code is 3 Media event code 3 is defined in the MMC-6 spec as follows: "MediaRemoval: The media has been removed from the specified slot, and the Drive is unable to access the media without user intervention. This applies to media changers only." This indicated that treating the condition as an EJECT_REQUEST was appropriate. However, doing so had the unfortunate side-effect of causing the drive tray to be physically ejected on resume. Instead treat the event as a MEDIA_CHANGE request. Fixes: 7dd753ca59d6 ("scsi: sr: Return appropriate error code when disk is ejected") Link: https://bugzilla.kernel.org/show_bug.cgi?id=213759 Link: https://lore.kernel.org/r/20210726114913.6760-1-limanyi@uniontech.com Signed-off-by: Li Manyi Signed-off-by: Martin K. Petersen --- drivers/scsi/sr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 94c254e9012e..a6d3ac0a6cbc 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -221,7 +221,7 @@ static unsigned int sr_get_events(struct scsi_device *sdev) else if (med->media_event_code == 2) return DISK_EVENT_MEDIA_CHANGE; else if (med->media_event_code == 3) - return DISK_EVENT_EJECT_REQUEST; + return DISK_EVENT_MEDIA_CHANGE; return 0; } From f0f82e2476f6adb9c7a0135cfab8091456990c99 Mon Sep 17 00:00:00 2001 From: lijinlin Date: Tue, 27 Jul 2021 11:44:55 +0800 Subject: [PATCH 624/794] scsi: core: Fix capacity set to zero after offlinining device After adding physical volumes to a volume group through vgextend, the kernel will rescan the partitions. This in turn will cause the device capacity to be queried. If the device status is set to offline through sysfs at this time, READ CAPACITY command will return a result which the host byte is DID_NO_CONNECT, and the capacity of the device will be set to zero in read_capacity_error(). After setting device status back to running, the capacity of the device will remain stuck at zero. Fix this issue by rescanning device when the device state changes to SDEV_RUNNING. Link: https://lore.kernel.org/r/20210727034455.1494960-1-lijinlin3@huawei.com Reviewed-by: Bart Van Assche Signed-off-by: lijinlin Signed-off-by: Wu Bo Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_sysfs.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 32489d25158f..ae9bfc658203 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -807,11 +807,14 @@ store_state_field(struct device *dev, struct device_attribute *attr, mutex_lock(&sdev->state_mutex); ret = scsi_device_set_state(sdev, state); /* - * If the device state changes to SDEV_RUNNING, we need to run - * the queue to avoid I/O hang. + * If the device state changes to SDEV_RUNNING, we need to + * rescan the device to revalidate it, and run the queue to + * avoid I/O hang. */ - if (ret == 0 && state == SDEV_RUNNING) + if (ret == 0 && state == SDEV_RUNNING) { + scsi_rescan_device(dev); blk_mq_run_hw_queues(sdev->request_queue, true); + } mutex_unlock(&sdev->state_mutex); return ret == 0 ? count : -EINVAL; From 8a7b46fa7902a3d36ce44a64f4d66586d66206ea Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 26 Jul 2021 11:26:44 +0200 Subject: [PATCH 625/794] MAINTAINERS: add Yasushi SHOJI as reviewer for the Microchip CAN BUS Analyzer Tool driver This patch adds Yasushi SHOJI as a reviewer for the Microchip CAN BUS Analyzer Tool driver. Link: https://lore.kernel.org/r/20210726111619.1023991-1-mkl@pengutronix.de Acked-by: Yasushi SHOJI Signed-off-by: Marc Kleine-Budde --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 58afeb12d3b3..42ea3183e87c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11327,6 +11327,12 @@ W: https://linuxtv.org T: git git://linuxtv.org/media_tree.git F: drivers/media/radio/radio-maxiradio* +MCAB MICROCHIP CAN BUS ANALYZER TOOL DRIVER +R: Yasushi SHOJI +L: linux-can@vger.kernel.org +S: Maintained +F: drivers/net/can/usb/mcba_usb.c + MCAN MMIO DEVICE DRIVER M: Chandrasekar Ramakrishnan L: linux-can@vger.kernel.org From f6b3c7848e66e9046c8a79a5b88fd03461cc252b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 29 Jul 2021 17:12:46 +0300 Subject: [PATCH 626/794] can: hi311x: fix a signedness bug in hi3110_cmd() The hi3110_cmd() is supposed to return zero on success and negative error codes on failure, but it was accidentally declared as a u8 when it needs to be an int type. Fixes: 57e83fb9b746 ("can: hi311x: Add Holt HI-311x CAN driver") Link: https://lore.kernel.org/r/20210729141246.GA1267@kili Signed-off-by: Dan Carpenter Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/hi311x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c index dd17b8c53e1c..89d9c986a229 100644 --- a/drivers/net/can/spi/hi311x.c +++ b/drivers/net/can/spi/hi311x.c @@ -218,7 +218,7 @@ static int hi3110_spi_trans(struct spi_device *spi, int len) return ret; } -static u8 hi3110_cmd(struct spi_device *spi, u8 command) +static int hi3110_cmd(struct spi_device *spi, u8 command) { struct hi3110_priv *priv = spi_get_drvdata(spi); From fc43fb69a7af92839551f99c1a96a37b77b3ae7a Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Sun, 25 Jul 2021 13:36:30 +0300 Subject: [PATCH 627/794] can: mcba_usb_start(): add missing urb->transfer_dma initialization Yasushi reported, that his Microchip CAN Analyzer stopped working since commit 91c02557174b ("can: mcba_usb: fix memory leak in mcba_usb"). The problem was in missing urb->transfer_dma initialization. In my previous patch to this driver I refactored mcba_usb_start() code to avoid leaking usb coherent buffers. To archive it, I passed local stack variable to usb_alloc_coherent() and then saved it to private array to correctly free all coherent buffers on ->close() call. But I forgot to initialize urb->transfer_dma with variable passed to usb_alloc_coherent(). All of this was causing device to not work, since dma addr 0 is not valid and following log can be found on bug report page, which points exactly to problem described above. | DMAR: [DMA Write] Request device [00:14.0] PASID ffffffff fault addr 0 [fault reason 05] PTE Write access is not set Fixes: 91c02557174b ("can: mcba_usb: fix memory leak in mcba_usb") Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=990850 Link: https://lore.kernel.org/r/20210725103630.23864-1-paskripkin@gmail.com Cc: linux-stable Reported-by: Yasushi SHOJI Signed-off-by: Pavel Skripkin Tested-by: Yasushi SHOJI [mkl: fixed typos in commit message - thanks Yasushi SHOJI] Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/mcba_usb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index a45865bd7254..a1a154c08b7f 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -653,6 +653,8 @@ static int mcba_usb_start(struct mcba_priv *priv) break; } + urb->transfer_dma = buf_dma; + usb_fill_bulk_urb(urb, priv->udev, usb_rcvbulkpipe(priv->udev, MCBA_USB_EP_IN), buf, MCBA_USB_RX_BUFF_SIZE, From 8dde723fcde4479f256441da03793e37181d9f21 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 29 Jul 2021 20:51:26 +0200 Subject: [PATCH 628/794] ALSA: usb-audio: Avoid unnecessary or invalid connector selection at resume The recent fix for the resume on Lenovo machines seems causing a regression on others. It's because the change always triggers the connector selection no matter which widget node type is. This patch addresses the regression by setting the resume callback selectively only for the connector widget. Fixes: 44609fc01f28 ("ALSA: usb-audio: Check connector value on resume") Cc: BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=213897 Link: https://lore.kernel.org/r/20210729185126.24432-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/mixer.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index f4cdaf1ba44a..9b713b4a5ec4 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1816,6 +1816,15 @@ static void get_connector_control_name(struct usb_mixer_interface *mixer, strlcat(name, " - Output Jack", name_size); } +/* get connector value to "wake up" the USB audio */ +static int connector_mixer_resume(struct usb_mixer_elem_list *list) +{ + struct usb_mixer_elem_info *cval = mixer_elem_list_to_info(list); + + get_connector_value(cval, NULL, NULL); + return 0; +} + /* Build a mixer control for a UAC connector control (jack-detect) */ static void build_connector_control(struct usb_mixer_interface *mixer, const struct usbmix_name_map *imap, @@ -1833,6 +1842,10 @@ static void build_connector_control(struct usb_mixer_interface *mixer, if (!cval) return; snd_usb_mixer_elem_init_std(&cval->head, mixer, term->id); + + /* set up a specific resume callback */ + cval->head.resume = connector_mixer_resume; + /* * UAC2: The first byte from reading the UAC2_TE_CONNECTOR control returns the * number of channels connected. @@ -3642,23 +3655,15 @@ static int restore_mixer_value(struct usb_mixer_elem_list *list) return 0; } -static int default_mixer_resume(struct usb_mixer_elem_list *list) -{ - struct usb_mixer_elem_info *cval = mixer_elem_list_to_info(list); - - /* get connector value to "wake up" the USB audio */ - if (cval->val_type == USB_MIXER_BOOLEAN && cval->channels == 1) - get_connector_value(cval, NULL, NULL); - - return 0; -} - static int default_mixer_reset_resume(struct usb_mixer_elem_list *list) { - int err = default_mixer_resume(list); + int err; - if (err < 0) - return err; + if (list->resume) { + err = list->resume(list); + if (err < 0) + return err; + } return restore_mixer_value(list); } @@ -3697,7 +3702,7 @@ void snd_usb_mixer_elem_init_std(struct usb_mixer_elem_list *list, list->id = unitid; list->dump = snd_usb_mixer_dump_cval; #ifdef CONFIG_PM - list->resume = default_mixer_resume; + list->resume = NULL; list->reset_resume = default_mixer_reset_resume; #endif } From 0e865f0c31928d6a313269ef624907eec55287c4 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Tue, 27 Jul 2021 19:59:57 +0300 Subject: [PATCH 629/794] can: usb_8dev: fix memory leak In usb_8dev_start() MAX_RX_URBS coherent buffers are allocated and there is nothing, that frees them: 1) In callback function the urb is resubmitted and that's all 2) In disconnect function urbs are simply killed, but URB_FREE_BUFFER is not set (see usb_8dev_start) and this flag cannot be used with coherent buffers. So, all allocated buffers should be freed with usb_free_coherent() explicitly. Side note: This code looks like a copy-paste of other can drivers. The same patch was applied to mcba_usb driver and it works nice with real hardware. There is no change in functionality, only clean-up code for coherent buffers. Fixes: 0024d8ad1639 ("can: usb_8dev: Add support for USB2CAN interface from 8 devices") Link: https://lore.kernel.org/r/d39b458cd425a1cf7f512f340224e6e9563b07bd.1627404470.git.paskripkin@gmail.com Cc: linux-stable Signed-off-by: Pavel Skripkin Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/usb_8dev.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index b6e7ef0d5bc6..d1b83bd1b3cb 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -137,7 +137,8 @@ struct usb_8dev_priv { u8 *cmd_msg_buffer; struct mutex usb_8dev_cmd_lock; - + void *rxbuf[MAX_RX_URBS]; + dma_addr_t rxbuf_dma[MAX_RX_URBS]; }; /* tx frame */ @@ -733,6 +734,7 @@ static int usb_8dev_start(struct usb_8dev_priv *priv) for (i = 0; i < MAX_RX_URBS; i++) { struct urb *urb = NULL; u8 *buf; + dma_addr_t buf_dma; /* create a URB, and a buffer for it */ urb = usb_alloc_urb(0, GFP_KERNEL); @@ -742,7 +744,7 @@ static int usb_8dev_start(struct usb_8dev_priv *priv) } buf = usb_alloc_coherent(priv->udev, RX_BUFFER_SIZE, GFP_KERNEL, - &urb->transfer_dma); + &buf_dma); if (!buf) { netdev_err(netdev, "No memory left for USB buffer\n"); usb_free_urb(urb); @@ -750,6 +752,8 @@ static int usb_8dev_start(struct usb_8dev_priv *priv) break; } + urb->transfer_dma = buf_dma; + usb_fill_bulk_urb(urb, priv->udev, usb_rcvbulkpipe(priv->udev, USB_8DEV_ENDP_DATA_RX), @@ -767,6 +771,9 @@ static int usb_8dev_start(struct usb_8dev_priv *priv) break; } + priv->rxbuf[i] = buf; + priv->rxbuf_dma[i] = buf_dma; + /* Drop reference, USB core will take care of freeing it */ usb_free_urb(urb); } @@ -836,6 +843,10 @@ static void unlink_all_urbs(struct usb_8dev_priv *priv) usb_kill_anchored_urbs(&priv->rx_submitted); + for (i = 0; i < MAX_RX_URBS; ++i) + usb_free_coherent(priv->udev, RX_BUFFER_SIZE, + priv->rxbuf[i], priv->rxbuf_dma[i]); + usb_kill_anchored_urbs(&priv->tx_submitted); atomic_set(&priv->active_tx_urbs, 0); From 9969e3c5f40c166e3396acc36c34f9de502929f6 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Tue, 27 Jul 2021 20:00:33 +0300 Subject: [PATCH 630/794] can: ems_usb: fix memory leak In ems_usb_start() MAX_RX_URBS coherent buffers are allocated and there is nothing, that frees them: 1) In callback function the urb is resubmitted and that's all 2) In disconnect function urbs are simply killed, but URB_FREE_BUFFER is not set (see ems_usb_start) and this flag cannot be used with coherent buffers. So, all allocated buffers should be freed with usb_free_coherent() explicitly. Side note: This code looks like a copy-paste of other can drivers. The same patch was applied to mcba_usb driver and it works nice with real hardware. There is no change in functionality, only clean-up code for coherent buffers. Fixes: 702171adeed3 ("ems_usb: Added support for EMS CPC-USB/ARM7 CAN/USB interface") Link: https://lore.kernel.org/r/59aa9fbc9a8cbf9af2bbd2f61a659c480b415800.1627404470.git.paskripkin@gmail.com Cc: linux-stable Signed-off-by: Pavel Skripkin Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/ems_usb.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index 0a37af4a3fa4..2b5302e72435 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -255,6 +255,8 @@ struct ems_usb { unsigned int free_slots; /* remember number of available slots */ struct ems_cpc_msg active_params; /* active controller parameters */ + void *rxbuf[MAX_RX_URBS]; + dma_addr_t rxbuf_dma[MAX_RX_URBS]; }; static void ems_usb_read_interrupt_callback(struct urb *urb) @@ -587,6 +589,7 @@ static int ems_usb_start(struct ems_usb *dev) for (i = 0; i < MAX_RX_URBS; i++) { struct urb *urb = NULL; u8 *buf = NULL; + dma_addr_t buf_dma; /* create a URB, and a buffer for it */ urb = usb_alloc_urb(0, GFP_KERNEL); @@ -596,7 +599,7 @@ static int ems_usb_start(struct ems_usb *dev) } buf = usb_alloc_coherent(dev->udev, RX_BUFFER_SIZE, GFP_KERNEL, - &urb->transfer_dma); + &buf_dma); if (!buf) { netdev_err(netdev, "No memory left for USB buffer\n"); usb_free_urb(urb); @@ -604,6 +607,8 @@ static int ems_usb_start(struct ems_usb *dev) break; } + urb->transfer_dma = buf_dma; + usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, 2), buf, RX_BUFFER_SIZE, ems_usb_read_bulk_callback, dev); @@ -619,6 +624,9 @@ static int ems_usb_start(struct ems_usb *dev) break; } + dev->rxbuf[i] = buf; + dev->rxbuf_dma[i] = buf_dma; + /* Drop reference, USB core will take care of freeing it */ usb_free_urb(urb); } @@ -684,6 +692,10 @@ static void unlink_all_urbs(struct ems_usb *dev) usb_kill_anchored_urbs(&dev->rx_submitted); + for (i = 0; i < MAX_RX_URBS; ++i) + usb_free_coherent(dev->udev, RX_BUFFER_SIZE, + dev->rxbuf[i], dev->rxbuf_dma[i]); + usb_kill_anchored_urbs(&dev->tx_submitted); atomic_set(&dev->active_tx_urbs, 0); From 928150fad41ba16df7fcc9f7f945747d0f56cbb6 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Tue, 27 Jul 2021 20:00:46 +0300 Subject: [PATCH 631/794] can: esd_usb2: fix memory leak In esd_usb2_setup_rx_urbs() MAX_RX_URBS coherent buffers are allocated and there is nothing, that frees them: 1) In callback function the urb is resubmitted and that's all 2) In disconnect function urbs are simply killed, but URB_FREE_BUFFER is not set (see esd_usb2_setup_rx_urbs) and this flag cannot be used with coherent buffers. So, all allocated buffers should be freed with usb_free_coherent() explicitly. Side note: This code looks like a copy-paste of other can drivers. The same patch was applied to mcba_usb driver and it works nice with real hardware. There is no change in functionality, only clean-up code for coherent buffers. Fixes: 96d8e90382dc ("can: Add driver for esd CAN-USB/2 device") Link: https://lore.kernel.org/r/b31b096926dcb35998ad0271aac4b51770ca7cc8.1627404470.git.paskripkin@gmail.com Cc: linux-stable Signed-off-by: Pavel Skripkin Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/esd_usb2.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c index 65b58f8fc328..66fa8b07c2e6 100644 --- a/drivers/net/can/usb/esd_usb2.c +++ b/drivers/net/can/usb/esd_usb2.c @@ -195,6 +195,8 @@ struct esd_usb2 { int net_count; u32 version; int rxinitdone; + void *rxbuf[MAX_RX_URBS]; + dma_addr_t rxbuf_dma[MAX_RX_URBS]; }; struct esd_usb2_net_priv { @@ -545,6 +547,7 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev) for (i = 0; i < MAX_RX_URBS; i++) { struct urb *urb = NULL; u8 *buf = NULL; + dma_addr_t buf_dma; /* create a URB, and a buffer for it */ urb = usb_alloc_urb(0, GFP_KERNEL); @@ -554,7 +557,7 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev) } buf = usb_alloc_coherent(dev->udev, RX_BUFFER_SIZE, GFP_KERNEL, - &urb->transfer_dma); + &buf_dma); if (!buf) { dev_warn(dev->udev->dev.parent, "No memory left for USB buffer\n"); @@ -562,6 +565,8 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev) goto freeurb; } + urb->transfer_dma = buf_dma; + usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, 1), buf, RX_BUFFER_SIZE, @@ -574,8 +579,12 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev) usb_unanchor_urb(urb); usb_free_coherent(dev->udev, RX_BUFFER_SIZE, buf, urb->transfer_dma); + goto freeurb; } + dev->rxbuf[i] = buf; + dev->rxbuf_dma[i] = buf_dma; + freeurb: /* Drop reference, USB core will take care of freeing it */ usb_free_urb(urb); @@ -663,6 +672,11 @@ static void unlink_all_urbs(struct esd_usb2 *dev) int i, j; usb_kill_anchored_urbs(&dev->rx_submitted); + + for (i = 0; i < MAX_RX_URBS; ++i) + usb_free_coherent(dev->udev, RX_BUFFER_SIZE, + dev->rxbuf[i], dev->rxbuf_dma[i]); + for (i = 0; i < dev->net_count; i++) { priv = dev->nets[i]; if (priv) { From fe911792eae32f03d27d8f3de2f0271862d435ac Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 23 Jun 2021 10:45:20 +0200 Subject: [PATCH 632/794] media: Revert "media: rtl28xxu: fix zero-length control request" This reverts commit 25d5ce3a606a1eb23a9265d615a92a876ff9cb5f. The patch in question causes a regression and was superseded by a second version. Unfortunately, the first revision ended up being applied instead of the correct one. Link: https://lore.kernel.org/r/YL3MCGY5wTsW2kEF@hovoldconsulting.com Signed-off-by: Johan Hovold Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb-v2/rtl28xxu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c index 83705730e37e..0cbdb95f8d35 100644 --- a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c +++ b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c @@ -612,9 +612,8 @@ static int rtl28xxu_read_config(struct dvb_usb_device *d) static int rtl28xxu_identify_state(struct dvb_usb_device *d, const char **name) { struct rtl28xxu_dev *dev = d_to_priv(d); - u8 buf[1]; int ret; - struct rtl28xxu_req req_demod_i2c = {0x0020, CMD_I2C_DA_RD, 1, buf}; + struct rtl28xxu_req req_demod_i2c = {0x0020, CMD_I2C_DA_RD, 0, NULL}; dev_dbg(&d->intf->dev, "\n"); From 76f22c93b209c811bd489950f17f8839adb31901 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 23 Jun 2021 10:45:21 +0200 Subject: [PATCH 633/794] media: rtl28xxu: fix zero-length control request The direction of the pipe argument must match the request-type direction bit or control requests may fail depending on the host-controller-driver implementation. Control transfers without a data stage are treated as OUT requests by the USB stack and should be using usb_sndctrlpipe(). Failing to do so will now trigger a warning. The driver uses a zero-length i2c-read request for type detection so update the control-request code to use usb_sndctrlpipe() in this case. Note that actually trying to read the i2c register in question does not work as the register might not exist (e.g. depending on the demodulator) as reported by Eero Lehtinen . Reported-by: syzbot+faf11bbadc5a372564da@syzkaller.appspotmail.com Reported-by: Eero Lehtinen Tested-by: Eero Lehtinen Fixes: d0f232e823af ("[media] rtl28xxu: add heuristic to detect chip type") Cc: stable@vger.kernel.org # 4.0 Cc: Antti Palosaari Signed-off-by: Johan Hovold Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb-v2/rtl28xxu.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c index 0cbdb95f8d35..795a012d4020 100644 --- a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c +++ b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c @@ -37,7 +37,16 @@ static int rtl28xxu_ctrl_msg(struct dvb_usb_device *d, struct rtl28xxu_req *req) } else { /* read */ requesttype = (USB_TYPE_VENDOR | USB_DIR_IN); - pipe = usb_rcvctrlpipe(d->udev, 0); + + /* + * Zero-length transfers must use usb_sndctrlpipe() and + * rtl28xxu_identify_state() uses a zero-length i2c read + * command to determine the chip type. + */ + if (req->size) + pipe = usb_rcvctrlpipe(d->udev, 0); + else + pipe = usb_sndctrlpipe(d->udev, 0); } ret = usb_control_msg(d->udev, pipe, 0, requesttype, req->value, From c592b46907adbeb81243f7eb7a468c36692658b8 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 30 Jun 2021 09:58:23 +0200 Subject: [PATCH 634/794] media: videobuf2-core: dequeue if start_streaming fails If a vb2_queue sets q->min_buffers_needed then when the number of queued buffers reaches q->min_buffers_needed, vb2_core_qbuf() will call the start_streaming() callback. If start_streaming() returns an error, then that error was just returned by vb2_core_qbuf(), but the buffer was still queued. However, userspace expects that if VIDIOC_QBUF fails, the buffer is returned dequeued. So if start_streaming() fails, then remove the buffer from the queue, thus avoiding this unwanted side-effect. Signed-off-by: Hans Verkuil Reviewed-by: Laurent Pinchart Tested-by: Kieran Bingham Fixes: b3379c6201bb ("[media] vb2: only call start_streaming if sufficient buffers are queued") Signed-off-by: Mauro Carvalho Chehab --- drivers/media/common/videobuf2/videobuf2-core.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c index 02281d13505f..508ac295eb06 100644 --- a/drivers/media/common/videobuf2/videobuf2-core.c +++ b/drivers/media/common/videobuf2/videobuf2-core.c @@ -1573,6 +1573,7 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb, struct media_request *req) { struct vb2_buffer *vb; + enum vb2_buffer_state orig_state; int ret; if (q->error) { @@ -1673,6 +1674,7 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb, * Add to the queued buffers list, a buffer will stay on it until * dequeued in dqbuf. */ + orig_state = vb->state; list_add_tail(&vb->queued_entry, &q->queued_list); q->queued_count++; q->waiting_for_buffers = false; @@ -1703,8 +1705,17 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb, if (q->streaming && !q->start_streaming_called && q->queued_count >= q->min_buffers_needed) { ret = vb2_start_streaming(q); - if (ret) + if (ret) { + /* + * Since vb2_core_qbuf will return with an error, + * we should return it to state DEQUEUED since + * the error indicates that the buffer wasn't queued. + */ + list_del(&vb->queued_entry); + q->queued_count--; + vb->state = orig_state; return ret; + } } dprintk(q, 2, "qbuf of buffer %d succeeded\n", vb->index); From f1de1c7803595e937ce9b922807f499851225021 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Mon, 5 Jul 2021 14:57:08 +0200 Subject: [PATCH 635/794] media: atmel: fix build when ISC=m and XISC=y Building VIDEO_ATMEL_ISC as module and VIDEO_ATMEL_XISC as built-in (or viceversa) causes build errors: or1k-linux-ld: drivers/media/platform/atmel/atmel-isc-base.o: in function `isc_async_complete': atmel-isc-base.c:(.text+0x40d0): undefined reference to `__this_module' or1k-linux-ld: atmel-isc-base.c:(.text+0x40f0): undefined reference to `__this_module' or1k-linux-ld: drivers/media/platform/atmel/atmel-isc-base.o:(.rodata+0x390): undefined reference to `__this_module' or1k-linux-ld: drivers/media/platform/atmel/atmel-isc-base.o:(__param+0x4): undefined reference to `__this_module' or1k-linux-ld: drivers/media/platform/atmel/atmel-isc-base.o:(__param+0x18): undefined reference to `__this_module' This is caused by the file atmel-isc-base.c which is common code between the two drivers. The solution is to create another Kconfig symbol that is automatically selected and generates the module atmel-isc-base.ko. This module can be loaded when both drivers are modules, or built-in when at least one of them is built-in. Reported-by: kernel test robot Fixes: c9aa973884a1 ("media: atmel: atmel-isc: add microchip-xisc driver") Signed-off-by: Eugen Hristev Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/atmel/Kconfig | 8 ++++++++ drivers/media/platform/atmel/Makefile | 5 +++-- drivers/media/platform/atmel/atmel-isc-base.c | 11 +++++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/media/platform/atmel/Kconfig b/drivers/media/platform/atmel/Kconfig index 99b51213f871..dda2f27da317 100644 --- a/drivers/media/platform/atmel/Kconfig +++ b/drivers/media/platform/atmel/Kconfig @@ -8,6 +8,7 @@ config VIDEO_ATMEL_ISC select VIDEOBUF2_DMA_CONTIG select REGMAP_MMIO select V4L2_FWNODE + select VIDEO_ATMEL_ISC_BASE help This module makes the ATMEL Image Sensor Controller available as a v4l2 device. @@ -19,10 +20,17 @@ config VIDEO_ATMEL_XISC select VIDEOBUF2_DMA_CONTIG select REGMAP_MMIO select V4L2_FWNODE + select VIDEO_ATMEL_ISC_BASE help This module makes the ATMEL eXtended Image Sensor Controller available as a v4l2 device. +config VIDEO_ATMEL_ISC_BASE + tristate + default n + help + ATMEL ISC and XISC common code base. + config VIDEO_ATMEL_ISI tristate "ATMEL Image Sensor Interface (ISI) support" depends on VIDEO_V4L2 && OF diff --git a/drivers/media/platform/atmel/Makefile b/drivers/media/platform/atmel/Makefile index c5c01556c653..46d264ab7948 100644 --- a/drivers/media/platform/atmel/Makefile +++ b/drivers/media/platform/atmel/Makefile @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only -atmel-isc-objs = atmel-sama5d2-isc.o atmel-isc-base.o -atmel-xisc-objs = atmel-sama7g5-isc.o atmel-isc-base.o +atmel-isc-objs = atmel-sama5d2-isc.o +atmel-xisc-objs = atmel-sama7g5-isc.o obj-$(CONFIG_VIDEO_ATMEL_ISI) += atmel-isi.o +obj-$(CONFIG_VIDEO_ATMEL_ISC_BASE) += atmel-isc-base.o obj-$(CONFIG_VIDEO_ATMEL_ISC) += atmel-isc.o obj-$(CONFIG_VIDEO_ATMEL_XISC) += atmel-xisc.o diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c index 19daa49bf604..136ab7cf36ed 100644 --- a/drivers/media/platform/atmel/atmel-isc-base.c +++ b/drivers/media/platform/atmel/atmel-isc-base.c @@ -378,6 +378,7 @@ int isc_clk_init(struct isc_device *isc) return 0; } +EXPORT_SYMBOL_GPL(isc_clk_init); void isc_clk_cleanup(struct isc_device *isc) { @@ -392,6 +393,7 @@ void isc_clk_cleanup(struct isc_device *isc) clk_unregister(isc_clk->clk); } } +EXPORT_SYMBOL_GPL(isc_clk_cleanup); static int isc_queue_setup(struct vb2_queue *vq, unsigned int *nbuffers, unsigned int *nplanes, @@ -1578,6 +1580,7 @@ irqreturn_t isc_interrupt(int irq, void *dev_id) return ret; } +EXPORT_SYMBOL_GPL(isc_interrupt); static void isc_hist_count(struct isc_device *isc, u32 *min, u32 *max) { @@ -2212,6 +2215,7 @@ const struct v4l2_async_notifier_operations isc_async_ops = { .unbind = isc_async_unbind, .complete = isc_async_complete, }; +EXPORT_SYMBOL_GPL(isc_async_ops); void isc_subdev_cleanup(struct isc_device *isc) { @@ -2224,6 +2228,7 @@ void isc_subdev_cleanup(struct isc_device *isc) INIT_LIST_HEAD(&isc->subdev_entities); } +EXPORT_SYMBOL_GPL(isc_subdev_cleanup); int isc_pipeline_init(struct isc_device *isc) { @@ -2264,6 +2269,7 @@ int isc_pipeline_init(struct isc_device *isc) return 0; } +EXPORT_SYMBOL_GPL(isc_pipeline_init); /* regmap configuration */ #define ATMEL_ISC_REG_MAX 0xd5c @@ -2273,4 +2279,9 @@ const struct regmap_config isc_regmap_config = { .val_bits = 32, .max_register = ATMEL_ISC_REG_MAX, }; +EXPORT_SYMBOL_GPL(isc_regmap_config); +MODULE_AUTHOR("Songjun Wu"); +MODULE_AUTHOR("Eugen Hristev"); +MODULE_DESCRIPTION("Atmel ISC common code base"); +MODULE_LICENSE("GPL v2"); From 341abd693d10e5f337a51f140ae3e7a1ae0febf6 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Thu, 29 Jul 2021 06:33:06 +0200 Subject: [PATCH 636/794] serial: 8250_pci: Avoid irq sharing for MSI(-X) interrupts. This attempts to fix a bug found with a serial port card which uses an MCS9922 chip, one of the 4 models for which MSI-X interrupts are currently supported. I don't possess such a card, and i'm not experienced with the serial subsystem, so this patch is based on what i think i found as a likely reason for failure, based on walking the user who actually owns the card through some diagnostic. The user who reported the problem finds the following in his dmesg output for the relevant ttyS4 and ttyS5: [ 0.580425] serial 0000:02:00.0: enabling device (0000 -> 0003) [ 0.601448] 0000:02:00.0: ttyS4 at I/O 0x3010 (irq = 125, base_baud = 115200) is a ST16650V2 [ 0.603089] serial 0000:02:00.1: enabling device (0000 -> 0003) [ 0.624119] 0000:02:00.1: ttyS5 at I/O 0x3000 (irq = 126, base_baud = 115200) is a ST16650V2 ... [ 6.323784] genirq: Flags mismatch irq 128. 00000080 (ttyS5) vs. 00000000 (xhci_hcd) [ 6.324128] genirq: Flags mismatch irq 128. 00000080 (ttyS5) vs. 00000000 (xhci_hcd) ... Output of setserial -a: /dev/ttyS4, Line 4, UART: 16650V2, Port: 0x3010, IRQ: 127 Baud_base: 115200, close_delay: 50, divisor: 0 closing_wait: 3000 Flags: spd_normal skip_test This suggests to me that the serial driver wants to register and share a MSI/MSI-X irq 128 with the xhci_hcd driver, whereas the xhci driver does not want to share the irq, as flags 0x00000080 (== IRQF_SHARED) from the serial port driver means to share the irq, and this mismatch ends in some failed irq init? With this setup, data reception works very unreliable, with dropped data, already at a transmission rate of only a 16 Bytes chunk every 1/120th of a second, ie. 1920 Bytes/sec, presumably due to rx fifo overflow due to mishandled or not used at all rx irq's? See full discussion thread with attempted diagnosis at: https://psychtoolbox.discourse.group/t/issues-with-iscan-serial-port-recording/3886 Disabling the use of MSI interrupts for the serial port pci card did fix the reliability problems. The user executed the following sequence of commands to achieve this: echo 0000:02:00.0 | sudo tee /sys/bus/pci/drivers/serial/unbind echo 0000:02:00.1 | sudo tee /sys/bus/pci/drivers/serial/unbind echo 0 | sudo tee /sys/bus/pci/devices/0000:02:00.0/msi_bus echo 0 | sudo tee /sys/bus/pci/devices/0000:02:00.1/msi_bus echo 0000:02:00.0 | sudo tee /sys/bus/pci/drivers/serial/bind echo 0000:02:00.1 | sudo tee /sys/bus/pci/drivers/serial/bind This resulted in the following log output: [ 82.179021] pci 0000:02:00.0: MSI/MSI-X disallowed for future drivers [ 87.003031] pci 0000:02:00.1: MSI/MSI-X disallowed for future drivers [ 98.537010] 0000:02:00.0: ttyS4 at I/O 0x3010 (irq = 17, base_baud = 115200) is a ST16650V2 [ 103.648124] 0000:02:00.1: ttyS5 at I/O 0x3000 (irq = 18, base_baud = 115200) is a ST16650V2 This patch attempts to fix the problem by disabling irq sharing when using MSI irq's. Note that all i know for sure is that disabling MSI irq's fixed the problem for the user, so this patch could be wrong and is untested. Please review with caution, keeping this in mind. Fixes: 8428413b1d14 ("serial: 8250_pci: Implement MSI(-X) support") Cc: Ralf Ramsauer Cc: stable Reviewed-by: Andy Shevchenko Signed-off-by: Mario Kleiner Link: https://lore.kernel.org/r/20210729043306.18528-1-mario.kleiner.de@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 02985cf90ef2..a808c283883e 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -4002,6 +4002,7 @@ pciserial_init_ports(struct pci_dev *dev, const struct pciserial_board *board) if (pci_match_id(pci_use_msi, dev)) { dev_dbg(&dev->dev, "Using MSI(-X) interrupts\n"); pci_set_master(dev); + uart.port.flags &= ~UPF_SHARE_IRQ; rc = pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_ALL_TYPES); } else { dev_dbg(&dev->dev, "Using legacy interrupts\n"); From fa7a549d321a4189677b0cea86e58d9db7977f7b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 14 Jul 2021 17:37:49 -0400 Subject: [PATCH 637/794] KVM: x86: accept userspace interrupt only if no event is injected Once an exception has been injected, any side effects related to the exception (such as setting CR2 or DR6) have been taked place. Therefore, once KVM sets the VM-entry interruption information field or the AMD EVENTINJ field, the next VM-entry must deliver that exception. Pending interrupts are processed after injected exceptions, so in theory it would not be a problem to use KVM_INTERRUPT when an injected exception is present. However, DOSEMU is using run->ready_for_interrupt_injection to detect interrupt windows and then using KVM_SET_SREGS/KVM_SET_REGS to inject the interrupt manually. For this to work, the interrupt window must be delayed after the completion of the previous event injection. Cc: stable@vger.kernel.org Reported-by: Stas Sergeev Tested-by: Stas Sergeev Fixes: 71cc849b7093 ("KVM: x86: Fix split-irqchip vs interrupt injection window request") Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4116567f3d44..e5d5c5ed7dd4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4358,8 +4358,17 @@ static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu) static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu) { - return kvm_arch_interrupt_allowed(vcpu) && - kvm_cpu_accept_dm_intr(vcpu); + /* + * Do not cause an interrupt window exit if an exception + * is pending or an event needs reinjection; userspace + * might want to inject the interrupt manually using KVM_SET_REGS + * or KVM_SET_SREGS. For that to work, we must be at an + * instruction boundary and with no events half-injected. + */ + return (kvm_arch_interrupt_allowed(vcpu) && + kvm_cpu_accept_dm_intr(vcpu) && + !kvm_event_needs_reinjection(vcpu) && + !vcpu->arch.exception.pending); } static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, From 3a0670824979a986a2314c921aa092e60730eeae Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 20 Jun 2021 20:54:21 +0200 Subject: [PATCH 638/794] ARM: dts: stm32: Prefer HW RTC on DHCOM SoM The DHCOM SoM has two RTC, one is the STM32 RTC built into the SoC and another is Microcrystal RV RTC. By default, only the later has battery backup, the former does not. The order in which the RTCs are probed on boot is random, which means the kernel might pick up system time from the STM32 RTC which has no battery backup. This then leads to incorrect initial system time setup, even though the HW RTC has correct time configured in it. Add DT alias entries, so that the RTCs get assigned fixed IDs and the HW RTC is always picked by the kernel as the default RTC, thus resulting in correct system time in early userspace. Fixes: 34e0c7847dcf ("ARM: dts: stm32: Add DH Electronics DHCOM STM32MP1 SoM and PDK2 board") Signed-off-by: Marek Vasut Cc: Alexandre Torgue Cc: Patrice Chotard Cc: Patrick Delaunay Cc: linux-stm32@st-md-mailman.stormreply.com To: linux-arm-kernel@lists.infradead.org Signed-off-by: Alexandre Torgue --- arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi index 2af0a6752674..8349c9099e30 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi @@ -12,6 +12,8 @@ aliases { ethernet0 = ðernet0; ethernet1 = &ksz8851; + rtc0 = &hwrtc; + rtc1 = &rtc; }; memory@c0000000 { @@ -248,7 +250,7 @@ /delete-property/dmas; /delete-property/dma-names; - rtc@32 { + hwrtc: rtc@32 { compatible = "microcrystal,rv8803"; reg = <0x32>; }; From 36862c1ebc92a7e6fcc55002965c44b8ad17d4ca Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 21 Jul 2021 20:12:53 +0200 Subject: [PATCH 639/794] ARM: dts: stm32: Disable LAN8710 EDPD on DHCOM The LAN8710 Energy Detect Power Down (EDPD) functionality might cause unreliable cable detection. There are multiple accounts of this in the SMSC PHY driver patches which attempted to make EDPD reliable, however it seems there is always some sort of corner case left. Unfortunatelly, there is no errata documented which would confirm this to be a silicon bug on the LAN87xx series of PHYs (LAN8700, LAN8710, LAN8720 at least). Disable EDPD on the DHCOM SoM, just like multiple other boards already do as well, to make the cable detection reliable. Fixes: 34e0c7847dcf ("ARM: dts: stm32: Add DH Electronics DHCOM STM32MP1 SoM and PDK2 board") Signed-off-by: Marek Vasut Cc: Alexandre Torgue Cc: Patrice Chotard Cc: Patrick Delaunay Cc: linux-stm32@st-md-mailman.stormreply.com To: linux-arm-kernel@lists.infradead.org Signed-off-by: Alexandre Torgue --- arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi index 8349c9099e30..8c41f819f776 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi @@ -140,6 +140,7 @@ reset-gpios = <&gpioh 3 GPIO_ACTIVE_LOW>; reset-assert-us = <500>; reset-deassert-us = <500>; + smsc,disable-energy-detect; interrupt-parent = <&gpioi>; interrupts = <11 IRQ_TYPE_LEVEL_LOW>; }; From 15f68f027ebd961b99a1c420f96ff3838c5e4450 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 21 Jul 2021 20:10:40 +0200 Subject: [PATCH 640/794] ARM: dts: stm32: Fix touchscreen IRQ line assignment on DHCOM While 7e5f3155dcbb4 ("ARM: dts: stm32: Fix LED5 on STM32MP1 DHCOM PDK2") fixed the LED0 assignment on the PDK2 board, the same commit did not update the touchscreen IRQ line assignment, which is the same GPIO line, shared between the LED0 output and touchscreen IRQ input. To make this more convoluted, the same EXTI input (not the same GPIO line) is shared between Button B which is Active-Low IRQ, and touchscreen IRQ which is Edge-Falling IRQ, which cannot be used at the same time. In case the LCD board with touchscreen is in use, which is the case here, LED0 must be disabled, Button B must be polled, so the touchscreen interrupt works as it should. Update the touchscreen IRQ line assignment, disable LED0 and use polled GPIO button driver for Button B, since the DT here describes baseboard with LCD board. Fixes: 7e5f3155dcbb4 ("ARM: dts: stm32: Fix LED5 on STM32MP1 DHCOM PDK2") Signed-off-by: Marek Vasut Cc: Alexandre Torgue Cc: Patrice Chotard Cc: Patrick Delaunay Cc: linux-stm32@st-md-mailman.stormreply.com To: linux-arm-kernel@lists.infradead.org Signed-off-by: Alexandre Torgue --- arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi index c5ea08fec535..6cf1c8b4c6e2 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi @@ -37,7 +37,7 @@ poll-interval = <20>; /* - * The EXTi IRQ line 3 is shared with touchscreen and ethernet, + * The EXTi IRQ line 3 is shared with ethernet, * so mark this as polled GPIO key. */ button-0 { @@ -46,6 +46,16 @@ gpios = <&gpiof 3 GPIO_ACTIVE_LOW>; }; + /* + * The EXTi IRQ line 6 is shared with touchscreen, + * so mark this as polled GPIO key. + */ + button-1 { + label = "TA2-GPIO-B"; + linux,code = ; + gpios = <&gpiod 6 GPIO_ACTIVE_LOW>; + }; + /* * The EXTi IRQ line 0 is shared with PMIC, * so mark this as polled GPIO key. @@ -60,13 +70,6 @@ gpio-keys { compatible = "gpio-keys"; - button-1 { - label = "TA2-GPIO-B"; - linux,code = ; - gpios = <&gpiod 6 GPIO_ACTIVE_LOW>; - wakeup-source; - }; - button-3 { label = "TA4-GPIO-D"; linux,code = ; @@ -82,6 +85,7 @@ label = "green:led5"; gpios = <&gpioc 6 GPIO_ACTIVE_HIGH>; default-state = "off"; + status = "disabled"; }; led-1 { @@ -185,8 +189,8 @@ touchscreen@38 { compatible = "edt,edt-ft5406"; reg = <0x38>; - interrupt-parent = <&gpiog>; - interrupts = <2 IRQ_TYPE_EDGE_FALLING>; /* GPIO E */ + interrupt-parent = <&gpioc>; + interrupts = <6 IRQ_TYPE_EDGE_FALLING>; /* GPIO E */ }; }; From ce5a595744126be4f1327e29e3c5ae9aac6b38d5 Mon Sep 17 00:00:00 2001 From: Mike Tipton Date: Wed, 21 Jul 2021 10:54:31 -0700 Subject: [PATCH 641/794] interconnect: qcom: icc-rpmh: Ensure floor BW is enforced for all nodes We currently only enforce BW floors for a subset of nodes in a path. All BCMs that need updating are queued in the pre_aggregate/aggregate phase. The first set() commits all queued BCMs and subsequent set() calls short-circuit without committing anything. Since the floor BW isn't set in sum_avg/max_peak until set(), then some BCMs are committed before their associated nodes reflect the floor. Set the floor as each node is being aggregated. This ensures that all all relevant floors are set before the BCMs are committed. Fixes: 266cd33b5913 ("interconnect: qcom: Ensure that the floor bandwidth value is enforced") Signed-off-by: Mike Tipton Link: https://lore.kernel.org/r/20210721175432.2119-4-mdtipton@codeaurora.org [georgi: Removed unused variable] Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/icc-rpmh.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/interconnect/qcom/icc-rpmh.c b/drivers/interconnect/qcom/icc-rpmh.c index bf01d09dba6c..f6fae64861ce 100644 --- a/drivers/interconnect/qcom/icc-rpmh.c +++ b/drivers/interconnect/qcom/icc-rpmh.c @@ -57,6 +57,11 @@ int qcom_icc_aggregate(struct icc_node *node, u32 tag, u32 avg_bw, qn->sum_avg[i] += avg_bw; qn->max_peak[i] = max_t(u32, qn->max_peak[i], peak_bw); } + + if (node->init_avg || node->init_peak) { + qn->sum_avg[i] = max_t(u64, qn->sum_avg[i], node->init_avg); + qn->max_peak[i] = max_t(u64, qn->max_peak[i], node->init_peak); + } } *agg_avg += avg_bw; @@ -79,7 +84,6 @@ EXPORT_SYMBOL_GPL(qcom_icc_aggregate); int qcom_icc_set(struct icc_node *src, struct icc_node *dst) { struct qcom_icc_provider *qp; - struct qcom_icc_node *qn; struct icc_node *node; if (!src) @@ -88,12 +92,6 @@ int qcom_icc_set(struct icc_node *src, struct icc_node *dst) node = src; qp = to_qcom_provider(node->provider); - qn = node->data; - - qn->sum_avg[QCOM_ICC_BUCKET_AMC] = max_t(u64, qn->sum_avg[QCOM_ICC_BUCKET_AMC], - node->avg_bw); - qn->max_peak[QCOM_ICC_BUCKET_AMC] = max_t(u64, qn->max_peak[QCOM_ICC_BUCKET_AMC], - node->peak_bw); qcom_icc_bcm_voter_commit(qp->voter); From f84f5b6f72e68bbaeb850b58ac167e4a3a47532a Mon Sep 17 00:00:00 2001 From: Mike Tipton Date: Wed, 21 Jul 2021 10:54:32 -0700 Subject: [PATCH 642/794] interconnect: qcom: icc-rpmh: Add BCMs to commit list in pre_aggregate We're only adding BCMs to the commit list in aggregate(), but there are cases where pre_aggregate() is called without subsequently calling aggregate(). In particular, in icc_sync_state() when a node with initial BW has zero requests. Since BCMs aren't added to the commit list in these cases, we don't actually send the zero BW request to HW. So the resources remain on unnecessarily. Add BCMs to the commit list in pre_aggregate() instead, which is always called even when there are no requests. Fixes: 976daac4a1c5 ("interconnect: qcom: Consolidate interconnect RPMh support") Signed-off-by: Mike Tipton Link: https://lore.kernel.org/r/20210721175432.2119-5-mdtipton@codeaurora.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/icc-rpmh.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/interconnect/qcom/icc-rpmh.c b/drivers/interconnect/qcom/icc-rpmh.c index f6fae64861ce..27cc5f03611c 100644 --- a/drivers/interconnect/qcom/icc-rpmh.c +++ b/drivers/interconnect/qcom/icc-rpmh.c @@ -20,13 +20,18 @@ void qcom_icc_pre_aggregate(struct icc_node *node) { size_t i; struct qcom_icc_node *qn; + struct qcom_icc_provider *qp; qn = node->data; + qp = to_qcom_provider(node->provider); for (i = 0; i < QCOM_ICC_NUM_BUCKETS; i++) { qn->sum_avg[i] = 0; qn->max_peak[i] = 0; } + + for (i = 0; i < qn->num_bcms; i++) + qcom_icc_bcm_voter_add(qp->voter, qn->bcms[i]); } EXPORT_SYMBOL_GPL(qcom_icc_pre_aggregate); @@ -44,10 +49,8 @@ int qcom_icc_aggregate(struct icc_node *node, u32 tag, u32 avg_bw, { size_t i; struct qcom_icc_node *qn; - struct qcom_icc_provider *qp; qn = node->data; - qp = to_qcom_provider(node->provider); if (!tag) tag = QCOM_ICC_TAG_ALWAYS; @@ -67,9 +70,6 @@ int qcom_icc_aggregate(struct icc_node *node, u32 tag, u32 avg_bw, *agg_avg += avg_bw; *agg_peak = max_t(u32, *agg_peak, peak_bw); - for (i = 0; i < qn->num_bcms; i++) - qcom_icc_bcm_voter_add(qp->voter, qn->bcms[i]); - return 0; } EXPORT_SYMBOL_GPL(qcom_icc_aggregate); From 094121ef815f29d9e6a01fafca365831454ce293 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Wed, 28 Jul 2021 20:21:15 +0200 Subject: [PATCH 643/794] arch: Kconfig: clean up obsolete use of HAVE_IDE The arch-specific Kconfig files use HAVE_IDE to indicate if IDE is supported. As IDE support and the HAVE_IDE config vanishes with commit b7fb14d3ac63 ("ide: remove the legacy ide driver"), there is no need to mention HAVE_IDE in all those arch-specific Kconfig files. The issue was identified with ./scripts/checkkconfigsymbols.py. Fixes: b7fb14d3ac63 ("ide: remove the legacy ide driver") Suggested-by: Randy Dunlap Signed-off-by: Lukas Bulwahn Acked-by: Randy Dunlap Link: https://lore.kernel.org/r/20210728182115.4401-1-lukas.bulwahn@gmail.com Reviewed-by: Christoph Hellwig Acked-by: Geert Uytterhoeven Signed-off-by: Jens Axboe --- arch/alpha/Kconfig | 1 - arch/arm/Kconfig | 6 ------ arch/arm/mach-davinci/Kconfig | 1 - arch/h8300/Kconfig.cpu | 1 - arch/ia64/Kconfig | 1 - arch/m68k/Kconfig | 1 - arch/mips/Kconfig | 1 - arch/parisc/Kconfig | 1 - arch/powerpc/Kconfig | 1 - arch/sh/Kconfig | 1 - arch/sparc/Kconfig | 1 - arch/x86/Kconfig | 1 - arch/xtensa/Kconfig | 1 - 13 files changed, 18 deletions(-) diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 77d3280dc678..a6d4c2f744e3 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -14,7 +14,6 @@ config ALPHA select PCI_SYSCALL if PCI select HAVE_AOUT select HAVE_ASM_MODVERSIONS - select HAVE_IDE select HAVE_PCSPKR_PLATFORM select HAVE_PERF_EVENTS select NEED_DMA_MAP_STATE diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 06b6187b67af..f2ce83e643e7 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -95,7 +95,6 @@ config ARM select HAVE_FUNCTION_TRACER if !XIP_KERNEL select HAVE_GCC_PLUGINS select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7) - select HAVE_IDE if PCI || ISA || PCMCIA select HAVE_IRQ_TIME_ACCOUNTING select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZ4 @@ -361,7 +360,6 @@ config ARCH_FOOTBRIDGE bool "FootBridge" select CPU_SA110 select FOOTBRIDGE - select HAVE_IDE select NEED_MACH_IO_H if !MMU select NEED_MACH_MEMORY_H help @@ -429,7 +427,6 @@ config ARCH_PXA select GENERIC_IRQ_MULTI_HANDLER select GPIO_PXA select GPIOLIB - select HAVE_IDE select IRQ_DOMAIN select PLAT_PXA select SPARSE_IRQ @@ -445,7 +442,6 @@ config ARCH_RPC select ARM_HAS_SG_CHAIN select CPU_SA110 select FIQ - select HAVE_IDE select HAVE_PATA_PLATFORM select ISA_DMA_API select LEGACY_TIMER_TICK @@ -468,7 +464,6 @@ config ARCH_SA1100 select CPU_SA1100 select GENERIC_IRQ_MULTI_HANDLER select GPIOLIB - select HAVE_IDE select IRQ_DOMAIN select ISA select NEED_MACH_MEMORY_H @@ -504,7 +499,6 @@ config ARCH_OMAP1 select GENERIC_IRQ_CHIP select GENERIC_IRQ_MULTI_HANDLER select GPIOLIB - select HAVE_IDE select HAVE_LEGACY_CLK select IRQ_DOMAIN select NEED_MACH_IO_H if PCCARD diff --git a/arch/arm/mach-davinci/Kconfig b/arch/arm/mach-davinci/Kconfig index de11030748d0..1d3aef84287d 100644 --- a/arch/arm/mach-davinci/Kconfig +++ b/arch/arm/mach-davinci/Kconfig @@ -9,7 +9,6 @@ menuconfig ARCH_DAVINCI select PM_GENERIC_DOMAINS_OF if PM && OF select REGMAP_MMIO select RESET_CONTROLLER - select HAVE_IDE select PINCTRL_SINGLE if ARCH_DAVINCI diff --git a/arch/h8300/Kconfig.cpu b/arch/h8300/Kconfig.cpu index b5e14d513e62..c30baa0499fc 100644 --- a/arch/h8300/Kconfig.cpu +++ b/arch/h8300/Kconfig.cpu @@ -44,7 +44,6 @@ config H8300_H8MAX bool "H8MAX" select H83069 select RAMKERNEL - select HAVE_IDE help H8MAX Evaluation Board Support More Information. (Japanese Only) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index cf425c2c63af..4993c7ac7ff6 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -25,7 +25,6 @@ config IA64 select HAVE_ASM_MODVERSIONS select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_EXIT_THREAD - select HAVE_IDE select HAVE_KPROBES select HAVE_KRETPROBES select HAVE_FTRACE_MCOUNT_RECORD diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 96989ad46f66..d632a1d576f9 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -23,7 +23,6 @@ config M68K select HAVE_DEBUG_BUGVERBOSE select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_HAS_NO_UNALIGNED select HAVE_FUTEX_CMPXCHG if MMU && FUTEX - select HAVE_IDE select HAVE_MOD_ARCH_SPECIFIC select HAVE_UID16 select MMU_GATHER_NO_RANGE if MMU diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index cee6087cd686..6dfb27d531dd 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -71,7 +71,6 @@ config MIPS select HAVE_FUNCTION_TRACER select HAVE_GCC_PLUGINS select HAVE_GENERIC_VDSO - select HAVE_IDE select HAVE_IOREMAP_PROT select HAVE_IRQ_EXIT_ON_IRQ_STACK select HAVE_IRQ_TIME_ACCOUNTING diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index bde9907bc5b2..4f8c1fbf8f2f 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -3,7 +3,6 @@ config PARISC def_bool y select ARCH_32BIT_OFF_T if !64BIT select ARCH_MIGHT_HAVE_PC_PARPORT - select HAVE_IDE select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER select HAVE_SYSCALL_TRACEPOINTS diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index d01e3401581d..663766fbf505 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -220,7 +220,6 @@ config PPC select HAVE_HARDLOCKUP_DETECTOR_ARCH if PPC_BOOK3S_64 && SMP select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) - select HAVE_IDE select HAVE_IOREMAP_PROT select HAVE_IRQ_EXIT_ON_IRQ_STACK select HAVE_IRQ_TIME_ACCOUNTING diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 45a0549421cd..b683b69a4556 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -39,7 +39,6 @@ config SUPERH select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_FTRACE_MCOUNT_RECORD select HAVE_HW_BREAKPOINT - select HAVE_IDE if HAS_IOPORT_MAP select HAVE_IOREMAP_PROT if MMU && !X2TLB select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index c5fa7932b550..f0c0f955e169 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -19,7 +19,6 @@ config SPARC select OF select OF_PROMTREE select HAVE_ASM_MODVERSIONS - select HAVE_IDE select HAVE_ARCH_KGDB if !SMP || SPARC64 select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_SECCOMP if SPARC64 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 49270655e827..88fb922c23a0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -202,7 +202,6 @@ config X86 select HAVE_FUNCTION_TRACER select HAVE_GCC_PLUGINS select HAVE_HW_BREAKPOINT - select HAVE_IDE select HAVE_IOREMAP_PROT select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64 select HAVE_IRQ_TIME_ACCOUNTING diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 2332b2156993..3878880469d1 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -327,7 +327,6 @@ config XTENSA_PLATFORM_ISS config XTENSA_PLATFORM_XT2000 bool "XT2000" - select HAVE_IDE help XT2000 is the name of Tensilica's feature-rich emulation platform. This hardware is capable of running a full Linux distribution. From 7561c14d8a4d1a24a40b1839d927d488e2d6345a Mon Sep 17 00:00:00 2001 From: Sumanth Korikkar Date: Wed, 28 Jul 2021 13:24:53 +0200 Subject: [PATCH 644/794] s390/vdso: add .got.plt in vdso linker script KCFLAGS="-mno-pic-data-is-text-relative" make leads to bfd assertion error in s390_got_pointer(): LD arch/s390/kernel/vdso64/vdso64.so.dbg ld: BFD version 2.35-18.fc33 assertion fail elf-s390-common.c:74 readelf -Wr vdso64_generic.o | grep GOT 0000000000000032 000000110000001a R_390_GOTENT 0000000000000000 _vdso_data + 2 (...) Add .got.plt in linker script to avoid this. Suggested-by: Ilya Leoshkevich Signed-off-by: Sumanth Korikkar Signed-off-by: Heiko Carstens --- arch/s390/kernel/vdso32/vdso32.lds.S | 1 + arch/s390/kernel/vdso64/vdso64.lds.S | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S index bff50b6acd6d..edf5ff1debe1 100644 --- a/arch/s390/kernel/vdso32/vdso32.lds.S +++ b/arch/s390/kernel/vdso32/vdso32.lds.S @@ -51,6 +51,7 @@ SECTIONS .rela.dyn ALIGN(8) : { *(.rela.dyn) } .got ALIGN(8) : { *(.got .toc) } + .got.plt ALIGN(8) : { *(.got.plt) } _end = .; PROVIDE(end = .); diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S index d4fb336d747b..4461ea151e49 100644 --- a/arch/s390/kernel/vdso64/vdso64.lds.S +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -51,6 +51,7 @@ SECTIONS .rela.dyn ALIGN(8) : { *(.rela.dyn) } .got ALIGN(8) : { *(.got .toc) } + .got.plt ALIGN(8) : { *(.got.plt) } _end = .; PROVIDE(end = .); From 88731c8f3636b133e27df88febcd7cd2fdece0a7 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 29 Jul 2021 15:29:19 +0200 Subject: [PATCH 645/794] s390/boot: fix zstd build for -march=z900 zstd decompression uses __builtin_clz() which fails back to __clzdi2() when the kernel is built for older hardware like z900. This leads to build failures like the following: s390x-11.1.0-ld: /devel/src/kernel/arch/s390/boot/compressed/../../../../lib/zstd/bitstream.h:148: undefined reference to `__clzdi2' Fix that by optionally including lib/clz_ctz.c into the decompressor. Reported-by: kernel test robot Fixes: 7b034d9c1b08 ("s390/boot: add zstd support") Signed-off-by: Vasily Gorbik Link: https://lore.kernel.org/r/patch-1.thread-f0f589.git-f0f58936888f.your-ad-here.call-01627564869-ext-2765@work.hours Signed-off-by: Heiko Carstens --- arch/s390/boot/compressed/Makefile | 1 + arch/s390/boot/compressed/clz_ctz.c | 2 ++ 2 files changed, 3 insertions(+) create mode 100644 arch/s390/boot/compressed/clz_ctz.c diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 660c799d875d..e30d3fdbbc78 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -11,6 +11,7 @@ UBSAN_SANITIZE := n KASAN_SANITIZE := n obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o +obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o obj-all := $(obj-y) piggy.o syms.o targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 diff --git a/arch/s390/boot/compressed/clz_ctz.c b/arch/s390/boot/compressed/clz_ctz.c new file mode 100644 index 000000000000..c3ebf248596b --- /dev/null +++ b/arch/s390/boot/compressed/clz_ctz.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../../../lib/clz_ctz.c" From 1e9faef4d26de33bd6b5018695996e7394119e5b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 30 Jul 2021 14:21:56 +0200 Subject: [PATCH 646/794] USB: serial: pl2303: fix HX type detection The device release number for HX-type devices is configurable in EEPROM/OTPROM and cannot be used reliably for type detection. Assume all (non-H) devices with bcdUSB 1.1 and unknown bcdDevice to be of HX type while adding a bcdDevice check for HXD and TB (1.1 and 2.0, respectively). Reported-by: Chris Fixes: 8a7bf7510d1f ("USB: serial: pl2303: amend and tighten type detection") Cc: stable@vger.kernel.org # 5.13 Link: https://lore.kernel.org/r/20210730122156.718-1-johan@kernel.org Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold --- drivers/usb/serial/pl2303.c | 41 ++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 2f2f5047452b..17601e32083e 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -418,24 +418,33 @@ static int pl2303_detect_type(struct usb_serial *serial) bcdDevice = le16_to_cpu(desc->bcdDevice); bcdUSB = le16_to_cpu(desc->bcdUSB); - switch (bcdDevice) { - case 0x100: - /* - * Assume it's an HXN-type if the device doesn't support the old read - * request value. - */ - if (bcdUSB == 0x200 && !pl2303_supports_hx_status(serial)) - return TYPE_HXN; + switch (bcdUSB) { + case 0x110: + switch (bcdDevice) { + case 0x300: + return TYPE_HX; + case 0x400: + return TYPE_HXD; + default: + return TYPE_HX; + } break; - case 0x300: - if (bcdUSB == 0x200) + case 0x200: + switch (bcdDevice) { + case 0x100: + /* + * Assume it's an HXN-type if the device doesn't + * support the old read request value. + */ + if (!pl2303_supports_hx_status(serial)) + return TYPE_HXN; + break; + case 0x300: return TYPE_TA; - - return TYPE_HX; - case 0x400: - return TYPE_HXD; - case 0x500: - return TYPE_TB; + case 0x500: + return TYPE_TB; + } + break; } dev_err(&serial->interface->dev, From 4d77f36f2c8c62b230f4a5eb264c169fa04c4a5a Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Tue, 27 Jul 2021 17:43:37 +0800 Subject: [PATCH 647/794] drm/amdgpu: Fix out-of-bounds read when update mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If one GTT BO has been evicted/swapped out, it should sit in CPU domain. TTM only alloc struct ttm_resource instead of struct ttm_range_mgr_node for sysMem. Now when we update mapping for such invalidated BOs, we might walk out of bounds of struct ttm_resource. Three possible fix: 1) Let sysMem manager alloc struct ttm_range_mgr_node, like ttm_range_manager does. 2) Pass pages_addr to update_mapping function too, but need memset pages_addr[] to zero when unpopulate. 3) Init amdgpu_res_cursor directly. bug is detected by kfence. ================================================================== BUG: KFENCE: out-of-bounds read in amdgpu_vm_bo_update_mapping+0x564/0x6e0 Out-of-bounds read at 0x000000008ea93fe9 (64B right of kfence-#167): amdgpu_vm_bo_update_mapping+0x564/0x6e0 [amdgpu] amdgpu_vm_bo_update+0x282/0xa40 [amdgpu] amdgpu_vm_handle_moved+0x19e/0x1f0 [amdgpu] amdgpu_cs_vm_handling+0x4e4/0x640 [amdgpu] amdgpu_cs_ioctl+0x19e7/0x23c0 [amdgpu] drm_ioctl_kernel+0xf3/0x180 [drm] drm_ioctl+0x2cb/0x550 [drm] amdgpu_drm_ioctl+0x5e/0xb0 [amdgpu] kfence-#167 [0x000000008e11c055-0x000000001f676b3e ttm_sys_man_alloc+0x35/0x80 [ttm] ttm_resource_alloc+0x39/0x50 [ttm] ttm_bo_swapout+0x252/0x5a0 [ttm] ttm_device_swapout+0x107/0x180 [ttm] ttm_global_swapout+0x6f/0x130 [ttm] ttm_tt_populate+0xb1/0x2a0 [ttm] ttm_bo_handle_move_mem+0x17e/0x1d0 [ttm] ttm_mem_evict_first+0x59d/0x9c0 [ttm] ttm_bo_mem_space+0x39f/0x400 [ttm] ttm_bo_validate+0x13c/0x340 [ttm] ttm_bo_init_reserved+0x269/0x540 [ttm] amdgpu_bo_create+0x1d1/0xa30 [amdgpu] amdgpu_bo_create_user+0x40/0x80 [amdgpu] amdgpu_gem_object_create+0x71/0xc0 [amdgpu] amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu+0x2f2/0xcd0 [amdgpu] kfd_ioctl_alloc_memory_of_gpu+0xe2/0x330 [amdgpu] kfd_ioctl+0x461/0x690 [amdgpu] Signed-off-by: xinhui pan Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h index 59e0fefb15aa..acfa207cf970 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h @@ -54,11 +54,12 @@ static inline void amdgpu_res_first(struct ttm_resource *res, { struct drm_mm_node *node; - if (!res) { + if (!res || res->mem_type == TTM_PL_SYSTEM) { cur->start = start; cur->size = size; cur->remaining = size; cur->node = NULL; + WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT); return; } From 1c0539a6fc8a4a4b77278e35d763073890de96b9 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Thu, 10 Jun 2021 09:55:01 +0800 Subject: [PATCH 648/794] drm/amdgpu: fix the doorbell missing when in CGPG issue for renoir. If GC has entered CGPG, ringing doorbell > first page doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround this issue. Signed-off-by: Yifan Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 044076ec1d03..6a23c6826e12 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1295,6 +1295,16 @@ static bool is_raven_kicker(struct amdgpu_device *adev) return false; } +static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev) +{ + if ((adev->asic_type == CHIP_RENOIR) && + (adev->gfx.me_fw_version >= 0x000000a5) && + (adev->gfx.me_feature_version >= 52)) + return true; + else + return false; +} + static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) { if (gfx_v9_0_should_disable_gfxoff(adev->pdev)) @@ -3675,7 +3685,16 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) if (ring->use_doorbell) { WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, (adev->doorbell_index.kiq * 2) << 2); - WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, + /* If GC has entered CGPG, ringing doorbell > first page + * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to + * workaround this issue. And this change has to align with firmware + * update. + */ + if (check_if_enlarge_doorbell_range(adev)) + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, + (adev->doorbell.size - 4)); + else + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, (adev->doorbell_index.userqueue_end * 2) << 2); } From 028a71775f811e9d60664ba2c248ff95c6cf57cb Mon Sep 17 00:00:00 2001 From: Catherine Sullivan Date: Thu, 29 Jul 2021 08:52:58 -0700 Subject: [PATCH 649/794] gve: Update MAINTAINERS list The team maintaining the gve driver has undergone some changes, this updates the MAINTAINERS file accordingly. Signed-off-by: Catherine Sullivan Signed-off-by: Jon Olson Signed-off-by: David Awogbemila Signed-off-by: Jeroen de Borst Link: https://lore.kernel.org/r/20210729155258.442650-1-csully@google.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 58afeb12d3b3..17a873153eba 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7858,9 +7858,9 @@ S: Maintained F: drivers/input/touchscreen/goodix.c GOOGLE ETHERNET DRIVERS -M: Catherine Sullivan -R: Sagi Shahar -R: Jon Olson +M: Jeroen de Borst +R: Catherine Sullivan +R: David Awogbemila L: netdev@vger.kernel.org S: Supported F: Documentation/networking/device_drivers/ethernet/google/gve.rst From b2ff70a01a7a8083e749e01e5d3ffda706fe3305 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Thu, 29 Jul 2021 14:53:35 -0700 Subject: [PATCH 650/794] lib/test_string.c: move string selftest in the Runtime Testing menu STRING_SELFTEST is presented in the "Library routines" menu. Move it in Kernel hacking > Kernel Testing and Coverage > Runtime Testing together with other similar tests found in lib/ --- Runtime Testing <*> Test functions located in the hexdump module at runtime <*> Test string functions (NEW) <*> Test functions located in the string_helpers module at runtime <*> Test strscpy*() family of functions at runtime <*> Test kstrto*() family of functions at runtime <*> Test printf() family of functions at runtime <*> Test scanf() family of functions at runtime Link: https://lkml.kernel.org/r/20210719185158.190371-1-mcroce@linux.microsoft.com Signed-off-by: Matteo Croce Cc: Peter Rosin Cc: Geert Uytterhoeven Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig | 3 --- lib/Kconfig.debug | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/Kconfig b/lib/Kconfig index d241fe476fda..5c9c0687f76d 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -683,9 +683,6 @@ config PARMAN config OBJAGG tristate "objagg" if COMPILE_TEST -config STRING_SELFTEST - tristate "Test string functions" - endmenu config GENERIC_IOREMAP diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 831212722924..5ddd575159fb 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2180,6 +2180,9 @@ config ASYNC_RAID6_TEST config TEST_HEXDUMP tristate "Test functions located in the hexdump module at runtime" +config STRING_SELFTEST + tristate "Test string functions at runtime" + config TEST_STRING_HELPERS tristate "Test functions located in the string_helpers module at runtime" From f267aeb6dea5e468793e5b8eb6a9c72c0020d418 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Thu, 29 Jul 2021 14:53:38 -0700 Subject: [PATCH 651/794] ocfs2: fix zero out valid data If append-dio feature is enabled, direct-io write and fallocate could run in parallel to extend file size, fallocate used "orig_isize" to record i_size before taking "ip_alloc_sem", when ocfs2_zeroout_partial_cluster() zeroout EOF blocks, i_size maybe already extended by ocfs2_dio_end_io_write(), that will cause valid data zeroed out. Link: https://lkml.kernel.org/r/20210722054923.24389-1-junxiao.bi@oracle.com Fixes: 6bba4471f0cc ("ocfs2: fix data corruption by fallocate") Signed-off-by: Junxiao Bi Reviewed-by: Joseph Qi Cc: Changwei Ge Cc: Gang He Cc: Joel Becker Cc: Jun Piao Cc: Mark Fasheh Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 775657943057..53bb46ce3cbb 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1935,7 +1935,6 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, goto out_inode_unlock; } - orig_isize = i_size_read(inode); switch (sr->l_whence) { case 0: /*SEEK_SET*/ break; @@ -1943,7 +1942,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, sr->l_start += f_pos; break; case 2: /*SEEK_END*/ - sr->l_start += orig_isize; + sr->l_start += i_size_read(inode); break; default: ret = -EINVAL; @@ -1998,6 +1997,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, ret = -EINVAL; } + orig_isize = i_size_read(inode); /* zeroout eof blocks in the cluster. */ if (!ret && change_size && orig_isize < size) { ret = ocfs2_zeroout_partial_cluster(inode, orig_isize, From 9449ad33be8480f538b11a593e2dda2fb33ca06d Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Thu, 29 Jul 2021 14:53:41 -0700 Subject: [PATCH 652/794] ocfs2: issue zeroout to EOF blocks For punch holes in EOF blocks, fallocate used buffer write to zero the EOF blocks in last cluster. But since ->writepage will ignore EOF pages, those zeros will not be flushed. This "looks" ok as commit 6bba4471f0cc ("ocfs2: fix data corruption by fallocate") will zero the EOF blocks when extend the file size, but it isn't. The problem happened on those EOF pages, before writeback, those pages had DIRTY flag set and all buffer_head in them also had DIRTY flag set, when writeback run by write_cache_pages(), DIRTY flag on the page was cleared, but DIRTY flag on the buffer_head not. When next write happened to those EOF pages, since buffer_head already had DIRTY flag set, it would not mark page DIRTY again. That made writeback ignore them forever. That will cause data corruption. Even directio write can't work because it will fail when trying to drop pages caches before direct io, as it found the buffer_head for those pages still had DIRTY flag set, then it will fall back to buffer io mode. To make a summary of the issue, as writeback ingores EOF pages, once any EOF page is generated, any write to it will only go to the page cache, it will never be flushed to disk even file size extends and that page is not EOF page any more. The fix is to avoid zero EOF blocks with buffer write. The following code snippet from qemu-img could trigger the corruption. 656 open("6b3711ae-3306-4bdd-823c-cf1c0060a095.conv.2", O_RDWR|O_DIRECT|O_CLOEXEC) = 11 ... 660 fallocate(11, FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE, 2275868672, 327680 660 fallocate(11, 0, 2275868672, 327680) = 0 658 pwrite64(11, " Link: https://lkml.kernel.org/r/20210722054923.24389-2-junxiao.bi@oracle.com Signed-off-by: Junxiao Bi Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/file.c | 99 ++++++++++++++++++++++++++++++------------------- 1 file changed, 60 insertions(+), 39 deletions(-) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 53bb46ce3cbb..54d7843c0211 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1529,6 +1529,45 @@ static void ocfs2_truncate_cluster_pages(struct inode *inode, u64 byte_start, } } +/* + * zero out partial blocks of one cluster. + * + * start: file offset where zero starts, will be made upper block aligned. + * len: it will be trimmed to the end of current cluster if "start + len" + * is bigger than it. + */ +static int ocfs2_zeroout_partial_cluster(struct inode *inode, + u64 start, u64 len) +{ + int ret; + u64 start_block, end_block, nr_blocks; + u64 p_block, offset; + u32 cluster, p_cluster, nr_clusters; + struct super_block *sb = inode->i_sb; + u64 end = ocfs2_align_bytes_to_clusters(sb, start); + + if (start + len < end) + end = start + len; + + start_block = ocfs2_blocks_for_bytes(sb, start); + end_block = ocfs2_blocks_for_bytes(sb, end); + nr_blocks = end_block - start_block; + if (!nr_blocks) + return 0; + + cluster = ocfs2_bytes_to_clusters(sb, start); + ret = ocfs2_get_clusters(inode, cluster, &p_cluster, + &nr_clusters, NULL); + if (ret) + return ret; + if (!p_cluster) + return 0; + + offset = start_block - ocfs2_clusters_to_blocks(sb, cluster); + p_block = ocfs2_clusters_to_blocks(sb, p_cluster) + offset; + return sb_issue_zeroout(sb, p_block, nr_blocks, GFP_NOFS); +} + static int ocfs2_zero_partial_clusters(struct inode *inode, u64 start, u64 len) { @@ -1538,6 +1577,7 @@ static int ocfs2_zero_partial_clusters(struct inode *inode, struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); unsigned int csize = osb->s_clustersize; handle_t *handle; + loff_t isize = i_size_read(inode); /* * The "start" and "end" values are NOT necessarily part of @@ -1558,6 +1598,26 @@ static int ocfs2_zero_partial_clusters(struct inode *inode, if ((start & (csize - 1)) == 0 && (end & (csize - 1)) == 0) goto out; + /* No page cache for EOF blocks, issue zero out to disk. */ + if (end > isize) { + /* + * zeroout eof blocks in last cluster starting from + * "isize" even "start" > "isize" because it is + * complicated to zeroout just at "start" as "start" + * may be not aligned with block size, buffer write + * would be required to do that, but out of eof buffer + * write is not supported. + */ + ret = ocfs2_zeroout_partial_cluster(inode, isize, + end - isize); + if (ret) { + mlog_errno(ret); + goto out; + } + if (start >= isize) + goto out; + end = isize; + } handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); if (IS_ERR(handle)) { ret = PTR_ERR(handle); @@ -1855,45 +1915,6 @@ out: return ret; } -/* - * zero out partial blocks of one cluster. - * - * start: file offset where zero starts, will be made upper block aligned. - * len: it will be trimmed to the end of current cluster if "start + len" - * is bigger than it. - */ -static int ocfs2_zeroout_partial_cluster(struct inode *inode, - u64 start, u64 len) -{ - int ret; - u64 start_block, end_block, nr_blocks; - u64 p_block, offset; - u32 cluster, p_cluster, nr_clusters; - struct super_block *sb = inode->i_sb; - u64 end = ocfs2_align_bytes_to_clusters(sb, start); - - if (start + len < end) - end = start + len; - - start_block = ocfs2_blocks_for_bytes(sb, start); - end_block = ocfs2_blocks_for_bytes(sb, end); - nr_blocks = end_block - start_block; - if (!nr_blocks) - return 0; - - cluster = ocfs2_bytes_to_clusters(sb, start); - ret = ocfs2_get_clusters(inode, cluster, &p_cluster, - &nr_clusters, NULL); - if (ret) - return ret; - if (!p_cluster) - return 0; - - offset = start_block - ocfs2_clusters_to_blocks(sb, cluster); - p_block = ocfs2_clusters_to_blocks(sb, p_cluster) + offset; - return sb_issue_zeroout(sb, p_block, nr_blocks, GFP_NOFS); -} - /* * Parts of this function taken from xfs_change_file_space() */ From 30def93565e5ba08676aa2b9083f253fc586dbed Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 29 Jul 2021 14:53:44 -0700 Subject: [PATCH 653/794] mm: memcontrol: fix blocking rstat function called from atomic cgroup1 thresholding code Dan Carpenter reports: The patch 2d146aa3aa84: "mm: memcontrol: switch to rstat" from Apr 29, 2021, leads to the following static checker warning: kernel/cgroup/rstat.c:200 cgroup_rstat_flush() warn: sleeping in atomic context mm/memcontrol.c 3572 static unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) 3573 { 3574 unsigned long val; 3575 3576 if (mem_cgroup_is_root(memcg)) { 3577 cgroup_rstat_flush(memcg->css.cgroup); ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ This is from static analysis and potentially a false positive. The problem is that mem_cgroup_usage() is called from __mem_cgroup_threshold() which holds an rcu_read_lock(). And the cgroup_rstat_flush() function can sleep. 3578 val = memcg_page_state(memcg, NR_FILE_PAGES) + 3579 memcg_page_state(memcg, NR_ANON_MAPPED); 3580 if (swap) 3581 val += memcg_page_state(memcg, MEMCG_SWAP); 3582 } else { 3583 if (!swap) 3584 val = page_counter_read(&memcg->memory); 3585 else 3586 val = page_counter_read(&memcg->memsw); 3587 } 3588 return val; 3589 } __mem_cgroup_threshold() indeed holds the rcu lock. In addition, the thresholding code is invoked during stat changes, and those contexts have irqs disabled as well. If the lock breaking occurs inside the flush function, it will result in a sleep from an atomic context. Use the irqsafe flushing variant in mem_cgroup_usage() to fix this. Link: https://lkml.kernel.org/r/20210726150019.251820-1-hannes@cmpxchg.org Fixes: 2d146aa3aa84 ("mm: memcontrol: switch to rstat") Signed-off-by: Johannes Weiner Reported-by: Dan Carpenter Acked-by: Chris Down Reviewed-by: Rik van Riel Acked-by: Michal Hocko Reviewed-by: Shakeel Butt Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ae1f5d0cb581..eb8e87c4833f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3574,7 +3574,8 @@ static unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) unsigned long val; if (mem_cgroup_is_root(memcg)) { - cgroup_rstat_flush(memcg->css.cgroup); + /* mem_cgroup_threshold() calls here from irqsafe context */ + cgroup_rstat_flush_irqsafe(memcg->css.cgroup); val = memcg_page_state(memcg, NR_FILE_PAGES) + memcg_page_state(memcg, NR_ANON_MAPPED); if (swap) From b5916c025432b7c776b6bb13617485fbc0bd3ebd Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 29 Jul 2021 14:53:47 -0700 Subject: [PATCH 654/794] mm/migrate: fix NR_ISOLATED corruption on 64-bit Similar to commit 2da9f6305f30 ("mm/vmscan: fix NR_ISOLATED_FILE corruption on 64-bit") avoid using unsigned int for nr_pages. With unsigned int type the large unsigned int converts to a large positive signed long. Symptoms include CMA allocations hanging forever due to alloc_contig_range->...->isolate_migratepages_block waiting forever in "while (unlikely(too_many_isolated(pgdat)))". Link: https://lkml.kernel.org/r/20210728042531.359409-1-aneesh.kumar@linux.ibm.com Fixes: c5fc5c3ae0c8 ("mm: migrate: account THP NUMA migration counters correctly") Signed-off-by: Aneesh Kumar K.V Reported-by: Michael Ellerman Reported-by: Alexey Kardashevskiy Reviewed-by: Yang Shi Cc: Mel Gorman Cc: Nicholas Piggin Cc: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/migrate.c b/mm/migrate.c index 34a9ad3e0a4f..7e240437e7d9 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2068,7 +2068,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, LIST_HEAD(migratepages); new_page_t *new; bool compound; - unsigned int nr_pages = thp_nr_pages(page); + int nr_pages = thp_nr_pages(page); /* * PTE mapped THP or HugeTLB page can't reach here so the page could From f227f0faf63b46a113c4d1aca633c80195622dd2 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Thu, 29 Jul 2021 14:53:50 -0700 Subject: [PATCH 655/794] slub: fix unreclaimable slab stat for bulk free SLUB uses page allocator for higher order allocations and update unreclaimable slab stat for such allocations. At the moment, the bulk free for SLUB does not share code with normal free code path for these type of allocations and have missed the stat update. So, fix the stat update by common code. The user visible impact of the bug is the potential of inconsistent unreclaimable slab stat visible through meminfo and vmstat. Link: https://lkml.kernel.org/r/20210728155354.3440560-1-shakeelb@google.com Fixes: 6a486c0ad4dc ("mm, sl[ou]b: improve memory accounting") Signed-off-by: Shakeel Butt Acked-by: Michal Hocko Acked-by: Roman Gushchin Reviewed-by: Muchun Song Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 090fa14628f9..af984e4990e8 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3236,6 +3236,16 @@ struct detached_freelist { struct kmem_cache *s; }; +static inline void free_nonslab_page(struct page *page) +{ + unsigned int order = compound_order(page); + + VM_BUG_ON_PAGE(!PageCompound(page), page); + kfree_hook(page_address(page)); + mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, -(PAGE_SIZE << order)); + __free_pages(page, order); +} + /* * This function progressively scans the array with free objects (with * a limited look ahead) and extract objects belonging to the same @@ -3272,9 +3282,7 @@ int build_detached_freelist(struct kmem_cache *s, size_t size, if (!s) { /* Handle kalloc'ed objects */ if (unlikely(!PageSlab(page))) { - BUG_ON(!PageCompound(page)); - kfree_hook(object); - __free_pages(page, compound_order(page)); + free_nonslab_page(page); p[size] = NULL; /* mark object processed */ return size; } @@ -4250,13 +4258,7 @@ void kfree(const void *x) page = virt_to_head_page(x); if (unlikely(!PageSlab(page))) { - unsigned int order = compound_order(page); - - BUG_ON(!PageCompound(page)); - kfree_hook(object); - mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, - -(PAGE_SIZE << order)); - __free_pages(page, order); + free_nonslab_page(page); return; } slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_); From 121dffe20b141c9b27f39d49b15882469cbebae7 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Thu, 29 Jul 2021 14:53:54 -0700 Subject: [PATCH 656/794] mm/memcg: fix NULL pointer dereference in memcg_slab_free_hook() When I use kfree_rcu() to free a large memory allocated by kmalloc_node(), the following dump occurs. BUG: kernel NULL pointer dereference, address: 0000000000000020 [...] Oops: 0000 [#1] SMP [...] Workqueue: events kfree_rcu_work RIP: 0010:__obj_to_index include/linux/slub_def.h:182 [inline] RIP: 0010:obj_to_index include/linux/slub_def.h:191 [inline] RIP: 0010:memcg_slab_free_hook+0x120/0x260 mm/slab.h:363 [...] Call Trace: kmem_cache_free_bulk+0x58/0x630 mm/slub.c:3293 kfree_bulk include/linux/slab.h:413 [inline] kfree_rcu_work+0x1ab/0x200 kernel/rcu/tree.c:3300 process_one_work+0x207/0x530 kernel/workqueue.c:2276 worker_thread+0x320/0x610 kernel/workqueue.c:2422 kthread+0x13d/0x160 kernel/kthread.c:313 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:294 When kmalloc_node() a large memory, page is allocated, not slab, so when freeing memory via kfree_rcu(), this large memory should not be used by memcg_slab_free_hook(), because memcg_slab_free_hook() is is used for slab. Using page_objcgs_check() instead of page_objcgs() in memcg_slab_free_hook() to fix this bug. Link: https://lkml.kernel.org/r/20210728145655.274476-1-wanghai38@huawei.com Fixes: 270c6a71460e ("mm: memcontrol/slab: Use helpers to access slab page's memcg_data") Signed-off-by: Wang Hai Reviewed-by: Shakeel Butt Acked-by: Michal Hocko Acked-by: Roman Gushchin Reviewed-by: Kefeng Wang Reviewed-by: Muchun Song Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Vlastimil Babka Cc: Johannes Weiner Cc: Alexei Starovoitov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slab.h b/mm/slab.h index f997fd5e42c8..58c01a34e5b8 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -346,7 +346,7 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig, continue; page = virt_to_head_page(p[i]); - objcgs = page_objcgs(page); + objcgs = page_objcgs_check(page); if (!objcgs) continue; From 852a8a97776a153be2e6c803218eced45f37a19c Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Fri, 30 Jul 2021 11:02:54 +0200 Subject: [PATCH 657/794] ALSA: pcm - fix mmap capability check for the snd-dummy driver The snd-dummy driver (fake_buffer configuration) uses the ops->page callback for the mmap operations. Allow mmap for this case, too. Cc: Fixes: c4824ae7db41 ("ALSA: pcm: Fix mmap capability check") Signed-off-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20210730090254.612478-1-perex@perex.cz Signed-off-by: Takashi Iwai --- sound/core/pcm_native.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 6a2971a7e6a1..09c0e2a6489c 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -246,7 +246,7 @@ static bool hw_support_mmap(struct snd_pcm_substream *substream) if (!(substream->runtime->hw.info & SNDRV_PCM_INFO_MMAP)) return false; - if (substream->ops->mmap) + if (substream->ops->mmap || substream->ops->page) return true; switch (substream->dma_buffer.dev.type) { From 9bac1bd6e6d36459087a728a968e79e37ebcea1a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 30 Jul 2021 18:26:22 -0300 Subject: [PATCH 658/794] Revert "perf map: Fix dso->nsinfo refcounting" This makes 'perf top' abort in some cases, and the right fix will involve surgery that is too much to do at this stage, so revert for now and fix it in the next merge window. This reverts commit 2d6b74baa7147251c30a46c4996e8cc224aa2dc5. Cc: Riccardo Mancini Cc: Ian Rogers Cc: Jiri Olsa Cc: Krister Johansen Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 72e7f3616157..8af693d9678c 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -192,8 +192,6 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, if (!(prot & PROT_EXEC)) dso__set_loaded(dso); } - - nsinfo__put(dso->nsinfo); dso->nsinfo = nsi; if (build_id__is_defined(bid)) From 3a34b13a88caeb2800ab44a4918f230041b37dd9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 30 Jul 2021 15:42:34 -0700 Subject: [PATCH 659/794] pipe: make pipe writes always wake up readers Since commit 1b6b26ae7053 ("pipe: fix and clarify pipe write wakeup logic") we have sanitized the pipe write logic, and would only try to wake up readers if they needed it. In particular, if the pipe already had data in it before the write, there was no point in trying to wake up a reader, since any existing readers must have been aware of the pre-existing data already. Doing extraneous wakeups will only cause potential thundering herd problems. However, it turns out that some Android libraries have misused the EPOLL interface, and expected "edge triggered" be to "any new write will trigger it". Even if there was no edge in sight. Quoting Sandeep Patil: "The commit 1b6b26ae7053 ('pipe: fix and clarify pipe write wakeup logic') changed pipe write logic to wakeup readers only if the pipe was empty at the time of write. However, there are libraries that relied upon the older behavior for notification scheme similar to what's described in [1] One such library 'realm-core'[2] is used by numerous Android applications. The library uses a similar notification mechanism as GNU Make but it never drains the pipe until it is full. When Android moved to v5.10 kernel, all applications using this library stopped working. The library has since been fixed[3] but it will be a while before all applications incorporate the updated library" Our regression rule for the kernel is that if applications break from new behavior, it's a regression, even if it was because the application did something patently wrong. Also note the original report [4] by Michal Kerrisk about a test for this epoll behavior - but at that point we didn't know of any actual broken use case. So add the extraneous wakeup, to approximate the old behavior. [ I say "approximate", because the exact old behavior was to do a wakeup not for each write(), but for each pipe buffer chunk that was filled in. The behavior introduced by this change is not that - this is just "every write will cause a wakeup, whether necessary or not", which seems to be sufficient for the broken library use. ] It's worth noting that this adds the extraneous wakeup only for the write side, while the read side still considers the "edge" to be purely about reading enough from the pipe to allow further writes. See commit f467a6a66419 ("pipe: fix and clarify pipe read wakeup logic") for the pipe read case, which remains that "only wake up if the pipe was full, and we read something from it". Link: https://lore.kernel.org/lkml/CAHk-=wjeG0q1vgzu4iJhW5juPkTsjTYmiqiMUYAebWW+0bam6w@mail.gmail.com/ [1] Link: https://github.com/realm/realm-core [2] Link: https://github.com/realm/realm-core/issues/4666 [3] Link: https://lore.kernel.org/lkml/CAKgNAkjMBGeAwF=2MKK758BhxvW58wYTgYKB2V-gY1PwXxrH+Q@mail.gmail.com/ [4] Link: https://lore.kernel.org/lkml/20210729222635.2937453-1-sspatil@android.com/ Reported-by: Sandeep Patil Cc: Michael Kerrisk Signed-off-by: Linus Torvalds --- fs/pipe.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index bfd946a9ad01..9ef4231cce61 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -429,20 +429,20 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) #endif /* - * Only wake up if the pipe started out empty, since - * otherwise there should be no readers waiting. + * Epoll nonsensically wants a wakeup whether the pipe + * was already empty or not. * * If it wasn't empty we try to merge new data into * the last buffer. * * That naturally merges small writes, but it also - * page-aligs the rest of the writes for large writes + * page-aligns the rest of the writes for large writes * spanning multiple pages. */ head = pipe->head; - was_empty = pipe_empty(head, pipe->tail); + was_empty = true; chars = total_len & (PAGE_SIZE-1); - if (chars && !was_empty) { + if (chars && !pipe_empty(head, pipe->tail)) { unsigned int mask = pipe->ring_size - 1; struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask]; int offset = buf->offset + buf->len; From ff41c28c4b54052942180d8b3f49e75f1445135a Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Fri, 30 Jul 2021 18:53:06 +0530 Subject: [PATCH 660/794] tracing: Fix NULL pointer dereference in start_creating The event_trace_add_tracer() can fail. In this case, it leads to a crash in start_creating with below call stack. Handle the error scenario properly in trace_array_create_dir. Call trace: down_write+0x7c/0x204 start_creating.25017+0x6c/0x194 tracefs_create_file+0xc4/0x2b4 init_tracer_tracefs+0x5c/0x940 trace_array_create_dir+0x58/0xb4 trace_array_create+0x1bc/0x2b8 trace_array_get_by_name+0xdc/0x18c Link: https://lkml.kernel.org/r/1627651386-21315-1-git-send-email-kamaagra@codeaurora.org Cc: stable@vger.kernel.org Fixes: 4114fbfd02f1 ("tracing: Enable creating new instance early boot") Signed-off-by: Kamal Agrawal Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c59dd35a6da5..33899a71fdc1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -9135,8 +9135,10 @@ static int trace_array_create_dir(struct trace_array *tr) return -EINVAL; ret = event_trace_add_tracer(tr->dir, tr); - if (ret) + if (ret) { tracefs_remove(tr->dir); + return ret; + } init_tracer_tracefs(tr, tr->dir); __update_tracer_options(tr); From f828b0bcacef189edbd247e9f48864fc36bfbe33 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 30 Jul 2021 19:59:50 -0700 Subject: [PATCH 661/794] clk: fix leak on devm_clk_bulk_get_all() unwind clk_bulk_get_all() allocates an array of struct clk_bulk data for us (unlike clk_bulk_get()), so we need to free it. Let's use the clk_bulk_put_all() helper. kmemleak complains, on an RK3399 Gru/Kevin system: unreferenced object 0xffffff80045def00 (size 128): comm "swapper/0", pid 1, jiffies 4294667682 (age 86.394s) hex dump (first 32 bytes): 44 32 60 fe fe ff ff ff 00 00 00 00 00 00 00 00 D2`............. 48 32 60 fe fe ff ff ff 00 00 00 00 00 00 00 00 H2`............. backtrace: [<00000000742860d6>] __kmalloc+0x22c/0x39c [<00000000b0493f2c>] clk_bulk_get_all+0x64/0x188 [<00000000325f5900>] devm_clk_bulk_get_all+0x58/0xa8 [<00000000175b9bc5>] dwc3_probe+0x8ac/0xb5c [<000000009169e2f9>] platform_drv_probe+0x9c/0xbc [<000000005c51e2ee>] really_probe+0x13c/0x378 [<00000000c47b1f24>] driver_probe_device+0x84/0xc0 [<00000000f870fcfb>] __device_attach_driver+0x94/0xb0 [<000000004d1b92ae>] bus_for_each_drv+0x8c/0xd8 [<00000000481d60c3>] __device_attach+0xc4/0x150 [<00000000a163bd36>] device_initial_probe+0x1c/0x28 [<00000000accb6bad>] bus_probe_device+0x3c/0x9c [<000000001a199f89>] device_add+0x218/0x3cc [<000000001bd84952>] of_device_add+0x40/0x50 [<000000009c658c29>] of_platform_device_create_pdata+0xac/0x100 [<0000000021c69ba4>] of_platform_bus_create+0x190/0x224 Fixes: f08c2e2865f6 ("clk: add managed version of clk_bulk_get_all") Cc: Dong Aisheng Cc: stable@vger.kernel.org Signed-off-by: Brian Norris Link: https://lore.kernel.org/r/20210731025950.2238582-1-briannorris@chromium.org Signed-off-by: Stephen Boyd --- drivers/clk/clk-devres.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c index be160764911b..f9d5b7334341 100644 --- a/drivers/clk/clk-devres.c +++ b/drivers/clk/clk-devres.c @@ -92,13 +92,20 @@ int __must_check devm_clk_bulk_get_optional(struct device *dev, int num_clks, } EXPORT_SYMBOL_GPL(devm_clk_bulk_get_optional); +static void devm_clk_bulk_release_all(struct device *dev, void *res) +{ + struct clk_bulk_devres *devres = res; + + clk_bulk_put_all(devres->num_clks, devres->clks); +} + int __must_check devm_clk_bulk_get_all(struct device *dev, struct clk_bulk_data **clks) { struct clk_bulk_devres *devres; int ret; - devres = devres_alloc(devm_clk_bulk_release, + devres = devres_alloc(devm_clk_bulk_release_all, sizeof(*devres), GFP_KERNEL); if (!devres) return -ENOMEM; From c500bee1c5b2f1d59b1081ac879d73268ab0ff17 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 1 Aug 2021 17:04:17 -0700 Subject: [PATCH 662/794] Linux 5.14-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6b555f64df06..27a072cffcb9 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Opossums on Parade # *DOCUMENTATION* From 7199ddede9f0f2f68d41e6928e1c6c4bca9c39c0 Mon Sep 17 00:00:00 2001 From: Juergen Borleis Date: Thu, 29 Jul 2021 09:18:21 +0200 Subject: [PATCH 663/794] dmaengine: imx-dma: configure the generic DMA type to make it work Commit dea7a9fbb009 ("dmaengine: imx-dma: remove dma_slave_config direction usage") changes the method from a "configuration when called" to an "configuration when used". Due to this, only the cyclic DMA type gets configured correctly, while the generic DMA type is left non-configured. Without this additional call, the struct imxdma_channel::word_size member is stuck at DMA_SLAVE_BUSWIDTH_UNDEFINED and imxdma_prep_slave_sg() always returns NULL. Signed-off-by: Juergen Borleis Fixes: dea7a9fbb009 ("dmaengine: imx-dma: remove dma_slave_config direction usage") Link: https://lore.kernel.org/r/20210729071821.9857-1-jbe@pengutronix.de Signed-off-by: Vinod Koul --- drivers/dma/imx-dma.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c index 7f116bbcfad2..2ddc31e64db0 100644 --- a/drivers/dma/imx-dma.c +++ b/drivers/dma/imx-dma.c @@ -812,6 +812,8 @@ static struct dma_async_tx_descriptor *imxdma_prep_slave_sg( dma_length += sg_dma_len(sg); } + imxdma_config_write(chan, &imxdmac->config, direction); + switch (imxdmac->word_size) { case DMA_SLAVE_BUSWIDTH_4_BYTES: if (sg_dma_len(sgl) & 3 || sgl->dma_address & 3) From eda80d7c9c4db0f55f130e38c682e19b58d5add7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 1 Aug 2021 13:38:01 +0200 Subject: [PATCH 664/794] ALSA: memalloc: Fix regression with SNDRV_DMA_TYPE_CONTINUOUS The recent code refactoring made the mmap of continuous pages to be done via the own helper snd_dma_continuous_mmap() with remap_pfn_range(). There I overlooked that dmab->addr isn't set for the allocation with SNDRV_DMA_TYPE_CONTINUOUS. This resulted always in an error at mmap with this buffer type on the system such as Intel SST Baytrail driver. This patch fixes the regression by passing the correct address. Fixes: 30b7ba6972d5 ("ALSA: core: Add continuous and vmalloc mmap ops") Reported-by: Hans de Goede Link: https://lore.kernel.org/r/8d6674da-7d7b-803e-acc9-7de6cb1223fa@redhat.com Link: https://lore.kernel.org/r/20210801113801.31290-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/memalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index 83b79edfa52d..439a358ecfe9 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -215,7 +215,7 @@ static int snd_dma_continuous_mmap(struct snd_dma_buffer *dmab, struct vm_area_struct *area) { return remap_pfn_range(area, area->vm_start, - dmab->addr >> PAGE_SHIFT, + page_to_pfn(virt_to_page(dmab->area)), area->vm_end - area->vm_start, area->vm_page_prot); } From 1159e25c137422bdc48ee96e3fb014bd942092c6 Mon Sep 17 00:00:00 2001 From: Prabhakar Kushwaha Date: Thu, 29 Jul 2021 14:43:06 +0300 Subject: [PATCH 665/794] qede: fix crash in rmmod qede while automatic debug collection A crash has been observed if rmmod is done while automatic debug collection in progress. It is due to a race condition between both of them. To fix stop the sp_task during unload to avoid running qede_sp_task even if they are schedule during removal process. Signed-off-by: Alok Prasad Signed-off-by: Shai Malin Signed-off-by: Ariel Elior Signed-off-by: Prabhakar Kushwaha Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede.h | 1 + drivers/net/ethernet/qlogic/qede/qede_main.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 2e62a2c4eb63..5630008f38b7 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -501,6 +501,7 @@ struct qede_fastpath { #define QEDE_SP_HW_ERR 4 #define QEDE_SP_ARFS_CONFIG 5 #define QEDE_SP_AER 7 +#define QEDE_SP_DISABLE 8 #ifdef CONFIG_RFS_ACCEL int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 01ac1e93d27a..7c6064baeba2 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -1009,6 +1009,13 @@ static void qede_sp_task(struct work_struct *work) struct qede_dev *edev = container_of(work, struct qede_dev, sp_task.work); + /* Disable execution of this deferred work once + * qede removal is in progress, this stop any future + * scheduling of sp_task. + */ + if (test_bit(QEDE_SP_DISABLE, &edev->sp_flags)) + return; + /* The locking scheme depends on the specific flag: * In case of QEDE_SP_RECOVERY, acquiring the RTNL lock is required to * ensure that ongoing flows are ended and new ones are not started. @@ -1300,6 +1307,7 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) qede_rdma_dev_remove(edev, (mode == QEDE_REMOVE_RECOVERY)); if (mode != QEDE_REMOVE_RECOVERY) { + set_bit(QEDE_SP_DISABLE, &edev->sp_flags); unregister_netdev(ndev); cancel_delayed_work_sync(&edev->sp_task); From d51c5907e9809a803b276883d203f45849abd4d6 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 29 Jul 2021 15:48:20 +0200 Subject: [PATCH 666/794] net, gro: Set inner transport header offset in tcp/udp GRO hook GSO expects inner transport header offset to be valid when skb->encapsulation flag is set. GSO uses this value to calculate the length of an individual segment of a GSO packet in skb_gso_transport_seglen(). However, tcp/udp gro_complete callbacks don't update the skb->inner_transport_header when processing an encapsulated TCP/UDP segment. As a result a GRO skb has ->inner_transport_header set to a value carried over from earlier skb processing. This can have mild to tragic consequences. From miscalculating the GSO segment length to triggering a page fault [1], when trying to read TCP/UDP header at an address past the skb->data page. The latter scenario leads to an oops report like so: BUG: unable to handle page fault for address: ffff9fa7ec00d008 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 123f201067 P4D 123f201067 PUD 123f209067 PMD 0 Oops: 0000 [#1] SMP NOPTI CPU: 44 PID: 0 Comm: swapper/44 Not tainted 5.4.53-cloudflare-2020.7.21 #1 Hardware name: HYVE EDGE-METAL-GEN10/HS-1811DLite1, BIOS V2.15 02/21/2020 RIP: 0010:skb_gso_transport_seglen+0x44/0xa0 Code: c0 41 83 e0 11 f6 87 81 00 00 00 20 74 30 0f b7 87 aa 00 00 00 0f [...] RSP: 0018:ffffad8640bacbb8 EFLAGS: 00010202 RAX: 000000000000feda RBX: ffff9fcc8d31bc00 RCX: ffff9fa7ec00cffc RDX: ffff9fa7ebffdec0 RSI: 000000000000feda RDI: 0000000000000122 RBP: 00000000000005c4 R08: 0000000000000001 R09: 0000000000000000 R10: ffff9fe588ae3800 R11: ffff9fe011fc92f0 R12: ffff9fcc8d31bc00 R13: ffff9fe0119d4300 R14: 00000000000005c4 R15: ffff9fba57d70900 FS: 0000000000000000(0000) GS:ffff9fe68df00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff9fa7ec00d008 CR3: 0000003e99b1c000 CR4: 0000000000340ee0 Call Trace: skb_gso_validate_network_len+0x11/0x70 __ip_finish_output+0x109/0x1c0 ip_sublist_rcv_finish+0x57/0x70 ip_sublist_rcv+0x2aa/0x2d0 ? ip_rcv_finish_core.constprop.0+0x390/0x390 ip_list_rcv+0x12b/0x14f __netif_receive_skb_list_core+0x2a9/0x2d0 netif_receive_skb_list_internal+0x1b5/0x2e0 napi_complete_done+0x93/0x140 veth_poll+0xc0/0x19f [veth] ? mlx5e_napi_poll+0x221/0x610 [mlx5_core] net_rx_action+0x1f8/0x790 __do_softirq+0xe1/0x2bf irq_exit+0x8e/0xc0 do_IRQ+0x58/0xe0 common_interrupt+0xf/0xf The bug can be observed in a simple setup where we send IP/GRE/IP/TCP packets into a netns over a veth pair. Inside the netns, packets are forwarded to dummy device: trafgen -> [veth A]--[veth B] -forward-> [dummy] For veth B to GRO aggregate packets on receive, it needs to have an XDP program attached (for example, a trivial XDP_PASS). Additionally, for UDP, we need to enable GSO_UDP_L4 feature on the device: ip netns exec A ethtool -K AB rx-udp-gro-forwarding on The last component is an artificial delay to increase the chances of GRO batching happening: ip netns exec A tc qdisc add dev AB root \ netem delay 200us slot 5ms 10ms packets 2 bytes 64k With such a setup in place, the bug can be observed by tracing the skb outer and inner offsets when GSO skb is transmitted from the dummy device: tcp: FUNC DEV SKB_LEN NH TH ENC INH ITH GSO_SIZE GSO_TYPE ip_finish_output dumB 2830 270 290 1 294 254 1383 (tcpv4,gre,) ^^^ udp: FUNC DEV SKB_LEN NH TH ENC INH ITH GSO_SIZE GSO_TYPE ip_finish_output dumB 2818 270 290 1 294 254 1383 (gre,udp_l4,) ^^^ Fix it by updating the inner transport header offset in tcp/udp gro_complete callbacks, similar to how {inet,ipv6}_gro_complete callbacks update the inner network header offset, when skb->encapsulation flag is set. [1] https://lore.kernel.org/netdev/CAKxSbF01cLpZem2GFaUaifh0S-5WYViZemTicAg7FCHOnh6kug@mail.gmail.com/ Fixes: bf296b125b21 ("tcp: Add GRO support") Fixes: f993bc25e519 ("net: core: handle encapsulation offloads when computing segment lengths") Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.") Reported-by: Alex Forster Signed-off-by: Jakub Sitnicki Signed-off-by: David S. Miller --- net/ipv4/tcp_offload.c | 3 +++ net/ipv4/udp_offload.c | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index e09147ac9a99..fc61cd3fea65 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -298,6 +298,9 @@ int tcp_gro_complete(struct sk_buff *skb) if (th->cwr) skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; + if (skb->encapsulation) + skb->inner_transport_header = skb->transport_header; + return 0; } EXPORT_SYMBOL(tcp_gro_complete); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 9dde1e5fb449..1380a6b6f4ff 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -624,6 +624,10 @@ static int udp_gro_complete_segment(struct sk_buff *skb) skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4; + + if (skb->encapsulation) + skb->inner_transport_header = skb->transport_header; + return 0; } From 85b1ebfea2b0d8797266bcc6f04b6cc87e38290a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 30 Jul 2021 08:54:08 +0100 Subject: [PATCH 667/794] interconnect: Fix undersized devress_alloc allocation The expression sizeof(**ptr) for the void **ptr is just 1 rather than the size of a pointer. Fix this by using sizeof(*ptr). Addresses-Coverity: ("Wrong sizeof argument") Fixes: e145d9a184f2 ("interconnect: Add devm_of_icc_get() as exported API for users") Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20210730075408.19945-1-colin.king@canonical.com Signed-off-by: Georgi Djakov --- drivers/interconnect/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index 1b2c564eaa99..7887941730db 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -403,7 +403,7 @@ struct icc_path *devm_of_icc_get(struct device *dev, const char *name) { struct icc_path **ptr, *path; - ptr = devres_alloc(devm_icc_release, sizeof(**ptr), GFP_KERNEL); + ptr = devres_alloc(devm_icc_release, sizeof(*ptr), GFP_KERNEL); if (!ptr) return ERR_PTR(-ENOMEM); From ebca25ead0711729e0aeeec45062e7ac4df3e158 Mon Sep 17 00:00:00 2001 From: Yannick Vignon Date: Fri, 30 Jul 2021 18:53:21 +0200 Subject: [PATCH 668/794] net/sched: taprio: Fix init procedure Commit 13511704f8d759 ("net: taprio offload: enforce qdisc to netdev queue mapping") resulted in duplicate entries in the qdisc hash. While this did not impact the overall operation of the qdisc and taprio code paths, it did result in an infinite loop when dumping the qdisc properties, at least on one target (NXP LS1028 ARDB). Removing the duplicate call to qdisc_hash_add() solves the problem. Fixes: 13511704f8d759 ("net: taprio offload: enforce qdisc to netdev queue mapping") Signed-off-by: Yannick Vignon Signed-off-by: David S. Miller --- net/sched/sch_taprio.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 07b30d0601d7..9c79374457a0 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1739,8 +1739,6 @@ static void taprio_attach(struct Qdisc *sch) if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; old = dev_graft_qdisc(qdisc->dev_queue, qdisc); - if (ntx < dev->real_num_tx_queues) - qdisc_hash_add(qdisc, false); } else { old = dev_graft_qdisc(qdisc->dev_queue, sch); qdisc_refcount_inc(sch); From 0d5c3954b35eddff0da0436c31e8d721eceb7dc2 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 1 Aug 2021 20:00:23 -0700 Subject: [PATCH 669/794] spi: mediatek: Fix fifo transfer Commit 3a70dd2d0503 ("spi: mediatek: fix fifo rx mode") claims that fifo RX mode was never handled, and adds the presumably missing code to the FIFO transfer function. However, the claim that receive data was not handled is incorrect. It was handled as part of interrupt handling after the transfer was complete. The code added with the above mentioned commit reads data from the receive FIFO before the transfer is started, which is wrong. This results in an actual transfer error on a Hayato Chromebook. Remove the code trying to handle receive data before the transfer is started to fix the problem. Fixes: 3a70dd2d0503 ("spi: mediatek: fix fifo rx mode") Cc: Peter Hess Cc: Frank Wunderlich Cc: Tzung-Bi Shih Cc: Hsin-Yi Wang Signed-off-by: Guenter Roeck Tested-by: Hsin-Yi Wang Tested-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20210802030023.1748777-1-linux@roeck-us.net Signed-off-by: Mark Brown --- drivers/spi/spi-mt65xx.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index 68dca8ceb3ad..7914255521c3 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -426,24 +426,15 @@ static int mtk_spi_fifo_transfer(struct spi_master *master, mtk_spi_prepare_transfer(master, xfer); mtk_spi_setup_packet(master); - cnt = xfer->len / 4; - if (xfer->tx_buf) + if (xfer->tx_buf) { + cnt = xfer->len / 4; iowrite32_rep(mdata->base + SPI_TX_DATA_REG, xfer->tx_buf, cnt); - - if (xfer->rx_buf) - ioread32_rep(mdata->base + SPI_RX_DATA_REG, xfer->rx_buf, cnt); - - remainder = xfer->len % 4; - if (remainder > 0) { - reg_val = 0; - if (xfer->tx_buf) { + remainder = xfer->len % 4; + if (remainder > 0) { + reg_val = 0; memcpy(®_val, xfer->tx_buf + (cnt * 4), remainder); writel(reg_val, mdata->base + SPI_TX_DATA_REG); } - if (xfer->rx_buf) { - reg_val = readl(mdata->base + SPI_RX_DATA_REG); - memcpy(xfer->rx_buf + (cnt * 4), ®_val, remainder); - } } mtk_spi_enable_transfer(master); From 40e159403896f7d55c98f858d0b20fee1d941fa4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 2 Aug 2021 12:21:30 +0100 Subject: [PATCH 670/794] mhi: Fix networking tree build. Signed-off-by: David S. Miller --- include/linux/mhi.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 944aa3aa3035..5e08468854db 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -719,8 +719,13 @@ void mhi_device_put(struct mhi_device *mhi_dev); * host and device execution environments match and * channels are in a DISABLED state. * @mhi_dev: Device associated with the channels + * @flags: MHI channel flags */ -int mhi_prepare_for_transfer(struct mhi_device *mhi_dev); +int mhi_prepare_for_transfer(struct mhi_device *mhi_dev, + unsigned int flags); + +/* Automatically allocate and queue inbound buffers */ +#define MHI_CH_INBOUND_ALLOC_BUFS BIT(0) /** * mhi_unprepare_from_transfer - Reset UL and DL channels for data transfer. From 47091f473b364c98207c4def197a0ae386fc9af1 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Sat, 26 Jun 2021 02:01:03 +0200 Subject: [PATCH 671/794] ARM: dts: nomadik: Fix up interrupt controller node names Once the new schema interrupt-controller/arm,vic.yaml is added, we get the below warnings: arch/arm/boot/dts/ste-nomadik-nhk15.dt.yaml: intc@10140000: $nodename:0: 'intc@10140000' does not match '^interrupt-controller(@[0-9a-f,]+)*$' Fix the node names for the interrupt controller to conform to the standard node name interrupt-controller@.. Signed-off-by: Sudeep Holla Signed-off-by: Linus Walleij Cc: Linus Walleij Link: https://lore.kernel.org/r/20210617210825.3064367-2-sudeep.holla@arm.com Link: https://lore.kernel.org/r/20210626000103.830184-1-linus.walleij@linaro.org' Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/ste-nomadik-stn8815.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi index c9b906432341..1815361fe73c 100644 --- a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi +++ b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi @@ -755,14 +755,14 @@ status = "disabled"; }; - vica: intc@10140000 { + vica: interrupt-controller@10140000 { compatible = "arm,versatile-vic"; interrupt-controller; #interrupt-cells = <1>; reg = <0x10140000 0x20>; }; - vicb: intc@10140020 { + vicb: interrupt-controller@10140020 { compatible = "arm,versatile-vic"; interrupt-controller; #interrupt-cells = <1>; From a4282f66d90e93aacfe1b19509fd5851bf95be68 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 22 Jul 2021 02:26:16 +0300 Subject: [PATCH 672/794] soc/tegra: Make regulator couplers depend on CONFIG_REGULATOR The regulator coupler drivers now use regulator-driver API function that isn't available during compile-testing. Make regulator coupler drivers dependent on CONFIG_REGULATOR in Kconfig. Fixes: 03978d42ed0d ("soc/tegra: regulators: Bump voltages on system reboot") Reported-by: kernel test robot Signed-off-by: Dmitry Osipenko Acked-by: Jon Hunter Signed-off-by: Arnd Bergmann --- drivers/soc/tegra/Kconfig | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/soc/tegra/Kconfig b/drivers/soc/tegra/Kconfig index 20ace654553a..8b53ed1cc67e 100644 --- a/drivers/soc/tegra/Kconfig +++ b/drivers/soc/tegra/Kconfig @@ -15,7 +15,7 @@ config ARCH_TEGRA_2x_SOC select PL310_ERRATA_769419 if CACHE_L2X0 select SOC_TEGRA_FLOWCTRL select SOC_TEGRA_PMC - select SOC_TEGRA20_VOLTAGE_COUPLER + select SOC_TEGRA20_VOLTAGE_COUPLER if REGULATOR select TEGRA_TIMER help Support for NVIDIA Tegra AP20 and T20 processors, based on the @@ -29,7 +29,7 @@ config ARCH_TEGRA_3x_SOC select PL310_ERRATA_769419 if CACHE_L2X0 select SOC_TEGRA_FLOWCTRL select SOC_TEGRA_PMC - select SOC_TEGRA30_VOLTAGE_COUPLER + select SOC_TEGRA30_VOLTAGE_COUPLER if REGULATOR select TEGRA_TIMER help Support for NVIDIA Tegra T30 processor family, based on the @@ -155,7 +155,9 @@ config SOC_TEGRA_POWERGATE_BPMP config SOC_TEGRA20_VOLTAGE_COUPLER bool "Voltage scaling support for Tegra20 SoCs" depends on ARCH_TEGRA_2x_SOC || COMPILE_TEST + depends on REGULATOR config SOC_TEGRA30_VOLTAGE_COUPLER bool "Voltage scaling support for Tegra30 SoCs" depends on ARCH_TEGRA_3x_SOC || COMPILE_TEST + depends on REGULATOR From 7f94b69ece515ac82defa60ef7cba2cf26180216 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 21 Jul 2021 23:13:43 +0200 Subject: [PATCH 673/794] ARM: ixp4xx: fix compile-testing soc drivers Randconfig builds on the ixp4xx ethernet driver showed that the qmgr and npe drivers are not actually built even when compile testing is enabled: ERROR: modpost: "qmgr_stat_empty" [drivers/net/ethernet/xscale/ixp4xx_eth.ko] undefined! ERROR: modpost: "qmgr_enable_irq" [drivers/net/ethernet/xscale/ixp4xx_eth.ko] undefined! ERROR: modpost: "qmgr_set_irq" [drivers/net/ethernet/xscale/ixp4xx_eth.ko] undefined! ERROR: modpost: "__qmgr_request_queue" [drivers/net/ethernet/xscale/ixp4xx_eth.ko] undefined! ERROR: modpost: "npe_send_recv_message" [drivers/net/ethernet/xscale/ixp4xx_eth.ko] undefined! ERROR: modpost: "npe_recv_message" [drivers/net/ethernet/xscale/ixp4xx_eth.ko] undefined! ERROR: modpost: "npe_load_firmware" [drivers/net/ethernet/xscale/ixp4xx_eth.ko] undefined! ERROR: modpost: "npe_running" [drivers/net/ethernet/xscale/ixp4xx_eth.ko] undefined! ERROR: modpost: "qmgr_disable_irq" [drivers/net/ethernet/xscale/ixp4xx_eth.ko] undefined! ERROR: modpost: "qmgr_stat_below_low_watermark" [drivers/net/ethernet/xscale/ixp4xx_eth.ko] undefined! Fix it by always entering the drivers/soc/ixp4xx/ directory, and fix the resulting compile test failures by removing the #include statements that prevent building on most other platforms. Fixes: 7a6c9dbb36a4 ("soc: ixp4xx: Protect IXP4xx SoC drivers by ARCH_IXP4XX || COMPILE_TEST") Fixes: fcf2d8978cd5 ("ARM: ixp4xx: Move NPE and QMGR to drivers/soc") Signed-off-by: Arnd Bergmann Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20210721211412.3537004-1-arnd@kernel.org' Signed-off-by: Arnd Bergmann --- drivers/soc/Makefile | 2 +- drivers/soc/ixp4xx/ixp4xx-npe.c | 1 - drivers/soc/ixp4xx/ixp4xx-qmgr.c | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile index f678e4d9e585..a05e9fbcd3e0 100644 --- a/drivers/soc/Makefile +++ b/drivers/soc/Makefile @@ -13,7 +13,7 @@ obj-$(CONFIG_MACH_DOVE) += dove/ obj-y += fsl/ obj-$(CONFIG_ARCH_GEMINI) += gemini/ obj-y += imx/ -obj-$(CONFIG_ARCH_IXP4XX) += ixp4xx/ +obj-y += ixp4xx/ obj-$(CONFIG_SOC_XWAY) += lantiq/ obj-$(CONFIG_LITEX_SOC_CONTROLLER) += litex/ obj-y += mediatek/ diff --git a/drivers/soc/ixp4xx/ixp4xx-npe.c b/drivers/soc/ixp4xx/ixp4xx-npe.c index 7bd19354982a..fea50e04d5a1 100644 --- a/drivers/soc/ixp4xx/ixp4xx-npe.c +++ b/drivers/soc/ixp4xx/ixp4xx-npe.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #define DEBUG_MSG 0 diff --git a/drivers/soc/ixp4xx/ixp4xx-qmgr.c b/drivers/soc/ixp4xx/ixp4xx-qmgr.c index 7149510b307e..c6bf6ef257c0 100644 --- a/drivers/soc/ixp4xx/ixp4xx-qmgr.c +++ b/drivers/soc/ixp4xx/ixp4xx-qmgr.c @@ -12,7 +12,6 @@ #include #include #include -#include #include static struct qmgr_regs __iomem *qmgr_regs; From 796a8c85b1216618258e08b463d3bef0d7123760 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 21 Jul 2021 17:16:04 +0200 Subject: [PATCH 674/794] ARM: ixp4xx: goramo_mlr depends on old PCI driver When this driver is disabled, the board file fails to build, so add a dependency: arch/arm/mach-ixp4xx/goramo_mlr.c: In function 'gmlr_pci_preinit': arch/arm/mach-ixp4xx/goramo_mlr.c:472:9: error: implicit declaration of function 'ixp4xx_pci_preinit'; did you mean 'iop3xx_pci_preinit'? [-Werror=implicit-function-declaration] 472 | ixp4xx_pci_preinit(); | ^~~~~~~~~~~~~~~~~~ | iop3xx_pci_preinit arch/arm/mach-ixp4xx/goramo_mlr.c: In function 'gmlr_pci_postinit': arch/arm/mach-ixp4xx/goramo_mlr.c:481:22: error: implicit declaration of function 'ixp4xx_pci_read' [-Werror=implicit-function-declaration] 481 | if (!ixp4xx_pci_read(addr, NP_CMD_CONFIGREAD, &value)) { | ^~~~~~~~~~~~~~~ arch/arm/mach-ixp4xx/goramo_mlr.c:231:35: error: 'IXP4XX_UART1_BASE_PHYS' undeclared here (not in a function) 231 | .start = IXP4XX_UART1_BASE_PHYS, | ^~~~~~~~~~~~~~~~~~~~~~ arch/arm/mach-ixp4xx/goramo_mlr.c: In function 'gmlr_init': arch/arm/mach-ixp4xx/goramo_mlr.c:376:9: error: implicit declaration of function 'ixp4xx_sys_init' [-Werror=implicit-function-declaration] 376 | ixp4xx_sys_init(); | ^~~~~~~~~~~~~~~ Signed-off-by: Arnd Bergmann Reviewed-by: Linus Walleij Cc: Linus Walleij Cc: soc@kernel.org Link: https://lore.kernel.org/r/20210721151620.2373500-1-arnd@kernel.org' Signed-off-by: Arnd Bergmann --- arch/arm/mach-ixp4xx/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-ixp4xx/Kconfig b/arch/arm/mach-ixp4xx/Kconfig index bf14d65120b9..34a1c7742088 100644 --- a/arch/arm/mach-ixp4xx/Kconfig +++ b/arch/arm/mach-ixp4xx/Kconfig @@ -91,6 +91,7 @@ config MACH_IXDP465 config MACH_GORAMO_MLR bool "GORAMO Multi Link Router" + depends on IXP4XX_PCI_LEGACY help Say 'Y' here if you want your kernel to support GORAMO MultiLink router. From cb81698fddbcc9a3ee75857e99dfc29caa96135b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 30 Jul 2021 20:18:10 +0300 Subject: [PATCH 675/794] net: dsa: sja1105: fix static FDB writes for SJA1110 The blamed commit made FDB access on SJA1110 functional only as far as dumping the existing entries goes, but anything having to do with an entry's index (adding, deleting) is still broken. There are in fact 2 problems, all caused by improperly inheriting the code from SJA1105P/Q/R/S: - An entry size is SJA1110_SIZE_L2_LOOKUP_ENTRY (24) bytes and not SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY (20) bytes - The "index" field within an FDB entry is at bits 10:1 for SJA1110 and not 15:6 as in SJA1105P/Q/R/S This patch moves the packing function for the cmd->index outside of sja1105pqrs_common_l2_lookup_cmd_packing() and into the device specific functions sja1105pqrs_l2_lookup_cmd_packing and sja1110_l2_lookup_cmd_packing. Fixes: 74e7feff0e22 ("net: dsa: sja1105: fix dynamic access to L2 Address Lookup table for SJA1110") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- .../net/dsa/sja1105/sja1105_dynamic_config.c | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c index 56fead68ea9f..147709131c13 100644 --- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c +++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c @@ -304,6 +304,15 @@ sja1105pqrs_common_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, hostcmd = SJA1105_HOSTCMD_INVALIDATE; } sja1105_packing(p, &hostcmd, 25, 23, size, op); +} + +static void +sja1105pqrs_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, + enum packing_op op) +{ + int entry_size = SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY; + + sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, entry_size); /* Hack - The hardware takes the 'index' field within * struct sja1105_l2_lookup_entry as the index on which this command @@ -313,26 +322,18 @@ sja1105pqrs_common_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, * such that our API doesn't need to ask for a full-blown entry * structure when e.g. a delete is requested. */ - sja1105_packing(buf, &cmd->index, 15, 6, - SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY, op); -} - -static void -sja1105pqrs_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, - enum packing_op op) -{ - int size = SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY; - - return sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, size); + sja1105_packing(buf, &cmd->index, 15, 6, entry_size, op); } static void sja1110_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, enum packing_op op) { - int size = SJA1110_SIZE_L2_LOOKUP_ENTRY; + int entry_size = SJA1110_SIZE_L2_LOOKUP_ENTRY; - return sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, size); + sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, entry_size); + + sja1105_packing(buf, &cmd->index, 10, 1, entry_size, op); } /* The switch is so retarded that it makes our command/entry abstraction From e11e865bf84e3c6ea91563ff3e858cfe0e184bd2 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 30 Jul 2021 20:18:11 +0300 Subject: [PATCH 676/794] net: dsa: sja1105: overwrite dynamic FDB entries with static ones in .port_fdb_add The SJA1105 switch family leaves it up to software to decide where within the FDB to install a static entry, and to concatenate destination ports for already existing entries (the FDB is also used for multicast entries), it is not as simple as just saying "please add this entry". This means we first need to search for an existing FDB entry before adding a new one. The driver currently manages to fool itself into thinking that if an FDB entry already exists, there is nothing to be done. But that FDB entry might be dynamically learned, case in which it should be replaced with a static entry, but instead it is left alone. This patch checks the LOCKEDS ("locked/static") bit from found FDB entries, and lets the code "goto skip_finding_an_index;" if the FDB entry was not static. So we also need to move the place where we set LOCKEDS = true, to cover the new case where a dynamic FDB entry existed but was dynamic. Fixes: 291d1e72b756 ("net: dsa: sja1105: Add support for FDB and MDB management") Fixes: 1da73821343c ("net: dsa: sja1105: Add FDB operations for P/Q/R/S series") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index e2dc997580a8..cc4a22ee1474 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -1333,7 +1333,7 @@ int sja1105et_fdb_add(struct dsa_switch *ds, int port, * mask? If yes, we need to do nothing. If not, we need * to rewrite the entry by adding this port to it. */ - if (l2_lookup.destports & BIT(port)) + if ((l2_lookup.destports & BIT(port)) && l2_lookup.lockeds) return 0; l2_lookup.destports |= BIT(port); } else { @@ -1364,6 +1364,7 @@ int sja1105et_fdb_add(struct dsa_switch *ds, int port, index, NULL, false); } } + l2_lookup.lockeds = true; l2_lookup.index = sja1105et_fdb_index(bin, way); rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP, @@ -1434,10 +1435,10 @@ int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port, rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP, SJA1105_SEARCH, &l2_lookup); if (rc == 0) { - /* Found and this port is already in the entry's + /* Found a static entry and this port is already in the entry's * port mask => job done */ - if (l2_lookup.destports & BIT(port)) + if ((l2_lookup.destports & BIT(port)) && l2_lookup.lockeds) return 0; /* l2_lookup.index is populated by the switch in case it * found something. @@ -1460,10 +1461,11 @@ int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port, dev_err(ds->dev, "FDB is full, cannot add entry.\n"); return -EINVAL; } - l2_lookup.lockeds = true; l2_lookup.index = i; skip_finding_an_index: + l2_lookup.lockeds = true; + rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP, l2_lookup.index, &l2_lookup, true); From 6c5fc159e0927531707895709eee1f8bfa04289f Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 30 Jul 2021 20:18:12 +0300 Subject: [PATCH 677/794] net: dsa: sja1105: invalidate dynamic FDB entries learned concurrently with statically added ones The procedure to add a static FDB entry in sja1105 is concurrent with dynamic learning performed on all bridge ports and the CPU port. The switch looks up the FDB from left to right, and also learns dynamically from left to right, so it is possible that between the moment when we pick up a free slot to install an FDB entry, another slot to the left of that one becomes free due to an address ageing out, and that other slot is then immediately used by the switch to learn dynamically the same address as we're trying to add statically. The result is that we succeeded to add our static FDB entry, but it is being shadowed by a dynamic FDB entry to its left, and the switch will behave as if our static FDB entry did not exist. We cannot really prevent this from happening unless we make the entire process to add a static FDB entry a huge critical section where address learning is temporarily disabled on _all_ ports, and then re-enabled according to the configuration done by sja1105_port_set_learning. However, that is kind of disruptive for the operation of the network. What we can do alternatively is to simply read back the FDB for dynamic entries located before our newly added static one, and delete them. This will guarantee that our static FDB entry is now operational. It will still not guarantee that there aren't dynamic FDB entries to the _right_ of that static FDB entry, but at least those entries will age out by themselves since they aren't hit, and won't bother anyone. Fixes: 291d1e72b756 ("net: dsa: sja1105: Add support for FDB and MDB management") Fixes: 1da73821343c ("net: dsa: sja1105: Add FDB operations for P/Q/R/S series") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 57 +++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index cc4a22ee1474..5a4c7789ca43 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -1318,10 +1318,11 @@ static int sja1105et_is_fdb_entry_in_bin(struct sja1105_private *priv, int bin, int sja1105et_fdb_add(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid) { - struct sja1105_l2_lookup_entry l2_lookup = {0}; + struct sja1105_l2_lookup_entry l2_lookup = {0}, tmp; struct sja1105_private *priv = ds->priv; struct device *dev = ds->dev; int last_unused = -1; + int start, end, i; int bin, way, rc; bin = sja1105et_fdb_hash(priv, addr, vid); @@ -1373,6 +1374,29 @@ int sja1105et_fdb_add(struct dsa_switch *ds, int port, if (rc < 0) return rc; + /* Invalidate a dynamically learned entry if that exists */ + start = sja1105et_fdb_index(bin, 0); + end = sja1105et_fdb_index(bin, way); + + for (i = start; i < end; i++) { + rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP, + i, &tmp); + if (rc == -ENOENT) + continue; + if (rc) + return rc; + + if (tmp.macaddr != ether_addr_to_u64(addr) || tmp.vlanid != vid) + continue; + + rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP, + i, NULL, false); + if (rc) + return rc; + + break; + } + return sja1105_static_fdb_change(priv, port, &l2_lookup, true); } @@ -1414,7 +1438,7 @@ int sja1105et_fdb_del(struct dsa_switch *ds, int port, int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid) { - struct sja1105_l2_lookup_entry l2_lookup = {0}; + struct sja1105_l2_lookup_entry l2_lookup = {0}, tmp; struct sja1105_private *priv = ds->priv; int rc, i; @@ -1472,6 +1496,35 @@ skip_finding_an_index: if (rc < 0) return rc; + /* The switch learns dynamic entries and looks up the FDB left to + * right. It is possible that our addition was concurrent with the + * dynamic learning of the same address, so now that the static entry + * has been installed, we are certain that address learning for this + * particular address has been turned off, so the dynamic entry either + * is in the FDB at an index smaller than the static one, or isn't (it + * can also be at a larger index, but in that case it is inactive + * because the static FDB entry will match first, and the dynamic one + * will eventually age out). Search for a dynamically learned address + * prior to our static one and invalidate it. + */ + tmp = l2_lookup; + + rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP, + SJA1105_SEARCH, &tmp); + if (rc < 0) { + dev_err(ds->dev, + "port %d failed to read back entry for %pM vid %d: %pe\n", + port, addr, vid, ERR_PTR(rc)); + return rc; + } + + if (tmp.index < l2_lookup.index) { + rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP, + tmp.index, NULL, false); + if (rc < 0) + return rc; + } + return sja1105_static_fdb_change(priv, port, &l2_lookup, true); } From 728db843df88753aeb7224314807a203afa8eb32 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 30 Jul 2021 20:18:13 +0300 Subject: [PATCH 678/794] net: dsa: sja1105: ignore the FDB entry for unknown multicast when adding a new address Currently, when sja1105pqrs_fdb_add() is called for a host-joined IPv6 MDB entry such as 33:33:00:00:00:6a, the search for that address will return the FDB entry for SJA1105_UNKNOWN_MULTICAST, which has a destination MAC of 01:00:00:00:00:00 and a mask of 01:00:00:00:00:00. It returns that entry because, well, it matches, in the sense that unknown multicast is supposed by design to match it... But the issue is that we then proceed to overwrite this entry with the one for our precise host-joined multicast address, and the unknown multicast entry is no longer there - unknown multicast is now flooded to the same group of ports as broadcast, which does not look up the FDB. To solve this problem, we should ignore searches that return the unknown multicast address as the match, and treat them as "no match" which will result in the entry being installed to hardware. For this to work properly, we need to put the result of the FDB search in a temporary variable in order to avoid overwriting the l2_lookup entry we want to program. The l2_lookup entry returned by the search might not have the same set of DESTPORTS and not even the same MACADDR as the entry we're trying to add. Fixes: 4d9423549501 ("net: dsa: sja1105: offload bridge port flags to device") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 5a4c7789ca43..5d8739b30d8c 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -1456,14 +1456,19 @@ int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port, } l2_lookup.destports = BIT(port); + tmp = l2_lookup; + rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP, - SJA1105_SEARCH, &l2_lookup); - if (rc == 0) { + SJA1105_SEARCH, &tmp); + if (rc == 0 && tmp.index != SJA1105_MAX_L2_LOOKUP_COUNT - 1) { /* Found a static entry and this port is already in the entry's * port mask => job done */ - if ((l2_lookup.destports & BIT(port)) && l2_lookup.lockeds) + if ((tmp.destports & BIT(port)) && tmp.lockeds) return 0; + + l2_lookup = tmp; + /* l2_lookup.index is populated by the switch in case it * found something. */ From 589918df93226a1e5f104306c185b6dcf2bd8051 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 30 Jul 2021 20:18:14 +0300 Subject: [PATCH 679/794] net: dsa: sja1105: be stateless with FDB entries on SJA1105P/Q/R/S/SJA1110 too Similar but not quite the same with what was done in commit b11f0a4c0c81 ("net: dsa: sja1105: be stateless when installing FDB entries") for SJA1105E/T, it is desirable to drop the priv->vlan_aware check and simply go ahead and install FDB entries in the VLAN that was given by the bridge. As opposed to SJA1105E/T, in SJA1105P/Q/R/S and SJA1110, the FDB is a maskable TCAM, and we are installing VLAN-unaware FDB entries with the VLAN ID masked off. However, such FDB entries might completely obscure VLAN-aware entries where the VLAN ID is included in the search mask, because the switch looks up the FDB from left to right and picks the first entry which results in a masked match. So it depends on whether the bridge installs first the VLAN-unaware or the VLAN-aware FDB entries. Anyway, if we had a VLAN-unaware FDB entry towards one set of DESTPORTS and a VLAN-aware one towards other set of DESTPORTS, the result is that the packets in VLAN-aware mode will be forwarded towards the DESTPORTS specified by the VLAN-unaware entry. To solve this, simply do not use the masked matching ability of the FDB for VLAN ID, and always match precisely on it. In VLAN-unaware mode, we configure the switch for shared VLAN learning, so the VLAN ID will be ignored anyway during lookup, so it is redundant to mask it off in the TCAM. This patch conflicts with net-next commit 0fac6aa098ed ("net: dsa: sja1105: delete the best_effort_vlan_filtering mode") which changed this line: if (priv->vlan_state != SJA1105_VLAN_UNAWARE) { into: if (priv->vlan_aware) { When merging with net-next, the lines added by this patch should take precedence in the conflict resolution (i.e. the "if" condition should be deleted in both cases). Fixes: 1da73821343c ("net: dsa: sja1105: Add FDB operations for P/Q/R/S series") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 5d8739b30d8c..335b608bad11 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -1447,13 +1447,8 @@ int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port, l2_lookup.vlanid = vid; l2_lookup.iotag = SJA1105_S_TAG; l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0); - if (priv->vlan_state != SJA1105_VLAN_UNAWARE) { - l2_lookup.mask_vlanid = VLAN_VID_MASK; - l2_lookup.mask_iotag = BIT(0); - } else { - l2_lookup.mask_vlanid = 0; - l2_lookup.mask_iotag = 0; - } + l2_lookup.mask_vlanid = VLAN_VID_MASK; + l2_lookup.mask_iotag = BIT(0); l2_lookup.destports = BIT(port); tmp = l2_lookup; @@ -1545,13 +1540,8 @@ int sja1105pqrs_fdb_del(struct dsa_switch *ds, int port, l2_lookup.vlanid = vid; l2_lookup.iotag = SJA1105_S_TAG; l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0); - if (priv->vlan_state != SJA1105_VLAN_UNAWARE) { - l2_lookup.mask_vlanid = VLAN_VID_MASK; - l2_lookup.mask_iotag = BIT(0); - } else { - l2_lookup.mask_vlanid = 0; - l2_lookup.mask_iotag = 0; - } + l2_lookup.mask_vlanid = VLAN_VID_MASK; + l2_lookup.mask_iotag = BIT(0); l2_lookup.destports = BIT(port); rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP, From 47c2c0c2312118a478f738503781de1d1a6020d2 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 30 Jul 2021 20:18:15 +0300 Subject: [PATCH 680/794] net: dsa: sja1105: match FDB entries regardless of inner/outer VLAN tag On SJA1105P/Q/R/S and SJA1110, the L2 Lookup Table entries contain a maskable "inner/outer tag" bit which means: - when set to 1: match single-outer and double tagged frames - when set to 0: match untagged and single-inner tagged frames - when masked off: match all frames regardless of the type of tag This driver does not make any meaningful distinction between inner tags (matches on TPID) and outer tags (matches on TPID2). In fact, all VLAN table entries are installed as SJA1110_VLAN_D_TAG, which means that they match on both inner and outer tags. So it does not make sense that we install FDB entries with the IOTAG bit set to 1. In VLAN-unaware mode, we set both TPID and TPID2 to 0xdadb, so the switch will see frames as outer-tagged or double-tagged (never inner). So the FDB entries will match if IOTAG is set to 1. In VLAN-aware mode, we set TPID to 0x8100 and TPID2 to 0x88a8. So the switch will see untagged and 802.1Q-tagged packets as inner-tagged, and 802.1ad-tagged packets as outer-tagged. So untagged and 802.1Q-tagged packets will not match FDB entries if IOTAG is set to 1, but 802.1ad tagged packets will. Strange. To fix this, simply mask off the IOTAG bit from FDB entries, and make them match regardless of whether the VLAN tag is inner or outer. Fixes: 1da73821343c ("net: dsa: sja1105: Add FDB operations for P/Q/R/S series") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 335b608bad11..8667c9754330 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -1445,10 +1445,8 @@ int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port, /* Search for an existing entry in the FDB table */ l2_lookup.macaddr = ether_addr_to_u64(addr); l2_lookup.vlanid = vid; - l2_lookup.iotag = SJA1105_S_TAG; l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0); l2_lookup.mask_vlanid = VLAN_VID_MASK; - l2_lookup.mask_iotag = BIT(0); l2_lookup.destports = BIT(port); tmp = l2_lookup; @@ -1538,10 +1536,8 @@ int sja1105pqrs_fdb_del(struct dsa_switch *ds, int port, l2_lookup.macaddr = ether_addr_to_u64(addr); l2_lookup.vlanid = vid; - l2_lookup.iotag = SJA1105_S_TAG; l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0); l2_lookup.mask_vlanid = VLAN_VID_MASK; - l2_lookup.mask_iotag = BIT(0); l2_lookup.destports = BIT(port); rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP, From 4c156084daa8ee70978e4b150b5eb5fc7b1f15be Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Thu, 29 Jul 2021 11:16:44 +0800 Subject: [PATCH 681/794] selinux: correct the return value when loads initial sids It should not return 0 when SID 0 is assigned to isids. This patch fixes it. Cc: stable@vger.kernel.org Fixes: e3e0b582c321a ("selinux: remove unused initial SIDs and improve handling") Signed-off-by: Xiu Jianfeng [PM: remove changelog from description] Signed-off-by: Paul Moore --- security/selinux/ss/policydb.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index defc5ef35c66..0ae1b718194a 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -874,7 +874,7 @@ int policydb_load_isids(struct policydb *p, struct sidtab *s) rc = sidtab_init(s); if (rc) { pr_err("SELinux: out of memory on SID table init\n"); - goto out; + return rc; } head = p->ocontexts[OCON_ISID]; @@ -885,7 +885,7 @@ int policydb_load_isids(struct policydb *p, struct sidtab *s) if (sid == SECSID_NULL) { pr_err("SELinux: SID 0 was assigned a context.\n"); sidtab_destroy(s); - goto out; + return -EINVAL; } /* Ignore initial SIDs unused by this kernel. */ @@ -897,12 +897,10 @@ int policydb_load_isids(struct policydb *p, struct sidtab *s) pr_err("SELinux: unable to load initial SID %s.\n", name); sidtab_destroy(s); - goto out; + return rc; } } - rc = 0; -out: - return rc; + return 0; } int policydb_class_isvalid(struct policydb *p, unsigned int class) From a5e63c7d38d548b8dab6c6205e0b6af76899dbf5 Mon Sep 17 00:00:00 2001 From: Steve Bennett Date: Sat, 31 Jul 2021 08:57:50 +1000 Subject: [PATCH 682/794] net: phy: micrel: Fix detection of ksz87xx switch The logic for discerning between KSZ8051 and KSZ87XX PHYs is incorrect such that the that KSZ87XX switch is not identified correctly. ksz8051_ksz8795_match_phy_device() uses the parameter ksz_phy_id to discriminate whether it was called from ksz8051_match_phy_device() or from ksz8795_match_phy_device() but since PHY_ID_KSZ87XX is the same value as PHY_ID_KSZ8051, this doesn't work. Instead use a bool to discriminate the caller. Without this patch, the KSZ8795 switch port identifies as: ksz8795-switch spi3.1 ade1 (uninitialized): PHY [dsa-0.1:03] driver [Generic PHY] With the patch, it identifies correctly: ksz8795-switch spi3.1 ade1 (uninitialized): PHY [dsa-0.1:03] driver [Micrel KSZ87XX Switch] Fixes: 8b95599c55ed24b36cf4 ("net: phy: micrel: Discern KSZ8051 and KSZ8795 PHYs") Signed-off-by: Steve Bennett Reviewed-by: Marek Vasut Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 4d53886f7d51..53bdd673ae56 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -401,11 +401,11 @@ static int ksz8041_config_aneg(struct phy_device *phydev) } static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev, - const u32 ksz_phy_id) + const bool ksz_8051) { int ret; - if ((phydev->phy_id & MICREL_PHY_ID_MASK) != ksz_phy_id) + if ((phydev->phy_id & MICREL_PHY_ID_MASK) != PHY_ID_KSZ8051) return 0; ret = phy_read(phydev, MII_BMSR); @@ -418,7 +418,7 @@ static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev, * the switch does not. */ ret &= BMSR_ERCAP; - if (ksz_phy_id == PHY_ID_KSZ8051) + if (ksz_8051) return ret; else return !ret; @@ -426,7 +426,7 @@ static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev, static int ksz8051_match_phy_device(struct phy_device *phydev) { - return ksz8051_ksz8795_match_phy_device(phydev, PHY_ID_KSZ8051); + return ksz8051_ksz8795_match_phy_device(phydev, true); } static int ksz8081_config_init(struct phy_device *phydev) @@ -535,7 +535,7 @@ static int ksz8061_config_init(struct phy_device *phydev) static int ksz8795_match_phy_device(struct phy_device *phydev) { - return ksz8051_ksz8795_match_phy_device(phydev, PHY_ID_KSZ87XX); + return ksz8051_ksz8795_match_phy_device(phydev, false); } static int ksz9021_load_values_from_of(struct phy_device *phydev, From 7fe74dfd41c428afb24e2e615470832fa997ff14 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Sat, 31 Jul 2021 14:38:01 +0800 Subject: [PATCH 683/794] net: natsemi: Fix missing pci_disable_device() in probe and remove Replace pci_enable_device() with pcim_enable_device(), pci_disable_device() and pci_release_regions() will be called in release automatically. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Hulk Robot Signed-off-by: Wang Hai Signed-off-by: David S. Miller --- drivers/net/ethernet/natsemi/natsemi.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c index 51b4b25d15ad..84f7dbe9edff 100644 --- a/drivers/net/ethernet/natsemi/natsemi.c +++ b/drivers/net/ethernet/natsemi/natsemi.c @@ -819,7 +819,7 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) printk(version); #endif - i = pci_enable_device(pdev); + i = pcim_enable_device(pdev); if (i) return i; /* natsemi has a non-standard PM control register @@ -852,7 +852,7 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) ioaddr = ioremap(iostart, iosize); if (!ioaddr) { i = -ENOMEM; - goto err_ioremap; + goto err_pci_request_regions; } /* Work around the dropped serial bit. */ @@ -974,9 +974,6 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) err_register_netdev: iounmap(ioaddr); - err_ioremap: - pci_release_regions(pdev); - err_pci_request_regions: free_netdev(dev); return i; @@ -3241,7 +3238,6 @@ static void natsemi_remove1(struct pci_dev *pdev) NATSEMI_REMOVE_FILE(pdev, dspcfg_workaround); unregister_netdev (dev); - pci_release_regions (pdev); iounmap(ioaddr); free_netdev (dev); } From 6387f65e2acb9a63044bd64464401771b8cf1acc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 31 Jul 2021 07:39:17 -0700 Subject: [PATCH 684/794] net: sparx5: fix compiletime_assert for GCC 4.9 Stephen reports sparx5 broke GCC 4.9 build. Move the compiletime_assert() out of the static function. Compile-tested only, no object code changes. Reported-by: Stephen Rothwell Fixes: f3cad2611a77 ("net: sparx5: add hostmode with phylink support") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- .../ethernet/microchip/sparx5/sparx5_netdev.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c index 9d485a9d1f1f..1a240e6bddd0 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c @@ -13,7 +13,19 @@ */ #define VSTAX 73 -static void ifh_encode_bitfield(void *ifh, u64 value, u32 pos, u32 width) +#define ifh_encode_bitfield(ifh, value, pos, _width) \ + ({ \ + u32 width = (_width); \ + \ + /* Max width is 5 bytes - 40 bits. In worst case this will + * spread over 6 bytes - 48 bits + */ \ + compiletime_assert(width <= 40, \ + "Unsupported width, must be <= 40"); \ + __ifh_encode_bitfield((ifh), (value), (pos), width); \ + }) + +static void __ifh_encode_bitfield(void *ifh, u64 value, u32 pos, u32 width) { u8 *ifh_hdr = ifh; /* Calculate the Start IFH byte position of this IFH bit position */ @@ -22,11 +34,6 @@ static void ifh_encode_bitfield(void *ifh, u64 value, u32 pos, u32 width) u32 bit = (pos % 8); u64 encode = GENMASK(bit + width - 1, bit) & (value << bit); - /* Max width is 5 bytes - 40 bits. In worst case this will - * spread over 6 bytes - 48 bits - */ - compiletime_assert(width <= 40, "Unsupported width, must be <= 40"); - /* The b0-b7 goes into the start IFH byte */ if (encode & 0xFF) ifh_hdr[byte] |= (u8)((encode & 0xFF)); From 66e0da21728343bd3e75230a53d909e045fb9dd7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 31 Jul 2021 07:40:07 -0700 Subject: [PATCH 685/794] docs: operstates: fix typo TVL -> TLV Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/networking/operstates.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/operstates.rst b/Documentation/networking/operstates.rst index 9c918f7cb0e8..f6b9cce5b201 100644 --- a/Documentation/networking/operstates.rst +++ b/Documentation/networking/operstates.rst @@ -111,7 +111,7 @@ it as lower layer. Note that for certain kind of soft-devices, which are not managing any real hardware, it is possible to set this bit from userspace. One -should use TVL IFLA_CARRIER to do so. +should use TLV IFLA_CARRIER to do so. netif_carrier_ok() can be used to query that bit. From 7a7b8635b622add64d98cff84bf3ee71eac36237 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 31 Jul 2021 07:40:52 -0700 Subject: [PATCH 686/794] docs: operstates: document IF_OPER_TESTING IF_OPER_TESTING is in fact used today. Signed-off-by: Jakub Kicinski Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- Documentation/networking/operstates.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/operstates.rst b/Documentation/networking/operstates.rst index f6b9cce5b201..1ee2141e8ef1 100644 --- a/Documentation/networking/operstates.rst +++ b/Documentation/networking/operstates.rst @@ -73,7 +73,9 @@ IF_OPER_LOWERLAYERDOWN (3): state (f.e. VLAN). IF_OPER_TESTING (4): - Unused in current kernel. + Interface is in testing mode, for example executing driver self-tests + or media (cable) test. It can't be used for normal traffic until tests + complete. IF_OPER_DORMANT (5): Interface is L1 up, but waiting for an external event, f.e. for a From 1c69d7cf4a8b6b6cfd920a1e809f1cd33ae4369c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Aug 2021 07:30:29 -0700 Subject: [PATCH 687/794] Revert "mhi: Fix networking tree build." This reverts commit 40e159403896f7d55c98f858d0b20fee1d941fa4. Looks like this commit breaks the build for me. Signed-off-by: Jakub Kicinski --- include/linux/mhi.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 5e08468854db..944aa3aa3035 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -719,13 +719,8 @@ void mhi_device_put(struct mhi_device *mhi_dev); * host and device execution environments match and * channels are in a DISABLED state. * @mhi_dev: Device associated with the channels - * @flags: MHI channel flags */ -int mhi_prepare_for_transfer(struct mhi_device *mhi_dev, - unsigned int flags); - -/* Automatically allocate and queue inbound buffers */ -#define MHI_CH_INBOUND_ALLOC_BUFS BIT(0) +int mhi_prepare_for_transfer(struct mhi_device *mhi_dev); /** * mhi_unprepare_from_transfer - Reset UL and DL channels for data transfer. From 9b87f43537acfa24b95c236beba0f45901356eb2 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Fri, 16 Jul 2021 12:00:47 +0200 Subject: [PATCH 688/794] gpio: tqmx86: really make IRQ optional The tqmx86 MFD driver was passing IRQ 0 for "no IRQ" in the past. This causes warnings with newer kernels. Prepare the gpio-tqmx86 driver for the fixed MFD driver by handling a missing IRQ properly. Fixes: b868db94a6a7 ("gpio: tqmx86: Add GPIO from for this IO controller") Signed-off-by: Matthias Schiffer Reviewed-by: Andrew Lunn Acked-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-tqmx86.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-tqmx86.c b/drivers/gpio/gpio-tqmx86.c index 5022e0ad0fae..0f5d17f343f1 100644 --- a/drivers/gpio/gpio-tqmx86.c +++ b/drivers/gpio/gpio-tqmx86.c @@ -238,8 +238,8 @@ static int tqmx86_gpio_probe(struct platform_device *pdev) struct resource *res; int ret, irq; - irq = platform_get_irq(pdev, 0); - if (irq < 0) + irq = platform_get_irq_optional(pdev, 0); + if (irq < 0 && irq != -ENXIO) return irq; res = platform_get_resource(pdev, IORESOURCE_IO, 0); @@ -278,7 +278,7 @@ static int tqmx86_gpio_probe(struct platform_device *pdev) pm_runtime_enable(&pdev->dev); - if (irq) { + if (irq > 0) { struct irq_chip *irq_chip = &gpio->irq_chip; u8 irq_status; From d6793ca97b76642b77629dd0783ec64782a50bdb Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Tue, 27 Jul 2021 10:16:06 +0300 Subject: [PATCH 689/794] RDMA/mlx5: Delay emptying a cache entry when a new MR is added to it recently Fixing a typo that causes a cache entry to shrink immediately after adding to it new MRs if the entry size exceeds the high limit. In doing so, the cache misses its purpose to prevent the creation of new mkeys on the runtime by using the cached ones. Fixes: b9358bdbc713 ("RDMA/mlx5: Fix locking in MR cache work queue") Link: https://lore.kernel.org/r/fcb546986be346684a016f5ca23a0567399145fa.1627370131.git.leonro@nvidia.com Signed-off-by: Aharon Landau Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 3263851ea574..3f1c5a4f158b 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -531,8 +531,8 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) */ spin_unlock_irq(&ent->lock); need_delay = need_resched() || someone_adding(cache) || - time_after(jiffies, - READ_ONCE(cache->last_add) + 300 * HZ); + !time_after(jiffies, + READ_ONCE(cache->last_add) + 300 * HZ); spin_lock_irq(&ent->lock); if (ent->disabled) goto out; From db4657afd10e45855ac1d8437fcc9a86bd3d741d Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 29 Jul 2021 14:26:22 -0400 Subject: [PATCH 690/794] RDMA/cma: Revert INIT-INIT patch The net/sunrpc/xprtrdma module creates its QP using rdma_create_qp() and immediately post receives, implicitly assuming the QP is in the INIT state and thus valid for ib_post_recv(). The patch noted in Fixes: removed the RESET->INIT modifiy from rdma_create_qp(), breaking NFS rdma for verbs providers that fail the ib_post_recv() for a bad state. This situation was proven using kprobes in rvt_post_recv() and rvt_modify_qp(). The traces showed that the rvt_post_recv() failed before ANY modify QP and that the current state was RESET. Fix by reverting the patch below. Fixes: dc70f7c3ed34 ("RDMA/cma: Remove unnecessary INIT->INIT transition") Link: https://lore.kernel.org/r/1627583182-81330-1-git-send-email-mike.marciniszyn@cornelisnetworks.com Cc: Haakon Bugge Cc: Chuck Lever III Signed-off-by: Mike Marciniszyn Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 515a7e95a421..5d3b8b8d163d 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -926,12 +926,25 @@ static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) return ret; } +static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) +{ + struct ib_qp_attr qp_attr; + int qp_attr_mask, ret; + + qp_attr.qp_state = IB_QPS_INIT; + ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); + if (ret) + return ret; + + return ib_modify_qp(qp, &qp_attr, qp_attr_mask); +} + int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr) { struct rdma_id_private *id_priv; struct ib_qp *qp; - int ret = 0; + int ret; id_priv = container_of(id, struct rdma_id_private, id); if (id->device != pd->device) { @@ -948,6 +961,8 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, if (id->qp_type == IB_QPT_UD) ret = cma_init_ud_qp(id_priv, qp); + else + ret = cma_init_conn_qp(id_priv, qp); if (ret) goto out_destroy; From e2a05339fa1188b6b37540f4611893ac4c534fa2 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 29 Jul 2021 17:00:38 -0500 Subject: [PATCH 691/794] RDMA/rxe: Use the correct size of wqe when processing SRQ The memcpy() that copies a WQE from a SRQ the QP uses an incorrect size. The size should have been the size of the rxe_send_wqe struct not the size of a pointer to it. The result is that IO operations using a SRQ on the responder side will fail. Fixes: ec0fa2445c18 ("RDMA/rxe: Fix over copying in get_srq_wqe") Link: https://lore.kernel.org/r/20210729220039.18549-2-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_resp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 3743dc39b60c..360ec67cb9e1 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -318,7 +318,7 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp) pr_warn("%s: invalid num_sge in SRQ entry\n", __func__); return RESPST_ERR_MALFORMED_WQE; } - size = sizeof(wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge); + size = sizeof(*wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge); memcpy(&qp->resp.srq_wqe, wqe, size); qp->resp.wqe = &qp->resp.srq_wqe.wqe; From ef4b96a5773d7f6568363b3d0c3c3f371fb690bd Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 29 Jul 2021 17:00:39 -0500 Subject: [PATCH 692/794] RDMA/rxe: Restore setting tot_len in the IPv4 header An earlier patch removed setting of tot_len in IPv4 headers because it was also set in ip_local_out. However, this change resulted in an incorrect ICRC being computed because the tot_len field is not masked out. This patch restores that line. This fixes the bug reported by Zhu Yanjun. This bug affects anyone using rxe which is currently broken. Fixes: 230bb836ee88 ("RDMA/rxe: Fix redundant call to ip_send_check") Link: https://lore.kernel.org/r/20210729220039.18549-3-rpearsonhpe@gmail.com Reported-by: Zhu Yanjun Signed-off-by: Bob Pearson Reviewed-and-tested-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index dec92928a1cd..5ac27f28ace1 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -259,6 +259,7 @@ static void prepare_ipv4_hdr(struct dst_entry *dst, struct sk_buff *skb, iph->version = IPVERSION; iph->ihl = sizeof(struct iphdr) >> 2; + iph->tot_len = htons(skb->len); iph->frag_off = df; iph->protocol = proto; iph->tos = tos; From e89afb51f97ae03ee246c1fd0b47e3e491266aef Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 15 Jun 2021 14:23:34 -0400 Subject: [PATCH 693/794] drm/vmwgfx: Fix a 64bit regression on svga3 Register accesses are always 4bytes, accidently this was changed to a void pointer whwqich badly breaks 64bit archs when running on top of svga3. Fixes: 2cd80dbd3551 ("drm/vmwgfx: Add basic support for SVGA3") Signed-off-by: Zack Rusin Reviewed-by: Martin Krastev Link: https://patchwork.freedesktop.org/patch/msgid/20210615182336.995192-3-zackr@vmware.com (cherry picked from commit 87360168759879d68550b0c052bbcc2a0339ff74) Signed-off-by: Thomas Zimmermann --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index d1cef3b69e9d..5652d982b1ce 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -492,7 +492,7 @@ struct vmw_private { resource_size_t vram_start; resource_size_t vram_size; resource_size_t prim_bb_mem; - void __iomem *rmmio; + u32 __iomem *rmmio; u32 *fifo_mem; resource_size_t fifo_mem_size; uint32_t fb_max_width; From 0541a6293298fb52789de389dfb27ef54df81f73 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 2 Aug 2021 02:17:30 +0300 Subject: [PATCH 694/794] net: bridge: validate the NUD_PERMANENT bit when adding an extern_learn FDB entry Currently it is possible to add broken extern_learn FDB entries to the bridge in two ways: 1. Entries pointing towards the bridge device that are not local/permanent: ip link add br0 type bridge bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn static 2. Entries pointing towards the bridge device or towards a port that are marked as local/permanent, however the bridge does not process the 'permanent' bit in any way, therefore they are recorded as though they aren't permanent: ip link add br0 type bridge bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn permanent Since commit 52e4bec15546 ("net: bridge: switchdev: treat local FDBs the same as entries towards the bridge"), these incorrect FDB entries can even trigger NULL pointer dereferences inside the kernel. This is because that commit made the assumption that all FDB entries that are not local/permanent have a valid destination port. For context, local / permanent FDB entries either have fdb->dst == NULL, and these point towards the bridge device and are therefore local and not to be used for forwarding, or have fdb->dst == a net_bridge_port structure (but are to be treated in the same way, i.e. not for forwarding). That assumption _is_ correct as long as things are working correctly in the bridge driver, i.e. we cannot logically have fdb->dst == NULL under any circumstance for FDB entries that are not local. However, the extern_learn code path where FDB entries are managed by a user space controller show that it is possible for the bridge kernel driver to misinterpret the NUD flags of an entry transmitted by user space, and end up having fdb->dst == NULL while not being a local entry. This is invalid and should be rejected. Before, the two commands listed above both crashed the kernel in this check from br_switchdev_fdb_notify: struct net_device *dev = info.is_local ? br->dev : dst->dev; info.is_local == false, dst == NULL. After this patch, the invalid entry added by the first command is rejected: ip link add br0 type bridge && bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn static; ip link del br0 Error: bridge: FDB entry towards bridge must be permanent. and the valid entry added by the second command is properly treated as a local address and does not crash br_switchdev_fdb_notify anymore: ip link add br0 type bridge && bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn permanent; ip link del br0 Fixes: eb100e0e24a2 ("net: bridge: allow to add externally learned entries from user-space") Reported-by: syzbot+9ba1174359adba5a5b7c@syzkaller.appspotmail.com Signed-off-by: Vladimir Oltean Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20210801231730.7493-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/bridge/br.c | 3 ++- net/bridge/br_fdb.c | 30 ++++++++++++++++++++++++------ net/bridge/br_private.h | 2 +- 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/net/bridge/br.c b/net/bridge/br.c index ef743f94254d..bbab9984f24e 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -166,7 +166,8 @@ static int br_switchdev_event(struct notifier_block *unused, case SWITCHDEV_FDB_ADD_TO_BRIDGE: fdb_info = ptr; err = br_fdb_external_learn_add(br, p, fdb_info->addr, - fdb_info->vid, false); + fdb_info->vid, + fdb_info->is_local, false); if (err) { err = notifier_from_errno(err); break; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index a16191dcaed1..835cec1e5a03 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -1019,7 +1019,8 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, - u16 nlh_flags, u16 vid, struct nlattr *nfea_tb[]) + u16 nlh_flags, u16 vid, struct nlattr *nfea_tb[], + struct netlink_ext_ack *extack) { int err = 0; @@ -1038,7 +1039,15 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br, rcu_read_unlock(); local_bh_enable(); } else if (ndm->ndm_flags & NTF_EXT_LEARNED) { - err = br_fdb_external_learn_add(br, p, addr, vid, true); + if (!p && !(ndm->ndm_state & NUD_PERMANENT)) { + NL_SET_ERR_MSG_MOD(extack, + "FDB entry towards bridge must be permanent"); + return -EINVAL; + } + + err = br_fdb_external_learn_add(br, p, addr, vid, + ndm->ndm_state & NUD_PERMANENT, + true); } else { spin_lock_bh(&br->hash_lock); err = fdb_add_entry(br, p, addr, ndm, nlh_flags, vid, nfea_tb); @@ -1110,9 +1119,11 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], } /* VID was specified, so use it. */ - err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid, nfea_tb); + err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid, nfea_tb, + extack); } else { - err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0, nfea_tb); + err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0, nfea_tb, + extack); if (err || !vg || !vg->num_vlans) goto out; @@ -1124,7 +1135,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], if (!br_vlan_should_use(v)) continue; err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid, - nfea_tb); + nfea_tb, extack); if (err) goto out; } @@ -1264,7 +1275,7 @@ void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p) } int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, - const unsigned char *addr, u16 vid, + const unsigned char *addr, u16 vid, bool is_local, bool swdev_notify) { struct net_bridge_fdb_entry *fdb; @@ -1281,6 +1292,10 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, if (swdev_notify) flags |= BIT(BR_FDB_ADDED_BY_USER); + + if (is_local) + flags |= BIT(BR_FDB_LOCAL); + fdb = fdb_create(br, p, addr, vid, flags); if (!fdb) { err = -ENOMEM; @@ -1307,6 +1322,9 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, if (swdev_notify) set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); + if (is_local) + set_bit(BR_FDB_LOCAL, &fdb->flags); + if (modified) fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify); } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 2b48b204205e..aa64d8d63ca3 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -711,7 +711,7 @@ int br_fdb_get(struct sk_buff *skb, struct nlattr *tb[], struct net_device *dev, int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p); void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p); int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, - const unsigned char *addr, u16 vid, + const unsigned char *addr, u16 vid, bool is_local, bool swdev_notify); int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid, From 8861452b2097bb0b5d0081a1c137fb3870b0a31f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 8 Nov 2019 09:43:06 +0100 Subject: [PATCH 695/794] soc: ixp4xx: fix printing resources When compile-testing with 64-bit resource_size_t, gcc reports an invalid printk format string: In file included from include/linux/dma-mapping.h:7, from drivers/soc/ixp4xx/ixp4xx-npe.c:15: drivers/soc/ixp4xx/ixp4xx-npe.c: In function 'ixp4xx_npe_probe': drivers/soc/ixp4xx/ixp4xx-npe.c:694:18: error: format '%x' expects argument of type 'unsigned int', but argument 4 has type 'resource_size_t' {aka 'long long unsigned int'} [-Werror=format=] dev_info(dev, "NPE%d at 0x%08x-0x%08x not available\n", Use the special %pR format string to print the resources. Fixes: 0b458d7b10f8 ("soc: ixp4xx: npe: Pass addresses as resources") Signed-off-by: Arnd Bergmann --- drivers/soc/ixp4xx/ixp4xx-npe.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/soc/ixp4xx/ixp4xx-npe.c b/drivers/soc/ixp4xx/ixp4xx-npe.c index fea50e04d5a1..f490c4ca51f5 100644 --- a/drivers/soc/ixp4xx/ixp4xx-npe.c +++ b/drivers/soc/ixp4xx/ixp4xx-npe.c @@ -693,8 +693,8 @@ static int ixp4xx_npe_probe(struct platform_device *pdev) if (!(ixp4xx_read_feature_bits() & (IXP4XX_FEATURE_RESET_NPEA << i))) { - dev_info(dev, "NPE%d at 0x%08x-0x%08x not available\n", - i, res->start, res->end); + dev_info(dev, "NPE%d at %pR not available\n", + i, res); continue; /* NPE already disabled or not present */ } npe->regs = devm_ioremap_resource(dev, res); @@ -702,13 +702,12 @@ static int ixp4xx_npe_probe(struct platform_device *pdev) return PTR_ERR(npe->regs); if (npe_reset(npe)) { - dev_info(dev, "NPE%d at 0x%08x-0x%08x does not reset\n", - i, res->start, res->end); + dev_info(dev, "NPE%d at %pR does not reset\n", + i, res); continue; } npe->valid = 1; - dev_info(dev, "NPE%d at 0x%08x-0x%08x registered\n", - i, res->start, res->end); + dev_info(dev, "NPE%d at %pR registered\n", i, res); found++; } From a8eee86317f11e97990d755d4615c1c0db203d08 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 3 Aug 2021 10:12:34 +0200 Subject: [PATCH 696/794] soc: ixp4xx/qmgr: fix invalid __iomem access Sparse reports a compile time warning when dereferencing an __iomem pointer: drivers/soc/ixp4xx/ixp4xx-qmgr.c:149:37: warning: dereference of noderef expression drivers/soc/ixp4xx/ixp4xx-qmgr.c:153:40: warning: dereference of noderef expression drivers/soc/ixp4xx/ixp4xx-qmgr.c:154:40: warning: dereference of noderef expression drivers/soc/ixp4xx/ixp4xx-qmgr.c:174:38: warning: dereference of noderef expression drivers/soc/ixp4xx/ixp4xx-qmgr.c:174:44: warning: dereference of noderef expression Use __raw_readl() here for consistency with the rest of the file. This should really get converted to some proper accessor, as the __raw functions are not meant to be used in drivers, but the driver has used these since the start, so for the moment, let's only fix the warning. Reported-by: kernel test robot Fixes: d4c9e9fc9751 ("IXP42x: Add QMgr support for IXP425 rev. A0 processors.") Signed-off-by: Arnd Bergmann --- drivers/soc/ixp4xx/ixp4xx-qmgr.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/soc/ixp4xx/ixp4xx-qmgr.c b/drivers/soc/ixp4xx/ixp4xx-qmgr.c index c6bf6ef257c0..9154c7029b05 100644 --- a/drivers/soc/ixp4xx/ixp4xx-qmgr.c +++ b/drivers/soc/ixp4xx/ixp4xx-qmgr.c @@ -146,12 +146,12 @@ static irqreturn_t qmgr_irq1_a0(int irq, void *pdev) /* ACK - it may clear any bits so don't rely on it */ __raw_writel(0xFFFFFFFF, &qmgr_regs->irqstat[0]); - en_bitmap = qmgr_regs->irqen[0]; + en_bitmap = __raw_readl(&qmgr_regs->irqen[0]); while (en_bitmap) { i = __fls(en_bitmap); /* number of the last "low" queue */ en_bitmap &= ~BIT(i); - src = qmgr_regs->irqsrc[i >> 3]; - stat = qmgr_regs->stat1[i >> 3]; + src = __raw_readl(&qmgr_regs->irqsrc[i >> 3]); + stat = __raw_readl(&qmgr_regs->stat1[i >> 3]); if (src & 4) /* the IRQ condition is inverted */ stat = ~stat; if (stat & BIT(src & 3)) { @@ -171,7 +171,8 @@ static irqreturn_t qmgr_irq2_a0(int irq, void *pdev) /* ACK - it may clear any bits so don't rely on it */ __raw_writel(0xFFFFFFFF, &qmgr_regs->irqstat[1]); - req_bitmap = qmgr_regs->irqen[1] & qmgr_regs->statne_h; + req_bitmap = __raw_readl(&qmgr_regs->irqen[1]) & + __raw_readl(&qmgr_regs->statne_h); while (req_bitmap) { i = __fls(req_bitmap); /* number of the last "high" queue */ req_bitmap &= ~BIT(i); From e30e8d46cf605d216a799a28c77b8a41c328613a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 2 Aug 2021 11:42:00 +0100 Subject: [PATCH 697/794] arm64: fix compat syscall return truncation Due to inconsistencies in the way we manipulate compat GPRs, we have a few issues today: * For audit and tracing, where error codes are handled as a (native) long, negative error codes are expected to be sign-extended to the native 64-bits, or they may fail to be matched correctly. Thus a syscall which fails with an error may erroneously be identified as failing. * For ptrace, *all* compat return values should be sign-extended for consistency with 32-bit arm, but we currently only do this for negative return codes. * As we may transiently set the upper 32 bits of some compat GPRs while in the kernel, these can be sampled by perf, which is somewhat confusing. This means that where a syscall returns a pointer above 2G, this will be sign-extended, but will not be mistaken for an error as error codes are constrained to the inclusive range [-4096, -1] where no user pointer can exist. To fix all of these, we must consistently use helpers to get/set the compat GPRs, ensuring that we never write the upper 32 bits of the return code, and always sign-extend when reading the return code. This patch does so, with the following changes: * We re-organise syscall_get_return_value() to always sign-extend for compat tasks, and reimplement syscall_get_error() atop. We update syscall_trace_exit() to use syscall_get_return_value(). * We consistently use syscall_set_return_value() to set the return value, ensureing the upper 32 bits are never set unexpectedly. * As the core audit code currently uses regs_return_value() rather than syscall_get_return_value(), we special-case this for compat_user_mode(regs) such that this will do the right thing. Going forward, we should try to move the core audit code over to syscall_get_return_value(). Cc: Reported-by: He Zhe Reported-by: weiyuchen Cc: Catalin Marinas Cc: Will Deacon Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20210802104200.21390-1-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/ptrace.h | 12 +++++++++++- arch/arm64/include/asm/syscall.h | 21 +++++++++++---------- arch/arm64/kernel/ptrace.c | 2 +- arch/arm64/kernel/signal.c | 3 ++- arch/arm64/kernel/syscall.c | 9 +++------ 5 files changed, 28 insertions(+), 19 deletions(-) diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index e58bca832dff..41b332c054ab 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -320,7 +320,17 @@ static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) static inline unsigned long regs_return_value(struct pt_regs *regs) { - return regs->regs[0]; + unsigned long val = regs->regs[0]; + + /* + * Audit currently uses regs_return_value() instead of + * syscall_get_return_value(). Apply the same sign-extension here until + * audit is updated to use syscall_get_return_value(). + */ + if (compat_user_mode(regs)) + val = sign_extend64(val, 31); + + return val; } static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index cfc0672013f6..03e20895453a 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -29,24 +29,25 @@ static inline void syscall_rollback(struct task_struct *task, regs->regs[0] = regs->orig_x0; } +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + unsigned long val = regs->regs[0]; + + if (is_compat_thread(task_thread_info(task))) + val = sign_extend64(val, 31); + + return val; +} static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { - unsigned long error = regs->regs[0]; - - if (is_compat_thread(task_thread_info(task))) - error = sign_extend64(error, 31); + unsigned long error = syscall_get_return_value(task, regs); return IS_ERR_VALUE(error) ? error : 0; } -static inline long syscall_get_return_value(struct task_struct *task, - struct pt_regs *regs) -{ - return regs->regs[0]; -} - static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 499b6b2f9757..b381a1ee9ea7 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1862,7 +1862,7 @@ void syscall_trace_exit(struct pt_regs *regs) audit_syscall_exit(regs); if (flags & _TIF_SYSCALL_TRACEPOINT) - trace_sys_exit(regs, regs_return_value(regs)); + trace_sys_exit(regs, syscall_get_return_value(current, regs)); if (flags & (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP)) tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT); diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index f8192f4ae0b8..23036334f4dc 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -890,7 +891,7 @@ static void do_signal(struct pt_regs *regs) retval == -ERESTART_RESTARTBLOCK || (retval == -ERESTARTSYS && !(ksig.ka.sa.sa_flags & SA_RESTART)))) { - regs->regs[0] = -EINTR; + syscall_set_return_value(current, regs, -EINTR, 0); regs->pc = continue_addr; } diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 263d6c1a525f..50a0f1a38e84 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -54,10 +54,7 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno, ret = do_ni_syscall(regs, scno); } - if (is_compat_task()) - ret = lower_32_bits(ret); - - regs->regs[0] = ret; + syscall_set_return_value(current, regs, 0, ret); /* * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), @@ -115,7 +112,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, * syscall. do_notify_resume() will send a signal to userspace * before the syscall is restarted. */ - regs->regs[0] = -ERESTARTNOINTR; + syscall_set_return_value(current, regs, -ERESTARTNOINTR, 0); return; } @@ -136,7 +133,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, * anyway. */ if (scno == NO_SYSCALL) - regs->regs[0] = -ENOSYS; + syscall_set_return_value(current, regs, -ENOSYS, 0); scno = syscall_trace_enter(regs); if (scno == NO_SYSCALL) goto trace_exit; From 64ee84c75b5f75132eec97f2c7a201a056d53698 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 1 Aug 2021 14:35:25 +0900 Subject: [PATCH 698/794] arm64: move warning about toolchains to archprepare Commit 987fdfec2410 ("arm64: move --fix-cortex-a53-843419 linker test to Kconfig") fixed the false-positive warning in the installation step. Yet, there are some cases where this false-positive is shown. For example, you can see it when you cross 987fdfec2410 during git-bisect. $ git checkout 987fdfec2410^ [ snip ] $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- defconfig all [ snip ] $ git checkout v5.13 [ snip] $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- defconfig all [ snip ] arch/arm64/Makefile:25: ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum In the stale include/config/auto.config, CONFIG_ARM64_ERRATUM_843419=y is set without CONFIG_ARM64_LD_HAS_FIX_ERRATUM_843419, so the warning is displayed while parsing the Makefiles. Make will restart with the updated include/config/auto.config, hence CONFIG_ARM64_LD_HAS_FIX_ERRATUM_843419 will be set eventually, but this warning is a surprise for users. Commit 25896d073d8a ("x86/build: Fix compiler support check for CONFIG_RETPOLINE") addressed a similar issue. Move $(warning ...) out of the parse stage of Makefiles. The same applies to CONFIG_ARM64_USE_LSE_ATOMICS. Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20210801053525.105235-1-masahiroy@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Makefile | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 7bc37d0a1b68..7b668db43261 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -21,19 +21,11 @@ LDFLAGS_vmlinux += -shared -Bsymbolic -z notext \ endif ifeq ($(CONFIG_ARM64_ERRATUM_843419),y) - ifneq ($(CONFIG_ARM64_LD_HAS_FIX_ERRATUM_843419),y) -$(warning ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum) - else + ifeq ($(CONFIG_ARM64_LD_HAS_FIX_ERRATUM_843419),y) LDFLAGS_vmlinux += --fix-cortex-a53-843419 endif endif -ifeq ($(CONFIG_ARM64_USE_LSE_ATOMICS), y) - ifneq ($(CONFIG_ARM64_LSE_ATOMICS), y) -$(warning LSE atomics not supported by binutils) - endif -endif - cc_has_k_constraint := $(call try-run,echo \ 'int main(void) { \ asm volatile("and w0, w0, %w0" :: "K" (4294967295)); \ @@ -176,6 +168,17 @@ vdso_install: archprepare: $(Q)$(MAKE) $(build)=arch/arm64/tools kapi +ifeq ($(CONFIG_ARM64_ERRATUM_843419),y) + ifneq ($(CONFIG_ARM64_LD_HAS_FIX_ERRATUM_843419),y) + @echo "warning: ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum" >&2 + endif +endif +ifeq ($(CONFIG_ARM64_USE_LSE_ATOMICS),y) + ifneq ($(CONFIG_ARM64_LSE_ATOMICS),y) + @echo "warning: LSE atomics not supported by binutils" >&2 + endif +endif + # We use MRPROPER_FILES and CLEAN_FILES now archclean: From f9c4ff2ab9fe433d44ebbc2e3c2368a49df44798 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Sat, 31 Jul 2021 00:51:31 +1200 Subject: [PATCH 699/794] arm64: fix the doc of RANDOMIZE_MODULE_REGION_FULL Obviously kaslr is setting the module region to 2GB rather than 4GB since commit b2eed9b588112 ("arm64/kernel: kaslr: reduce module randomization range to 2 GB"). So fix the size of region in Kconfig. On the other hand, even though RANDOMIZE_MODULE_REGION_FULL is not set, module_alloc() can fall back to a 2GB window if ARM64_MODULE_PLTS is set. In this case, veneers are still needed. !RANDOMIZE_MODULE_REGION_FULL doesn't necessarily mean veneers are not needed. So fix the doc to be more precise to avoid any confusion to the readers of the code. Cc: Masami Hiramatsu Cc: Ard Biesheuvel Cc: Qi Liu Signed-off-by: Barry Song Reviewed-by: Masami Hiramatsu Link: https://lore.kernel.org/r/20210730125131.13724-1-song.bao.hua@hisilicon.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 9 ++++++--- arch/arm64/kernel/kaslr.c | 4 +++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b5b13a932561..fdcd54d39c1e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1800,11 +1800,11 @@ config RANDOMIZE_BASE If unsure, say N. config RANDOMIZE_MODULE_REGION_FULL - bool "Randomize the module region over a 4 GB range" + bool "Randomize the module region over a 2 GB range" depends on RANDOMIZE_BASE default y help - Randomizes the location of the module region inside a 4 GB window + Randomizes the location of the module region inside a 2 GB window covering the core kernel. This way, it is less likely for modules to leak information about the location of core kernel data structures but it does imply that function calls between modules and the core @@ -1812,7 +1812,10 @@ config RANDOMIZE_MODULE_REGION_FULL When this option is not set, the module region will be randomized over a limited range that contains the [_stext, _etext] interval of the - core kernel, so branch relocations are always in range. + core kernel, so branch relocations are almost always in range unless + ARM64_MODULE_PLTS is enabled and the region is exhausted. In this + particular case of region exhaustion, modules might be able to fall + back to a larger 2GB area. config CC_HAVE_STACKPROTECTOR_SYSREG def_bool $(cc-option,-mstack-protector-guard=sysreg -mstack-protector-guard-reg=sp_el0 -mstack-protector-guard-offset=0) diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index cfa2cfde3019..418b2bba1521 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -162,7 +162,9 @@ u64 __init kaslr_early_init(void) * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE, * _stext) . This guarantees that the resulting region still * covers [_stext, _etext], and that all relative branches can - * be resolved without veneers. + * be resolved without veneers unless this region is exhausted + * and we fall back to a larger 2GB window in module_alloc() + * when ARM64_MODULE_PLTS is enabled. */ module_range = MODULES_VSIZE - (u64)(_etext - _stext); module_alloc_base = (u64)_etext + offset - MODULES_VSIZE; From 8d5903f457145e3fcd858578b065d667822d99ac Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 2 Aug 2021 17:48:44 +0100 Subject: [PATCH 700/794] arm64: stacktrace: fix comment Due to a copy-paste error, we describe struct stackframe::pc as a snapshot of the `fp` field rather than the `lr` field. Fix the comment. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Madhavan T. Venkataraman Cc: Mark Brown Cc: Will Deacon Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20210802164845.45506-2-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/stacktrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 1801399204d7..8aebc00c1718 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -35,7 +35,7 @@ struct stack_info { * accounting information necessary for robust unwinding. * * @fp: The fp value in the frame record (or the real fp) - * @pc: The fp value in the frame record (or the real lr) + * @pc: The lr value in the frame record (or the real lr) * * @stacks_done: Stacks which have been entirely unwound, for which it is no * longer valid to unwind to. From 0c32706dac1b0a72713184246952ab0f54327c21 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 2 Aug 2021 17:48:45 +0100 Subject: [PATCH 701/794] arm64: stacktrace: avoid tracing arch_stack_walk() When the function_graph tracer is in use, arch_stack_walk() may unwind the stack incorrectly, erroneously reporting itself, missing the final entry which is being traced, and reporting all traced entries between these off-by-one from where they should be. When ftrace hooks a function return, the original return address is saved to the fgraph ret_stack, and the return address in the LR (or the function's frame record) is replaced with `return_to_handler`. When arm64's unwinder encounter frames returning to `return_to_handler`, it finds the associated original return address from the fgraph ret stack, assuming the most recent `ret_to_hander` entry on the stack corresponds to the most recent entry in the fgraph ret stack, and so on. When arch_stack_walk() is used to dump the current task's stack, it starts from the caller of arch_stack_walk(). However, arch_stack_walk() can be traced, and so may push an entry on to the fgraph ret stack, leaving the fgraph ret stack offset by one from the expected position. This can be seen when dumping the stack via /proc/self/stack, where enabling the graph tracer results in an unexpected `stack_trace_save_tsk` entry at the start of the trace, and `el0_svc` missing form the end of the trace. This patch fixes this by marking arch_stack_walk() as notrace, as we do for all other functions on the path to ftrace_graph_get_ret_stack(). While a few helper functions are not marked notrace, their calls/returns are balanced, and will have no observable effect when examining the fgraph ret stack. It is possible for an exeption boundary to cause a similar offset if the return address of the interrupted context was in the LR. Fixing those cases will require some more substantial rework, and is left for subsequent patches. Before: | # cat /proc/self/stack | [<0>] proc_pid_stack+0xc4/0x140 | [<0>] proc_single_show+0x6c/0x120 | [<0>] seq_read_iter+0x240/0x4e0 | [<0>] seq_read+0xe8/0x140 | [<0>] vfs_read+0xb8/0x1e4 | [<0>] ksys_read+0x74/0x100 | [<0>] __arm64_sys_read+0x28/0x3c | [<0>] invoke_syscall+0x50/0x120 | [<0>] el0_svc_common.constprop.0+0xc4/0xd4 | [<0>] do_el0_svc+0x30/0x9c | [<0>] el0_svc+0x2c/0x54 | [<0>] el0t_64_sync_handler+0x1a8/0x1b0 | [<0>] el0t_64_sync+0x198/0x19c | # echo function_graph > /sys/kernel/tracing/current_tracer | # cat /proc/self/stack | [<0>] stack_trace_save_tsk+0xa4/0x110 | [<0>] proc_pid_stack+0xc4/0x140 | [<0>] proc_single_show+0x6c/0x120 | [<0>] seq_read_iter+0x240/0x4e0 | [<0>] seq_read+0xe8/0x140 | [<0>] vfs_read+0xb8/0x1e4 | [<0>] ksys_read+0x74/0x100 | [<0>] __arm64_sys_read+0x28/0x3c | [<0>] invoke_syscall+0x50/0x120 | [<0>] el0_svc_common.constprop.0+0xc4/0xd4 | [<0>] do_el0_svc+0x30/0x9c | [<0>] el0t_64_sync_handler+0x1a8/0x1b0 | [<0>] el0t_64_sync+0x198/0x19c After: | # cat /proc/self/stack | [<0>] proc_pid_stack+0xc4/0x140 | [<0>] proc_single_show+0x6c/0x120 | [<0>] seq_read_iter+0x240/0x4e0 | [<0>] seq_read+0xe8/0x140 | [<0>] vfs_read+0xb8/0x1e4 | [<0>] ksys_read+0x74/0x100 | [<0>] __arm64_sys_read+0x28/0x3c | [<0>] invoke_syscall+0x50/0x120 | [<0>] el0_svc_common.constprop.0+0xc4/0xd4 | [<0>] do_el0_svc+0x30/0x9c | [<0>] el0_svc+0x2c/0x54 | [<0>] el0t_64_sync_handler+0x1a8/0x1b0 | [<0>] el0t_64_sync+0x198/0x19c | # echo function_graph > /sys/kernel/tracing/current_tracer | # cat /proc/self/stack | [<0>] proc_pid_stack+0xc4/0x140 | [<0>] proc_single_show+0x6c/0x120 | [<0>] seq_read_iter+0x240/0x4e0 | [<0>] seq_read+0xe8/0x140 | [<0>] vfs_read+0xb8/0x1e4 | [<0>] ksys_read+0x74/0x100 | [<0>] __arm64_sys_read+0x28/0x3c | [<0>] invoke_syscall+0x50/0x120 | [<0>] el0_svc_common.constprop.0+0xc4/0xd4 | [<0>] do_el0_svc+0x30/0x9c | [<0>] el0_svc+0x2c/0x54 | [<0>] el0t_64_sync_handler+0x1a8/0x1b0 | [<0>] el0t_64_sync+0x198/0x19c Cc: Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Madhavan T. Venkataraman Cc: Mark Brown Cc: Will Deacon Reviwed-by: Mark Brown Link: https://lore.kernel.org/r/20210802164845.45506-3-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/stacktrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index b83c8d911930..8982a2b78acf 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -218,7 +218,7 @@ void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) #ifdef CONFIG_STACKTRACE -noinline void arch_stack_walk(stack_trace_consume_fn consume_entry, +noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, struct task_struct *task, struct pt_regs *regs) { From ce78ffa3ef1681065ba451cfd545da6126f5ca88 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 3 Aug 2021 11:14:03 +0100 Subject: [PATCH 702/794] net: really fix the build... Signed-off-by: David S. Miller --- drivers/bus/mhi/core/internal.h | 2 +- drivers/bus/mhi/core/main.c | 9 ++++++--- drivers/net/mhi/net.c | 2 +- drivers/net/wwan/mhi_wwan_ctrl.c | 2 +- include/linux/mhi.h | 7 ++++++- net/qrtr/mhi.c | 16 +++++++++++++++- 6 files changed, 30 insertions(+), 8 deletions(-) diff --git a/drivers/bus/mhi/core/internal.h b/drivers/bus/mhi/core/internal.h index 5b9ea66b92dc..bc239a11aa69 100644 --- a/drivers/bus/mhi/core/internal.h +++ b/drivers/bus/mhi/core/internal.h @@ -682,7 +682,7 @@ void mhi_rddm_prepare(struct mhi_controller *mhi_cntrl, struct image_info *img_info); void mhi_fw_load_handler(struct mhi_controller *mhi_cntrl); int mhi_prepare_channel(struct mhi_controller *mhi_cntrl, - struct mhi_chan *mhi_chan); + struct mhi_chan *mhi_chan, unsigned int flags); int mhi_init_chan_ctxt(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan); void mhi_deinit_chan_ctxt(struct mhi_controller *mhi_cntrl, diff --git a/drivers/bus/mhi/core/main.c b/drivers/bus/mhi/core/main.c index fc9196f11cb7..84448233f64c 100644 --- a/drivers/bus/mhi/core/main.c +++ b/drivers/bus/mhi/core/main.c @@ -1430,7 +1430,7 @@ exit_unprepare_channel: } int mhi_prepare_channel(struct mhi_controller *mhi_cntrl, - struct mhi_chan *mhi_chan) + struct mhi_chan *mhi_chan, unsigned int flags) { int ret = 0; struct device *dev = &mhi_chan->mhi_dev->dev; @@ -1455,6 +1455,9 @@ int mhi_prepare_channel(struct mhi_controller *mhi_cntrl, if (ret) goto error_pm_state; + if (mhi_chan->dir == DMA_FROM_DEVICE) + mhi_chan->pre_alloc = !!(flags & MHI_CH_INBOUND_ALLOC_BUFS); + /* Pre-allocate buffer for xfer ring */ if (mhi_chan->pre_alloc) { int nr_el = get_nr_avail_ring_elements(mhi_cntrl, @@ -1610,7 +1613,7 @@ void mhi_reset_chan(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan) } /* Move channel to start state */ -int mhi_prepare_for_transfer(struct mhi_device *mhi_dev) +int mhi_prepare_for_transfer(struct mhi_device *mhi_dev, unsigned int flags) { int ret, dir; struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl; @@ -1621,7 +1624,7 @@ int mhi_prepare_for_transfer(struct mhi_device *mhi_dev) if (!mhi_chan) continue; - ret = mhi_prepare_channel(mhi_cntrl, mhi_chan); + ret = mhi_prepare_channel(mhi_cntrl, mhi_chan, flags); if (ret) goto error_open_chan; } diff --git a/drivers/net/mhi/net.c b/drivers/net/mhi/net.c index e60e38c1f09d..11be6bcdd551 100644 --- a/drivers/net/mhi/net.c +++ b/drivers/net/mhi/net.c @@ -335,7 +335,7 @@ static int mhi_net_newlink(void *ctxt, struct net_device *ndev, u32 if_id, u64_stats_init(&mhi_netdev->stats.tx_syncp); /* Start MHI channels */ - err = mhi_prepare_for_transfer(mhi_dev); + err = mhi_prepare_for_transfer(mhi_dev, 0); if (err) goto out_err; diff --git a/drivers/net/wwan/mhi_wwan_ctrl.c b/drivers/net/wwan/mhi_wwan_ctrl.c index 1bc6b69aa530..1e18420ce404 100644 --- a/drivers/net/wwan/mhi_wwan_ctrl.c +++ b/drivers/net/wwan/mhi_wwan_ctrl.c @@ -110,7 +110,7 @@ static int mhi_wwan_ctrl_start(struct wwan_port *port) int ret; /* Start mhi device's channel(s) */ - ret = mhi_prepare_for_transfer(mhiwwan->mhi_dev); + ret = mhi_prepare_for_transfer(mhiwwan->mhi_dev, 0); if (ret) return ret; diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 944aa3aa3035..5e08468854db 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -719,8 +719,13 @@ void mhi_device_put(struct mhi_device *mhi_dev); * host and device execution environments match and * channels are in a DISABLED state. * @mhi_dev: Device associated with the channels + * @flags: MHI channel flags */ -int mhi_prepare_for_transfer(struct mhi_device *mhi_dev); +int mhi_prepare_for_transfer(struct mhi_device *mhi_dev, + unsigned int flags); + +/* Automatically allocate and queue inbound buffers */ +#define MHI_CH_INBOUND_ALLOC_BUFS BIT(0) /** * mhi_unprepare_from_transfer - Reset UL and DL channels for data transfer. diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c index fa611678af05..1dc955ca57d3 100644 --- a/net/qrtr/mhi.c +++ b/net/qrtr/mhi.c @@ -15,6 +15,7 @@ struct qrtr_mhi_dev { struct qrtr_endpoint ep; struct mhi_device *mhi_dev; struct device *dev; + struct completion ready; }; /* From MHI to QRTR */ @@ -50,6 +51,10 @@ static int qcom_mhi_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb) struct qrtr_mhi_dev *qdev = container_of(ep, struct qrtr_mhi_dev, ep); int rc; + rc = wait_for_completion_interruptible(&qdev->ready); + if (rc) + goto free_skb; + if (skb->sk) sock_hold(skb->sk); @@ -79,7 +84,7 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev, int rc; /* start channels */ - rc = mhi_prepare_for_transfer(mhi_dev); + rc = mhi_prepare_for_transfer(mhi_dev, 0); if (rc) return rc; @@ -96,6 +101,15 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev, if (rc) return rc; + /* start channels */ + rc = mhi_prepare_for_transfer(mhi_dev, MHI_CH_INBOUND_ALLOC_BUFS); + if (rc) { + qrtr_endpoint_unregister(&qdev->ep); + dev_set_drvdata(&mhi_dev->dev, NULL); + return rc; + } + + complete_all(&qdev->ready); dev_dbg(qdev->dev, "Qualcomm MHI QRTR driver probed\n"); return 0; From 2e2f1e8d0450c561c0c936b4b67e8b5a95975fb7 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 30 Jul 2021 14:26:22 +0200 Subject: [PATCH 703/794] KVM: x86: hyper-v: Check access to hypercall before reading XMM registers In case guest doesn't have access to the particular hypercall we can avoid reading XMM registers. Signed-off-by: Vitaly Kuznetsov Reviewed-by: Siddharth Chandrasekaran Signed-off-by: Paolo Bonzini Message-Id: <20210730122625.112848-2-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index b07592ca92f0..cb7e045905a5 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -2173,9 +2173,6 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) hc.rep_idx = (hc.param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff; hc.rep = !!(hc.rep_cnt || hc.rep_idx); - if (hc.fast && is_xmm_fast_hypercall(&hc)) - kvm_hv_hypercall_read_xmm(&hc); - trace_kvm_hv_hypercall(hc.code, hc.fast, hc.rep_cnt, hc.rep_idx, hc.ingpa, hc.outgpa); @@ -2184,6 +2181,9 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) goto hypercall_complete; } + if (hc.fast && is_xmm_fast_hypercall(&hc)) + kvm_hv_hypercall_read_xmm(&hc); + switch (hc.code) { case HVCALL_NOTIFY_LONG_SPIN_WAIT: if (unlikely(hc.rep)) { From f5714bbb5b3120b33dfbf3d81ffc0b98ae4cd4c1 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 30 Jul 2021 14:26:23 +0200 Subject: [PATCH 704/794] KVM: x86: Introduce trace_kvm_hv_hypercall_done() Hypercall failures are unusual with potentially far going consequences so it would be useful to see their results when tracing. Signed-off-by: Vitaly Kuznetsov Reviewed-by: Siddharth Chandrasekaran Signed-off-by: Paolo Bonzini Message-Id: <20210730122625.112848-3-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 1 + arch/x86/kvm/trace.h | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index cb7e045905a5..2945b93dbadd 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -2016,6 +2016,7 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result) { + trace_kvm_hv_hypercall_done(result); kvm_hv_hypercall_set_result(vcpu, result); ++vcpu->stat.hypercalls; return kvm_skip_emulated_instruction(vcpu); diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index b484141ea15b..03ebe368333e 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -92,6 +92,21 @@ TRACE_EVENT(kvm_hv_hypercall, __entry->outgpa) ); +TRACE_EVENT(kvm_hv_hypercall_done, + TP_PROTO(u64 result), + TP_ARGS(result), + + TP_STRUCT__entry( + __field(__u64, result) + ), + + TP_fast_assign( + __entry->result = result; + ), + + TP_printk("result 0x%llx", __entry->result) +); + /* * Tracepoint for Xen hypercall. */ From 4e62aa96d6e55c1b2a4e841f1f8601eae81e81ae Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 30 Jul 2021 14:26:24 +0200 Subject: [PATCH 705/794] KVM: x86: hyper-v: Check if guest is allowed to use XMM registers for hypercall input MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TLFS states that "Availability of the XMM fast hypercall interface is indicated via the “Hypervisor Feature Identification” CPUID Leaf (0x40000003, see section 2.4.4) ... Any attempt to use this interface when the hypervisor does not indicate availability will result in a #UD fault." Implement the check for 'strict' mode (KVM_CAP_HYPERV_ENFORCE_CPUID). Signed-off-by: Vitaly Kuznetsov Reviewed-by: Siddharth Chandrasekaran Signed-off-by: Paolo Bonzini Message-Id: <20210730122625.112848-4-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 2945b93dbadd..0b38f944c6b6 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -2140,6 +2140,7 @@ static bool hv_check_hypercall_access(struct kvm_vcpu_hv *hv_vcpu, u16 code) int kvm_hv_hypercall(struct kvm_vcpu *vcpu) { + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); struct kvm_hv_hcall hc; u64 ret = HV_STATUS_SUCCESS; @@ -2177,13 +2178,21 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) trace_kvm_hv_hypercall(hc.code, hc.fast, hc.rep_cnt, hc.rep_idx, hc.ingpa, hc.outgpa); - if (unlikely(!hv_check_hypercall_access(to_hv_vcpu(vcpu), hc.code))) { + if (unlikely(!hv_check_hypercall_access(hv_vcpu, hc.code))) { ret = HV_STATUS_ACCESS_DENIED; goto hypercall_complete; } - if (hc.fast && is_xmm_fast_hypercall(&hc)) + if (hc.fast && is_xmm_fast_hypercall(&hc)) { + if (unlikely(hv_vcpu->enforce_cpuid && + !(hv_vcpu->cpuid_cache.features_edx & + HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE))) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + kvm_hv_hypercall_read_xmm(&hc); + } switch (hc.code) { case HVCALL_NOTIFY_LONG_SPIN_WAIT: From 2476b5a1b16ced78a80629da8ff87538d5c95073 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 30 Jul 2021 14:26:25 +0200 Subject: [PATCH 706/794] KVM: selftests: Test access to XMM fast hypercalls Check that #UD is raised if bit 16 is clear in HYPERV_CPUID_FEATURES.EDX and an 'XMM fast' hypercall is issued. Signed-off-by: Vitaly Kuznetsov Reviewed-by: Siddharth Chandrasekaran Signed-off-by: Paolo Bonzini Message-Id: <20210730122625.112848-5-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/include/x86_64/hyperv.h | 5 ++- .../selftests/kvm/x86_64/hyperv_features.c | 41 +++++++++++++++++-- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h index 412eaee7884a..b66910702c0a 100644 --- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h +++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h @@ -117,7 +117,7 @@ #define HV_X64_GUEST_DEBUGGING_AVAILABLE BIT(1) #define HV_X64_PERF_MONITOR_AVAILABLE BIT(2) #define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE BIT(3) -#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE BIT(4) +#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE BIT(4) #define HV_X64_GUEST_IDLE_STATE_AVAILABLE BIT(5) #define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE BIT(8) #define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10) @@ -182,4 +182,7 @@ #define HV_STATUS_INVALID_CONNECTION_ID 18 #define HV_STATUS_INSUFFICIENT_BUFFERS 19 +/* hypercall options */ +#define HV_HYPERCALL_FAST_BIT BIT(16) + #endif /* !SELFTEST_KVM_HYPERV_H */ diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index af27c7e829c1..91d88aaa9899 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -47,6 +47,7 @@ static void do_wrmsr(u32 idx, u64 val) } static int nr_gp; +static int nr_ud; static inline u64 hypercall(u64 control, vm_vaddr_t input_address, vm_vaddr_t output_address) @@ -80,6 +81,12 @@ static void guest_gp_handler(struct ex_regs *regs) regs->rip = (uint64_t)&wrmsr_end; } +static void guest_ud_handler(struct ex_regs *regs) +{ + nr_ud++; + regs->rip += 3; +} + struct msr_data { uint32_t idx; bool available; @@ -90,6 +97,7 @@ struct msr_data { struct hcall_data { uint64_t control; uint64_t expect; + bool ud_expected; }; static void guest_msr(struct msr_data *msr) @@ -117,13 +125,26 @@ static void guest_msr(struct msr_data *msr) static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) { int i = 0; + u64 res, input, output; wrmsr(HV_X64_MSR_GUEST_OS_ID, LINUX_OS_ID); wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa); while (hcall->control) { - GUEST_ASSERT(hypercall(hcall->control, pgs_gpa, - pgs_gpa + 4096) == hcall->expect); + nr_ud = 0; + if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) { + input = pgs_gpa; + output = pgs_gpa + 4096; + } else { + input = output = 0; + } + + res = hypercall(hcall->control, input, output); + if (hcall->ud_expected) + GUEST_ASSERT(nr_ud == 1); + else + GUEST_ASSERT(res == hcall->expect); + GUEST_SYNC(i++); } @@ -552,8 +573,18 @@ static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall recomm.ebx = 0xfff; hcall->expect = HV_STATUS_SUCCESS; break; - case 17: + /* XMM fast hypercall */ + hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT; + hcall->ud_expected = true; + break; + case 18: + feat.edx |= HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE; + hcall->ud_expected = false; + hcall->expect = HV_STATUS_SUCCESS; + break; + + case 19: /* END */ hcall->control = 0; break; @@ -625,6 +656,10 @@ int main(void) /* Test hypercalls */ vm = vm_create_default(VCPU_ID, 0, guest_hcall); + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); + /* Hypercall input/output */ hcall_page = vm_vaddr_alloc_pages(vm, 2); memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); From ae954bbc451d267f7d60d7b49db811d5a68ebd7b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 1 Aug 2021 02:25:31 -0400 Subject: [PATCH 707/794] sctp: move the active_key update after sh_keys is added In commit 58acd1009226 ("sctp: update active_key for asoc when old key is being replaced"), sctp_auth_asoc_init_active_key() is called to update the active_key right after the old key is deleted and before the new key is added, and it caused that the active_key could be found with the key_id. In Ying Xu's testing, the BUG_ON in sctp_auth_asoc_init_active_key() was triggered: [ ] kernel BUG at net/sctp/auth.c:416! [ ] RIP: 0010:sctp_auth_asoc_init_active_key.part.8+0xe7/0xf0 [sctp] [ ] Call Trace: [ ] sctp_auth_set_key+0x16d/0x1b0 [sctp] [ ] sctp_setsockopt.part.33+0x1ba9/0x2bd0 [sctp] [ ] __sys_setsockopt+0xd6/0x1d0 [ ] __x64_sys_setsockopt+0x20/0x30 [ ] do_syscall_64+0x5b/0x1a0 So fix it by moving the active_key update after sh_keys is added. Fixes: 58acd1009226 ("sctp: update active_key for asoc when old key is being replaced") Reported-by: Ying Xu Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/auth.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/sctp/auth.c b/net/sctp/auth.c index fe74c5f95630..db6b7373d16c 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -857,14 +857,18 @@ int sctp_auth_set_key(struct sctp_endpoint *ep, memcpy(key->data, &auth_key->sca_key[0], auth_key->sca_keylength); cur_key->key = key; - if (replace) { - list_del_init(&shkey->key_list); - sctp_auth_shkey_release(shkey); - if (asoc && asoc->active_key_id == auth_key->sca_keynumber) - sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL); + if (!replace) { + list_add(&cur_key->key_list, sh_keys); + return 0; } + + list_del_init(&shkey->key_list); + sctp_auth_shkey_release(shkey); list_add(&cur_key->key_list, sh_keys); + if (asoc && asoc->active_key_id == auth_key->sca_keynumber) + sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL); + return 0; } From f41e57af926ad840d114439d34cafc0533bf25f0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 2 Aug 2021 17:21:53 +0200 Subject: [PATCH 708/794] net: sparx5: fix bitmask on 32-bit targets I saw the build failure that was fixed in commit 6387f65e2acb ("net: sparx5: fix compiletime_assert for GCC 4.9") and noticed another issue that was introduced in the same patch: Using GENMASK() to create a 64-bit mask does not work on 32-bit architectures. This probably won't ever happen on this driver since it's specific to a 64-bit SoC, but it's better to write it portably, so use GENMASK_ULL() instead. Fixes: f3cad2611a77 ("net: sparx5: add hostmode with phylink support") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c index 1a240e6bddd0..cb68eaaac881 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c @@ -32,7 +32,7 @@ static void __ifh_encode_bitfield(void *ifh, u64 value, u32 pos, u32 width) u32 byte = (35 - (pos / 8)); /* Calculate the Start bit position in the Start IFH byte */ u32 bit = (pos % 8); - u64 encode = GENMASK(bit + width - 1, bit) & (value << bit); + u64 encode = GENMASK_ULL(bit + width - 1, bit) & (value << bit); /* The b0-b7 goes into the start IFH byte */ if (encode & 0xFF) From 9c9c6d0ab08acfe41c9f7efa72c4ad3f133a266b Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 28 Jul 2021 16:34:11 -0700 Subject: [PATCH 709/794] drm/i915: Correct SFC_DONE register offset The register offset for SFC_DONE was missing a '0' at the end, causing us to read from a non-existent register address. We only use this register in error state dumps so the mistake hasn't caused any real problems, but fixing it will hopefully make the error state dumps a bit more useful for debugging. Fixes: e50dbdbfd9fb ("drm/i915/tgl: Add SFC instdone to error state") Cc: Mika Kuoppala Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210728233411.2365788-1-matthew.d.roper@intel.com Reviewed-by: Mika Kuoppala (cherry picked from commit 82929a2140eb99f1f1d21855f3f580e70d7abdd8) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 94fde5ca26ae..41186c1f771e 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -422,7 +422,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN12_HCP_SFC_LOCK_ACK_BIT REG_BIT(1) #define GEN12_HCP_SFC_USAGE_BIT REG_BIT(0) -#define GEN12_SFC_DONE(n) _MMIO(0x1cc00 + (n) * 0x100) +#define GEN12_SFC_DONE(n) _MMIO(0x1cc000 + (n) * 0x1000) #define GEN12_SFC_DONE_MAX 4 #define RING_PP_DIR_BASE(base) _MMIO((base) + 0x228) From 1354d830cb8f9be966cc07fc61368af27ffb7c4a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 21 Jul 2021 10:23:54 -0500 Subject: [PATCH 710/794] drm/i915: Call i915_globals_exit() if pci_register_device() fails In the unlikely event that pci_register_device() fails, we were tearing down our PMU setup but not globals. This leaves a bunch of memory slabs lying around. Signed-off-by: Jason Ekstrand Fixes: 32eb6bcfdda9 ("drm/i915: Make request allocation caches global") [danvet: Fix conflicts against removal of the globals_flush infrastructure.] Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210721152358.2893314-3-jason@jlekstrand.net (cherry picked from commit db484889d1ff0645e07e360d3e3ad306c0515821) Signed-off-by: Rodrigo Vivi [Fixed small conflict while cherry picking] --- drivers/gpu/drm/i915/i915_globals.c | 2 +- drivers/gpu/drm/i915/i915_pci.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c index 77f1911c463b..2db90e770616 100644 --- a/drivers/gpu/drm/i915/i915_globals.c +++ b/drivers/gpu/drm/i915/i915_globals.c @@ -148,7 +148,7 @@ static void __exit __i915_globals_flush(void) atomic_dec(&active); } -void __exit i915_globals_exit(void) +void i915_globals_exit(void) { GEM_BUG_ON(atomic_read(&active)); diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 83b500bb170c..2880ec57c97d 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1195,6 +1195,7 @@ static int __init i915_init(void) err = pci_register_driver(&i915_pci_driver); if (err) { i915_pmu_exit(); + i915_globals_exit(); return err; } From 97367c97226aab8b298ada954ce12659ee3ad2a4 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 3 Aug 2021 13:43:12 +0200 Subject: [PATCH 711/794] ALSA: seq: Fix racy deletion of subscriber It turned out that the current implementation of the port subscription is racy. The subscription contains two linked lists, and we have to add to or delete from both lists. Since both connection and disconnection procedures perform the same order for those two lists (i.e. src list, then dest list), when a deletion happens during a connection procedure, the src list may be deleted before the dest list addition completes, and this may lead to a use-after-free or an Oops, even though the access to both lists are protected via mutex. The simple workaround for this race is to change the access order for the disconnection, namely, dest list, then src list. This assures that the connection has been established when disconnecting, and also the concurrent deletion can be avoided. Reported-and-tested-by: folkert Cc: Link: https://lore.kernel.org/r/20210801182754.GP890690@belle.intranet.vanheusden.com Link: https://lore.kernel.org/r/20210803114312.2536-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/seq/seq_ports.c | 39 ++++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/sound/core/seq/seq_ports.c b/sound/core/seq/seq_ports.c index b9c2ce2b8d5a..84d78630463e 100644 --- a/sound/core/seq/seq_ports.c +++ b/sound/core/seq/seq_ports.c @@ -514,10 +514,11 @@ static int check_and_subscribe_port(struct snd_seq_client *client, return err; } -static void delete_and_unsubscribe_port(struct snd_seq_client *client, - struct snd_seq_client_port *port, - struct snd_seq_subscribers *subs, - bool is_src, bool ack) +/* called with grp->list_mutex held */ +static void __delete_and_unsubscribe_port(struct snd_seq_client *client, + struct snd_seq_client_port *port, + struct snd_seq_subscribers *subs, + bool is_src, bool ack) { struct snd_seq_port_subs_info *grp; struct list_head *list; @@ -525,7 +526,6 @@ static void delete_and_unsubscribe_port(struct snd_seq_client *client, grp = is_src ? &port->c_src : &port->c_dest; list = is_src ? &subs->src_list : &subs->dest_list; - down_write(&grp->list_mutex); write_lock_irq(&grp->list_lock); empty = list_empty(list); if (!empty) @@ -535,6 +535,18 @@ static void delete_and_unsubscribe_port(struct snd_seq_client *client, if (!empty) unsubscribe_port(client, port, grp, &subs->info, ack); +} + +static void delete_and_unsubscribe_port(struct snd_seq_client *client, + struct snd_seq_client_port *port, + struct snd_seq_subscribers *subs, + bool is_src, bool ack) +{ + struct snd_seq_port_subs_info *grp; + + grp = is_src ? &port->c_src : &port->c_dest; + down_write(&grp->list_mutex); + __delete_and_unsubscribe_port(client, port, subs, is_src, ack); up_write(&grp->list_mutex); } @@ -590,27 +602,30 @@ int snd_seq_port_disconnect(struct snd_seq_client *connector, struct snd_seq_client_port *dest_port, struct snd_seq_port_subscribe *info) { - struct snd_seq_port_subs_info *src = &src_port->c_src; + struct snd_seq_port_subs_info *dest = &dest_port->c_dest; struct snd_seq_subscribers *subs; int err = -ENOENT; - down_write(&src->list_mutex); + /* always start from deleting the dest port for avoiding concurrent + * deletions + */ + down_write(&dest->list_mutex); /* look for the connection */ - list_for_each_entry(subs, &src->list_head, src_list) { + list_for_each_entry(subs, &dest->list_head, dest_list) { if (match_subs_info(info, &subs->info)) { - atomic_dec(&subs->ref_count); /* mark as not ready */ + __delete_and_unsubscribe_port(dest_client, dest_port, + subs, false, + connector->number != dest_client->number); err = 0; break; } } - up_write(&src->list_mutex); + up_write(&dest->list_mutex); if (err < 0) return err; delete_and_unsubscribe_port(src_client, src_port, subs, true, connector->number != src_client->number); - delete_and_unsubscribe_port(dest_client, dest_port, subs, false, - connector->number != dest_client->number); kfree(subs); return 0; } From c87a4c542b5a796f795fec2b7a909c7d3067b11c Mon Sep 17 00:00:00 2001 From: Bijie Xu Date: Tue, 3 Aug 2021 11:40:18 +0200 Subject: [PATCH 712/794] net: flow_offload: correct comments mismatch with code Correct mismatch between the name of flow_offload_has_one_action() and its kdoc entry. Found using ./scripts/kernel-doc -Werror -none include/net/flow_offload.h Signed-off-by: Bijie Xu Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/net/flow_offload.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 69c9eabf8325..f3c2841566a0 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -293,7 +293,7 @@ static inline bool flow_action_has_entries(const struct flow_action *action) } /** - * flow_action_has_one_action() - check if exactly one action is present + * flow_offload_has_one_action() - check if exactly one action is present * @action: tc filter flow offload action * * Returns true if exactly one action is present. From 0161d151f3e36306219f5aa6f5f6b3877038afd3 Mon Sep 17 00:00:00 2001 From: Bijie Xu Date: Tue, 3 Aug 2021 11:40:19 +0200 Subject: [PATCH 713/794] net: sched: provide missing kdoc for tcf_pkt_info and tcf_ematch_ops Provide missing kdoc of fields of struct tcf_pkt_info and tcf_ematch_ops. Found using ./scripts/kernel-doc -none -Werror include/net/pkt_cls.h Signed-off-by: Bijie Xu Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index ec7823921bd2..298a8d10168b 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -337,6 +337,9 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts); /** * struct tcf_pkt_info - packet information + * + * @ptr: start of the pkt data + * @nexthdr: offset of the next header */ struct tcf_pkt_info { unsigned char * ptr; @@ -355,6 +358,7 @@ struct tcf_ematch_ops; * @ops: the operations lookup table of the corresponding ematch module * @datalen: length of the ematch specific configuration data * @data: ematch specific data + * @net: the network namespace */ struct tcf_ematch { struct tcf_ematch_ops * ops; From 9fdc5d85a8fe684cdf24dc31c6bc4a727decfe87 Mon Sep 17 00:00:00 2001 From: Fei Qin Date: Tue, 3 Aug 2021 12:39:11 +0200 Subject: [PATCH 714/794] nfp: update ethtool reporting of pauseframe control Pauseframe control is set to symmetric mode by default on the NFP. Pause frames can not be configured through ethtool now, but ethtool can report the supported mode. Fixes: 265aeb511bd5 ("nfp: add support for .get_link_ksettings()") Signed-off-by: Fei Qin Signed-off-by: Louis Peens Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 1b482446536d..8803faadd302 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -286,6 +286,8 @@ nfp_net_get_link_ksettings(struct net_device *netdev, /* Init to unknowns */ ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); + ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); + ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause); cmd->base.port = PORT_OTHER; cmd->base.speed = SPEED_UNKNOWN; cmd->base.duplex = DUPLEX_UNKNOWN; From 4039146777a91e1576da2bf38e0d8a1061a1ae47 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 3 Aug 2021 12:00:16 +0200 Subject: [PATCH 715/794] net: ipv6: fix returned variable type in ip6_skb_dst_mtu The patch fixing the returned value of ip6_skb_dst_mtu (int -> unsigned int) was rebased between its initial review and the version applied. In the meantime fade56410c22 was applied, which added a new variable (int) used as the returned value. This lead to a mismatch between the function prototype and the variable used as the return value. Fixes: 40fc3054b458 ("net: ipv6: fix return value of ip6_skb_dst_mtu") Cc: Vadim Fedorenko Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- include/net/ip6_route.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 625a38ccb5d9..0bf09a9bca4e 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -265,7 +265,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, static inline unsigned int ip6_skb_dst_mtu(struct sk_buff *skb) { - int mtu; + unsigned int mtu; struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? inet6_sk(skb->sk) : NULL; From ecd92e2167c30faa18df21e3ec3dbec510ddebaa Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 31 Jul 2021 13:13:40 +0200 Subject: [PATCH 716/794] s390: update defconfigs Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 2 +- arch/s390/configs/defconfig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 7de253f766e8..b88184019af9 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -335,7 +335,7 @@ CONFIG_L2TP_DEBUGFS=m CONFIG_L2TP_V3=y CONFIG_L2TP_IP=m CONFIG_L2TP_ETH=m -CONFIG_BRIDGE=m +CONFIG_BRIDGE=y CONFIG_BRIDGE_MRP=y CONFIG_VLAN_8021Q=m CONFIG_VLAN_8021Q_GVRP=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index b671642967ba..1667a3cdcf0a 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -325,7 +325,7 @@ CONFIG_L2TP_DEBUGFS=m CONFIG_L2TP_V3=y CONFIG_L2TP_IP=m CONFIG_L2TP_ETH=m -CONFIG_BRIDGE=m +CONFIG_BRIDGE=y CONFIG_BRIDGE_MRP=y CONFIG_VLAN_8021Q=m CONFIG_VLAN_8021Q_GVRP=y From c2ec772b87408259cb01209a22fb4e1ae7d346de Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 30 Jul 2021 16:38:07 +0200 Subject: [PATCH 717/794] cpuidle: teo: Fix alternative idle state lookup There are three mistakes in the loop in teo_select() that is looking for an alternative candidate idle state. First, it should walk all of the idle states shallower than the current candidate one, including all of the disabled ones, but it terminates after the first enabled idle state. Second, it should not terminate its last step if idle state 0 is disabled (which is related to the first issue). Finally, it may return the current alternative candidate idle state prematurely if the time span criterion is not met by the idle state under consideration at the moment. To address the issues mentioned above, make the loop in question walk all of the idle states shallower than the current candidate idle state all the way down to idle state 0 and rearrange the checks in it. Fixes: 77577558f25d ("cpuidle: teo: Rework most recent idle duration values treatment") Reported-by: Doug Smythies Tested-by: Doug Smythies Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/governors/teo.c | 44 ++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index 7b91060e82f6..8c59050ba419 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -397,32 +397,46 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, intercept_sum = 0; recent_sum = 0; - for (i = idx - 1; i >= idx0; i--) { + for (i = idx - 1; i >= 0; i--) { struct teo_bin *bin = &cpu_data->state_bins[i]; s64 span_ns; intercept_sum += bin->intercepts; recent_sum += bin->recent; - if (dev->states_usage[i].disable) - continue; - span_ns = teo_middle_of_bin(i, drv); - if (!teo_time_ok(span_ns)) { - /* - * The current state is too shallow, so select - * the first enabled deeper state. - */ - duration_ns = last_enabled_span_ns; - idx = last_enabled_idx; - break; - } if ((!alt_recent || 2 * recent_sum > idx_recent_sum) && (!alt_intercepts || 2 * intercept_sum > idx_intercept_sum)) { - idx = i; - duration_ns = span_ns; + if (teo_time_ok(span_ns) && + !dev->states_usage[i].disable) { + idx = i; + duration_ns = span_ns; + } else { + /* + * The current state is too shallow or + * disabled, so take the first enabled + * deeper state with suitable time span. + */ + idx = last_enabled_idx; + duration_ns = last_enabled_span_ns; + } + break; + } + + if (dev->states_usage[i].disable) + continue; + + if (!teo_time_ok(span_ns)) { + /* + * The current state is too shallow, but if an + * alternative candidate state has been found, + * it may still turn out to be a better choice. + */ + if (last_enabled_idx != idx) + continue; + break; } From 4adae7dd10db10f20f51833dc11b3cf7a342ad38 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 30 Jul 2021 16:38:52 +0200 Subject: [PATCH 718/794] cpuidle: teo: Rename two local variables in teo_select() Rename two local variables in teo_select() so that their names better reflect their purpose. No functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/governors/teo.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index 8c59050ba419..d9262db79cae 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -382,8 +382,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, alt_intercepts = 2 * idx_intercept_sum > cpu_data->total - idx_hit_sum; alt_recent = idx_recent_sum > NR_RECENT / 2; if (alt_recent || alt_intercepts) { - s64 last_enabled_span_ns = duration_ns; - int last_enabled_idx = idx; + s64 first_suitable_span_ns = duration_ns; + int first_suitable_idx = idx; /* * Look for the deepest idle state whose target residency had @@ -419,8 +419,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * disabled, so take the first enabled * deeper state with suitable time span. */ - idx = last_enabled_idx; - duration_ns = last_enabled_span_ns; + idx = first_suitable_idx; + duration_ns = first_suitable_span_ns; } break; } @@ -434,14 +434,14 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * alternative candidate state has been found, * it may still turn out to be a better choice. */ - if (last_enabled_idx != idx) + if (first_suitable_idx != idx) continue; break; } - last_enabled_span_ns = span_ns; - last_enabled_idx = i; + first_suitable_span_ns = span_ns; + first_suitable_idx = i; } } From 6511a8b5b7a65037340cd8ee91a377811effbc83 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 3 Aug 2021 18:14:44 +0200 Subject: [PATCH 719/794] Revert "ACPICA: Fix memory leak caused by _CID repair function" Revert commit c27bac0314131 ("ACPICA: Fix memory leak caused by _CID repair function") which is reported to cause a boot issue on Acer Swift 3 (SF314-51). Reported-by: Adrien Precigout Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/nsrepair2.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c index 38e10ab976e6..14b71b41e845 100644 --- a/drivers/acpi/acpica/nsrepair2.c +++ b/drivers/acpi/acpica/nsrepair2.c @@ -379,13 +379,6 @@ acpi_ns_repair_CID(struct acpi_evaluate_info *info, (*element_ptr)->common.reference_count = original_ref_count; - - /* - * The original_element holds a reference from the package object - * that represents _HID. Since a new element was created by _HID, - * remove the reference from the _CID package. - */ - acpi_ut_remove_reference(original_element); } element_ptr++; From 8b436a99cd708bd158231a0630ffa49b1d6175e4 Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Mon, 2 Aug 2021 14:56:14 +0800 Subject: [PATCH 720/794] RDMA/hns: Fix the double unlock problem of poll_sem If hns_roce_cmd_use_events() fails then it means that the poll_sem is not obtained, but the poll_sem is released in hns_roce_cmd_use_polling(), this will cause an unlock problem. This is the static checker warning: drivers/infiniband/hw/hns/hns_roce_main.c:926 hns_roce_init() error: double unlocked '&hr_dev->cmd.poll_sem' (orig line 879) Event mode and polling mode are mutually exclusive and resources are separated, so there is no need to process polling mode resources in event mode. The initial mode of cmd is polling mode, so even if cmd fails to switch to event mode, it is not necessary to switch to polling mode. Fixes: a389d016c030 ("RDMA/hns: Enable all CMDQ context") Fixes: 3d50503b3b33 ("RDMA/hns: Optimize cmd init and mode selection for hip08") Link: https://lore.kernel.org/r/1627887374-20019-1-git-send-email-liangwenpeng@huawei.com Reported-by: Dan Carpenter Signed-off-by: Yangyang Li Signed-off-by: Wenpeng Liang Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_cmd.c | 7 +++---- drivers/infiniband/hw/hns/hns_roce_main.c | 4 +--- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c index 8f68cc3ff193..84f3f2b5f097 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.c +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c @@ -213,8 +213,10 @@ int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev) hr_cmd->context = kcalloc(hr_cmd->max_cmds, sizeof(*hr_cmd->context), GFP_KERNEL); - if (!hr_cmd->context) + if (!hr_cmd->context) { + hr_dev->cmd_mod = 0; return -ENOMEM; + } for (i = 0; i < hr_cmd->max_cmds; ++i) { hr_cmd->context[i].token = i; @@ -228,7 +230,6 @@ int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev) spin_lock_init(&hr_cmd->context_lock); hr_cmd->use_events = 1; - down(&hr_cmd->poll_sem); return 0; } @@ -239,8 +240,6 @@ void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev) kfree(hr_cmd->context); hr_cmd->use_events = 0; - - up(&hr_cmd->poll_sem); } struct hns_roce_cmd_mailbox * diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 078a97193f0e..cc6eab14a222 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -873,11 +873,9 @@ int hns_roce_init(struct hns_roce_dev *hr_dev) if (hr_dev->cmd_mod) { ret = hns_roce_cmd_use_events(hr_dev); - if (ret) { + if (ret) dev_warn(dev, "Cmd event mode failed, set back to poll!\n"); - hns_roce_cmd_use_polling(hr_dev); - } } ret = hns_roce_init_hem(hr_dev); From abc7285d89ffd089739a1a3059ddd843dd019637 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 2 Aug 2021 16:19:14 -0700 Subject: [PATCH 721/794] mptcp: drop unused rcu member in mptcp_pm_addr_entry kfree_rcu() had been removed from pm_netlink.c, so this rcu field in struct mptcp_pm_addr_entry became useless. Let's drop it. Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20210802231914.54709-1-mathew.j.martineau@linux.intel.com Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index d2591ebf01d9..56263c2c4014 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -27,7 +27,6 @@ struct mptcp_pm_addr_entry { struct mptcp_addr_info addr; u8 flags; int ifindex; - struct rcu_head rcu; struct socket *lsk; }; From e3ea110d6e796146920e1be0108464ebcf283ef7 Mon Sep 17 00:00:00 2001 From: Harshavardhan Unnibhavi Date: Mon, 2 Aug 2021 19:35:06 +0200 Subject: [PATCH 722/794] VSOCK: handle VIRTIO_VSOCK_OP_CREDIT_REQUEST The original implementation of the virtio-vsock driver does not handle a VIRTIO_VSOCK_OP_CREDIT_REQUEST as required by the virtio-vsock specification. The vsock device emulated by vhost-vsock and the virtio-vsock driver never uses this request, which was probably why nobody noticed it. However, another implementation of the device may use this request type. Hence, this commit introduces a way to handle an explicit credit request by responding with a corresponding credit update as required by the virtio-vsock specification. Fixes: 06a8fc78367d ("VSOCK: Introduce virtio_vsock_common.ko") Signed-off-by: Harshavardhan Unnibhavi Reviewed-by: Stefano Garzarella Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20210802173506.2383-1-harshanavkis@gmail.com Signed-off-by: Jakub Kicinski --- net/vmw_vsock/virtio_transport_common.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 169ba8b72a63..081e7ae93cb1 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -1079,6 +1079,9 @@ virtio_transport_recv_connected(struct sock *sk, virtio_transport_recv_enqueue(vsk, pkt); sk->sk_data_ready(sk); return err; + case VIRTIO_VSOCK_OP_CREDIT_REQUEST: + virtio_transport_send_credit_update(vsk); + break; case VIRTIO_VSOCK_OP_CREDIT_UPDATE: sk->sk_write_space(sk); break; From d1a58c013a5837451e3213e7a426d350fa524ead Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 3 Aug 2021 08:37:46 +0200 Subject: [PATCH 723/794] net: dsa: qca: ar9331: reorder MDIO write sequence In case of this switch we work with 32bit registers on top of 16bit bus. Some registers (for example access to forwarding database) have trigger bit on the first 16bit half of request and the result + configuration of request in the second half. Without this patch, we would trigger database operation and overwrite result in one run. To make it work properly, we should do the second part of transfer before the first one is done. So far, this rule seems to work for all registers on this switch. Fixes: ec6698c272de ("net: dsa: add support for Atheros AR9331 built-in switch") Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20210803063746.3600-1-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/dsa/qca/ar9331.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c index ca2ad77b71f1..6686192e1883 100644 --- a/drivers/net/dsa/qca/ar9331.c +++ b/drivers/net/dsa/qca/ar9331.c @@ -837,16 +837,24 @@ static int ar9331_mdio_write(void *ctx, u32 reg, u32 val) return 0; } - ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg, val); - if (ret < 0) - goto error; - + /* In case of this switch we work with 32bit registers on top of 16bit + * bus. Some registers (for example access to forwarding database) have + * trigger bit on the first 16bit half of request, the result and + * configuration of request in the second half. + * To make it work properly, we should do the second part of transfer + * before the first one is done. + */ ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg + 2, val >> 16); if (ret < 0) goto error; + ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg, val); + if (ret < 0) + goto error; + return 0; + error: dev_err_ratelimited(&sbus->dev, "Bus error. Failed to write register.\n"); return ret; From d09560435cb712c9ec1e62b8a43a79b0af69fe77 Mon Sep 17 00:00:00 2001 From: Qiu Wenbo Date: Sun, 4 Jul 2021 16:34:41 +0800 Subject: [PATCH 724/794] riscv: dts: fix memory size for the SiFive HiFive Unmatched The production version of HiFive Unmatched have 16GB memory. Signed-off-by: Qiu Wenbo Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts index b1c3c596578f..2e4ea84f27e7 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts @@ -24,7 +24,7 @@ memory@80000000 { device_type = "memory"; - reg = <0x0 0x80000000 0x2 0x00000000>; + reg = <0x0 0x80000000 0x4 0x00000000>; }; soc { From a18b14d8886614b3c7d290c4cfc33389822b0535 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 6 Jul 2021 09:26:21 -0700 Subject: [PATCH 725/794] riscv: Disable STACKPROTECTOR_PER_TASK if GCC_PLUGIN_RANDSTRUCT is enabled riscv uses the value of TSK_STACK_CANARY to set stack-protector-guard-offset. With GCC_PLUGIN_RANDSTRUCT enabled, that value is non-deterministic, and with riscv:allmodconfig often results in build errors such as cc1: error: '8120' is not a valid offset in '-mstack-protector-guard-offset=' Enable STACKPROTECTOR_PER_TASK only if GCC_PLUGIN_RANDSTRUCT is disabled to fix the problem. Fixes: fea2fed201ee5 ("riscv: Enable per-task stack canaries") Signed-off-by: Guenter Roeck Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 8fcceb8eda07..31f9e92f1402 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -492,6 +492,7 @@ config CC_HAVE_STACKPROTECTOR_TLS config STACKPROTECTOR_PER_TASK def_bool y + depends on !GCC_PLUGIN_RANDSTRUCT depends on STACKPROTECTOR && CC_HAVE_STACKPROTECTOR_TLS config PHYS_RAM_BASE From 5648c073c33d33a0a19d0cb1194a4eb88efe2b71 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Tue, 3 Aug 2021 21:47:11 +0200 Subject: [PATCH 726/794] USB: serial: option: add Telit FD980 composition 0x1056 Add the following Telit FD980 composition 0x1056: Cfg #1: mass storage Cfg #2: rndis, tty, adb, tty, tty, tty, tty Signed-off-by: Daniele Palmas Link: https://lore.kernel.org/r/20210803194711.3036-1-dnlplm@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 0fbe253dc570..039450069ca4 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1203,6 +1203,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(2) | RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1055, 0xff), /* Telit FN980 (PCIe) */ .driver_info = NCTRL(0) | RSVD(1) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1056, 0xff), /* Telit FD980 */ + .driver_info = NCTRL(2) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), From 06f5553e0f0c2182268179b93856187d9cb86dd5 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Tue, 3 Aug 2021 18:58:21 +0800 Subject: [PATCH 727/794] net: sched: fix lockdep_set_class() typo error for sch->seqlock According to comment in qdisc_alloc(), sch->seqlock's lockdep class key should be set to qdisc_tx_busylock, due to possible type error, sch->busylock's lockdep class key is set to qdisc_tx_busylock, which is duplicated because sch->busylock's lockdep class key is already set in qdisc_alloc(). So fix it by replacing sch->busylock with sch->seqlock. Fixes: 96009c7d500e ("sched: replace __QDISC_STATE_RUNNING bit with a spin lock") Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index d9ac60ffe927..a8dd06c74e31 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -913,7 +913,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, /* seqlock has the same scope of busylock, for NOLOCK qdisc */ spin_lock_init(&sch->seqlock); - lockdep_set_class(&sch->busylock, + lockdep_set_class(&sch->seqlock, dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); seqcount_init(&sch->running); From 13a9c4ac319a23c792e2e03ac73777b6710132c3 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 3 Aug 2021 15:00:43 +0300 Subject: [PATCH 728/794] net/prestera: Fix devlink groups leakage in error flow Devlink trap group is registered but not released in error flow, add the missing devlink_trap_groups_unregister() call. Fixes: 0a9003f45e91 ("net: marvell: prestera: devlink: add traps/groups implementation") Signed-off-by: Leon Romanovsky Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/prestera/prestera_devlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c index d12e21db9fd6..fa7a0682ad1e 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c @@ -530,6 +530,8 @@ err_trap_register: prestera_trap = &prestera_trap_items_arr[i]; devlink_traps_unregister(devlink, &prestera_trap->trap, 1); } + devlink_trap_groups_unregister(devlink, prestera_trap_groups_arr, + groups_count); err_groups_register: kfree(trap_data->trap_items_arr); err_trap_items_alloc: From 3212a99349cee5fb611d3ffcf0e65bc3cd6dcf2f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 4 Aug 2021 11:31:00 +0200 Subject: [PATCH 729/794] USB: serial: pl2303: fix GT type detection At least some PL2303GT have a bcdDevice of 0x305 instead of 0x100 as the datasheet claims. Add it to the list of known release numbers for the HXN (G) type. Fixes: 894758d0571d ("USB: serial: pl2303: tighten type HXN (G) detection") Reported-by: Vasily Khoruzhick Tested-by: Vasily Khoruzhick Cc: stable@vger.kernel.org # 5.13 Link: https://lore.kernel.org/r/20210804093100.24811-1-johan@kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/pl2303.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 17601e32083e..930b3d50a330 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -432,6 +432,7 @@ static int pl2303_detect_type(struct usb_serial *serial) case 0x200: switch (bcdDevice) { case 0x100: + case 0x305: /* * Assume it's an HXN-type if the device doesn't * support the old read request value. From 8a160e2e9aeb8318159b48701ad8a6e22274372d Mon Sep 17 00:00:00 2001 From: Petko Manolov Date: Tue, 3 Aug 2021 20:25:23 +0300 Subject: [PATCH 730/794] net: usb: pegasus: Check the return value of get_geristers() and friends; Certain call sites of get_geristers() did not do proper error handling. This could be a problem as get_geristers() typically return the data via pointer to a buffer. If an error occurred the code is carelessly manipulating the wrong data. Signed-off-by: Petko Manolov Reviewed-by: Pavel Skripkin Signed-off-by: David S. Miller --- drivers/net/usb/pegasus.c | 108 ++++++++++++++++++++++++++------------ 1 file changed, 75 insertions(+), 33 deletions(-) diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index 9a907182569c..22353bab76c8 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -132,9 +132,15 @@ static int get_registers(pegasus_t *pegasus, __u16 indx, __u16 size, void *data) static int set_registers(pegasus_t *pegasus, __u16 indx, __u16 size, const void *data) { - return usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REGS, + int ret; + + ret = usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REGS, PEGASUS_REQT_WRITE, 0, indx, data, size, 1000, GFP_NOIO); + if (ret < 0) + netif_dbg(pegasus, drv, pegasus->net, "%s failed with %d\n", __func__, ret); + + return ret; } /* @@ -145,10 +151,15 @@ static int set_registers(pegasus_t *pegasus, __u16 indx, __u16 size, static int set_register(pegasus_t *pegasus, __u16 indx, __u8 data) { void *buf = &data; + int ret; - return usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REG, + ret = usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REG, PEGASUS_REQT_WRITE, data, indx, buf, 1, 1000, GFP_NOIO); + if (ret < 0) + netif_dbg(pegasus, drv, pegasus->net, "%s failed with %d\n", __func__, ret); + + return ret; } static int update_eth_regs_async(pegasus_t *pegasus) @@ -188,10 +199,9 @@ static int update_eth_regs_async(pegasus_t *pegasus) static int __mii_op(pegasus_t *p, __u8 phy, __u8 indx, __u16 *regd, __u8 cmd) { - int i; - __u8 data[4] = { phy, 0, 0, indx }; + int i, ret; __le16 regdi; - int ret = -ETIMEDOUT; + __u8 data[4] = { phy, 0, 0, indx }; if (cmd & PHY_WRITE) { __le16 *t = (__le16 *) & data[1]; @@ -207,12 +217,15 @@ static int __mii_op(pegasus_t *p, __u8 phy, __u8 indx, __u16 *regd, __u8 cmd) if (data[0] & PHY_DONE) break; } - if (i >= REG_TIMEOUT) + if (i >= REG_TIMEOUT) { + ret = -ETIMEDOUT; goto fail; + } if (cmd & PHY_READ) { ret = get_registers(p, PhyData, 2, ®di); + if (ret < 0) + goto fail; *regd = le16_to_cpu(regdi); - return ret; } return 0; fail: @@ -235,9 +248,13 @@ static int write_mii_word(pegasus_t *pegasus, __u8 phy, __u8 indx, __u16 *regd) static int mdio_read(struct net_device *dev, int phy_id, int loc) { pegasus_t *pegasus = netdev_priv(dev); + int ret; u16 res; - read_mii_word(pegasus, phy_id, loc, &res); + ret = read_mii_word(pegasus, phy_id, loc, &res); + if (ret < 0) + return ret; + return (int)res; } @@ -251,10 +268,9 @@ static void mdio_write(struct net_device *dev, int phy_id, int loc, int val) static int read_eprom_word(pegasus_t *pegasus, __u8 index, __u16 *retdata) { - int i; - __u8 tmp = 0; + int ret, i; __le16 retdatai; - int ret; + __u8 tmp = 0; set_register(pegasus, EpromCtrl, 0); set_register(pegasus, EpromOffset, index); @@ -262,21 +278,25 @@ static int read_eprom_word(pegasus_t *pegasus, __u8 index, __u16 *retdata) for (i = 0; i < REG_TIMEOUT; i++) { ret = get_registers(pegasus, EpromCtrl, 1, &tmp); + if (ret < 0) + goto fail; if (tmp & EPROM_DONE) break; - if (ret == -ESHUTDOWN) - goto fail; } - if (i >= REG_TIMEOUT) + if (i >= REG_TIMEOUT) { + ret = -ETIMEDOUT; goto fail; + } ret = get_registers(pegasus, EpromData, 2, &retdatai); + if (ret < 0) + goto fail; *retdata = le16_to_cpu(retdatai); return ret; fail: - netif_warn(pegasus, drv, pegasus->net, "%s failed\n", __func__); - return -ETIMEDOUT; + netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__); + return ret; } #ifdef PEGASUS_WRITE_EEPROM @@ -324,10 +344,10 @@ static int write_eprom_word(pegasus_t *pegasus, __u8 index, __u16 data) return ret; fail: - netif_warn(pegasus, drv, pegasus->net, "%s failed\n", __func__); + netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__); return -ETIMEDOUT; } -#endif /* PEGASUS_WRITE_EEPROM */ +#endif /* PEGASUS_WRITE_EEPROM */ static inline int get_node_id(pegasus_t *pegasus, u8 *id) { @@ -367,19 +387,21 @@ static void set_ethernet_addr(pegasus_t *pegasus) return; err: eth_hw_addr_random(pegasus->net); - dev_info(&pegasus->intf->dev, "software assigned MAC address.\n"); + netif_dbg(pegasus, drv, pegasus->net, "software assigned MAC address.\n"); return; } static inline int reset_mac(pegasus_t *pegasus) { + int ret, i; __u8 data = 0x8; - int i; set_register(pegasus, EthCtrl1, data); for (i = 0; i < REG_TIMEOUT; i++) { - get_registers(pegasus, EthCtrl1, 1, &data); + ret = get_registers(pegasus, EthCtrl1, 1, &data); + if (ret < 0) + goto fail; if (~data & 0x08) { if (loopback) break; @@ -402,22 +424,29 @@ static inline int reset_mac(pegasus_t *pegasus) } if (usb_dev_id[pegasus->dev_index].vendor == VENDOR_ELCON) { __u16 auxmode; - read_mii_word(pegasus, 3, 0x1b, &auxmode); + ret = read_mii_word(pegasus, 3, 0x1b, &auxmode); + if (ret < 0) + goto fail; auxmode |= 4; write_mii_word(pegasus, 3, 0x1b, &auxmode); } return 0; +fail: + netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__); + return ret; } static int enable_net_traffic(struct net_device *dev, struct usb_device *usb) { - __u16 linkpart; - __u8 data[4]; pegasus_t *pegasus = netdev_priv(dev); int ret; + __u16 linkpart; + __u8 data[4]; - read_mii_word(pegasus, pegasus->phy, MII_LPA, &linkpart); + ret = read_mii_word(pegasus, pegasus->phy, MII_LPA, &linkpart); + if (ret < 0) + goto fail; data[0] = 0xc8; /* TX & RX enable, append status, no CRC */ data[1] = 0; if (linkpart & (ADVERTISE_100FULL | ADVERTISE_10FULL)) @@ -435,11 +464,16 @@ static int enable_net_traffic(struct net_device *dev, struct usb_device *usb) usb_dev_id[pegasus->dev_index].vendor == VENDOR_LINKSYS2 || usb_dev_id[pegasus->dev_index].vendor == VENDOR_DLINK) { u16 auxmode; - read_mii_word(pegasus, 0, 0x1b, &auxmode); + ret = read_mii_word(pegasus, 0, 0x1b, &auxmode); + if (ret < 0) + goto fail; auxmode |= 4; write_mii_word(pegasus, 0, 0x1b, &auxmode); } + return 0; +fail: + netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__); return ret; } @@ -447,9 +481,9 @@ static void read_bulk_callback(struct urb *urb) { pegasus_t *pegasus = urb->context; struct net_device *net; + u8 *buf = urb->transfer_buffer; int rx_status, count = urb->actual_length; int status = urb->status; - u8 *buf = urb->transfer_buffer; __u16 pkt_len; if (!pegasus) @@ -998,8 +1032,7 @@ static int pegasus_ioctl(struct net_device *net, struct ifreq *rq, int cmd) data[0] = pegasus->phy; fallthrough; case SIOCDEVPRIVATE + 1: - read_mii_word(pegasus, data[0], data[1] & 0x1f, &data[3]); - res = 0; + res = read_mii_word(pegasus, data[0], data[1] & 0x1f, &data[3]); break; case SIOCDEVPRIVATE + 2: if (!capable(CAP_NET_ADMIN)) @@ -1033,22 +1066,25 @@ static void pegasus_set_multicast(struct net_device *net) static __u8 mii_phy_probe(pegasus_t *pegasus) { - int i; + int i, ret; __u16 tmp; for (i = 0; i < 32; i++) { - read_mii_word(pegasus, i, MII_BMSR, &tmp); + ret = read_mii_word(pegasus, i, MII_BMSR, &tmp); + if (ret < 0) + goto fail; if (tmp == 0 || tmp == 0xffff || (tmp & BMSR_MEDIA) == 0) continue; else return i; } - +fail: return 0xff; } static inline void setup_pegasus_II(pegasus_t *pegasus) { + int ret; __u8 data = 0xa5; set_register(pegasus, Reg1d, 0); @@ -1060,7 +1096,9 @@ static inline void setup_pegasus_II(pegasus_t *pegasus) set_register(pegasus, Reg7b, 2); set_register(pegasus, 0x83, data); - get_registers(pegasus, 0x83, 1, &data); + ret = get_registers(pegasus, 0x83, 1, &data); + if (ret < 0) + goto fail; if (data == 0xa5) pegasus->chip = 0x8513; @@ -1075,6 +1113,10 @@ static inline void setup_pegasus_II(pegasus_t *pegasus) set_register(pegasus, Reg81, 6); else set_register(pegasus, Reg81, 2); + + return; +fail: + netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__); } static void check_carrier(struct work_struct *work) From bc65bacf239d0bc1d00d92cd535a4031921dd78a Mon Sep 17 00:00:00 2001 From: Petko Manolov Date: Tue, 3 Aug 2021 20:25:24 +0300 Subject: [PATCH 731/794] net: usb: pegasus: Remove the changelog and DRIVER_VERSION. These are now deemed redundant. Signed-off-by: Petko Manolov Acked-by: Greg Kroah-Hartman Signed-off-by: David S. Miller --- drivers/net/usb/pegasus.c | 30 ++---------------------------- 1 file changed, 2 insertions(+), 28 deletions(-) diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index 22353bab76c8..f18b03be1b87 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -1,31 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (c) 1999-2013 Petko Manolov (petkan@nucleusys.com) + * Copyright (c) 1999-2021 Petko Manolov (petkan@nucleusys.com) * - * ChangeLog: - * .... Most of the time spent on reading sources & docs. - * v0.2.x First official release for the Linux kernel. - * v0.3.0 Beutified and structured, some bugs fixed. - * v0.3.x URBifying bulk requests and bugfixing. First relatively - * stable release. Still can touch device's registers only - * from top-halves. - * v0.4.0 Control messages remained unurbified are now URBs. - * Now we can touch the HW at any time. - * v0.4.9 Control urbs again use process context to wait. Argh... - * Some long standing bugs (enable_net_traffic) fixed. - * Also nasty trick about resubmiting control urb from - * interrupt context used. Please let me know how it - * behaves. Pegasus II support added since this version. - * TODO: suppressing HCD warnings spewage on disconnect. - * v0.4.13 Ethernet address is now set at probe(), not at open() - * time as this seems to break dhcpd. - * v0.5.0 branch to 2.5.x kernels - * v0.5.1 ethtool support added - * v0.5.5 rx socket buffers are in a pool and the their allocation - * is out of the interrupt routine. - * ... - * v0.9.3 simplified [get|set]_register(s), async update registers - * logic revisited, receive skb_pool removed. */ #include @@ -45,7 +21,6 @@ /* * Version Information */ -#define DRIVER_VERSION "v0.9.3 (2013/04/25)" #define DRIVER_AUTHOR "Petko Manolov " #define DRIVER_DESC "Pegasus/Pegasus II USB Ethernet driver" @@ -914,7 +889,6 @@ static void pegasus_get_drvinfo(struct net_device *dev, pegasus_t *pegasus = netdev_priv(dev); strlcpy(info->driver, driver_name, sizeof(info->driver)); - strlcpy(info->version, DRIVER_VERSION, sizeof(info->version)); usb_make_path(pegasus->usb, info->bus_info, sizeof(info->bus_info)); } @@ -1338,7 +1312,7 @@ static void __init parse_id(char *id) static int __init pegasus_init(void) { - pr_info("%s: %s, " DRIVER_DESC "\n", driver_name, DRIVER_VERSION); + pr_info("%s: " DRIVER_DESC "\n", driver_name); if (devid) parse_id(devid); return usb_register(&pegasus_driver); From 85cd39af14f498f791d8aab3fbd64cd175787f1a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 4 Aug 2021 05:28:52 -0400 Subject: [PATCH 732/794] KVM: Do not leak memory for duplicate debugfs directories KVM creates a debugfs directory for each VM in order to store statistics about the virtual machine. The directory name is built from the process pid and a VM fd. While generally unique, it is possible to keep a file descriptor alive in a way that causes duplicate directories, which manifests as these messages: [ 471.846235] debugfs: Directory '20245-4' with parent 'kvm' already present! Even though this should not happen in practice, it is more or less expected in the case of KVM for testcases that call KVM_CREATE_VM and close the resulting file descriptor repeatedly and in parallel. When this happens, debugfs_create_dir() returns an error but kvm_create_vm_debugfs() goes on to allocate stat data structs which are later leaked. The slow memory leak was spotted by syzkaller, where it caused OOM reports. Since the issue only affects debugfs, do a lookup before calling debugfs_create_dir, so that the message is downgraded and rate-limited. While at it, ensure kvm->debugfs_dentry is NULL rather than an error if it is not created. This fixes kvm_destroy_vm_debugfs, which was not checking IS_ERR_OR_NULL correctly. Cc: stable@vger.kernel.org Fixes: 536a6f88c49d ("KVM: Create debugfs dir and stat files for each VM") Reported-by: Alexey Kardashevskiy Suggested-by: Greg Kroah-Hartman Acked-by: Greg Kroah-Hartman Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d20fba0fc290..b50dbe269f4b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -892,6 +892,8 @@ static void kvm_destroy_vm_debugfs(struct kvm *kvm) static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) { + static DEFINE_MUTEX(kvm_debugfs_lock); + struct dentry *dent; char dir_name[ITOA_MAX_LEN * 2]; struct kvm_stat_data *stat_data; const struct _kvm_stats_desc *pdesc; @@ -903,8 +905,20 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) return 0; snprintf(dir_name, sizeof(dir_name), "%d-%d", task_pid_nr(current), fd); - kvm->debugfs_dentry = debugfs_create_dir(dir_name, kvm_debugfs_dir); + mutex_lock(&kvm_debugfs_lock); + dent = debugfs_lookup(dir_name, kvm_debugfs_dir); + if (dent) { + pr_warn_ratelimited("KVM: debugfs: duplicate directory %s\n", dir_name); + dput(dent); + mutex_unlock(&kvm_debugfs_lock); + return 0; + } + dent = debugfs_create_dir(dir_name, kvm_debugfs_dir); + mutex_unlock(&kvm_debugfs_lock); + if (IS_ERR(dent)) + return 0; + kvm->debugfs_dentry = dent; kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries, sizeof(*kvm->debugfs_stat_data), GFP_KERNEL_ACCOUNT); @@ -5201,7 +5215,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) } add_uevent_var(env, "PID=%d", kvm->userspace_pid); - if (!IS_ERR_OR_NULL(kvm->debugfs_dentry)) { + if (kvm->debugfs_dentry) { char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT); if (p) { From 179c6c27bf487273652efc99acd3ba512a23c137 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 3 Aug 2021 09:27:46 -0700 Subject: [PATCH 733/794] KVM: SVM: Fix off-by-one indexing when nullifying last used SEV VMCB Use the raw ASID, not ASID-1, when nullifying the last used VMCB when freeing an SEV ASID. The consumer, pre_sev_run(), indexes the array by the raw ASID, thus KVM could get a false negative when checking for a different VMCB if KVM manages to reallocate the same ASID+VMCB combo for a new VM. Note, this cannot cause a functional issue _in the current code_, as pre_sev_run() also checks which pCPU last did VMRUN for the vCPU, and last_vmentry_cpu is initialized to -1 during vCPU creation, i.e. is guaranteed to mismatch on the first VMRUN. However, prior to commit 8a14fe4f0c54 ("kvm: x86: Move last_cpu into kvm_vcpu_arch as last_vmentry_cpu"), SVM tracked pCPU on its own and zero-initialized the last_cpu variable. Thus it's theoretically possible that older versions of KVM could miss a TLB flush if the first VMRUN is on pCPU0 and the ASID and VMCB exactly match those of a prior VM. Fixes: 70cd94e60c73 ("KVM: SVM: VMRUN should use associated ASID when SEV is enabled") Cc: Tom Lendacky Cc: Brijesh Singh Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 6710d9ee2e4b..4d0aba185412 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -189,7 +189,7 @@ static void sev_asid_free(struct kvm_sev_info *sev) for_each_possible_cpu(cpu) { sd = per_cpu(svm_data, cpu); - sd->sev_vmcbs[pos] = NULL; + sd->sev_vmcbs[sev->asid] = NULL; } mutex_unlock(&sev_bitmap_lock); From 396492b4c5f249f616002bb5de787d060d2b2974 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 3 Aug 2021 16:14:15 -0700 Subject: [PATCH 734/794] docs: networking: netdevsim rules There are aspects of netdevsim which are commonly misunderstood and pointed out in review. Cong suggest we document them. Suggested-by: Cong Wang Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/networking/netdev-FAQ.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst index 91b2cf712801..e26532f49760 100644 --- a/Documentation/networking/netdev-FAQ.rst +++ b/Documentation/networking/netdev-FAQ.rst @@ -228,6 +228,23 @@ before posting to the mailing list. The patchwork build bot instance gets overloaded very easily and netdev@vger really doesn't need more traffic if we can help it. +netdevsim is great, can I extend it for my out-of-tree tests? +------------------------------------------------------------- + +No, `netdevsim` is a test vehicle solely for upstream tests. +(Please add your tests under tools/testing/selftests/.) + +We also give no guarantees that `netdevsim` won't change in the future +in a way which would break what would normally be considered uAPI. + +Is netdevsim considered a "user" of an API? +------------------------------------------- + +Linux kernel has a long standing rule that no API should be added unless +it has a real, in-tree user. Mock-ups and tests based on `netdevsim` are +strongly encouraged when adding new APIs, but `netdevsim` in itself +is **not** considered a use case/user. + Any other tips to help ensure my net/net-next patch gets OK'd? -------------------------------------------------------------- Attention to detail. Re-read your own work as if you were the From 6b67d4d63edece1033972214704c04f36c5be89a Mon Sep 17 00:00:00 2001 From: "Ivan T. Ivanov" Date: Wed, 4 Aug 2021 11:13:39 +0300 Subject: [PATCH 735/794] net: usb: lan78xx: don't modify phy_device state concurrently Currently phy_device state could be left in inconsistent state shown by following alert message[1]. This is because phy_read_status could be called concurrently from lan78xx_delayedwork, phy_state_machine and __ethtool_get_link. Fix this by making sure that phy_device state is updated atomically. [1] lan78xx 1-1.1.1:1.0 eth0: No phy led trigger registered for speed(-1) Signed-off-by: Ivan T. Ivanov Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 25489389ea49..6d092d78e0cb 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1154,7 +1154,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) { struct phy_device *phydev = dev->net->phydev; struct ethtool_link_ksettings ecmd; - int ladv, radv, ret; + int ladv, radv, ret, link; u32 buf; /* clear LAN78xx interrupt status */ @@ -1162,9 +1162,12 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) if (unlikely(ret < 0)) return -EIO; + mutex_lock(&phydev->lock); phy_read_status(phydev); + link = phydev->link; + mutex_unlock(&phydev->lock); - if (!phydev->link && dev->link_on) { + if (!link && dev->link_on) { dev->link_on = false; /* reset MAC */ @@ -1177,7 +1180,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) return -EIO; del_timer(&dev->stat_monitor); - } else if (phydev->link && !dev->link_on) { + } else if (link && !dev->link_on) { dev->link_on = true; phy_ethtool_ksettings_get(phydev, &ecmd); @@ -1466,9 +1469,14 @@ static int lan78xx_set_eee(struct net_device *net, struct ethtool_eee *edata) static u32 lan78xx_get_link(struct net_device *net) { - phy_read_status(net->phydev); + u32 link; - return net->phydev->link; + mutex_lock(&net->phydev->lock); + phy_read_status(net->phydev); + link = net->phydev->link; + mutex_unlock(&net->phydev->lock); + + return link; } static void lan78xx_get_drvinfo(struct net_device *net, From f558c2b834ec27e75d37b1c860c139e7b7c3a8e4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 3 Aug 2021 12:45:01 +0200 Subject: [PATCH 736/794] sched/rt: Fix double enqueue caused by rt_effective_prio Double enqueues in rt runqueues (list) have been reported while running a simple test that spawns a number of threads doing a short sleep/run pattern while being concurrently setscheduled between rt and fair class. WARNING: CPU: 3 PID: 2825 at kernel/sched/rt.c:1294 enqueue_task_rt+0x355/0x360 CPU: 3 PID: 2825 Comm: setsched__13 RIP: 0010:enqueue_task_rt+0x355/0x360 Call Trace: __sched_setscheduler+0x581/0x9d0 _sched_setscheduler+0x63/0xa0 do_sched_setscheduler+0xa0/0x150 __x64_sys_sched_setscheduler+0x1a/0x30 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xae list_add double add: new=ffff9867cb629b40, prev=ffff9867cb629b40, next=ffff98679fc67ca0. kernel BUG at lib/list_debug.c:31! invalid opcode: 0000 [#1] PREEMPT_RT SMP PTI CPU: 3 PID: 2825 Comm: setsched__13 RIP: 0010:__list_add_valid+0x41/0x50 Call Trace: enqueue_task_rt+0x291/0x360 __sched_setscheduler+0x581/0x9d0 _sched_setscheduler+0x63/0xa0 do_sched_setscheduler+0xa0/0x150 __x64_sys_sched_setscheduler+0x1a/0x30 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xae __sched_setscheduler() uses rt_effective_prio() to handle proper queuing of priority boosted tasks that are setscheduled while being boosted. rt_effective_prio() is however called twice per each __sched_setscheduler() call: first directly by __sched_setscheduler() before dequeuing the task and then by __setscheduler() to actually do the priority change. If the priority of the pi_top_task is concurrently being changed however, it might happen that the two calls return different results. If, for example, the first call returned the same rt priority the task was running at and the second one a fair priority, the task won't be removed by the rt list (on_list still set) and then enqueued in the fair runqueue. When eventually setscheduled back to rt it will be seen as enqueued already and the WARNING/BUG be issued. Fix this by calling rt_effective_prio() only once and then reusing the return value. While at it refactor code as well for clarity. Concurrent priority inheritance handling is still safe and will eventually converge to a new state by following the inheritance chain(s). Fixes: 0782e63bc6fe ("sched: Handle priority boosted tasks proper in setscheduler()") [squashed Peterz changes; added changelog] Reported-by: Mark Simmons Signed-off-by: Juri Lelli Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210803104501.38333-1-juri.lelli@redhat.com --- kernel/sched/core.c | 90 ++++++++++++++++++--------------------------- 1 file changed, 35 insertions(+), 55 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2d9ff40f4661..20ffcc044134 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1981,12 +1981,18 @@ void deactivate_task(struct rq *rq, struct task_struct *p, int flags) dequeue_task(rq, p, flags); } -/* - * __normal_prio - return the priority that is based on the static prio - */ -static inline int __normal_prio(struct task_struct *p) +static inline int __normal_prio(int policy, int rt_prio, int nice) { - return p->static_prio; + int prio; + + if (dl_policy(policy)) + prio = MAX_DL_PRIO - 1; + else if (rt_policy(policy)) + prio = MAX_RT_PRIO - 1 - rt_prio; + else + prio = NICE_TO_PRIO(nice); + + return prio; } /* @@ -1998,15 +2004,7 @@ static inline int __normal_prio(struct task_struct *p) */ static inline int normal_prio(struct task_struct *p) { - int prio; - - if (task_has_dl_policy(p)) - prio = MAX_DL_PRIO-1; - else if (task_has_rt_policy(p)) - prio = MAX_RT_PRIO-1 - p->rt_priority; - else - prio = __normal_prio(p); - return prio; + return __normal_prio(p->policy, p->rt_priority, PRIO_TO_NICE(p->static_prio)); } /* @@ -4099,7 +4097,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) } else if (PRIO_TO_NICE(p->static_prio) < 0) p->static_prio = NICE_TO_PRIO(0); - p->prio = p->normal_prio = __normal_prio(p); + p->prio = p->normal_prio = p->static_prio; set_load_weight(p, false); /* @@ -6341,6 +6339,18 @@ int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flag } EXPORT_SYMBOL(default_wake_function); +static void __setscheduler_prio(struct task_struct *p, int prio) +{ + if (dl_prio(prio)) + p->sched_class = &dl_sched_class; + else if (rt_prio(prio)) + p->sched_class = &rt_sched_class; + else + p->sched_class = &fair_sched_class; + + p->prio = prio; +} + #ifdef CONFIG_RT_MUTEXES static inline int __rt_effective_prio(struct task_struct *pi_task, int prio) @@ -6456,22 +6466,19 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) } else { p->dl.pi_se = &p->dl; } - p->sched_class = &dl_sched_class; } else if (rt_prio(prio)) { if (dl_prio(oldprio)) p->dl.pi_se = &p->dl; if (oldprio < prio) queue_flag |= ENQUEUE_HEAD; - p->sched_class = &rt_sched_class; } else { if (dl_prio(oldprio)) p->dl.pi_se = &p->dl; if (rt_prio(oldprio)) p->rt.timeout = 0; - p->sched_class = &fair_sched_class; } - p->prio = prio; + __setscheduler_prio(p, prio); if (queued) enqueue_task(rq, p, queue_flag); @@ -6824,35 +6831,6 @@ static void __setscheduler_params(struct task_struct *p, set_load_weight(p, true); } -/* Actually do priority change: must hold pi & rq lock. */ -static void __setscheduler(struct rq *rq, struct task_struct *p, - const struct sched_attr *attr, bool keep_boost) -{ - /* - * If params can't change scheduling class changes aren't allowed - * either. - */ - if (attr->sched_flags & SCHED_FLAG_KEEP_PARAMS) - return; - - __setscheduler_params(p, attr); - - /* - * Keep a potential priority boosting if called from - * sched_setscheduler(). - */ - p->prio = normal_prio(p); - if (keep_boost) - p->prio = rt_effective_prio(p, p->prio); - - if (dl_prio(p->prio)) - p->sched_class = &dl_sched_class; - else if (rt_prio(p->prio)) - p->sched_class = &rt_sched_class; - else - p->sched_class = &fair_sched_class; -} - /* * Check the target process has a UID that matches the current process's: */ @@ -6873,10 +6851,8 @@ static int __sched_setscheduler(struct task_struct *p, const struct sched_attr *attr, bool user, bool pi) { - int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 : - MAX_RT_PRIO - 1 - attr->sched_priority; - int retval, oldprio, oldpolicy = -1, queued, running; - int new_effective_prio, policy = attr->sched_policy; + int oldpolicy = -1, policy = attr->sched_policy; + int retval, oldprio, newprio, queued, running; const struct sched_class *prev_class; struct callback_head *head; struct rq_flags rf; @@ -7074,6 +7050,7 @@ change: p->sched_reset_on_fork = reset_on_fork; oldprio = p->prio; + newprio = __normal_prio(policy, attr->sched_priority, attr->sched_nice); if (pi) { /* * Take priority boosted tasks into account. If the new @@ -7082,8 +7059,8 @@ change: * the runqueue. This will be done when the task deboost * itself. */ - new_effective_prio = rt_effective_prio(p, newprio); - if (new_effective_prio == oldprio) + newprio = rt_effective_prio(p, newprio); + if (newprio == oldprio) queue_flags &= ~DEQUEUE_MOVE; } @@ -7096,7 +7073,10 @@ change: prev_class = p->sched_class; - __setscheduler(rq, p, attr, pi); + if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) { + __setscheduler_params(p, attr); + __setscheduler_prio(p, newprio); + } __setscheduler_uclamp(p, attr); if (queued) { From f4b4b45652578357031fbbef7f7a1b04f6fa2dc3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 29 Jul 2021 11:14:57 +0200 Subject: [PATCH 737/794] perf/x86: Fix out of bound MSR access On Wed, Jul 28, 2021 at 12:49:43PM -0400, Vince Weaver wrote: > [32694.087403] unchecked MSR access error: WRMSR to 0x318 (tried to write 0x0000000000000000) at rIP: 0xffffffff8106f854 (native_write_msr+0x4/0x20) > [32694.101374] Call Trace: > [32694.103974] perf_clear_dirty_counters+0x86/0x100 The problem being that it doesn't filter out all fake counters, in specific the above (erroneously) tries to use FIXED_BTS. Limit the fixed counters indexes to the hardware supplied number. Reported-by: Vince Weaver Signed-off-by: Peter Zijlstra (Intel) Tested-by: Vince Weaver Tested-by: Like Xu Link: https://lkml.kernel.org/r/YQJxka3dxgdIdebG@hirez.programming.kicks-ass.net --- arch/x86/events/core.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 1eb45139fcc6..3092fbf9dbe4 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2489,13 +2489,15 @@ void perf_clear_dirty_counters(void) return; for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) { - /* Metrics and fake events don't have corresponding HW counters. */ - if (is_metric_idx(i) || (i == INTEL_PMC_IDX_FIXED_VLBR)) - continue; - else if (i >= INTEL_PMC_IDX_FIXED) + if (i >= INTEL_PMC_IDX_FIXED) { + /* Metrics and fake events don't have corresponding HW counters. */ + if ((i - INTEL_PMC_IDX_FIXED) >= hybrid(cpuc->pmu, num_counters_fixed)) + continue; + wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0); - else + } else { wrmsrl(x86_pmu_event_addr(i), 0); + } } bitmap_zero(cpuc->dirty, X86_PMC_IDX_MAX); From df51fe7ea1c1c2c3bfdb81279712fdd2e4ea6c27 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Mon, 2 Aug 2021 15:08:50 +0800 Subject: [PATCH 738/794] perf/x86/amd: Don't touch the AMD64_EVENTSEL_HOSTONLY bit inside the guest If we use "perf record" in an AMD Milan guest, dmesg reports a #GP warning from an unchecked MSR access error on MSR_F15H_PERF_CTLx: [] unchecked MSR access error: WRMSR to 0xc0010200 (tried to write 0x0000020000110076) at rIP: 0xffffffff8106ddb4 (native_write_msr+0x4/0x20) [] Call Trace: [] amd_pmu_disable_event+0x22/0x90 [] x86_pmu_stop+0x4c/0xa0 [] x86_pmu_del+0x3a/0x140 The AMD64_EVENTSEL_HOSTONLY bit is defined and used on the host, while the guest perf driver should avoid such use. Fixes: 1018faa6cf23 ("perf/x86/kvm: Fix Host-Only/Guest-Only counting with SVM disabled") Signed-off-by: Like Xu Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Liam Merwick Tested-by: Kim Phillips Tested-by: Liam Merwick Link: https://lkml.kernel.org/r/20210802070850.35295-1-likexu@tencent.com --- arch/x86/events/perf_event.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 2bf1c7ea2758..2938c902ffbe 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -1115,9 +1115,10 @@ void x86_pmu_stop(struct perf_event *event, int flags); static inline void x86_pmu_disable_event(struct perf_event *event) { + u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask); struct hw_perf_event *hwc = &event->hw; - wrmsrl(hwc->config_base, hwc->config); + wrmsrl(hwc->config_base, hwc->config & ~disable_mask); if (is_counter_pair(hwc)) wrmsrl(x86_pmu_config_addr(hwc->idx + 1), 0); From bb2baeb214a71cda47d50dce80414016117ddda0 Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Mon, 2 Aug 2021 11:09:03 -0700 Subject: [PATCH 739/794] KVM: SVM: improve the code readability for ASID management KVM SEV code uses bitmaps to manage ASID states. ASID 0 was always skipped because it is never used by VM. Thus, in existing code, ASID value and its bitmap postion always has an 'offset-by-1' relationship. Both SEV and SEV-ES shares the ASID space, thus KVM uses a dynamic range [min_asid, max_asid] to handle SEV and SEV-ES ASIDs separately. Existing code mixes the usage of ASID value and its bitmap position by using the same variable called 'min_asid'. Fix the min_asid usage: ensure that its usage is consistent with its name; allocate extra size for ASID 0 to ensure that each ASID has the same value with its bitmap position. Add comments on ASID bitmap allocation to clarify the size change. Signed-off-by: Mingwei Zhang Cc: Tom Lendacky Cc: Marc Orr Cc: David Rientjes Cc: Alper Gun Cc: Dionna Glaze Cc: Sean Christopherson Cc: Vipin Sharma Cc: Peter Gonda Cc: Joerg Roedel Message-Id: <20210802180903.159381-1-mizhang@google.com> [Fix up sev_asid_free to also index by ASID, as suggested by Sean Christopherson, and use nr_asids in sev_cpu_init. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 43 +++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 4d0aba185412..7fbce342eec4 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -64,6 +64,7 @@ static DEFINE_MUTEX(sev_bitmap_lock); unsigned int max_sev_asid; static unsigned int min_sev_asid; static unsigned long sev_me_mask; +static unsigned int nr_asids; static unsigned long *sev_asid_bitmap; static unsigned long *sev_reclaim_asid_bitmap; @@ -78,11 +79,11 @@ struct enc_region { /* Called with the sev_bitmap_lock held, or on shutdown */ static int sev_flush_asids(int min_asid, int max_asid) { - int ret, pos, error = 0; + int ret, asid, error = 0; /* Check if there are any ASIDs to reclaim before performing a flush */ - pos = find_next_bit(sev_reclaim_asid_bitmap, max_asid, min_asid); - if (pos >= max_asid) + asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid); + if (asid > max_asid) return -EBUSY; /* @@ -115,15 +116,15 @@ static bool __sev_recycle_asids(int min_asid, int max_asid) /* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */ bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap, - max_sev_asid); - bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid); + nr_asids); + bitmap_zero(sev_reclaim_asid_bitmap, nr_asids); return true; } static int sev_asid_new(struct kvm_sev_info *sev) { - int pos, min_asid, max_asid, ret; + int asid, min_asid, max_asid, ret; bool retry = true; enum misc_res_type type; @@ -143,11 +144,11 @@ static int sev_asid_new(struct kvm_sev_info *sev) * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. */ - min_asid = sev->es_active ? 0 : min_sev_asid - 1; + min_asid = sev->es_active ? 1 : min_sev_asid; max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; again: - pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_asid); - if (pos >= max_asid) { + asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid); + if (asid > max_asid) { if (retry && __sev_recycle_asids(min_asid, max_asid)) { retry = false; goto again; @@ -157,11 +158,11 @@ again: goto e_uncharge; } - __set_bit(pos, sev_asid_bitmap); + __set_bit(asid, sev_asid_bitmap); mutex_unlock(&sev_bitmap_lock); - return pos + 1; + return asid; e_uncharge: misc_cg_uncharge(type, sev->misc_cg, 1); put_misc_cg(sev->misc_cg); @@ -179,13 +180,12 @@ static int sev_get_asid(struct kvm *kvm) static void sev_asid_free(struct kvm_sev_info *sev) { struct svm_cpu_data *sd; - int cpu, pos; + int cpu; enum misc_res_type type; mutex_lock(&sev_bitmap_lock); - pos = sev->asid - 1; - __set_bit(pos, sev_reclaim_asid_bitmap); + __set_bit(sev->asid, sev_reclaim_asid_bitmap); for_each_possible_cpu(cpu) { sd = per_cpu(svm_data, cpu); @@ -1857,12 +1857,17 @@ void __init sev_hardware_setup(void) min_sev_asid = edx; sev_me_mask = 1UL << (ebx & 0x3f); - /* Initialize SEV ASID bitmaps */ - sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL); + /* + * Initialize SEV ASID bitmaps. Allocate space for ASID 0 in the bitmap, + * even though it's never used, so that the bitmap is indexed by the + * actual ASID. + */ + nr_asids = max_sev_asid + 1; + sev_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL); if (!sev_asid_bitmap) goto out; - sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL); + sev_reclaim_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL); if (!sev_reclaim_asid_bitmap) { bitmap_free(sev_asid_bitmap); sev_asid_bitmap = NULL; @@ -1907,7 +1912,7 @@ void sev_hardware_teardown(void) return; /* No need to take sev_bitmap_lock, all VMs have been destroyed. */ - sev_flush_asids(0, max_sev_asid); + sev_flush_asids(1, max_sev_asid); bitmap_free(sev_asid_bitmap); bitmap_free(sev_reclaim_asid_bitmap); @@ -1921,7 +1926,7 @@ int sev_cpu_init(struct svm_cpu_data *sd) if (!sev_enabled) return 0; - sd->sev_vmcbs = kcalloc(max_sev_asid + 1, sizeof(void *), GFP_KERNEL); + sd->sev_vmcbs = kcalloc(nr_asids, sizeof(void *), GFP_KERNEL); if (!sd->sev_vmcbs) return -ENOMEM; From 13c2c3cfe01952575b1dd5e24d450fcccff93bc0 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Wed, 4 Aug 2021 14:20:57 +0300 Subject: [PATCH 740/794] KVM: selftests: fix hyperv_clock test The test was mistakenly using addr_gpa2hva on a gva and that happened to work accidentally. Commit 106a2e766eae ("KVM: selftests: Lower the min virtual address for misc page allocations") revealed this bug. Fixes: 2c7f76b4c42b ("selftests: kvm: Add basic Hyper-V clocksources tests", 2021-03-18) Signed-off-by: Maxim Levitsky Message-Id: <20210804112057.409498-1-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86_64/hyperv_clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c index bab10ae787b6..e0b2bb1339b1 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c @@ -215,7 +215,7 @@ int main(void) vcpu_set_hv_cpuid(vm, VCPU_ID); tsc_page_gva = vm_vaddr_alloc_page(vm); - memset(addr_gpa2hva(vm, tsc_page_gva), 0x0, getpagesize()); + memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize()); TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0, "TSC page has to be page aligned\n"); vcpu_args_set(vm, VCPU_ID, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva)); From 952835edb4fdad49361d5330da918be8b765b787 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Wed, 4 Aug 2021 17:18:00 +0200 Subject: [PATCH 741/794] s390/dasd: fix use after free in dasd path handling When new configuration data is obtained after a path event it is stored in the per path array. The old data needs to be freed. The first valid configuration data is also referenced in the device private structure to identify the device. When the old per path configuration data was freed the device still pointed to the already freed data leading to a use after free. Fix by replacing also the device configuration data with the newly obtained one before the old data gets freed. Fixes: 460181217a24 ("s390/dasd: Store path configuration data during path handling") Cc: stable@vger.kernel.org # 5.11+ Signed-off-by: Stefan Haberland Reviewed-by: Jan Hoeppner Link: https://lore.kernel.org/r/20210804151800.4031761-2-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_eckd.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 0de1a463c509..fb5d8152652d 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1004,15 +1004,23 @@ static unsigned char dasd_eckd_path_access(void *conf_data, int conf_len) static void dasd_eckd_store_conf_data(struct dasd_device *device, struct dasd_conf_data *conf_data, int chp) { + struct dasd_eckd_private *private = device->private; struct channel_path_desc_fmt0 *chp_desc; struct subchannel_id sch_id; + void *cdp; - ccw_device_get_schid(device->cdev, &sch_id); /* * path handling and read_conf allocate data * free it before replacing the pointer + * also replace the old private->conf_data pointer + * with the new one if this points to the same data */ - kfree(device->path[chp].conf_data); + cdp = device->path[chp].conf_data; + if (private->conf_data == cdp) { + private->conf_data = (void *)conf_data; + dasd_eckd_identify_conf_parts(private); + } + ccw_device_get_schid(device->cdev, &sch_id); device->path[chp].conf_data = conf_data; device->path[chp].cssid = sch_id.cssid; device->path[chp].ssid = sch_id.ssid; @@ -1020,6 +1028,7 @@ static void dasd_eckd_store_conf_data(struct dasd_device *device, if (chp_desc) device->path[chp].chpid = chp_desc->chpid; kfree(chp_desc); + kfree(cdp); } static void dasd_eckd_clear_conf_data(struct dasd_device *device) From 402e0b8cd00284a25c6eb8c0a43319bc8430b1c7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 4 Aug 2021 11:49:58 +0200 Subject: [PATCH 742/794] n64cart: fix the dma address in n64cart_do_bvec dma_map_bvec already takes bv_offset into account. Fixes: 9b2a2bbbb4d0 ("block: Add n64 cart driver") Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/n64cart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c index 7b4dd10af9ec..c84be0028f63 100644 --- a/drivers/block/n64cart.c +++ b/drivers/block/n64cart.c @@ -74,7 +74,7 @@ static bool n64cart_do_bvec(struct device *dev, struct bio_vec *bv, u32 pos) n64cart_wait_dma(); - n64cart_write_reg(PI_DRAM_REG, dma_addr + bv->bv_offset); + n64cart_write_reg(PI_DRAM_REG, dma_addr); n64cart_write_reg(PI_CART_REG, (bstart | CART_DOMAIN) & CART_MAX); n64cart_write_reg(PI_WRITE_REG, bv->bv_len - 1); From 83d6c39310b6d11199179f6384c2b0a415389597 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 3 Aug 2021 09:14:35 -0600 Subject: [PATCH 743/794] io-wq: fix race between worker exiting and activating free worker Nadav correctly reports that we have a race between a worker exiting, and new work being queued. This can lead to work being queued behind an existing worker that could be sleeping on an event before it can run to completion, and hence introducing potential big latency gaps if we hit this race condition: cpu0 cpu1 ---- ---- io_wqe_worker() schedule_timeout() // timed out io_wqe_enqueue() io_wqe_wake_worker() // work_flags & IO_WQ_WORK_CONCURRENT io_wqe_activate_free_worker() io_worker_exit() Fix this by having the exiting worker go through the normal decrement of a running worker, which will spawn a new one if needed. The free worker activation is modified to only return success if we were able to find a sleeping worker - if not, we keep looking through the list. If we fail, we create a new worker as per usual. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/io-uring/BFF746C0-FEDE-4646-A253-3021C57C26C9@gmail.com/ Reported-by: Nadav Amit Tested-by: Nadav Amit Signed-off-by: Jens Axboe --- fs/io-wq.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index cf086b01c6c6..50dc93ffc153 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -130,6 +130,7 @@ struct io_cb_cancel_data { }; static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index); +static void io_wqe_dec_running(struct io_worker *worker); static bool io_worker_get(struct io_worker *worker) { @@ -168,26 +169,21 @@ static void io_worker_exit(struct io_worker *worker) { struct io_wqe *wqe = worker->wqe; struct io_wqe_acct *acct = io_wqe_get_acct(worker); - unsigned flags; if (refcount_dec_and_test(&worker->ref)) complete(&worker->ref_done); wait_for_completion(&worker->ref_done); - preempt_disable(); - current->flags &= ~PF_IO_WORKER; - flags = worker->flags; - worker->flags = 0; - if (flags & IO_WORKER_F_RUNNING) - atomic_dec(&acct->nr_running); - worker->flags = 0; - preempt_enable(); - raw_spin_lock_irq(&wqe->lock); - if (flags & IO_WORKER_F_FREE) + if (worker->flags & IO_WORKER_F_FREE) hlist_nulls_del_rcu(&worker->nulls_node); list_del_rcu(&worker->all_list); acct->nr_workers--; + preempt_disable(); + io_wqe_dec_running(worker); + worker->flags = 0; + current->flags &= ~PF_IO_WORKER; + preempt_enable(); raw_spin_unlock_irq(&wqe->lock); kfree_rcu(worker, rcu); @@ -214,15 +210,19 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe) struct hlist_nulls_node *n; struct io_worker *worker; - n = rcu_dereference(hlist_nulls_first_rcu(&wqe->free_list)); - if (is_a_nulls(n)) - return false; - - worker = hlist_nulls_entry(n, struct io_worker, nulls_node); - if (io_worker_get(worker)) { - wake_up_process(worker->task); + /* + * Iterate free_list and see if we can find an idle worker to + * activate. If a given worker is on the free_list but in the process + * of exiting, keep trying. + */ + hlist_nulls_for_each_entry_rcu(worker, n, &wqe->free_list, nulls_node) { + if (!io_worker_get(worker)) + continue; + if (wake_up_process(worker->task)) { + io_worker_release(worker); + return true; + } io_worker_release(worker); - return true; } return false; From a07296453bf2778952a09b6244a695bf7607babb Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 4 Aug 2021 13:41:47 -0700 Subject: [PATCH 744/794] drm/i915: fix i915_globals_exit() section mismatch error Fix modpost Section mismatch error in i915_globals_exit(). Since both an __init function and an __exit function can call i915_globals_exit(), any function that i915_globals_exit() calls should not be marked as __init or __exit. I.e., it needs to be available for either of them. WARNING: modpost: vmlinux.o(.text+0x8b796a): Section mismatch in reference from the function i915_globals_exit() to the function .exit.text:__i915_globals_flush() The function i915_globals_exit() references a function in an exit section. Often the function __i915_globals_flush() has valid usage outside the exit section and the fix is to remove the __exit annotation of __i915_globals_flush. ERROR: modpost: Section mismatches detected. Set CONFIG_SECTION_MISMATCH_WARN_ONLY=y to allow them. Fixes: 1354d830cb8f ("drm/i915: Call i915_globals_exit() if pci_register_device() fails") Signed-off-by: Randy Dunlap Cc: Jason Ekstrand Cc: Daniel Vetter Cc: Rodrigo Vivi Cc: Jani Nikula Cc: Joonas Lahtinen Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20210804204147.2070-1-rdunlap@infradead.org --- drivers/gpu/drm/i915/i915_globals.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c index 2db90e770616..3acb0b6be284 100644 --- a/drivers/gpu/drm/i915/i915_globals.c +++ b/drivers/gpu/drm/i915/i915_globals.c @@ -138,7 +138,7 @@ void i915_globals_unpark(void) atomic_inc(&active); } -static void __exit __i915_globals_flush(void) +static void __i915_globals_flush(void) { atomic_inc(&active); /* skip shrinking */ From 2c05caa7ba8803209769b9e4fe02c38d77ae88d0 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 30 Jul 2021 17:19:51 -0400 Subject: [PATCH 745/794] tracing / histogram: Give calculation hist_fields a size When working on my user space applications, I found a bug in the synthetic event code where the automated synthetic event field was not matching the event field calculation it was attached to. Looking deeper into it, it was because the calculation hist_field was not given a size. The synthetic event fields are matched to their hist_fields either by having the field have an identical string type, or if that does not match, then the size and signed values are used to match the fields. The problem arose when I tried to match a calculation where the fields were "unsigned int". My tool created a synthetic event of type "u32". But it failed to match. The string was: diff=field1-field2:onmatch(event).trace(synth,$diff) Adding debugging into the kernel, I found that the size of "diff" was 0. And since it was given "unsigned int" as a type, the histogram fallback code used size and signed. The signed matched, but the size of u32 (4) did not match zero, and the event failed to be created. This can be worse if the field you want to match is not one of the acceptable fields for a synthetic event. As event fields can have any type that is supported in Linux, this can cause an issue. For example, if a type is an enum. Then there's no way to use that with any calculations. Have the calculation field simply take on the size of what it is calculating. Link: https://lkml.kernel.org/r/20210730171951.59c7743f@oasis.local.home Cc: Tom Zanussi Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Ingo Molnar Cc: Andrew Morton Cc: stable@vger.kernel.org Fixes: 100719dcef447 ("tracing: Add simple expression support to hist triggers") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 34325f41ebc0..362db9b81b8d 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -2287,6 +2287,10 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, expr->operands[0] = operand1; expr->operands[1] = operand2; + + /* The operand sizes should be the same, so just pick one */ + expr->size = operand1->size; + expr->operator = field_op; expr->name = expr_str(expr, 0); expr->type = kstrdup(operand1->type, GFP_KERNEL); From a9d10ca4986571bffc19778742d508cc8dd13e02 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 28 Jul 2021 07:55:43 +0900 Subject: [PATCH 746/794] tracing: Reject string operand in the histogram expression Since the string type can not be the target of the addition / subtraction operation, it must be rejected. Without this fix, the string type silently converted to digits. Link: https://lkml.kernel.org/r/162742654278.290973.1523000673366456634.stgit@devnote2 Cc: stable@vger.kernel.org Fixes: 100719dcef447 ("tracing: Add simple expression support to hist triggers") Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 362db9b81b8d..949ef09dc537 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -65,7 +65,8 @@ C(INVALID_SORT_MODIFIER,"Invalid sort modifier"), \ C(EMPTY_SORT_FIELD, "Empty sort field"), \ C(TOO_MANY_SORT_FIELDS, "Too many sort fields (Max = 2)"), \ - C(INVALID_SORT_FIELD, "Sort field must be a key or a val"), + C(INVALID_SORT_FIELD, "Sort field must be a key or a val"), \ + C(INVALID_STR_OPERAND, "String type can not be an operand in expression"), #undef C #define C(a, b) HIST_ERR_##a @@ -2156,6 +2157,13 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, ret = PTR_ERR(operand1); goto free; } + if (operand1->flags & HIST_FIELD_FL_STRING) { + /* String type can not be the operand of unary operator. */ + hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(str)); + destroy_hist_field(operand1, 0); + ret = -EINVAL; + goto free; + } expr->flags |= operand1->flags & (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS); @@ -2257,6 +2265,11 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, operand1 = NULL; goto free; } + if (operand1->flags & HIST_FIELD_FL_STRING) { + hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(operand1_str)); + ret = -EINVAL; + goto free; + } /* rest of string could be another expression e.g. b+c in a+b+c */ operand_flags = 0; @@ -2266,6 +2279,11 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, operand2 = NULL; goto free; } + if (operand2->flags & HIST_FIELD_FL_STRING) { + hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(str)); + ret = -EINVAL; + goto free; + } ret = check_expr_operands(file->tr, operand1, operand2); if (ret) From b18b851ba85a5855cb53865fcff3cd2c17b44b0b Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 2 Aug 2021 14:03:07 -0700 Subject: [PATCH 747/794] scripts/recordmcount.pl: Remove check_objcopy() and $can_use_local When building ARCH=riscv allmodconfig with llvm-objcopy, the objcopy version warning from this script appears: WARNING: could not find objcopy version or version is less than 2.17. Local function references are disabled. The check_objcopy() function in scripts/recordmcount.pl is set up to parse GNU objcopy's version string, not llvm-objcopy's, which triggers the warning. Commit 799c43415442 ("kbuild: thin archives make default for all archs") made binutils 2.20 mandatory and commit ba64beb17493 ("kbuild: check the minimum assembler version in Kconfig") enforces this at configuration time so just remove check_objcopy() and $can_use_local instead, assuming --globalize-symbol is always available. llvm-objcopy has supported --globalize-symbol since LLVM 7.0.0 in 2018 and the minimum version for building the kernel with LLVM is 10.0.1 so there is no issue introduced: Link: https://github.com/llvm/llvm-project/commit/ee5be798dae30d5f9414b01f76ff807edbc881aa Link: https://lkml.kernel.org/r/20210802210307.3202472-1-nathan@kernel.org Reviewed-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Steven Rostedt (VMware) --- Makefile | 1 - scripts/recordmcount.pl | 40 ---------------------------------------- 2 files changed, 41 deletions(-) diff --git a/Makefile b/Makefile index e4f5895badb5..d6915f361aa4 100644 --- a/Makefile +++ b/Makefile @@ -546,7 +546,6 @@ export RCS_TAR_IGNORE := --exclude SCCS --exclude BitKeeper --exclude .svn \ PHONY += scripts_basic scripts_basic: $(Q)$(MAKE) $(build)=scripts/basic - $(Q)rm -f .tmp_quiet_recordmcount PHONY += outputmakefile ifdef building_out_of_srctree diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index c17e48020ec3..8f6b13ae46bf 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -173,39 +173,6 @@ my $mcount_regex; # Find the call site to mcount (return offset) my $mcount_adjust; # Address adjustment to mcount offset my $alignment; # The .align value to use for $mcount_section my $section_type; # Section header plus possible alignment command -my $can_use_local = 0; # If we can use local function references - -# Shut up recordmcount if user has older objcopy -my $quiet_recordmcount = ".tmp_quiet_recordmcount"; -my $print_warning = 1; -$print_warning = 0 if ( -f $quiet_recordmcount); - -## -# check_objcopy - whether objcopy supports --globalize-symbols -# -# --globalize-symbols came out in 2.17, we must test the version -# of objcopy, and if it is less than 2.17, then we can not -# record local functions. -sub check_objcopy -{ - open (IN, "$objcopy --version |") or die "error running $objcopy"; - while () { - if (/objcopy.*\s(\d+)\.(\d+)/) { - $can_use_local = 1 if ($1 > 2 || ($1 == 2 && $2 >= 17)); - last; - } - } - close (IN); - - if (!$can_use_local && $print_warning) { - print STDERR "WARNING: could not find objcopy version or version " . - "is less than 2.17.\n" . - "\tLocal function references are disabled.\n"; - open (QUIET, ">$quiet_recordmcount"); - printf QUIET "Disables the warning from recordmcount.pl\n"; - close QUIET; - } -} if ($arch =~ /(x86(_64)?)|(i386)/) { if ($bits == 64) { @@ -434,8 +401,6 @@ if ($filename =~ m,^(.*)(\.\S),) { my $mcount_s = $dirname . "/.tmp_mc_" . $prefix . ".s"; my $mcount_o = $dirname . "/.tmp_mc_" . $prefix . ".o"; -check_objcopy(); - # # Step 1: find all the local (static functions) and weak symbols. # 't' is local, 'w/W' is weak @@ -473,11 +438,6 @@ sub update_funcs # is this function static? If so, note this fact. if (defined $locals{$ref_func}) { - - # only use locals if objcopy supports globalize-symbols - if (!$can_use_local) { - return; - } $convert{$ref_func} = 1; } From 1c0cec64a7cc545eb49f374a43e9f7190a14defa Mon Sep 17 00:00:00 2001 From: Hui Su Date: Fri, 11 Jun 2021 10:21:07 +0800 Subject: [PATCH 748/794] scripts/tracing: fix the bug that can't parse raw_trace_func Since commit 77271ce4b2c0 ("tracing: Add irq, preempt-count and need resched info to default trace output"), the default trace output format has been changed to: -0 [009] d.h. 22420.068695: _raw_spin_lock_irqsave <-hrtimer_interrupt -0 [000] ..s. 22420.068695: _nohz_idle_balance <-run_rebalance_domains -0 [011] d.h. 22420.068695: account_process_tick <-update_process_times origin trace output format:(before v3.2.0) # tracer: nop # # TASK-PID CPU# TIMESTAMP FUNCTION # | | | | | migration/0-6 [000] 50.025810: rcu_note_context_switch <-__schedule migration/0-6 [000] 50.025812: trace_rcu_utilization <-rcu_note_context_switch migration/0-6 [000] 50.025813: rcu_sched_qs <-rcu_note_context_switch migration/0-6 [000] 50.025815: rcu_preempt_qs <-rcu_note_context_switch migration/0-6 [000] 50.025817: trace_rcu_utilization <-rcu_note_context_switch migration/0-6 [000] 50.025818: debug_lockdep_rcu_enabled <-__schedule migration/0-6 [000] 50.025820: debug_lockdep_rcu_enabled <-__schedule The draw_functrace.py(introduced in v2.6.28) can't parse the new version format trace_func, So we need modify draw_functrace.py to adapt the new version trace output format. Link: https://lkml.kernel.org/r/20210611022107.608787-1-suhui@zeku.com Cc: stable@vger.kernel.org Fixes: 77271ce4b2c0 tracing: Add irq, preempt-count and need resched info to default trace output Signed-off-by: Hui Su Signed-off-by: Steven Rostedt (VMware) --- scripts/tracing/draw_functrace.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/tracing/draw_functrace.py b/scripts/tracing/draw_functrace.py index 74f8aadfd4cb..7011fbe003ff 100755 --- a/scripts/tracing/draw_functrace.py +++ b/scripts/tracing/draw_functrace.py @@ -17,7 +17,7 @@ Usage: $ cat /sys/kernel/debug/tracing/trace_pipe > ~/raw_trace_func Wait some times but not too much, the script is a bit slow. Break the pipe (Ctrl + Z) - $ scripts/draw_functrace.py < raw_trace_func > draw_functrace + $ scripts/tracing/draw_functrace.py < ~/raw_trace_func > draw_functrace Then you have your drawn trace in draw_functrace """ @@ -103,10 +103,10 @@ def parseLine(line): line = line.strip() if line.startswith("#"): raise CommentLineException - m = re.match("[^]]+?\\] +([0-9.]+): (\\w+) <-(\\w+)", line) + m = re.match("[^]]+?\\] +([a-z.]+) +([0-9.]+): (\\w+) <-(\\w+)", line) if m is None: raise BrokenLineException - return (m.group(1), m.group(2), m.group(3)) + return (m.group(2), m.group(3), m.group(4)) def main(): From d5aaad6f83420efb8357ac8e11c868708b22d0a9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 4 Aug 2021 14:46:09 -0700 Subject: [PATCH 749/794] KVM: x86/mmu: Fix per-cpu counter corruption on 32-bit builds Take a signed 'long' instead of an 'unsigned long' for the number of pages to add/subtract to the total number of pages used by the MMU. This fixes a zero-extension bug on 32-bit kernels that effectively corrupts the per-cpu counter used by the shrinker. Per-cpu counters take a signed 64-bit value on both 32-bit and 64-bit kernels, whereas kvm_mod_used_mmu_pages() takes an unsigned long and thus an unsigned 32-bit value on 32-bit kernels. As a result, the value used to adjust the per-cpu counter is zero-extended (unsigned -> signed), not sign-extended (signed -> signed), and so KVM's intended -1 gets morphed to 4294967295 and effectively corrupts the counter. This was found by a staggering amount of sheer dumb luck when running kvm-unit-tests on a 32-bit KVM build. The shrinker just happened to kick in while running tests and do_shrink_slab() logged an error about trying to free a negative number of objects. The truly lucky part is that the kernel just happened to be a slightly stale build, as the shrinker no longer yells about negative objects as of commit 18bb473e5031 ("mm: vmscan: shrink deferred objects proportional to priority"). vmscan: shrink_slab: mmu_shrink_scan+0x0/0x210 [kvm] negative objects to delete nr=-858993460 Fixes: bc8a3d8925a8 ("kvm: mmu: Fix overflow on kvm mmu page limit calculation") Cc: stable@vger.kernel.org Cc: Ben Gardon Signed-off-by: Sean Christopherson Message-Id: <20210804214609.1096003-1-seanjc@google.com> Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 66f7f5bc3482..c4f4fa23320e 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1644,7 +1644,7 @@ static int is_empty_shadow_page(u64 *spt) * aggregate version in order to make the slab shrinker * faster */ -static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, unsigned long nr) +static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, long nr) { kvm->arch.n_used_mmu_pages += nr; percpu_counter_add(&kvm_total_used_mmu_pages, nr); From 8da0e55c7988ef9f08a708c38e5c75ecd8862cf8 Mon Sep 17 00:00:00 2001 From: David Bauer Date: Thu, 5 Aug 2021 01:25:22 +0200 Subject: [PATCH 750/794] USB: serial: ftdi_sio: add device ID for Auto-M3 OP-COM v2 The Auto-M3 OP-COM v2 is a OBD diagnostic device using a FTD232 for the USB connection. Signed-off-by: David Bauer Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/ftdi_sio.c | 1 + drivers/usb/serial/ftdi_sio_ids.h | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 4a1f3a95d017..33bbb3470ca3 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -219,6 +219,7 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(FTDI_VID, FTDI_MTXORB_6_PID) }, { USB_DEVICE(FTDI_VID, FTDI_R2000KU_TRUE_RNG) }, { USB_DEVICE(FTDI_VID, FTDI_VARDAAN_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_AUTO_M3_OP_COM_V2_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0100_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0101_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0102_PID) }, diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index add602bebd82..755858ca20ba 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -159,6 +159,9 @@ /* Vardaan Enterprises Serial Interface VEUSB422R3 */ #define FTDI_VARDAAN_PID 0xF070 +/* Auto-M3 Ltd. - OP-COM USB V2 - OBD interface Adapter */ +#define FTDI_AUTO_M3_OP_COM_V2_PID 0x4f50 + /* * Xsens Technologies BV products (http://www.xsens.com). */ From d25d85061bd856d6be221626605319154f9b5043 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Thu, 29 Jul 2021 00:33:14 -0700 Subject: [PATCH 751/794] usb: dwc3: gadget: Use list_replace_init() before traversing lists The list_for_each_entry_safe() macro saves the current item (n) and the item after (n+1), so that n can be safely removed without corrupting the list. However, when traversing the list and removing items using gadget giveback, the DWC3 lock is briefly released, allowing other routines to execute. There is a situation where, while items are being removed from the cancelled_list using dwc3_gadget_ep_cleanup_cancelled_requests(), the pullup disable routine is running in parallel (due to UDC unbind). As the cleanup routine removes n, and the pullup disable removes n+1, once the cleanup retakes the DWC3 lock, it references a request who was already removed/handled. With list debug enabled, this leads to a panic. Ensure all instances of the macro are replaced where gadget giveback is used. Example call stack: Thread#1: __dwc3_gadget_ep_set_halt() - CLEAR HALT -> dwc3_gadget_ep_cleanup_cancelled_requests() ->list_for_each_entry_safe() ->dwc3_gadget_giveback(n) ->dwc3_gadget_del_and_unmap_request()- n deleted[cancelled_list] ->spin_unlock ->Thread#2 executes ... ->dwc3_gadget_giveback(n+1) ->Already removed! Thread#2: dwc3_gadget_pullup() ->waiting for dwc3 spin_lock ... ->Thread#1 released lock ->dwc3_stop_active_transfers() ->dwc3_remove_requests() ->fetches n+1 item from cancelled_list (n removed by Thread#1) ->dwc3_gadget_giveback() ->dwc3_gadget_del_and_unmap_request()- n+1 deleted[cancelled_list] ->spin_unlock Fix this condition by utilizing list_replace_init(), and traversing through a local copy of the current elements in the endpoint lists. This will also set the parent list as empty, so if another thread is also looping through the list, it will be empty on the next iteration. Fixes: d4f1afe5e896 ("usb: dwc3: gadget: move requests to cancelled_list") Cc: stable Acked-by: Felipe Balbi Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/1627543994-20327-1-git-send-email-wcheng@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 45f2bc0807e8..a1b262669574 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1741,9 +1741,13 @@ static void dwc3_gadget_ep_cleanup_cancelled_requests(struct dwc3_ep *dep) { struct dwc3_request *req; struct dwc3_request *tmp; + struct list_head local; struct dwc3 *dwc = dep->dwc; - list_for_each_entry_safe(req, tmp, &dep->cancelled_list, list) { +restart: + list_replace_init(&dep->cancelled_list, &local); + + list_for_each_entry_safe(req, tmp, &local, list) { dwc3_gadget_ep_skip_trbs(dep, req); switch (req->status) { case DWC3_REQUEST_STATUS_DISCONNECTED: @@ -1761,6 +1765,9 @@ static void dwc3_gadget_ep_cleanup_cancelled_requests(struct dwc3_ep *dep) break; } } + + if (!list_empty(&dep->cancelled_list)) + goto restart; } static int dwc3_gadget_ep_dequeue(struct usb_ep *ep, @@ -2958,8 +2965,12 @@ static void dwc3_gadget_ep_cleanup_completed_requests(struct dwc3_ep *dep, { struct dwc3_request *req; struct dwc3_request *tmp; + struct list_head local; - list_for_each_entry_safe(req, tmp, &dep->started_list, list) { +restart: + list_replace_init(&dep->started_list, &local); + + list_for_each_entry_safe(req, tmp, &local, list) { int ret; ret = dwc3_gadget_ep_cleanup_completed_request(dep, event, @@ -2967,6 +2978,9 @@ static void dwc3_gadget_ep_cleanup_completed_requests(struct dwc3_ep *dep, if (ret) break; } + + if (!list_empty(&dep->started_list)) + goto restart; } static bool dwc3_gadget_ep_should_continue(struct dwc3_ep *dep) From cb10f68ad8150f243964b19391711aaac5e8ff42 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Tue, 3 Aug 2021 23:24:05 -0700 Subject: [PATCH 752/794] usb: dwc3: gadget: Avoid runtime resume if disabling pullup If the device is already in the runtime suspended state, any call to the pullup routine will issue a runtime resume on the DWC3 core device. If the USB gadget is disabling the pullup, then avoid having to issue a runtime resume, as DWC3 gadget has already been halted/stopped. This fixes an issue where the following condition occurs: usb_gadget_remove_driver() -->usb_gadget_disconnect() -->dwc3_gadget_pullup(0) -->pm_runtime_get_sync() -> ret = 0 -->pm_runtime_put() [async] -->usb_gadget_udc_stop() -->dwc3_gadget_stop() -->dwc->gadget_driver = NULL ... dwc3_suspend_common() -->dwc3_gadget_suspend() -->DWC3 halt/stop routine skipped, driver_data == NULL This leads to a situation where the DWC3 gadget is not properly stopped, as the runtime resume would have re-enabled EP0 and event interrupts, and since we avoided the DWC3 gadget suspend, these resources were never disabled. Fixes: 77adb8bdf422 ("usb: dwc3: gadget: Allow runtime suspend if UDC unbinded") Cc: stable Acked-by: Felipe Balbi Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/1628058245-30692-1-git-send-email-wcheng@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index a1b262669574..b8d4b2d327b2 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2256,6 +2256,17 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) } } + /* + * Avoid issuing a runtime resume if the device is already in the + * suspended state during gadget disconnect. DWC3 gadget was already + * halted/stopped during runtime suspend. + */ + if (!is_on) { + pm_runtime_barrier(dwc->dev); + if (pm_runtime_suspended(dwc->dev)) + return 0; + } + /* * Check the return value for successful resume, or error. For a * successful resume, the DWC3 runtime PM resume routine will handle From 6aa32467299e9e12280a6aec9dbc21bf2db830b0 Mon Sep 17 00:00:00 2001 From: Huang Pei Date: Wed, 21 Jul 2021 17:30:45 +0800 Subject: [PATCH 753/794] MIPS: check return value of pgtable_pmd_page_ctor +. According to Documentation/vm/split_page_table_lock, handle failure of pgtable_pmd_page_ctor +. Use GFP_KERNEL_ACCOUNT instead of GFP_KERNEL|__GFP_ACCOUNT +. Adjust coding style Fixes: ed914d48b6a1 ("MIPS: add PMD table accounting into MIPS') Reported-by: Joshua Kinard Signed-off-by: Huang Pei Reviewed-by: Joshua Kinard Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/pgalloc.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h index 4b2567d6b2df..c7925d0e9874 100644 --- a/arch/mips/include/asm/pgalloc.h +++ b/arch/mips/include/asm/pgalloc.h @@ -58,15 +58,20 @@ do { \ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) { - pmd_t *pmd = NULL; + pmd_t *pmd; struct page *pg; - pg = alloc_pages(GFP_KERNEL | __GFP_ACCOUNT, PMD_ORDER); - if (pg) { - pgtable_pmd_page_ctor(pg); - pmd = (pmd_t *)page_address(pg); - pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table); + pg = alloc_pages(GFP_KERNEL_ACCOUNT, PMD_ORDER); + if (!pg) + return NULL; + + if (!pgtable_pmd_page_ctor(pg)) { + __free_pages(pg, PMD_ORDER); + return NULL; } + + pmd = (pmd_t *)page_address(pg); + pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table); return pmd; } From 43ad944cd73f2360ec8ff31d29ea44830b3119af Mon Sep 17 00:00:00 2001 From: Kyle Tso Date: Tue, 3 Aug 2021 17:13:14 +0800 Subject: [PATCH 754/794] usb: typec: tcpm: Keep other events when receiving FRS and Sourcing_vbus events When receiving FRS and Sourcing_Vbus events from low-level drivers, keep other events which come a bit earlier so that they will not be ignored in the event handler. Fixes: 8dc4bd073663 ("usb: typec: tcpm: Add support for Sink Fast Role SWAP(FRS)") Cc: stable Cc: Badhri Jagan Sridharan Reviewed-by: Guenter Roeck Reviewed-by: Badhri Jagan Sridharan Signed-off-by: Kyle Tso Link: https://lore.kernel.org/r/20210803091314.3051302-1-kyletso@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 5b22a1c931a9..b9bb63d749ec 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -5369,7 +5369,7 @@ EXPORT_SYMBOL_GPL(tcpm_pd_hard_reset); void tcpm_sink_frs(struct tcpm_port *port) { spin_lock(&port->pd_event_lock); - port->pd_events = TCPM_FRS_EVENT; + port->pd_events |= TCPM_FRS_EVENT; spin_unlock(&port->pd_event_lock); kthread_queue_work(port->wq, &port->event_work); } @@ -5378,7 +5378,7 @@ EXPORT_SYMBOL_GPL(tcpm_sink_frs); void tcpm_sourcing_vbus(struct tcpm_port *port) { spin_lock(&port->pd_event_lock); - port->pd_events = TCPM_SOURCING_VBUS; + port->pd_events |= TCPM_SOURCING_VBUS; spin_unlock(&port->pd_event_lock); kthread_queue_work(port->wq, &port->event_work); } From 5a7c1b2a5bb4461967b15f3484a0ff75d3199719 Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Wed, 4 Aug 2021 21:39:49 +0530 Subject: [PATCH 755/794] net: wwan: iosm: fix lkp buildbot warning Correct td buffer type casting & format specifier to fix lkp buildbot warning. Reported-by: kernel test robot Signed-off-by: M Chetan Kumar Signed-off-by: David S. Miller --- drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c b/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c index 91109e27efd3..35d590743d3a 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c +++ b/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c @@ -412,8 +412,8 @@ struct sk_buff *ipc_protocol_dl_td_process(struct iosm_protocol *ipc_protocol, } if (p_td->buffer.address != IPC_CB(skb)->mapping) { - dev_err(ipc_protocol->dev, "invalid buf=%p or skb=%p", - (void *)p_td->buffer.address, skb->data); + dev_err(ipc_protocol->dev, "invalid buf=%llx or skb=%p", + (unsigned long long)p_td->buffer.address, skb->data); ipc_pcie_kfree_skb(ipc_protocol->pcie, skb); skb = NULL; goto ret; From b46c5795d641b759eb0f001ab21852fe5df5ef92 Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Wed, 4 Aug 2021 21:39:50 +0530 Subject: [PATCH 756/794] net: wwan: iosm: endianness type correction Endianness type correction for nr_of_bytes. This field is exchanged as part of host-device protocol communication. Signed-off-by: M Chetan Kumar Reviewed-by: Loic Poulain Signed-off-by: David S. Miller --- drivers/net/wwan/iosm/iosm_ipc_mux_codec.c | 4 ++-- drivers/net/wwan/iosm/iosm_ipc_mux_codec.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c index 562de275797a..bdb2d32cdb6d 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c +++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c @@ -320,7 +320,7 @@ static void ipc_mux_dl_fcth_decode(struct iosm_mux *ipc_mux, return; } - ul_credits = fct->vfl.nr_of_bytes; + ul_credits = le32_to_cpu(fct->vfl.nr_of_bytes); dev_dbg(ipc_mux->dev, "Flow_Credit:: if_id[%d] Old: %d Grants: %d", if_id, ipc_mux->session[if_id].ul_flow_credits, ul_credits); @@ -586,7 +586,7 @@ static bool ipc_mux_lite_send_qlt(struct iosm_mux *ipc_mux) qlt->reserved[0] = 0; qlt->reserved[1] = 0; - qlt->vfl.nr_of_bytes = session->ul_list.qlen; + qlt->vfl.nr_of_bytes = cpu_to_le32(session->ul_list.qlen); /* Add QLT to the transfer list. */ skb_queue_tail(&ipc_mux->channel->ul_list, diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h index 4a74e3c9457f..aae83db5cbb8 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h +++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h @@ -106,7 +106,7 @@ struct mux_lite_cmdh { * @nr_of_bytes: Number of bytes available to transmit in the queue. */ struct mux_lite_vfl { - u32 nr_of_bytes; + __le32 nr_of_bytes; }; /** From c98f5220e9703db2d73b4e89c07879dc61eeab14 Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Wed, 4 Aug 2021 21:39:51 +0530 Subject: [PATCH 757/794] net: wwan: iosm: correct data protocol mask bit Correct ul/dl data protocol mask bit to know which protocol capability does device implement. Signed-off-by: M Chetan Kumar Reviewed-by: Loic Poulain Signed-off-by: David S. Miller --- drivers/net/wwan/iosm/iosm_ipc_mmio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wwan/iosm/iosm_ipc_mmio.h b/drivers/net/wwan/iosm/iosm_ipc_mmio.h index 45e6923da78f..f861994a6d90 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mmio.h +++ b/drivers/net/wwan/iosm/iosm_ipc_mmio.h @@ -10,10 +10,10 @@ #define IOSM_CP_VERSION 0x0100UL /* DL dir Aggregation support mask */ -#define DL_AGGR BIT(23) +#define DL_AGGR BIT(9) /* UL dir Aggregation support mask */ -#define UL_AGGR BIT(22) +#define UL_AGGR BIT(8) /* UL flow credit support mask */ #define UL_FLOW_CREDIT BIT(21) From 679505baaaabed98359c1dfb78f81600e299af21 Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Wed, 4 Aug 2021 21:39:52 +0530 Subject: [PATCH 758/794] net: wwan: iosm: fix recursive lock acquire in unregister Calling unregister_netdevice() inside wwan del link is trying to acquire the held lock in ndo_stop_cb(). Instead, queue net dev to be unregistered later. Signed-off-by: M Chetan Kumar Reviewed-by: Loic Poulain Signed-off-by: David S. Miller --- drivers/net/wwan/iosm/iosm_ipc_wwan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wwan/iosm/iosm_ipc_wwan.c b/drivers/net/wwan/iosm/iosm_ipc_wwan.c index b2357ad5d517..b571d9cedba4 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_wwan.c +++ b/drivers/net/wwan/iosm/iosm_ipc_wwan.c @@ -228,7 +228,7 @@ static void ipc_wwan_dellink(void *ctxt, struct net_device *dev, RCU_INIT_POINTER(ipc_wwan->sub_netlist[if_id], NULL); /* unregistering includes synchronize_net() */ - unregister_netdevice(dev); + unregister_netdevice_queue(dev, head); unlock: mutex_unlock(&ipc_wwan->if_mutex); From fa953adfad7cf9c7e30d9ea0e4ccfd38cfb5495d Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Thu, 8 Jul 2021 10:57:09 +0200 Subject: [PATCH 759/794] x86/tools/relocs: Fix non-POSIX regexp Trying to run a cross-compiled x86 relocs tool on a BSD based HOSTCC leads to errors like VOFFSET arch/x86/boot/compressed/../voffset.h - due to: vmlinux CC arch/x86/boot/compressed/misc.o - due to: arch/x86/boot/compressed/../voffset.h OBJCOPY arch/x86/boot/compressed/vmlinux.bin - due to: vmlinux RELOCS arch/x86/boot/compressed/vmlinux.relocs - due to: vmlinux empty (sub)expressionarch/x86/boot/compressed/Makefile:118: recipe for target 'arch/x86/boot/compressed/vmlinux.relocs' failed make[3]: *** [arch/x86/boot/compressed/vmlinux.relocs] Error 1 It turns out that relocs.c uses patterns like "something(|_end)" This is not valid syntax or gives undefined results according to POSIX 9.5.3 ERE Grammar https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html It seems to be silently accepted by the Linux regexp() implementation while a BSD host complains. Such patterns can be replaced by a transformation like "(|p1|p2)" -> "(p1|p2)?" Fixes: fd952815307f ("x86-32, relocs: Whitelist more symbols for ld bug workaround") Signed-off-by: H. Nikolaus Schaller Signed-off-by: Masahiro Yamada --- arch/x86/tools/relocs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 04c5a44b9682..9ba700dc47de 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -57,12 +57,12 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = { [S_REL] = "^(__init_(begin|end)|" "__x86_cpu_dev_(start|end)|" - "(__parainstructions|__alt_instructions)(|_end)|" - "(__iommu_table|__apicdrivers|__smp_locks)(|_end)|" + "(__parainstructions|__alt_instructions)(_end)?|" + "(__iommu_table|__apicdrivers|__smp_locks)(_end)?|" "__(start|end)_pci_.*|" "__(start|end)_builtin_fw|" - "__(start|stop)___ksymtab(|_gpl)|" - "__(start|stop)___kcrctab(|_gpl)|" + "__(start|stop)___ksymtab(_gpl)?|" + "__(start|stop)___kcrctab(_gpl)?|" "__(start|stop)___param|" "__(start|stop)___modver|" "__(start|stop)___bug_table|" From 28bbbb9875a35975904e46f9b06fa689d051b290 Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Thu, 8 Jul 2021 10:57:10 +0200 Subject: [PATCH 760/794] mips: Fix non-POSIX regexp When cross compiling a MIPS kernel on a BSD based HOSTCC leads to errors like SYNC include/config/auto.conf.cmd - due to: .config egrep: empty (sub)expression UPD include/config/kernel.release HOSTCC scripts/dtc/dtc.o - due to target missing It turns out that egrep uses this egrep pattern: (|MINOR_|PATCHLEVEL_) This is not valid syntax or gives undefined results according to POSIX 9.5.3 ERE Grammar https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html It seems to be silently accepted by the Linux egrep implementation while a BSD host complains. Such patterns can be replaced by a transformation like "(|p1|p2)" -> "(p1|p2)?" Fixes: 48c35b2d245f ("[MIPS] There is no __GNUC_MAJOR__") Signed-off-by: H. Nikolaus Schaller Signed-off-by: Masahiro Yamada --- arch/mips/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 4e942b7ef022..653befc1b176 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -321,7 +321,7 @@ KBUILD_LDFLAGS += -m $(ld-emul) ifdef CONFIG_MIPS CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \ - egrep -vw '__GNUC_(|MINOR_|PATCHLEVEL_)_' | \ + egrep -vw '__GNUC_(MINOR_|PATCHLEVEL_)?_' | \ sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g') endif From 54eacba0e3bbda9777788b44b45a5186918569f2 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 26 Jul 2021 19:57:37 -0700 Subject: [PATCH 761/794] scripts: checkversion: modernize linux/version.h search strings Update scripts/checkversion.pl to recognize the current contents of and both of its current locations. Also update my email address. Signed-off-by: Randy Dunlap Signed-off-by: Masahiro Yamada --- scripts/checkversion.pl | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/scripts/checkversion.pl b/scripts/checkversion.pl index f67b125c5269..94cd49eff605 100755 --- a/scripts/checkversion.pl +++ b/scripts/checkversion.pl @@ -1,10 +1,10 @@ #! /usr/bin/env perl # SPDX-License-Identifier: GPL-2.0 # -# checkversion find uses of LINUX_VERSION_CODE or KERNEL_VERSION -# without including , or cases of -# including that don't need it. -# Copyright (C) 2003, Randy Dunlap +# checkversion finds uses of all macros in +# where the source files do not #include ; or cases +# of including where it is not needed. +# Copyright (C) 2003, Randy Dunlap use strict; @@ -13,7 +13,8 @@ $| = 1; my $debugging; foreach my $file (@ARGV) { - next if $file =~ "include/linux/version\.h"; + next if $file =~ "include/generated/uapi/linux/version\.h"; + next if $file =~ "usr/include/linux/version\.h"; # Open this file. open( my $f, '<', $file ) or die "Can't open $file: $!\n"; @@ -41,8 +42,11 @@ foreach my $file (@ARGV) { $iLinuxVersion = $. if m/^\s*#\s*include\s*/o; } - # Look for uses: LINUX_VERSION_CODE, KERNEL_VERSION, UTS_RELEASE - if (($_ =~ /LINUX_VERSION_CODE/) || ($_ =~ /\WKERNEL_VERSION/)) { + # Look for uses: LINUX_VERSION_CODE, KERNEL_VERSION, + # LINUX_VERSION_MAJOR, LINUX_VERSION_PATCHLEVEL, LINUX_VERSION_SUBLEVEL + if (($_ =~ /LINUX_VERSION_CODE/) || ($_ =~ /\WKERNEL_VERSION/) || + ($_ =~ /LINUX_VERSION_MAJOR/) || ($_ =~ /LINUX_VERSION_PATCHLEVEL/) || + ($_ =~ /LINUX_VERSION_SUBLEVEL/)) { $fUseVersion = 1; last if $iLinuxVersion; } From 14ccc638b02f9ec500c17d9e39efe979145a4b61 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 29 Jul 2021 09:12:54 +0900 Subject: [PATCH 762/794] kbuild: cancel sub_make_done for the install target to fix DKMS Since commit bcf637f54f6d ("kbuild: parse C= and M= before changing the working directory"), external module builds invoked by DKMS fail because M= option is not parsed. I wanted to add 'unset sub_make_done' in install.sh but similar scripts, arch/*/boot/install.sh, are duplicated, so I set sub_make_done empty in the top Makefile. Fixes: bcf637f54f6d ("kbuild: parse C= and M= before changing the working directory") Reported-by: John S Gruber Signed-off-by: Masahiro Yamada Tested-by: John S Gruber --- Makefile | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Makefile b/Makefile index 27a072cffcb9..efb294568f28 100644 --- a/Makefile +++ b/Makefile @@ -1317,6 +1317,16 @@ PHONY += scripts_unifdef scripts_unifdef: scripts_basic $(Q)$(MAKE) $(build)=scripts scripts/unifdef +# --------------------------------------------------------------------------- +# Install + +# Many distributions have the custom install script, /sbin/installkernel. +# If DKMS is installed, 'make install' will eventually recuses back +# to the this Makefile to build and install external modules. +# Cancel sub_make_done so that options such as M=, V=, etc. are parsed. + +install: sub_make_done := + # --------------------------------------------------------------------------- # Tools From fb653827c758725b149b5c924a5eb50ab4812750 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 5 Aug 2021 13:38:26 +0300 Subject: [PATCH 763/794] bnx2x: fix an error code in bnx2x_nic_load() Set the error code if bnx2x_alloc_fw_stats_mem() fails. The current code returns success. Fixes: ad5afc89365e ("bnx2x: Separate VF and PF logic") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 1a6ec1a12d53..b5d954cb409a 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -2669,7 +2669,8 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) } /* Allocated memory for FW statistics */ - if (bnx2x_alloc_fw_stats_mem(bp)) + rc = bnx2x_alloc_fw_stats_mem(bp); + if (rc) LOAD_ERROR_EXIT(bp, load_error0); /* request pf to initialize status blocks */ From ae03d189bae306e1e00aa631feee090ebda6cf63 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Thu, 5 Aug 2021 13:14:09 +0300 Subject: [PATCH 764/794] net: ethernet: ti: am65-cpsw: fix crash in am65_cpsw_port_offload_fwd_mark_update() The am65_cpsw_port_offload_fwd_mark_update() causes NULL exception crash when there is at least one disabled port and any other port added to the bridge first time. Unable to handle kernel NULL pointer dereference at virtual address 0000000000000858 pc : am65_cpsw_port_offload_fwd_mark_update+0x54/0x68 lr : am65_cpsw_netdevice_event+0x8c/0xf0 Call trace: am65_cpsw_port_offload_fwd_mark_update+0x54/0x68 notifier_call_chain+0x54/0x98 raw_notifier_call_chain+0x14/0x20 call_netdevice_notifiers_info+0x34/0x78 __netdev_upper_dev_link+0x1c8/0x290 netdev_master_upper_dev_link+0x1c/0x28 br_add_if+0x3f0/0x6d0 [bridge] Fix it by adding proper check for port->ndev != NULL. Fixes: 2934db9bcb30 ("net: ti: am65-cpsw-nuss: Add netdevice notifiers") Signed-off-by: Grygorii Strashko Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 718539cdd2f2..67a08cbba859 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2060,8 +2060,12 @@ static void am65_cpsw_port_offload_fwd_mark_update(struct am65_cpsw_common *comm for (i = 1; i <= common->port_num; i++) { struct am65_cpsw_port *port = am65_common_get_port(common, i); - struct am65_cpsw_ndev_priv *priv = am65_ndev_to_priv(port->ndev); + struct am65_cpsw_ndev_priv *priv; + if (!port->ndev) + continue; + + priv = am65_ndev_to_priv(port->ndev); priv->offload_fwd_mark = set_val; } } From 51397dc6f283bb570e1cf8226017d300d8ea1f5b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 4 Aug 2021 14:18:48 -0400 Subject: [PATCH 765/794] tracing: Quiet smp_processor_id() use in preemptable warning in hwlat The hardware latency detector (hwlat) has a mode that it runs one thread across CPUs. The logic to move from the currently running CPU to the next one in the list does a smp_processor_id() to find where it currently is. Unfortunately, it's done with preemption enabled, and this triggers a warning for using smp_processor_id() in a preempt enabled section. As it is only using smp_processor_id() to get information on where it currently is in order to simply move it to the next CPU, it doesn't really care if it got moved in the mean time. It will simply balance out later if such a case arises. Switch smp_processor_id() to raw_smp_processor_id() to quiet that warning. Link: https://lkml.kernel.org/r/20210804141848.79edadc0@oasis.local.home Acked-by: Daniel Bristot de Oliveira Fixes: 8fa826b7344d ("trace/hwlat: Implement the mode config option") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_hwlat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index a6c0cdaf4b87..14f46aae1981 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -327,7 +327,7 @@ static void move_to_next_cpu(void) get_online_cpus(); cpumask_and(current_mask, cpu_online_mask, tr->tracing_cpumask); - next_cpu = cpumask_next(smp_processor_id(), current_mask); + next_cpu = cpumask_next(raw_smp_processor_id(), current_mask); put_online_cpus(); if (next_cpu >= nr_cpu_ids) From af35fc37354cda3c9c8cc4961b1d24bdc9d27903 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Wed, 4 Aug 2021 17:30:05 +0300 Subject: [PATCH 766/794] net: pegasus: fix uninit-value in get_interrupt_interval Syzbot reported uninit value pegasus_probe(). The problem was in missing error handling. get_interrupt_interval() internally calls read_eprom_word() which can fail in some cases. For example: failed to receive usb control message. These cases should be handled to prevent uninit value bug, since read_eprom_word() will not initialize passed stack variable in case of internal failure. Fail log: BUG: KMSAN: uninit-value in get_interrupt_interval drivers/net/usb/pegasus.c:746 [inline] BUG: KMSAN: uninit-value in pegasus_probe+0x10e7/0x4080 drivers/net/usb/pegasus.c:1152 CPU: 1 PID: 825 Comm: kworker/1:1 Not tainted 5.12.0-rc6-syzkaller #0 ... Workqueue: usb_hub_wq hub_event Call Trace: __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0x24c/0x2e0 lib/dump_stack.c:120 kmsan_report+0xfb/0x1e0 mm/kmsan/kmsan_report.c:118 __msan_warning+0x5c/0xa0 mm/kmsan/kmsan_instr.c:197 get_interrupt_interval drivers/net/usb/pegasus.c:746 [inline] pegasus_probe+0x10e7/0x4080 drivers/net/usb/pegasus.c:1152 .... Local variable ----data.i@pegasus_probe created at: get_interrupt_interval drivers/net/usb/pegasus.c:1151 [inline] pegasus_probe+0xe57/0x4080 drivers/net/usb/pegasus.c:1152 get_interrupt_interval drivers/net/usb/pegasus.c:1151 [inline] pegasus_probe+0xe57/0x4080 drivers/net/usb/pegasus.c:1152 Reported-and-tested-by: syzbot+02c9f70f3afae308464a@syzkaller.appspotmail.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Pavel Skripkin Link: https://lore.kernel.org/r/20210804143005.439-1-paskripkin@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/pegasus.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index f18b03be1b87..652e9fcf0b77 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -744,12 +744,16 @@ static inline void disable_net_traffic(pegasus_t *pegasus) set_registers(pegasus, EthCtrl0, sizeof(tmp), &tmp); } -static inline void get_interrupt_interval(pegasus_t *pegasus) +static inline int get_interrupt_interval(pegasus_t *pegasus) { u16 data; u8 interval; + int ret; + + ret = read_eprom_word(pegasus, 4, &data); + if (ret < 0) + return ret; - read_eprom_word(pegasus, 4, &data); interval = data >> 8; if (pegasus->usb->speed != USB_SPEED_HIGH) { if (interval < 0x80) { @@ -764,6 +768,8 @@ static inline void get_interrupt_interval(pegasus_t *pegasus) } } pegasus->intr_interval = interval; + + return 0; } static void set_carrier(struct net_device *net) @@ -1165,7 +1171,9 @@ static int pegasus_probe(struct usb_interface *intf, | NETIF_MSG_PROBE | NETIF_MSG_LINK); pegasus->features = usb_dev_id[dev_index].private; - get_interrupt_interval(pegasus); + res = get_interrupt_interval(pegasus); + if (res) + goto out2; if (reset_mac(pegasus)) { dev_err(&intf->dev, "can't reset MAC\n"); res = -EIO; From 44712965bf12ae1758cec4de53816ed4b914ca1a Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Wed, 4 Aug 2021 18:51:51 +0300 Subject: [PATCH 767/794] net: fec: fix use-after-free in fec_drv_remove Smatch says: drivers/net/ethernet/freescale/fec_main.c:3994 fec_drv_remove() error: Using fep after free_{netdev,candev}(ndev); drivers/net/ethernet/freescale/fec_main.c:3995 fec_drv_remove() error: Using fep after free_{netdev,candev}(ndev); Since fep pointer is netdev private data, accessing it after free_netdev() call can cause use-after-free bug. Fix it by moving free_netdev() call at the end of the function Reported-by: Dan Carpenter Fixes: a31eda65ba21 ("net: fec: fix clock count mis-match") Signed-off-by: Pavel Skripkin Reviewed-by: Joakim Zhang Reviewed-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fec_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 8aea707a65a7..7e4c4980ced7 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3843,13 +3843,13 @@ fec_drv_remove(struct platform_device *pdev) if (of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); of_node_put(fep->phy_node); - free_netdev(ndev); clk_disable_unprepare(fep->clk_ahb); clk_disable_unprepare(fep->clk_ipg); pm_runtime_put_noidle(&pdev->dev); pm_runtime_disable(&pdev->dev); + free_netdev(ndev); return 0; } From 942e560a3d3862dd5dee1411dbdd7097d29b8416 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Wed, 4 Aug 2021 18:52:20 +0300 Subject: [PATCH 768/794] net: vxge: fix use-after-free in vxge_device_unregister Smatch says: drivers/net/ethernet/neterion/vxge/vxge-main.c:3518 vxge_device_unregister() error: Using vdev after free_{netdev,candev}(dev); drivers/net/ethernet/neterion/vxge/vxge-main.c:3518 vxge_device_unregister() error: Using vdev after free_{netdev,candev}(dev); drivers/net/ethernet/neterion/vxge/vxge-main.c:3520 vxge_device_unregister() error: Using vdev after free_{netdev,candev}(dev); drivers/net/ethernet/neterion/vxge/vxge-main.c:3520 vxge_device_unregister() error: Using vdev after free_{netdev,candev}(dev); Since vdev pointer is netdev private data accessing it after free_netdev() call can cause use-after-free bug. Fix it by moving free_netdev() call at the end of the function Fixes: 6cca200362b4 ("vxge: cleanup probe error paths") Reported-by: Dan Carpenter Signed-off-by: Pavel Skripkin Reviewed-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/neterion/vxge/vxge-main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index 82eef4c72f01..7abd13e69471 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -3512,13 +3512,13 @@ static void vxge_device_unregister(struct __vxge_hw_device *hldev) kfree(vdev->vpaths); - /* we are safe to free it now */ - free_netdev(dev); - vxge_debug_init(vdev->level_trace, "%s: ethernet device unregistered", buf); vxge_debug_entryexit(vdev->level_trace, "%s: %s:%d Exiting...", buf, __func__, __LINE__); + + /* we are safe to free it now */ + free_netdev(dev); } /* From 8d75d0eff6887bcac7225e12b9c75595e523d92d Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 5 Aug 2021 20:46:45 +0800 Subject: [PATCH 769/794] blk-iolatency: error out if blk_get_queue() failed in iolatency_set_limit() If queue is dying while iolatency_set_limit() is in progress, blk_get_queue() won't increment the refcount of the queue. However, blk_put_queue() will still decrement the refcount later, which will cause the refcout to be unbalanced. Thus error out in such case to fix the problem. Fixes: 8c772a9bfc7c ("blk-iolatency: fix IO hang due to negative inflight counter") Signed-off-by: Yu Kuai Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20210805124645.543797-1-yukuai3@huawei.com Signed-off-by: Jens Axboe --- block/blk-iolatency.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 81be0096411d..d8b0d8bd132b 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -833,7 +833,11 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, enable = iolatency_set_min_lat_nsec(blkg, lat_val); if (enable) { - WARN_ON_ONCE(!blk_get_queue(blkg->q)); + if (!blk_get_queue(blkg->q)) { + ret = -ENODEV; + goto out; + } + blkg_get(blkg); } From 46c4c9d1beb7f5b4cec4dd90e7728720583ee348 Mon Sep 17 00:00:00 2001 From: "Alex Xu (Hello71)" Date: Thu, 5 Aug 2021 10:40:47 -0400 Subject: [PATCH 770/794] pipe: increase minimum default pipe size to 2 pages This program always prints 4096 and hangs before the patch, and always prints 8192 and exits successfully after: int main() { int pipefd[2]; for (int i = 0; i < 1025; i++) if (pipe(pipefd) == -1) return 1; size_t bufsz = fcntl(pipefd[1], F_GETPIPE_SZ); printf("%zd\n", bufsz); char *buf = calloc(bufsz, 1); write(pipefd[1], buf, bufsz); read(pipefd[0], buf, bufsz-1); write(pipefd[1], buf, 1); } Note that you may need to increase your RLIMIT_NOFILE before running the program. Fixes: 759c01142a ("pipe: limit the per-user amount of pages allocated in pipes") Cc: Link: https://lore.kernel.org/lkml/1628086770.5rn8p04n6j.none@localhost/ Link: https://lore.kernel.org/lkml/1628127094.lxxn016tj7.none@localhost/ Signed-off-by: Alex Xu (Hello71) Signed-off-by: Linus Torvalds --- fs/pipe.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index 9ef4231cce61..8e6ef62aeb1c 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -31,6 +31,21 @@ #include "internal.h" +/* + * New pipe buffers will be restricted to this size while the user is exceeding + * their pipe buffer quota. The general pipe use case needs at least two + * buffers: one for data yet to be read, and one for new data. If this is less + * than two, then a write to a non-empty pipe may block even if the pipe is not + * full. This can occur with GNU make jobserver or similar uses of pipes as + * semaphores: multiple processes may be waiting to write tokens back to the + * pipe before reading tokens: https://lore.kernel.org/lkml/1628086770.5rn8p04n6j.none@localhost/. + * + * Users can reduce their pipe buffers with F_SETPIPE_SZ below this at their + * own risk, namely: pipe writes to non-full pipes may block until the pipe is + * emptied. + */ +#define PIPE_MIN_DEF_BUFFERS 2 + /* * The max size that a non-root user is allowed to grow the pipe. Can * be set by root in /proc/sys/fs/pipe-max-size @@ -781,8 +796,8 @@ struct pipe_inode_info *alloc_pipe_info(void) user_bufs = account_pipe_buffers(user, 0, pipe_bufs); if (too_many_pipe_buffers_soft(user_bufs) && pipe_is_unprivileged_user()) { - user_bufs = account_pipe_buffers(user, pipe_bufs, 1); - pipe_bufs = 1; + user_bufs = account_pipe_buffers(user, pipe_bufs, PIPE_MIN_DEF_BUFFERS); + pipe_bufs = PIPE_MIN_DEF_BUFFERS; } if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user()) From 2e9fb2c11e0ec3113fcf0e8e052c99ecd82fcd4e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 5 Aug 2021 10:34:47 -0700 Subject: [PATCH 771/794] block/partitions/ldm.c: Fix a kernel-doc warning Fix the following kernel-doc warning that appears when building with W=1: block/partitions/ldm.c:31: warning: expecting prototype for ldm(). Prototype was for ldm_debug() instead Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20210805173447.3249906-1-bvanassche@acm.org Signed-off-by: Jens Axboe --- block/partitions/ldm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/partitions/ldm.c b/block/partitions/ldm.c index d333786b5c7e..14b124cdacfc 100644 --- a/block/partitions/ldm.c +++ b/block/partitions/ldm.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later -/** +/* * ldm - Support for Windows Logical Disk Manager (Dynamic Disks) * * Copyright (C) 2001,2002 Richard Russon From e04480920d1eec9c061841399aa6f35b6f987d8b Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 4 Aug 2021 19:26:56 +0900 Subject: [PATCH 772/794] Bluetooth: defer cleanup of resources in hci_unregister_dev() syzbot is hitting might_sleep() warning at hci_sock_dev_event() due to calling lock_sock() with rw spinlock held [1]. It seems that history of this locking problem is a trial and error. Commit b40df5743ee8 ("[PATCH] bluetooth: fix socket locking in hci_sock_dev_event()") in 2.6.21-rc4 changed bh_lock_sock() to lock_sock() as an attempt to fix lockdep warning. Then, commit 4ce61d1c7a8e ("[BLUETOOTH]: Fix locking in hci_sock_dev_event().") in 2.6.22-rc2 changed lock_sock() to local_bh_disable() + bh_lock_sock_nested() as an attempt to fix the sleep in atomic context warning. Then, commit 4b5dd696f81b ("Bluetooth: Remove local_bh_disable() from hci_sock.c") in 3.3-rc1 removed local_bh_disable(). Then, commit e305509e678b ("Bluetooth: use correct lock to prevent UAF of hdev object") in 5.13-rc5 again changed bh_lock_sock_nested() to lock_sock() as an attempt to fix CVE-2021-3573. This difficulty comes from current implementation that hci_sock_dev_event(HCI_DEV_UNREG) is responsible for dropping all references from sockets because hci_unregister_dev() immediately reclaims resources as soon as returning from hci_sock_dev_event(HCI_DEV_UNREG). But the history suggests that hci_sock_dev_event(HCI_DEV_UNREG) was not doing what it should do. Therefore, instead of trying to detach sockets from device, let's accept not detaching sockets from device at hci_sock_dev_event(HCI_DEV_UNREG), by moving actual cleanup of resources from hci_unregister_dev() to hci_cleanup_dev() which is called by bt_host_release() when all references to this unregistered device (which is a kobject) are gone. Since hci_sock_dev_event(HCI_DEV_UNREG) no longer resets hci_pi(sk)->hdev, we need to check whether this device was unregistered and return an error based on HCI_UNREGISTER flag. There might be subtle behavioral difference in "monitor the hdev" functionality; please report if you found something went wrong due to this patch. Link: https://syzkaller.appspot.com/bug?extid=a5df189917e79d5e59c9 [1] Reported-by: syzbot Suggested-by: Linus Torvalds Signed-off-by: Tetsuo Handa Fixes: e305509e678b ("Bluetooth: use correct lock to prevent UAF of hdev object") Acked-by: Luiz Augusto von Dentz Signed-off-by: Linus Torvalds --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_core.c | 16 +++++------ net/bluetooth/hci_sock.c | 49 +++++++++++++++++++++----------- net/bluetooth/hci_sysfs.c | 3 ++ 4 files changed, 45 insertions(+), 24 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index a53e94459ecd..db4312e44d47 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1230,6 +1230,7 @@ struct hci_dev *hci_alloc_dev(void); void hci_free_dev(struct hci_dev *hdev); int hci_register_dev(struct hci_dev *hdev); void hci_unregister_dev(struct hci_dev *hdev); +void hci_cleanup_dev(struct hci_dev *hdev); int hci_suspend_dev(struct hci_dev *hdev); int hci_resume_dev(struct hci_dev *hdev); int hci_reset_dev(struct hci_dev *hdev); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 2560ed2f144d..e1a545c8a69f 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3996,14 +3996,10 @@ EXPORT_SYMBOL(hci_register_dev); /* Unregister HCI device */ void hci_unregister_dev(struct hci_dev *hdev) { - int id; - BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); hci_dev_set_flag(hdev, HCI_UNREGISTER); - id = hdev->id; - write_lock(&hci_dev_list_lock); list_del(&hdev->list); write_unlock(&hci_dev_list_lock); @@ -4038,7 +4034,14 @@ void hci_unregister_dev(struct hci_dev *hdev) } device_del(&hdev->dev); + /* Actual cleanup is deferred until hci_cleanup_dev(). */ + hci_dev_put(hdev); +} +EXPORT_SYMBOL(hci_unregister_dev); +/* Cleanup HCI device */ +void hci_cleanup_dev(struct hci_dev *hdev) +{ debugfs_remove_recursive(hdev->debugfs); kfree_const(hdev->hw_info); kfree_const(hdev->fw_info); @@ -4063,11 +4066,8 @@ void hci_unregister_dev(struct hci_dev *hdev) hci_blocked_keys_clear(hdev); hci_dev_unlock(hdev); - hci_dev_put(hdev); - - ida_simple_remove(&hci_index_ida, id); + ida_simple_remove(&hci_index_ida, hdev->id); } -EXPORT_SYMBOL(hci_unregister_dev); /* Suspend HCI device */ int hci_suspend_dev(struct hci_dev *hdev) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index b04a5a02ecf3..f1128c2134f0 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -59,6 +59,17 @@ struct hci_pinfo { char comm[TASK_COMM_LEN]; }; +static struct hci_dev *hci_hdev_from_sock(struct sock *sk) +{ + struct hci_dev *hdev = hci_pi(sk)->hdev; + + if (!hdev) + return ERR_PTR(-EBADFD); + if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) + return ERR_PTR(-EPIPE); + return hdev; +} + void hci_sock_set_flag(struct sock *sk, int nr) { set_bit(nr, &hci_pi(sk)->flags); @@ -759,19 +770,13 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event) if (event == HCI_DEV_UNREG) { struct sock *sk; - /* Detach sockets from device */ + /* Wake up sockets using this dead device */ read_lock(&hci_sk_list.lock); sk_for_each(sk, &hci_sk_list.head) { - lock_sock(sk); if (hci_pi(sk)->hdev == hdev) { - hci_pi(sk)->hdev = NULL; sk->sk_err = EPIPE; - sk->sk_state = BT_OPEN; sk->sk_state_change(sk); - - hci_dev_put(hdev); } - release_sock(sk); } read_unlock(&hci_sk_list.lock); } @@ -930,10 +935,10 @@ static int hci_sock_reject_list_del(struct hci_dev *hdev, void __user *arg) static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg) { - struct hci_dev *hdev = hci_pi(sk)->hdev; + struct hci_dev *hdev = hci_hdev_from_sock(sk); - if (!hdev) - return -EBADFD; + if (IS_ERR(hdev)) + return PTR_ERR(hdev); if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) return -EBUSY; @@ -1103,6 +1108,18 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, lock_sock(sk); + /* Allow detaching from dead device and attaching to alive device, if + * the caller wants to re-bind (instead of close) this socket in + * response to hci_sock_dev_event(HCI_DEV_UNREG) notification. + */ + hdev = hci_pi(sk)->hdev; + if (hdev && hci_dev_test_flag(hdev, HCI_UNREGISTER)) { + hci_pi(sk)->hdev = NULL; + sk->sk_state = BT_OPEN; + hci_dev_put(hdev); + } + hdev = NULL; + if (sk->sk_state == BT_BOUND) { err = -EALREADY; goto done; @@ -1379,9 +1396,9 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr, lock_sock(sk); - hdev = hci_pi(sk)->hdev; - if (!hdev) { - err = -EBADFD; + hdev = hci_hdev_from_sock(sk); + if (IS_ERR(hdev)) { + err = PTR_ERR(hdev); goto done; } @@ -1743,9 +1760,9 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, goto done; } - hdev = hci_pi(sk)->hdev; - if (!hdev) { - err = -EBADFD; + hdev = hci_hdev_from_sock(sk); + if (IS_ERR(hdev)) { + err = PTR_ERR(hdev); goto done; } diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 9874844a95a9..b69d88b88d2e 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -83,6 +83,9 @@ void hci_conn_del_sysfs(struct hci_conn *conn) static void bt_host_release(struct device *dev) { struct hci_dev *hdev = to_hci_dev(dev); + + if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) + hci_cleanup_dev(hdev); kfree(hdev); module_put(THIS_MODULE); } From 0395be967b067d99494113d78470574e86a02ed4 Mon Sep 17 00:00:00 2001 From: Apurva Nandan Date: Fri, 16 Jul 2021 23:25:03 +0000 Subject: [PATCH 773/794] spi: cadence-quadspi: Fix check condition for DTR ops buswidth and dtr fields in spi_mem_op are only valid when the corresponding spi_mem_op phase has a non-zero length. For example, SPI NAND core doesn't set buswidth when using SPI_MEM_OP_NO_ADDR phase. Fix the dtr checks in set_protocol() and suppports_mem_op() to ignore empty spi_mem_op phases, as checking for dtr field in empty phase will result in false negatives. Signed-off-by: Apurva Nandan Link: https://lore.kernel.org/r/20210716232504.182-3-a-nandan@ti.com Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-quadspi.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index a2de23516553..101cc71bffa7 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -325,7 +325,15 @@ static int cqspi_set_protocol(struct cqspi_flash_pdata *f_pdata, f_pdata->inst_width = CQSPI_INST_TYPE_SINGLE; f_pdata->addr_width = CQSPI_INST_TYPE_SINGLE; f_pdata->data_width = CQSPI_INST_TYPE_SINGLE; - f_pdata->dtr = op->data.dtr && op->cmd.dtr && op->addr.dtr; + + /* + * For an op to be DTR, cmd phase along with every other non-empty + * phase should have dtr field set to 1. If an op phase has zero + * nbytes, ignore its dtr field; otherwise, check its dtr field. + */ + f_pdata->dtr = op->cmd.dtr && + (!op->addr.nbytes || op->addr.dtr) && + (!op->data.nbytes || op->data.dtr); switch (op->data.buswidth) { case 0: @@ -1228,8 +1236,15 @@ static bool cqspi_supports_mem_op(struct spi_mem *mem, { bool all_true, all_false; - all_true = op->cmd.dtr && op->addr.dtr && op->dummy.dtr && - op->data.dtr; + /* + * op->dummy.dtr is required for converting nbytes into ncycles. + * Also, don't check the dtr field of the op phase having zero nbytes. + */ + all_true = op->cmd.dtr && + (!op->addr.nbytes || op->addr.dtr) && + (!op->dummy.nbytes || op->dummy.dtr) && + (!op->data.nbytes || op->data.dtr); + all_false = !op->cmd.dtr && !op->addr.dtr && !op->dummy.dtr && !op->data.dtr; From f7ec4121256393e1d03274acdca73eb18958f27e Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 5 Aug 2021 09:27:15 -0400 Subject: [PATCH 774/794] tracepoint: static call: Compare data on transition from 2->1 callees On transition from 2->1 callees, we should be comparing .data rather than .func, because the same callback can be registered twice with different data, and what we care about here is that the data of array element 0 is unchanged to skip rcu sync. Link: https://lkml.kernel.org/r/20210805132717.23813-2-mathieu.desnoyers@efficios.com Link: https://lore.kernel.org/io-uring/4ebea8f0-58c9-e571-fd30-0ce4f6f09c70@samba.org/ Cc: stable@vger.kernel.org Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Andrew Morton Cc: "Paul E. McKenney" Cc: Stefan Metzmacher Fixes: 547305a64632 ("tracepoint: Fix out of sync data passing by static caller") Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (VMware) --- kernel/tracepoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index fc32821f8240..133b6454b287 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -338,7 +338,7 @@ static int tracepoint_remove_func(struct tracepoint *tp, } else { rcu_assign_pointer(tp->funcs, tp_funcs); tracepoint_update_call(tp, tp_funcs, - tp_funcs[0].func != old[0].func); + tp_funcs[0].data != old[0].data); } release_probes(old); return 0; From 231264d6927f6740af36855a622d0e240be9d94c Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 5 Aug 2021 09:27:16 -0400 Subject: [PATCH 775/794] tracepoint: Fix static call function vs data state mismatch On a 1->0->1 callbacks transition, there is an issue with the new callback using the old callback's data. Considering __DO_TRACE_CALL: do { \ struct tracepoint_func *it_func_ptr; \ void *__data; \ it_func_ptr = \ rcu_dereference_raw((&__tracepoint_##name)->funcs); \ if (it_func_ptr) { \ __data = (it_func_ptr)->data; \ ----> [ delayed here on one CPU (e.g. vcpu preempted by the host) ] static_call(tp_func_##name)(__data, args); \ } \ } while (0) It has loaded the tp->funcs of the old callback, so it will try to use the old data. This can be fixed by adding a RCU sync anywhere in the 1->0->1 transition chain. On a N->2->1 transition, we need an rcu-sync because you may have a sequence of 3->2->1 (or 1->2->1) where the element 0 data is unchanged between 2->1, but was changed from 3->2 (or from 1->2), which may be observed by the static call. This can be fixed by adding an unconditional RCU sync in transition 2->1. Note, this fixes a correctness issue at the cost of adding a tremendous performance regression to the disabling of tracepoints. Before this commit: # trace-cmd start -e all # time trace-cmd start -p nop real 0m0.778s user 0m0.000s sys 0m0.061s After this commit: # trace-cmd start -e all # time trace-cmd start -p nop real 0m10.593s user 0m0.017s sys 0m0.259s A follow up fix will introduce a more lightweight scheme based on RCU get_state and cond_sync, that will return the performance back to what it was. As both this change and the lightweight versions are complex on their own, for bisecting any issues that this may cause, they are kept as two separate changes. Link: https://lkml.kernel.org/r/20210805132717.23813-3-mathieu.desnoyers@efficios.com Link: https://lore.kernel.org/io-uring/4ebea8f0-58c9-e571-fd30-0ce4f6f09c70@samba.org/ Cc: stable@vger.kernel.org Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Andrew Morton Cc: "Paul E. McKenney" Cc: Stefan Metzmacher Fixes: d25e37d89dd2 ("tracepoint: Optimize using static_call()") Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (VMware) --- kernel/tracepoint.c | 102 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 82 insertions(+), 20 deletions(-) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 133b6454b287..8d772bd6894d 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -15,6 +15,13 @@ #include #include +enum tp_func_state { + TP_FUNC_0, + TP_FUNC_1, + TP_FUNC_2, + TP_FUNC_N, +}; + extern tracepoint_ptr_t __start___tracepoints_ptrs[]; extern tracepoint_ptr_t __stop___tracepoints_ptrs[]; @@ -246,26 +253,29 @@ static void *func_remove(struct tracepoint_func **funcs, return old; } -static void tracepoint_update_call(struct tracepoint *tp, struct tracepoint_func *tp_funcs, bool sync) +/* + * Count the number of functions (enum tp_func_state) in a tp_funcs array. + */ +static enum tp_func_state nr_func_state(const struct tracepoint_func *tp_funcs) +{ + if (!tp_funcs) + return TP_FUNC_0; + if (!tp_funcs[1].func) + return TP_FUNC_1; + if (!tp_funcs[2].func) + return TP_FUNC_2; + return TP_FUNC_N; /* 3 or more */ +} + +static void tracepoint_update_call(struct tracepoint *tp, struct tracepoint_func *tp_funcs) { void *func = tp->iterator; /* Synthetic events do not have static call sites */ if (!tp->static_call_key) return; - - if (!tp_funcs[1].func) { + if (nr_func_state(tp_funcs) == TP_FUNC_1) func = tp_funcs[0].func; - /* - * If going from the iterator back to a single caller, - * we need to synchronize with __DO_TRACE to make sure - * that the data passed to the callback is the one that - * belongs to that callback. - */ - if (sync) - tracepoint_synchronize_unregister(); - } - __static_call_update(tp->static_call_key, tp->static_call_tramp, func); } @@ -299,9 +309,31 @@ static int tracepoint_add_func(struct tracepoint *tp, * a pointer to it. This array is referenced by __DO_TRACE from * include/linux/tracepoint.h using rcu_dereference_sched(). */ - tracepoint_update_call(tp, tp_funcs, false); - rcu_assign_pointer(tp->funcs, tp_funcs); - static_key_enable(&tp->key); + switch (nr_func_state(tp_funcs)) { + case TP_FUNC_1: /* 0->1 */ + /* Set static call to first function */ + tracepoint_update_call(tp, tp_funcs); + /* Both iterator and static call handle NULL tp->funcs */ + rcu_assign_pointer(tp->funcs, tp_funcs); + static_key_enable(&tp->key); + break; + case TP_FUNC_2: /* 1->2 */ + /* Set iterator static call */ + tracepoint_update_call(tp, tp_funcs); + /* + * Iterator callback installed before updating tp->funcs. + * Requires ordering between RCU assign/dereference and + * static call update/call. + */ + rcu_assign_pointer(tp->funcs, tp_funcs); + break; + case TP_FUNC_N: /* N->N+1 (N>1) */ + rcu_assign_pointer(tp->funcs, tp_funcs); + break; + default: + WARN_ON_ONCE(1); + break; + } release_probes(old); return 0; @@ -328,17 +360,47 @@ static int tracepoint_remove_func(struct tracepoint *tp, /* Failed allocating new tp_funcs, replaced func with stub */ return 0; - if (!tp_funcs) { + switch (nr_func_state(tp_funcs)) { + case TP_FUNC_0: /* 1->0 */ /* Removed last function */ if (tp->unregfunc && static_key_enabled(&tp->key)) tp->unregfunc(); static_key_disable(&tp->key); + /* Set iterator static call */ + tracepoint_update_call(tp, tp_funcs); + /* Both iterator and static call handle NULL tp->funcs */ + rcu_assign_pointer(tp->funcs, NULL); + /* + * Make sure new func never uses old data after a 1->0->1 + * transition sequence. + * Considering that transition 0->1 is the common case + * and don't have rcu-sync, issue rcu-sync after + * transition 1->0 to break that sequence by waiting for + * readers to be quiescent. + */ + tracepoint_synchronize_unregister(); + break; + case TP_FUNC_1: /* 2->1 */ rcu_assign_pointer(tp->funcs, tp_funcs); - } else { + /* + * On 2->1 transition, RCU sync is needed before setting + * static call to first callback, because the observer + * may have loaded any prior tp->funcs after the last one + * associated with an rcu-sync. + */ + tracepoint_synchronize_unregister(); + /* Set static call to first function */ + tracepoint_update_call(tp, tp_funcs); + break; + case TP_FUNC_2: /* N->N-1 (N>2) */ + fallthrough; + case TP_FUNC_N: rcu_assign_pointer(tp->funcs, tp_funcs); - tracepoint_update_call(tp, tp_funcs, - tp_funcs[0].data != old[0].data); + break; + default: + WARN_ON_ONCE(1); + break; } release_probes(old); return 0; From 23c0ebac20de19e3f54e5e81f4c3fa0caf2f8395 Mon Sep 17 00:00:00 2001 From: Xiaomeng Hou Date: Thu, 29 Jul 2021 15:44:39 +0800 Subject: [PATCH 776/794] drm/amd/pm: update yellow carp pmfw interface version Correct yellow carp driver-PMFW interface version to v4. Signed-off-by: Xiaomeng Hou Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/inc/smu_v13_0.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h index 3fea2430dec0..dc91eb608791 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h @@ -26,7 +26,7 @@ #include "amdgpu_smu.h" #define SMU13_DRIVER_IF_VERSION_INV 0xFFFFFFFF -#define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x03 +#define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04 #define SMU13_DRIVER_IF_VERSION_ALDE 0x07 /* MP Apertures */ From 5706cb3c910cc8283f344bc37a889a8d523a2c6d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 29 Jul 2021 20:03:47 -0700 Subject: [PATCH 777/794] drm/amdgpu: fix checking pmops when PM_SLEEP is not enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'pm_suspend_target_state' is only available when CONFIG_PM_SLEEP is set/enabled. OTOH, when both SUSPEND and HIBERNATION are not set, PM_SLEEP is not set, so this variable cannot be used. ../drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c: In function ‘amdgpu_acpi_is_s0ix_active’: ../drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c:1046:11: error: ‘pm_suspend_target_state’ undeclared (first use in this function); did you mean ‘__KSYM_pm_suspend_target_state’? return pm_suspend_target_state == PM_SUSPEND_TO_IDLE; ^~~~~~~~~~~~~~~~~~~~~~~ __KSYM_pm_suspend_target_state Also use shorter IS_ENABLED(CONFIG_foo) notation for checking the 2 config symbols. Fixes: 91e273712ab8dd ("drm/amdgpu: Check pmops for desired suspend state") Signed-off-by: Randy Dunlap Cc: Alex Deucher Cc: Christian König Cc: "Pan, Xinhui" Cc: amd-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Cc: linux-next@vger.kernel.org Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 6cc0d4fa4d0a..4137e848f6a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -1040,7 +1040,7 @@ void amdgpu_acpi_detect(void) */ bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev) { -#if defined(CONFIG_AMD_PMC) || defined(CONFIG_AMD_PMC_MODULE) +#if IS_ENABLED(CONFIG_AMD_PMC) && IS_ENABLED(CONFIG_PM_SLEEP) if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) { if (adev->flags & AMD_IS_APU) return pm_suspend_target_state == PM_SUSPEND_TO_IDLE; From ffb9ee8eb272ba2b5a7325e69bb98118869637db Mon Sep 17 00:00:00 2001 From: Wesley Chalmers Date: Mon, 19 Jul 2021 13:13:33 -0400 Subject: [PATCH 778/794] drm/amd/display: Assume LTTPR interop for DCN31+ [WHY] For DCN31 onward, LTTPR is to be enabled and set to Transparent by VBIOS. Driver is to assume that VBIOS has done this without needing to check the VBIOS interop bit. [HOW] Add LTTPR enable and interop VBIOS bits into dc->caps, and force-set the interop bit to true for DCN31+. Reviewed-by: Jun Lei Acked-by: Aurabindo Pillai Signed-off-by: Wesley Chalmers Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_link_dp.c | 21 ++----------------- drivers/gpu/drm/amd/display/dc/dc.h | 2 ++ .../drm/amd/display/dc/dcn30/dcn30_resource.c | 20 ++++++++++++++++++ .../drm/amd/display/dc/dcn31/dcn31_resource.c | 16 ++++++++++++++ 4 files changed, 40 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 9fb8c46dc606..a6d0fd24fd02 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -3602,29 +3602,12 @@ static bool dpcd_read_sink_ext_caps(struct dc_link *link) bool dp_retrieve_lttpr_cap(struct dc_link *link) { uint8_t lttpr_dpcd_data[6]; - bool vbios_lttpr_enable = false; - bool vbios_lttpr_interop = false; - struct dc_bios *bios = link->dc->ctx->dc_bios; + bool vbios_lttpr_enable = link->dc->caps.vbios_lttpr_enable; + bool vbios_lttpr_interop = link->dc->caps.vbios_lttpr_aware; enum dc_status status = DC_ERROR_UNEXPECTED; bool is_lttpr_present = false; memset(lttpr_dpcd_data, '\0', sizeof(lttpr_dpcd_data)); - /* Query BIOS to determine if LTTPR functionality is forced on by system */ - if (bios->funcs->get_lttpr_caps) { - enum bp_result bp_query_result; - uint8_t is_vbios_lttpr_enable = 0; - - bp_query_result = bios->funcs->get_lttpr_caps(bios, &is_vbios_lttpr_enable); - vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; - } - - if (bios->funcs->get_lttpr_interop) { - enum bp_result bp_query_result; - uint8_t is_vbios_interop_enabled = 0; - - bp_query_result = bios->funcs->get_lttpr_interop(bios, &is_vbios_interop_enabled); - vbios_lttpr_interop = (bp_query_result == BP_RESULT_OK) && !!is_vbios_interop_enabled; - } /* * Logic to determine LTTPR mode diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 8dcea8ff5c5a..af7b60108e9d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -183,6 +183,8 @@ struct dc_caps { unsigned int cursor_cache_size; struct dc_plane_cap planes[MAX_PLANES]; struct dc_color_caps color; + bool vbios_lttpr_aware; + bool vbios_lttpr_enable; }; struct dc_bug_wa { diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 596c97dce67e..253654d605c2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -2617,6 +2617,26 @@ static bool dcn30_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + /* read VBIOS LTTPR caps */ + { + if (ctx->dc_bios->funcs->get_lttpr_caps) { + enum bp_result bp_query_result; + uint8_t is_vbios_lttpr_enable = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); + dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; + } + + if (ctx->dc_bios->funcs->get_lttpr_interop) { + enum bp_result bp_query_result; + uint8_t is_vbios_interop_enabled = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_interop(ctx->dc_bios, + &is_vbios_interop_enabled); + dc->caps.vbios_lttpr_aware = (bp_query_result == BP_RESULT_OK) && !!is_vbios_interop_enabled; + } + } + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; else if (dc->ctx->dce_environment == DCE_ENV_FPGA_MAXIMUS) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 38c010afade1..cd3248dc31d8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -1968,6 +1968,22 @@ static bool dcn31_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + /* read VBIOS LTTPR caps */ + { + if (ctx->dc_bios->funcs->get_lttpr_caps) { + enum bp_result bp_query_result; + uint8_t is_vbios_lttpr_enable = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); + dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; + } + + /* interop bit is implicit */ + { + dc->caps.vbios_lttpr_aware = true; + } + } + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; else if (dc->ctx->dce_environment == DCE_ENV_FPGA_MAXIMUS) { From 06050a0f01dbac2ca33145ef19a72041206ea983 Mon Sep 17 00:00:00 2001 From: Bing Guo Date: Mon, 19 Jul 2021 18:24:06 -0400 Subject: [PATCH 779/794] drm/amd/display: Fix Dynamic bpp issue with 8K30 with Navi 1X Why: In DCN2x, HW doesn't automatically divide MASTER_UPDATE_LOCK_DB_X by the number of pipes ODM Combined. How: Set MASTER_UPDATE_LOCK_DB_X to the value that is adjusted by the number of pipes ODM Combined. Reviewed-by: Martin Leung Acked-by: Aurabindo Pillai Signed-off-by: Bing Guo Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c index 7fa9fc656b0c..f6e747f25ebe 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c @@ -464,7 +464,7 @@ void optc2_lock_doublebuffer_enable(struct timing_generator *optc) REG_UPDATE_2(OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_X, - h_blank_start - 200 - 1, + (h_blank_start - 200 - 1) / optc1->opp_count, MASTER_UPDATE_LOCK_DB_Y, v_blank_start - 1); } From cd7b0531a61811429e7907c875e864ab918f3e62 Mon Sep 17 00:00:00 2001 From: Bing Guo Date: Tue, 20 Jul 2021 15:13:38 -0400 Subject: [PATCH 780/794] drm/amd/display: Increase stutter watermark for dcn303 [Why&How] Hardware team suggested to use SRExitTime= 35.5us as w/a to prevent underflow in certain modes. Reviewed-by: Martin Leung Acked-by: Aurabindo Pillai Signed-off-by: Bing Guo Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c index 833ab13fa834..dc7823d23ba8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c @@ -146,8 +146,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_03_soc = { .min_dcfclk = 500.0, /* TODO: set this to actual min DCFCLK */ .num_states = 1, - .sr_exit_time_us = 26.5, - .sr_enter_plus_exit_time_us = 31, + .sr_exit_time_us = 35.5, + .sr_enter_plus_exit_time_us = 40, .urgent_latency_us = 4.0, .urgent_latency_pixel_data_only_us = 4.0, .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, From d5c5ac3a7bca35261eb599204cbf1efee0af22cc Mon Sep 17 00:00:00 2001 From: Jude Shih Date: Tue, 6 Jul 2021 18:04:11 +0800 Subject: [PATCH 781/794] drm/amd/display: Fix resetting DCN3.1 HW when resuming from S4 [Why] On S4 resume we also need to fix detection of when to reload DMCUB firmware because we're currently using the VBIOS version which isn't compatible with the driver version. [How] Update the hardware init check for DCN31 since it's the ASIC that has this issue. Reviewed-by: Nicholas Kazlauskas Acked-by: Aurabindo Pillai Signed-off-by: Jude Shih Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c index 973de346410d..27c7fa3110c8 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c @@ -267,11 +267,13 @@ void dmub_dcn31_set_outbox1_rptr(struct dmub_srv *dmub, uint32_t rptr_offset) bool dmub_dcn31_is_hw_init(struct dmub_srv *dmub) { - uint32_t is_hw_init; + union dmub_fw_boot_status status; + uint32_t is_enable; - REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_hw_init); + status.all = REG_READ(DMCUB_SCRATCH0); + REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_enable); - return is_hw_init != 0; + return is_enable != 0 && status.bits.dal_fw; } bool dmub_dcn31_is_supported(struct dmub_srv *dmub) From c4152b297d56d3696ad0a9003169bc5b98ad7b72 Mon Sep 17 00:00:00 2001 From: Qingqing Zhuo Date: Thu, 22 Jul 2021 14:48:54 -0400 Subject: [PATCH 782/794] drm/amd/display: workaround for hard hang on HPD on native DP [Why] HPD disable and enable sequences are not mutually exclusive on Linux. For HPDs that spans over 1s (i.e. HPD low = 1s), part of the disable sequence (specifically, a request to SMU to lower refclk) could come right before the call to PHY enable, causing DMUB to access an unresponsive PHY and thus a hard hang on the system. [How] Disable 48mhz refclk off on native DP. Reviewed-by: Hersen Wu Acked-by: Aurabindo Pillai Signed-off-by: Qingqing Zhuo Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index c6f494f0dcea..6185f9475fa2 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -66,9 +66,11 @@ int rn_get_active_display_cnt_wa( for (i = 0; i < context->stream_count; i++) { const struct dc_stream_state *stream = context->streams[i]; + /* Extend the WA to DP for Linux*/ if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A || stream->signal == SIGNAL_TYPE_DVI_SINGLE_LINK || - stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK) + stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK || + stream->signal == SIGNAL_TYPE_DISPLAY_PORT) tmds_present = true; } From 0e99e960ce6d5ff586fc0733bc393c087f52c27b Mon Sep 17 00:00:00 2001 From: Shirish S Date: Tue, 3 Aug 2021 14:03:44 +0530 Subject: [PATCH 783/794] drm/amdgpu/display: fix DMUB firmware version info DMUB firmware info is printed before it gets initialized. Correct this order to ensure true value is conveyed. Signed-off-by: Shirish S Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b53f49a23ddc..c0ae73b0691c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1548,6 +1548,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) } hdr = (const struct dmcub_firmware_header_v1_0 *)adev->dm.dmub_fw->data; + adev->dm.dmcub_fw_version = le32_to_cpu(hdr->header.ucode_version); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { adev->firmware.ucode[AMDGPU_UCODE_ID_DMCUB].ucode_id = @@ -1561,7 +1562,6 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) adev->dm.dmcub_fw_version); } - adev->dm.dmcub_fw_version = le32_to_cpu(hdr->header.ucode_version); adev->dm.dmub_srv = kzalloc(sizeof(*adev->dm.dmub_srv), GFP_KERNEL); dmub_srv = adev->dm.dmub_srv; From e00f543d3596c71201438d967877138ab33bb3de Mon Sep 17 00:00:00 2001 From: Chengming Gui Date: Wed, 24 Feb 2021 11:48:23 +0800 Subject: [PATCH 784/794] drm/amdgpu: add DID for beige goby Add device ids. Signed-off-by: Chengming Gui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 361b86b71b56..5ed8381ae0f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1213,6 +1213,13 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x740F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, {0x1002, 0x7410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, + /* BEIGE_GOBY */ + {0x1002, 0x7420, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, + {0x1002, 0x7421, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, + {0x1002, 0x7422, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, + {0x1002, 0x7423, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, + {0x1002, 0x743F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, + {0, 0, 0} }; From 2638a32348bbb1c384dbbd515fd2b12c155f0188 Mon Sep 17 00:00:00 2001 From: Dakshaja Uppalapati Date: Thu, 5 Aug 2021 18:13:32 +0530 Subject: [PATCH 785/794] RDMA/iw_cxgb4: Fix refcount underflow while destroying cqs. Previous atomic increment/decrement logic expects the atomic count to be '0' after the final decrement. Replacing atomic count with refcount does not allow that, as refcount_dec() considers count of 1 as underflow and triggers a kernel splat. Fix the current refcount logic by using the usual pattern of decrementing the refcount and test if it is '0' on the final deref in c4iw_destroy_cq(). Use wait_for_completion() instead of wait_event(). Fixes: 7183451f846d ("RDMA/cxgb4: Use refcount_t instead of atomic_t for reference counting") Link: https://lore.kernel.org/r/1628167412-12114-1-git-send-email-dakshaja@chelsio.com Signed-off-by: Dakshaja Uppalapati Reviewed-by: Potnuri Bharat Teja Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/cq.c | 12 +++++++++--- drivers/infiniband/hw/cxgb4/ev.c | 6 ++---- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 3 ++- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 6c8c910f4e86..c7e8d7b3baa1 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -967,6 +967,12 @@ int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) return !err || err == -ENODATA ? npolled : err; } +void c4iw_cq_rem_ref(struct c4iw_cq *chp) +{ + if (refcount_dec_and_test(&chp->refcnt)) + complete(&chp->cq_rel_comp); +} + int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct c4iw_cq *chp; @@ -976,8 +982,8 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) chp = to_c4iw_cq(ib_cq); xa_erase_irq(&chp->rhp->cqs, chp->cq.cqid); - refcount_dec(&chp->refcnt); - wait_event(chp->wait, !refcount_read(&chp->refcnt)); + c4iw_cq_rem_ref(chp); + wait_for_completion(&chp->cq_rel_comp); ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext, ibucontext); @@ -1081,7 +1087,7 @@ int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, spin_lock_init(&chp->lock); spin_lock_init(&chp->comp_handler_lock); refcount_set(&chp->refcnt, 1); - init_waitqueue_head(&chp->wait); + init_completion(&chp->cq_rel_comp); ret = xa_insert_irq(&rhp->cqs, chp->cq.cqid, chp, GFP_KERNEL); if (ret) goto err_destroy_cq; diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c index 7798d090888b..34211a533d5c 100644 --- a/drivers/infiniband/hw/cxgb4/ev.c +++ b/drivers/infiniband/hw/cxgb4/ev.c @@ -213,8 +213,7 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe) break; } done: - if (refcount_dec_and_test(&chp->refcnt)) - wake_up(&chp->wait); + c4iw_cq_rem_ref(chp); c4iw_qp_rem_ref(&qhp->ibqp); out: return; @@ -234,8 +233,7 @@ int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid) spin_lock_irqsave(&chp->comp_handler_lock, flag); (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); spin_unlock_irqrestore(&chp->comp_handler_lock, flag); - if (refcount_dec_and_test(&chp->refcnt)) - wake_up(&chp->wait); + c4iw_cq_rem_ref(chp); } else { pr_debug("unknown cqid 0x%x\n", qid); xa_unlock_irqrestore(&dev->cqs, flag); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 3883af3d2312..ac5f581aff4c 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -428,7 +428,7 @@ struct c4iw_cq { spinlock_t lock; spinlock_t comp_handler_lock; refcount_t refcnt; - wait_queue_head_t wait; + struct completion cq_rel_comp; struct c4iw_wr_wait *wr_waitp; }; @@ -979,6 +979,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc); int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata); int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); +void c4iw_cq_rem_ref(struct c4iw_cq *chp); int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); From acade6379930dfa7987f4bd9b26d1a701cc1b542 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 3 Aug 2021 06:25:28 -0700 Subject: [PATCH 786/794] perf/x86/intel: Apply mid ACK for small core A warning as below may be occasionally triggered in an ADL machine when these conditions occur: - Two perf record commands run one by one. Both record a PEBS event. - Both runs on small cores. - They have different adaptive PEBS configuration (PEBS_DATA_CFG). [ ] WARNING: CPU: 4 PID: 9874 at arch/x86/events/intel/ds.c:1743 setup_pebs_adaptive_sample_data+0x55e/0x5b0 [ ] RIP: 0010:setup_pebs_adaptive_sample_data+0x55e/0x5b0 [ ] Call Trace: [ ] [ ] intel_pmu_drain_pebs_icl+0x48b/0x810 [ ] perf_event_nmi_handler+0x41/0x80 [ ] [ ] __perf_event_task_sched_in+0x2c2/0x3a0 Different from the big core, the small core requires the ACK right before re-enabling counters in the NMI handler, otherwise a stale PEBS record may be dumped into the later NMI handler, which trigger the warning. Add a new mid_ack flag to track the case. Add all PMI handler bits in the struct x86_hybrid_pmu to track the bits for different types of PMUs. Apply mid ACK for the small cores on an Alder Lake machine. The existing hybrid() macro has a compile error when taking address of a bit-field variable. Add a new macro hybrid_bit() to get the bit-field value of a given PMU. Fixes: f83d2f91d259 ("perf/x86/intel: Add Alder Lake Hybrid support") Reported-by: Ammy Yi Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andi Kleen Tested-by: Ammy Yi Link: https://lkml.kernel.org/r/1627997128-57891-1-git-send-email-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 23 +++++++++++++++-------- arch/x86/events/perf_event.h | 15 +++++++++++++++ 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index fca7a6e2242f..ac6fd2dabf6a 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2904,24 +2904,28 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) */ static int intel_pmu_handle_irq(struct pt_regs *regs) { - struct cpu_hw_events *cpuc; + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + bool late_ack = hybrid_bit(cpuc->pmu, late_ack); + bool mid_ack = hybrid_bit(cpuc->pmu, mid_ack); int loops; u64 status; int handled; int pmu_enabled; - cpuc = this_cpu_ptr(&cpu_hw_events); - /* * Save the PMU state. * It needs to be restored when leaving the handler. */ pmu_enabled = cpuc->enabled; /* - * No known reason to not always do late ACK, - * but just in case do it opt-in. + * In general, the early ACK is only applied for old platforms. + * For the big core starts from Haswell, the late ACK should be + * applied. + * For the small core after Tremont, we have to do the ACK right + * before re-enabling counters, which is in the middle of the + * NMI handler. */ - if (!x86_pmu.late_ack) + if (!late_ack && !mid_ack) apic_write(APIC_LVTPC, APIC_DM_NMI); intel_bts_disable_local(); cpuc->enabled = 0; @@ -2958,6 +2962,8 @@ again: goto again; done: + if (mid_ack) + apic_write(APIC_LVTPC, APIC_DM_NMI); /* Only restore PMU state when it's active. See x86_pmu_disable(). */ cpuc->enabled = pmu_enabled; if (pmu_enabled) @@ -2969,7 +2975,7 @@ done: * have been reset. This avoids spurious NMIs on * Haswell CPUs. */ - if (x86_pmu.late_ack) + if (late_ack) apic_write(APIC_LVTPC, APIC_DM_NMI); return handled; } @@ -6129,7 +6135,6 @@ __init int intel_pmu_init(void) static_branch_enable(&perf_is_hybrid); x86_pmu.num_hybrid_pmus = X86_HYBRID_NUM_PMUS; - x86_pmu.late_ack = true; x86_pmu.pebs_aliases = NULL; x86_pmu.pebs_prec_dist = true; x86_pmu.pebs_block = true; @@ -6167,6 +6172,7 @@ __init int intel_pmu_init(void) pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; pmu->name = "cpu_core"; pmu->cpu_type = hybrid_big; + pmu->late_ack = true; if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) { pmu->num_counters = x86_pmu.num_counters + 2; pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1; @@ -6192,6 +6198,7 @@ __init int intel_pmu_init(void) pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; pmu->name = "cpu_atom"; pmu->cpu_type = hybrid_small; + pmu->mid_ack = true; pmu->num_counters = x86_pmu.num_counters; pmu->num_counters_fixed = x86_pmu.num_counters_fixed; pmu->max_pebs_events = x86_pmu.max_pebs_events; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 2938c902ffbe..e3ac05c97b5e 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -656,6 +656,10 @@ struct x86_hybrid_pmu { struct event_constraint *event_constraints; struct event_constraint *pebs_constraints; struct extra_reg *extra_regs; + + unsigned int late_ack :1, + mid_ack :1, + enabled_ack :1; }; static __always_inline struct x86_hybrid_pmu *hybrid_pmu(struct pmu *pmu) @@ -686,6 +690,16 @@ extern struct static_key_false perf_is_hybrid; __Fp; \ })) +#define hybrid_bit(_pmu, _field) \ +({ \ + bool __Fp = x86_pmu._field; \ + \ + if (is_hybrid() && (_pmu)) \ + __Fp = hybrid_pmu(_pmu)->_field; \ + \ + __Fp; \ +}) + enum hybrid_pmu_type { hybrid_big = 0x40, hybrid_small = 0x20, @@ -755,6 +769,7 @@ struct x86_pmu { /* PMI handler bits */ unsigned int late_ack :1, + mid_ack :1, enabled_ack :1; /* * sysfs attrs From 3d4e4face9c1548752a2891e98b38b100feee336 Mon Sep 17 00:00:00 2001 From: Hao Xu Date: Thu, 5 Aug 2021 18:05:37 +0800 Subject: [PATCH 787/794] io-wq: fix no lock protection of acct->nr_worker There is an acct->nr_worker visit without lock protection. Think about the case: two callers call io_wqe_wake_worker(), one is the original context and the other one is an io-worker(by calling io_wqe_enqueue(wqe, linked)), on two cpus paralelly, this may cause nr_worker to be larger than max_worker. Let's fix it by adding lock for it, and let's do nr_workers++ before create_io_worker. There may be a edge cause that the first caller fails to create an io-worker, but the second caller doesn't know it and then quit creating io-worker as well: say nr_worker = max_worker - 1 cpu 0 cpu 1 io_wqe_wake_worker() io_wqe_wake_worker() nr_worker < max_worker nr_worker++ create_io_worker() nr_worker == max_worker failed return return But the chance of this case is very slim. Fixes: 685fe7feedb9 ("io-wq: eliminate the need for a manager thread") Signed-off-by: Hao Xu [axboe: fix unconditional create_io_worker() call] Signed-off-by: Jens Axboe --- fs/io-wq.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 50dc93ffc153..64d904ce4f2d 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -247,10 +247,19 @@ static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct) ret = io_wqe_activate_free_worker(wqe); rcu_read_unlock(); - if (!ret && acct->nr_workers < acct->max_workers) { - atomic_inc(&acct->nr_running); - atomic_inc(&wqe->wq->worker_refs); - create_io_worker(wqe->wq, wqe, acct->index); + if (!ret) { + bool do_create = false; + + raw_spin_lock_irq(&wqe->lock); + if (acct->nr_workers < acct->max_workers) { + atomic_inc(&acct->nr_running); + atomic_inc(&wqe->wq->worker_refs); + acct->nr_workers++; + do_create = true; + } + raw_spin_unlock_irq(&wqe->lock); + if (do_create) + create_io_worker(wqe->wq, wqe, acct->index); } } @@ -635,6 +644,9 @@ static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) kfree(worker); fail: atomic_dec(&acct->nr_running); + raw_spin_lock_irq(&wqe->lock); + acct->nr_workers--; + raw_spin_unlock_irq(&wqe->lock); io_worker_ref_put(wq); return; } @@ -650,9 +662,8 @@ fail: worker->flags |= IO_WORKER_F_FREE; if (index == IO_WQ_ACCT_BOUND) worker->flags |= IO_WORKER_F_BOUND; - if (!acct->nr_workers && (worker->flags & IO_WORKER_F_BOUND)) + if ((acct->nr_workers == 1) && (worker->flags & IO_WORKER_F_BOUND)) worker->flags |= IO_WORKER_F_FIXED; - acct->nr_workers++; raw_spin_unlock_irq(&wqe->lock); wake_up_new_task(tsk); } From 21698274da5b6fc724b005bc7ec3e6b9fbcfaa06 Mon Sep 17 00:00:00 2001 From: Hao Xu Date: Thu, 5 Aug 2021 18:05:38 +0800 Subject: [PATCH 788/794] io-wq: fix lack of acct->nr_workers < acct->max_workers judgement There should be this judgement before we create an io-worker Fixes: 685fe7feedb9 ("io-wq: eliminate the need for a manager thread") Signed-off-by: Hao Xu Signed-off-by: Jens Axboe --- fs/io-wq.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 64d904ce4f2d..12fc19353bb0 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -280,9 +280,17 @@ static void create_worker_cb(struct callback_head *cb) { struct create_worker_data *cwd; struct io_wq *wq; + struct io_wqe *wqe; + struct io_wqe_acct *acct; cwd = container_of(cb, struct create_worker_data, work); - wq = cwd->wqe->wq; + wqe = cwd->wqe; + wq = wqe->wq; + acct = &wqe->acct[cwd->index]; + raw_spin_lock_irq(&wqe->lock); + if (acct->nr_workers < acct->max_workers) + acct->nr_workers++; + raw_spin_unlock_irq(&wqe->lock); create_io_worker(wq, cwd->wqe, cwd->index); kfree(cwd); } From 7b40066c97ec66a44e388f82fcf694987451768f Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 5 Aug 2021 15:29:54 -0400 Subject: [PATCH 789/794] tracepoint: Use rcu get state and cond sync for static call updates State transitions from 1->0->1 and N->2->1 callbacks require RCU synchronization. Rather than performing the RCU synchronization every time the state change occurs, which is quite slow when many tracepoints are registered in batch, instead keep a snapshot of the RCU state on the most recent transitions which belong to a chain, and conditionally wait for a grace period on the last transition of the chain if one g.p. has not elapsed since the last snapshot. This applies to both RCU and SRCU. This brings the performance regression caused by commit 231264d6927f ("Fix: tracepoint: static call function vs data state mismatch") back to what it was originally. Before this commit: # trace-cmd start -e all # time trace-cmd start -p nop real 0m10.593s user 0m0.017s sys 0m0.259s After this commit: # trace-cmd start -e all # time trace-cmd start -p nop real 0m0.878s user 0m0.000s sys 0m0.103s Link: https://lkml.kernel.org/r/20210805192954.30688-1-mathieu.desnoyers@efficios.com Link: https://lore.kernel.org/io-uring/4ebea8f0-58c9-e571-fd30-0ce4f6f09c70@samba.org/ Cc: stable@vger.kernel.org Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Andrew Morton Cc: "Paul E. McKenney" Cc: Stefan Metzmacher Fixes: 231264d6927f ("Fix: tracepoint: static call function vs data state mismatch") Signed-off-by: Mathieu Desnoyers Reviewed-by: Paul E. McKenney Signed-off-by: Steven Rostedt (VMware) --- kernel/tracepoint.c | 81 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 67 insertions(+), 14 deletions(-) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 8d772bd6894d..efd14c79fab4 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -28,6 +28,44 @@ extern tracepoint_ptr_t __stop___tracepoints_ptrs[]; DEFINE_SRCU(tracepoint_srcu); EXPORT_SYMBOL_GPL(tracepoint_srcu); +enum tp_transition_sync { + TP_TRANSITION_SYNC_1_0_1, + TP_TRANSITION_SYNC_N_2_1, + + _NR_TP_TRANSITION_SYNC, +}; + +struct tp_transition_snapshot { + unsigned long rcu; + unsigned long srcu; + bool ongoing; +}; + +/* Protected by tracepoints_mutex */ +static struct tp_transition_snapshot tp_transition_snapshot[_NR_TP_TRANSITION_SYNC]; + +static void tp_rcu_get_state(enum tp_transition_sync sync) +{ + struct tp_transition_snapshot *snapshot = &tp_transition_snapshot[sync]; + + /* Keep the latest get_state snapshot. */ + snapshot->rcu = get_state_synchronize_rcu(); + snapshot->srcu = start_poll_synchronize_srcu(&tracepoint_srcu); + snapshot->ongoing = true; +} + +static void tp_rcu_cond_sync(enum tp_transition_sync sync) +{ + struct tp_transition_snapshot *snapshot = &tp_transition_snapshot[sync]; + + if (!snapshot->ongoing) + return; + cond_synchronize_rcu(snapshot->rcu); + if (!poll_state_synchronize_srcu(&tracepoint_srcu, snapshot->srcu)) + synchronize_srcu(&tracepoint_srcu); + snapshot->ongoing = false; +} + /* Set to 1 to enable tracepoint debug output */ static const int tracepoint_debug; @@ -311,6 +349,11 @@ static int tracepoint_add_func(struct tracepoint *tp, */ switch (nr_func_state(tp_funcs)) { case TP_FUNC_1: /* 0->1 */ + /* + * Make sure new static func never uses old data after a + * 1->0->1 transition sequence. + */ + tp_rcu_cond_sync(TP_TRANSITION_SYNC_1_0_1); /* Set static call to first function */ tracepoint_update_call(tp, tp_funcs); /* Both iterator and static call handle NULL tp->funcs */ @@ -325,10 +368,15 @@ static int tracepoint_add_func(struct tracepoint *tp, * Requires ordering between RCU assign/dereference and * static call update/call. */ - rcu_assign_pointer(tp->funcs, tp_funcs); - break; + fallthrough; case TP_FUNC_N: /* N->N+1 (N>1) */ rcu_assign_pointer(tp->funcs, tp_funcs); + /* + * Make sure static func never uses incorrect data after a + * N->...->2->1 (N>1) transition sequence. + */ + if (tp_funcs[0].data != old[0].data) + tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1); break; default: WARN_ON_ONCE(1); @@ -372,24 +420,23 @@ static int tracepoint_remove_func(struct tracepoint *tp, /* Both iterator and static call handle NULL tp->funcs */ rcu_assign_pointer(tp->funcs, NULL); /* - * Make sure new func never uses old data after a 1->0->1 - * transition sequence. - * Considering that transition 0->1 is the common case - * and don't have rcu-sync, issue rcu-sync after - * transition 1->0 to break that sequence by waiting for - * readers to be quiescent. + * Make sure new static func never uses old data after a + * 1->0->1 transition sequence. */ - tracepoint_synchronize_unregister(); + tp_rcu_get_state(TP_TRANSITION_SYNC_1_0_1); break; case TP_FUNC_1: /* 2->1 */ rcu_assign_pointer(tp->funcs, tp_funcs); /* - * On 2->1 transition, RCU sync is needed before setting - * static call to first callback, because the observer - * may have loaded any prior tp->funcs after the last one - * associated with an rcu-sync. + * Make sure static func never uses incorrect data after a + * N->...->2->1 (N>2) transition sequence. If the first + * element's data has changed, then force the synchronization + * to prevent current readers that have loaded the old data + * from calling the new function. */ - tracepoint_synchronize_unregister(); + if (tp_funcs[0].data != old[0].data) + tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1); + tp_rcu_cond_sync(TP_TRANSITION_SYNC_N_2_1); /* Set static call to first function */ tracepoint_update_call(tp, tp_funcs); break; @@ -397,6 +444,12 @@ static int tracepoint_remove_func(struct tracepoint *tp, fallthrough; case TP_FUNC_N: rcu_assign_pointer(tp->funcs, tp_funcs); + /* + * Make sure static func never uses incorrect data after a + * N->...->2->1 (N>2) transition sequence. + */ + if (tp_funcs[0].data != old[0].data) + tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1); break; default: WARN_ON_ONCE(1); From 877ba3f729fd3d8ef0e29bc2a55e57cfa54b2e43 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 4 Aug 2021 14:23:55 -0400 Subject: [PATCH 790/794] ext4: fix potential htree corruption when growing large_dir directories MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit b5776e7524af ("ext4: fix potential htree index checksum corruption) removed a required restart when multiple levels of index nodes need to be split. Fix this to avoid directory htree corruptions when using the large_dir feature. Cc: stable@kernel.org # v5.11 Cc: Благодаренко Артём Fixes: b5776e7524af ("ext4: fix potential htree index checksum corruption) Reported-by: Denis Signed-off-by: Theodore Ts'o --- fs/ext4/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 5fd56f616cf0..f3bbcd4efb56 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2517,7 +2517,7 @@ again: goto journal_error; err = ext4_handle_dirty_dx_node(handle, dir, frame->bh); - if (err) + if (restart || err) goto journal_error; } else { struct dx_root *dxroot; From fb7b9b0231ba8f77587c23f5257a4fdb6df1219e Mon Sep 17 00:00:00 2001 From: Vincent Fu Date: Wed, 4 Aug 2021 19:49:23 +0000 Subject: [PATCH 791/794] kyber: make trace_block_rq call consistent with documentation The kyber ioscheduler calls trace_block_rq_insert() *after* the request is added to the queue but the documentation for trace_block_rq_insert() says that the call should be made *before* the request is added to the queue. Move the tracepoint for the kyber ioscheduler so that it is consistent with the documentation. Signed-off-by: Vincent Fu Link: https://lore.kernel.org/r/20210804194913.10497-1-vincent.fu@samsung.com Reviewed by: Adam Manzanares Signed-off-by: Jens Axboe --- block/kyber-iosched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index 81e3279ecd57..15a8be57203d 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -596,13 +596,13 @@ static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx, struct list_head *head = &kcq->rq_list[sched_domain]; spin_lock(&kcq->lock); + trace_block_rq_insert(rq); if (at_head) list_move(&rq->queuelist, head); else list_move_tail(&rq->queuelist, head); sbitmap_set_bit(&khd->kcq_map[sched_domain], rq->mq_ctx->index_hw[hctx->type]); - trace_block_rq_insert(rq); spin_unlock(&kcq->lock); } } From 6d7f91d914bc90a15ebc426440c26081337ceaa1 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Wed, 21 Jul 2021 09:59:35 +0200 Subject: [PATCH 792/794] riscv: Get rid of CONFIG_PHYS_RAM_BASE in kernel physical address conversion The usage of CONFIG_PHYS_RAM_BASE for all kernel types was a mistake: this value is implementation-specific and this breaks the genericity of the RISC-V kernel. Fix this by introducing a new variable phys_ram_base that holds this value at runtime and use it in the kernel physical address conversion macro. Since this value is used only for XIP kernels, evaluate it only if CONFIG_XIP_KERNEL is set which in addition optimizes this macro for standard kernels at compile-time. Signed-off-by: Alexandre Ghiti Tested-by: Emil Renner Berthing Reviewed-by: Jisheng Zhang Fixes: 44c922572952 ("RISC-V: enable XIP") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/page.h | 7 ++++--- arch/riscv/mm/init.c | 17 ++++++++++++----- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index cca8764aed83..b0ca5058e7ae 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -103,6 +103,7 @@ struct kernel_mapping { }; extern struct kernel_mapping kernel_map; +extern phys_addr_t phys_ram_base; #ifdef CONFIG_64BIT #define is_kernel_mapping(x) \ @@ -113,9 +114,9 @@ extern struct kernel_mapping kernel_map; #define linear_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + kernel_map.va_pa_offset)) #define kernel_mapping_pa_to_va(y) ({ \ unsigned long _y = y; \ - (_y >= CONFIG_PHYS_RAM_BASE) ? \ - (void *)((unsigned long)(_y) + kernel_map.va_kernel_pa_offset + XIP_OFFSET) : \ - (void *)((unsigned long)(_y) + kernel_map.va_kernel_xip_pa_offset); \ + (IS_ENABLED(CONFIG_XIP_KERNEL) && _y < phys_ram_base) ? \ + (void *)((unsigned long)(_y) + kernel_map.va_kernel_xip_pa_offset) : \ + (void *)((unsigned long)(_y) + kernel_map.va_kernel_pa_offset + XIP_OFFSET); \ }) #define __pa_to_va_nodebug(x) linear_mapping_pa_to_va(x) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index a14bf3910eec..88134cc288d9 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -36,6 +36,9 @@ EXPORT_SYMBOL(kernel_map); #define kernel_map (*(struct kernel_mapping *)XIP_FIXUP(&kernel_map)) #endif +phys_addr_t phys_ram_base __ro_after_init; +EXPORT_SYMBOL(phys_ram_base); + #ifdef CONFIG_XIP_KERNEL extern char _xiprom[], _exiprom[]; #endif @@ -160,7 +163,7 @@ static void __init setup_bootmem(void) phys_addr_t vmlinux_end = __pa_symbol(&_end); phys_addr_t vmlinux_start = __pa_symbol(&_start); phys_addr_t __maybe_unused max_mapped_addr; - phys_addr_t dram_end; + phys_addr_t phys_ram_end; #ifdef CONFIG_XIP_KERNEL vmlinux_start = __pa_symbol(&_sdata); @@ -181,9 +184,12 @@ static void __init setup_bootmem(void) #endif memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); - dram_end = memblock_end_of_DRAM(); + phys_ram_end = memblock_end_of_DRAM(); #ifndef CONFIG_64BIT +#ifndef CONFIG_XIP_KERNEL + phys_ram_base = memblock_start_of_DRAM(); +#endif /* * memblock allocator is not aware of the fact that last 4K bytes of * the addressable memory can not be mapped because of IS_ERR_VALUE @@ -194,12 +200,12 @@ static void __init setup_bootmem(void) * be done in create_kernel_page_table. */ max_mapped_addr = __pa(~(ulong)0); - if (max_mapped_addr == (dram_end - 1)) + if (max_mapped_addr == (phys_ram_end - 1)) memblock_set_current_limit(max_mapped_addr - 4096); #endif - min_low_pfn = PFN_UP(memblock_start_of_DRAM()); - max_low_pfn = max_pfn = PFN_DOWN(dram_end); + min_low_pfn = PFN_UP(phys_ram_base); + max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end); dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn)); set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET); @@ -558,6 +564,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR; kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom); + phys_ram_base = CONFIG_PHYS_RAM_BASE; kernel_map.phys_addr = (uintptr_t)CONFIG_PHYS_RAM_BASE; kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_sdata); From 867432bec1c6e7df21a361d7f12022a8c5f54022 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Wed, 21 Jul 2021 09:59:36 +0200 Subject: [PATCH 793/794] Revert "riscv: Remove CONFIG_PHYS_RAM_BASE_FIXED" This reverts commit 9b79878ced8f7ab85c57623f8b1f6882e484a316. The removal of this config exposes CONFIG_PHYS_RAM_BASE for all kernel types: this value being implementation-specific, this breaks the genericity of the RISC-V kernel so revert it. Signed-off-by: Alexandre Ghiti Tested-by: Emil Renner Berthing Reviewed-by: Jisheng Zhang Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 31f9e92f1402..4f7b70ae7c31 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -495,8 +495,13 @@ config STACKPROTECTOR_PER_TASK depends on !GCC_PLUGIN_RANDSTRUCT depends on STACKPROTECTOR && CC_HAVE_STACKPROTECTOR_TLS +config PHYS_RAM_BASE_FIXED + bool "Explicitly specified physical RAM address" + default n + config PHYS_RAM_BASE hex "Platform Physical RAM address" + depends on PHYS_RAM_BASE_FIXED default "0x80000000" help This is the physical address of RAM in the system. It has to be @@ -509,6 +514,7 @@ config XIP_KERNEL # This prevents XIP from being enabled by all{yes,mod}config, which # fail to build since XIP doesn't support large kernels. depends on !COMPILE_TEST + select PHYS_RAM_BASE_FIXED help Execute-In-Place allows the kernel to run from non-volatile storage directly addressable by the CPU, such as NOR flash. This saves RAM From 36a21d51725af2ce0700c6ebcb6b9594aac658a6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 8 Aug 2021 13:49:31 -0700 Subject: [PATCH 794/794] Linux 5.14-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9ca176ff1e40..eae1314a5b86 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Opossums on Parade # *DOCUMENTATION*