From 01341fbd0d8d4e717fc1231cdffe00343088ce0b Mon Sep 17 00:00:00 2001
From: Yunfeng Ye <yeyunfeng@huawei.com>
Date: Thu, 19 Nov 2020 14:21:25 +0800
Subject: [PATCH 001/547] workqueue: Kick a worker based on the actual
 activation of delayed works

In realtime scenario, We do not want to have interference on the
isolated cpu cores. but when invoking alloc_workqueue() for percpu wq
on the housekeeping cpu, it kick a kworker on the isolated cpu.

  alloc_workqueue
    pwq_adjust_max_active
      wake_up_worker

The comment in pwq_adjust_max_active() said:
  "Need to kick a worker after thawed or an unbound wq's
   max_active is bumped"

So it is unnecessary to kick a kworker for percpu's wq when invoking
alloc_workqueue(). this patch only kick a worker based on the actual
activation of delayed works.

Signed-off-by: Yunfeng Ye <yeyunfeng@huawei.com>
Reviewed-by: Lai Jiangshan <jiangshanlai@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 437935e7a199..0695c7895c89 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3728,17 +3728,24 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)
 	 * is updated and visible.
 	 */
 	if (!freezable || !workqueue_freezing) {
+		bool kick = false;
+
 		pwq->max_active = wq->saved_max_active;
 
 		while (!list_empty(&pwq->delayed_works) &&
-		       pwq->nr_active < pwq->max_active)
+		       pwq->nr_active < pwq->max_active) {
 			pwq_activate_first_delayed(pwq);
+			kick = true;
+		}
 
 		/*
 		 * Need to kick a worker after thawed or an unbound wq's
-		 * max_active is bumped.  It's a slow path.  Do it always.
+		 * max_active is bumped. In realtime scenarios, always kicking a
+		 * worker will cause interference on the isolated cpu cores, so
+		 * let's kick iff work items were activated.
 		 */
-		wake_up_worker(pwq->pool);
+		if (kick)
+			wake_up_worker(pwq->pool);
 	} else {
 		pwq->max_active = 0;
 	}

From 58315c96651152b9f438e5e56c910994234e2c7a Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Mon, 9 Nov 2020 16:01:11 +0530
Subject: [PATCH 002/547] kernel: cgroup: Mundane spelling fixes throughout the
 file

Few spelling fixes throughout the file.

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/cgroup/cgroup.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index e41c21819ba0..690f477d537c 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -244,7 +244,7 @@ bool cgroup_ssid_enabled(int ssid)
  *
  * The default hierarchy is the v2 interface of cgroup and this function
  * can be used to test whether a cgroup is on the default hierarchy for
- * cases where a subsystem should behave differnetly depending on the
+ * cases where a subsystem should behave differently depending on the
  * interface version.
  *
  * List of changed behaviors:
@@ -262,7 +262,7 @@ bool cgroup_ssid_enabled(int ssid)
  *   "cgroup.procs" instead.
  *
  * - "cgroup.procs" is not sorted.  pids will be unique unless they got
- *   recycled inbetween reads.
+ *   recycled in-between reads.
  *
  * - "release_agent" and "notify_on_release" are removed.  Replacement
  *   notification mechanism will be implemented.
@@ -345,7 +345,7 @@ static bool cgroup_is_mixable(struct cgroup *cgrp)
 	return !cgroup_parent(cgrp);
 }
 
-/* can @cgrp become a thread root? should always be true for a thread root */
+/* can @cgrp become a thread root? Should always be true for a thread root */
 static bool cgroup_can_be_thread_root(struct cgroup *cgrp)
 {
 	/* mixables don't care */
@@ -530,7 +530,7 @@ static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp,
  * the root css is returned, so this function always returns a valid css.
  *
  * The returned css is not guaranteed to be online, and therefore it is the
- * callers responsiblity to tryget a reference for it.
+ * callers responsibility to try get a reference for it.
  */
 struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
 					 struct cgroup_subsys *ss)
@@ -702,7 +702,7 @@ EXPORT_SYMBOL_GPL(of_css);
 			;						\
 		else
 
-/* walk live descendants in preorder */
+/* walk live descendants in pre order */
 #define cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp)		\
 	css_for_each_descendant_pre((d_css), cgroup_css((cgrp), NULL))	\
 		if (({ lockdep_assert_held(&cgroup_mutex);		\
@@ -936,7 +936,7 @@ void put_css_set_locked(struct css_set *cset)
 
 	WARN_ON_ONCE(!list_empty(&cset->threaded_csets));
 
-	/* This css_set is dead. unlink it and release cgroup and css refs */
+	/* This css_set is dead. Unlink it and release cgroup and css refs */
 	for_each_subsys(ss, ssid) {
 		list_del(&cset->e_cset_node[ssid]);
 		css_put(cset->subsys[ssid]);
@@ -1061,7 +1061,7 @@ static struct css_set *find_existing_css_set(struct css_set *old_cset,
 
 	/*
 	 * Build the set of subsystem state objects that we want to see in the
-	 * new css_set. while subsystems can change globally, the entries here
+	 * new css_set. While subsystems can change globally, the entries here
 	 * won't change, so no need for locking.
 	 */
 	for_each_subsys(ss, i) {
@@ -1151,7 +1151,7 @@ static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
 
 	/*
 	 * Always add links to the tail of the lists so that the lists are
-	 * in choronological order.
+	 * in chronological order.
 	 */
 	list_move_tail(&link->cset_link, &cgrp->cset_links);
 	list_add_tail(&link->cgrp_link, &cset->cgrp_links);
@@ -4137,7 +4137,7 @@ struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
 	 * implies that if we observe !CSS_RELEASED on @pos in this RCU
 	 * critical section, the one pointed to by its next pointer is
 	 * guaranteed to not have finished its RCU grace period even if we
-	 * have dropped rcu_read_lock() inbetween iterations.
+	 * have dropped rcu_read_lock() in-between iterations.
 	 *
 	 * If @pos has CSS_RELEASED set, its next pointer can't be
 	 * dereferenced; however, as each css is given a monotonically
@@ -4385,7 +4385,7 @@ static struct css_set *css_task_iter_next_css_set(struct css_task_iter *it)
 }
 
 /**
- * css_task_iter_advance_css_set - advance a task itererator to the next css_set
+ * css_task_iter_advance_css_set - advance a task iterator to the next css_set
  * @it: the iterator to advance
  *
  * Advance @it to the next css_set to walk.
@@ -6320,7 +6320,7 @@ struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss)
  *
  * Find the cgroup at @path on the default hierarchy, increment its
  * reference count and return it.  Returns pointer to the found cgroup on
- * success, ERR_PTR(-ENOENT) if @path doens't exist and ERR_PTR(-ENOTDIR)
+ * success, ERR_PTR(-ENOENT) if @path doesn't exist and ERR_PTR(-ENOTDIR)
  * if @path points to a non-directory.
  */
 struct cgroup *cgroup_get_from_path(const char *path)

From 5a7b5f32c5aa628841502d19a813c633ff6ecbe4 Mon Sep 17 00:00:00 2001
From: Hui Su <sh_def@163.com>
Date: Fri, 6 Nov 2020 22:47:40 +0800
Subject: [PATCH 003/547] cgroup/cgroup.c: replace 'of->kn->priv' with of_cft()

we have supplied the inline function: of_cft() in cgroup.h.

So replace the direct use 'of->kn->priv' with inline func
of_cft(), which is more readable.

Signed-off-by: Hui Su <sh_def@163.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/cgroup/cgroup.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 690f477d537c..385f80cf7d0c 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -3657,7 +3657,7 @@ static ssize_t cgroup_freeze_write(struct kernfs_open_file *of,
 
 static int cgroup_file_open(struct kernfs_open_file *of)
 {
-	struct cftype *cft = of->kn->priv;
+	struct cftype *cft = of_cft(of);
 
 	if (cft->open)
 		return cft->open(of);
@@ -3666,7 +3666,7 @@ static int cgroup_file_open(struct kernfs_open_file *of)
 
 static void cgroup_file_release(struct kernfs_open_file *of)
 {
-	struct cftype *cft = of->kn->priv;
+	struct cftype *cft = of_cft(of);
 
 	if (cft->release)
 		cft->release(of);
@@ -3677,7 +3677,7 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
 {
 	struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
 	struct cgroup *cgrp = of->kn->parent->priv;
-	struct cftype *cft = of->kn->priv;
+	struct cftype *cft = of_cft(of);
 	struct cgroup_subsys_state *css;
 	int ret;
 
@@ -3727,7 +3727,7 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
 
 static __poll_t cgroup_file_poll(struct kernfs_open_file *of, poll_table *pt)
 {
-	struct cftype *cft = of->kn->priv;
+	struct cftype *cft = of_cft(of);
 
 	if (cft->poll)
 		return cft->poll(of, pt);

From 665f1388bc9713c81989dda6eed5cde52d57c255 Mon Sep 17 00:00:00 2001
From: Andrey Zhizhikin <andrey.zhizhikin@leica-geosystems.com>
Date: Mon, 30 Nov 2020 12:42:33 +0000
Subject: [PATCH 004/547] ARM: omap2plus_defconfig: drop unused POWER_AVS
 option

Commit 785b5bb41b0a ("PM: AVS: Drop the avs directory and the
corresponding Kconfig") moved AVS code to SOC-specific folders, and
removed corresponding Kconfig from drivers/power, leaving original
POWER_AVS config option enabled in omap2plus_defconfig file.

Remove the option, which has no references in the tree anymore.

Fixes: 785b5bb41b0a ("PM: AVS: Drop the avs directory and the corresponding Kconfig")
Signed-off-by: Andrey Zhizhikin <andrey.zhizhikin@leica-geosystems.com>
Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org>
Reviewed-by: Nishanth Menon <nm@ti.com>
Cc: Nishanth Menon <nm@ti.com>
Cc: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/configs/omap2plus_defconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index 77716f500813..23b53526c4eb 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -297,7 +297,6 @@ CONFIG_GPIO_TWL4030=y
 CONFIG_W1=m
 CONFIG_HDQ_MASTER_OMAP=m
 CONFIG_W1_SLAVE_DS250X=m
-CONFIG_POWER_AVS=y
 CONFIG_POWER_RESET=y
 CONFIG_POWER_RESET_GPIO=y
 CONFIG_BATTERY_BQ27XXX=m

From f1dc15cd7fc146107cad2a926d9c1d005f69002a Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@iki.fi>
Date: Sun, 29 Nov 2020 16:47:10 +0200
Subject: [PATCH 005/547] ARM: dts: OMAP3: disable AES on N950/N9

AES needs to be disabled on Nokia N950/N9 as well (HS devices), otherwise
kernel fails to boot.

Fixes: c312f066314e ("ARM: dts: omap3: Migrate AES from hwmods to sysc-omap2")
Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/omap3-n950-n9.dtsi | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/arm/boot/dts/omap3-n950-n9.dtsi b/arch/arm/boot/dts/omap3-n950-n9.dtsi
index 11d41e86f814..7dde9fbb06d3 100644
--- a/arch/arm/boot/dts/omap3-n950-n9.dtsi
+++ b/arch/arm/boot/dts/omap3-n950-n9.dtsi
@@ -494,3 +494,11 @@
 		clock-names = "sysclk";
 	};
 };
+
+&aes1_target {
+	status = "disabled";
+};
+
+&aes2_target {
+	status = "disabled";
+};

From 9836720911cfec25d3fbdead1c438bf87e0f2841 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sun, 22 Nov 2020 04:36:52 +0900
Subject: [PATCH 006/547] ARC: build: remove non-existing bootpImage from
 KBUILD_IMAGE

The deb-pkg builds for ARCH=arc fail.

  $ export CROSS_COMPILE=<your-arc-compiler-prefix>
  $ make -s ARCH=arc defconfig
  $ make ARCH=arc bindeb-pkg
  SORTTAB vmlinux
  SYSMAP  System.map
  MODPOST Module.symvers
  make KERNELRELEASE=5.10.0-rc4 ARCH=arc KBUILD_BUILD_VERSION=2 -f ./Makefile intdeb-pkg
  sh ./scripts/package/builddeb
  cp: cannot stat 'arch/arc/boot/bootpImage': No such file or directory
  make[4]: *** [scripts/Makefile.package:87: intdeb-pkg] Error 1
  make[3]: *** [Makefile:1527: intdeb-pkg] Error 2
  make[2]: *** [debian/rules:13: binary-arch] Error 2
  dpkg-buildpackage: error: debian/rules binary subprocess returned exit status 2
  make[1]: *** [scripts/Makefile.package:83: bindeb-pkg] Error 2
  make: *** [Makefile:1527: bindeb-pkg] Error 2

The reason is obvious; arch/arc/Makefile sets $(boot)/bootpImage as
the default image, but there is no rule to build it.

Remove the meaningless KBUILD_IMAGE assignment so it will fallback
to the default vmlinux. With this change, you can build the deb package.

I removed the 'bootpImage' target as well. At best, it provides
'make bootpImage' as an alias of 'make vmlinux', but I do not see
much sense in doing so.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/Makefile | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index 0c6bf0d1df7a..acf99420e161 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -102,12 +102,6 @@ libs-y		+= arch/arc/lib/ $(LIBGCC)
 
 boot		:= arch/arc/boot
 
-#default target for make without any arguments.
-KBUILD_IMAGE	:= $(boot)/bootpImage
-
-all:	bootpImage
-bootpImage: vmlinux
-
 boot_targets += uImage uImage.bin uImage.gz
 
 $(boot_targets): vmlinux

From f2712ec76a5433e5ec9def2bd52a95df1f96d050 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sun, 22 Nov 2020 04:36:53 +0900
Subject: [PATCH 007/547] ARC: build: add uImage.lzma to the top-level target

arch/arc/boot/Makefile supports uImage.lzma, but you cannot do
'make uImage.lzma' because the corresponding target is missing
in arch/arc/Makefile. Add it.

I also changed the assignment operator '+=' to ':=' since this is the
only place where we expect this variable to be set.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index acf99420e161..61a41123ad4c 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -102,7 +102,7 @@ libs-y		+= arch/arc/lib/ $(LIBGCC)
 
 boot		:= arch/arc/boot
 
-boot_targets += uImage uImage.bin uImage.gz
+boot_targets := uImage uImage.bin uImage.gz uImage.lzma
 
 $(boot_targets): vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@

From 0cfccb3c04934cdef42ae26042139f16e805b5f7 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sun, 22 Nov 2020 04:36:54 +0900
Subject: [PATCH 008/547] ARC: build: add boot_targets to PHONY

The top-level boot_targets (uImage and uImage.*) should be phony
targets. They just let Kbuild descend into arch/arc/boot/ and create
files there.

If a file exists in the top directory with the same name, the boot
image will not be created.

You can confirm it by the following steps:

  $ export CROSS_COMPILE=<your-arc-compiler-prefix>
  $ make -s ARCH=arc defconfig all   # vmlinux will be built
  $ touch uImage.gz
  $ make ARCH=arc uImage.gz
  CALL    scripts/atomic/check-atomics.sh
  CALL    scripts/checksyscalls.sh
  CHK     include/generated/compile.h
  # arch/arc/boot/uImage.gz is not created

Specify the targets as PHONY to fix this.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index 61a41123ad4c..cf9da9aea12a 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -104,6 +104,7 @@ boot		:= arch/arc/boot
 
 boot_targets := uImage uImage.bin uImage.gz uImage.lzma
 
+PHONY += $(boot_targets)
 $(boot_targets): vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 

From c5e6ae563c802c4d828d42e134af64004db2e58c Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sun, 22 Nov 2020 04:36:55 +0900
Subject: [PATCH 009/547] ARC: build: move symlink creation to
 arch/arc/Makefile to avoid race

If you run 'make uImage uImage.gz' with the parallel option, uImage.gz
will be created by two threads simultaneously.

This is because arch/arc/Makefile does not specify the dependency
between uImage and uImage.gz. Hence, GNU Make assumes they can be
built in parallel. One thread descends into arch/arc/boot/ to create
uImage, and another to create uImage.gz.

Please notice the same log is displayed twice in the following steps:

  $ export CROSS_COMPILE=<your-arc-compiler-prefix>
  $ make -s ARCH=arc defconfig
  $ make -j$(nproc) ARCH=arc uImage uImage.gz
  [ snip ]
    LD      vmlinux
    SORTTAB vmlinux
    SYSMAP  System.map
    OBJCOPY arch/arc/boot/vmlinux.bin
    OBJCOPY arch/arc/boot/vmlinux.bin
    GZIP    arch/arc/boot/vmlinux.bin.gz
    GZIP    arch/arc/boot/vmlinux.bin.gz
    UIMAGE  arch/arc/boot/uImage.gz
    UIMAGE  arch/arc/boot/uImage.gz
  Image Name:   Linux-5.10.0-rc4-00003-g62f23044
  Created:      Sun Nov 22 02:52:26 2020
  Image Type:   ARC Linux Kernel Image (gzip compressed)
  Data Size:    2109376 Bytes = 2059.94 KiB = 2.01 MiB
  Load Address: 80000000
  Entry Point:  80004000
    Image arch/arc/boot/uImage is ready
  Image Name:   Linux-5.10.0-rc4-00003-g62f23044
  Created:      Sun Nov 22 02:52:26 2020
  Image Type:   ARC Linux Kernel Image (gzip compressed)
  Data Size:    2815455 Bytes = 2749.47 KiB = 2.69 MiB
  Load Address: 80000000
  Entry Point:  80004000

This is a race between the two threads trying to write to the same file
arch/arc/boot/uImage.gz. This is a potential problem that can generate
a broken file.

I fixed a similar problem for ARM by commit 3939f3345050 ("ARM: 8418/1:
add boot image dependencies to not generate invalid images").

I highly recommend to avoid such build rules that cause a race condition.

Move the uImage rule to arch/arc/Makefile.

Another strangeness is that arch/arc/boot/Makefile compares the
timestamps between $(obj)/uImage and $(obj)/uImage.*:

  $(obj)/uImage: $(obj)/uImage.$(suffix-y)
          @ln -sf $(notdir $<) $@
          @echo '  Image $@ is ready'

This does not work as expected since $(obj)/uImage is a symlink.
The symlink should be created in a phony target rule.

I used $(kecho) instead of echo to suppress the message
'Image arch/arc/boot/uImage is ready' when the -s option is given.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/Makefile      | 13 ++++++++++++-
 arch/arc/boot/Makefile | 11 +----------
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index cf9da9aea12a..578bdbbb0fa7 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -102,11 +102,22 @@ libs-y		+= arch/arc/lib/ $(LIBGCC)
 
 boot		:= arch/arc/boot
 
-boot_targets := uImage uImage.bin uImage.gz uImage.lzma
+boot_targets := uImage.bin uImage.gz uImage.lzma
 
 PHONY += $(boot_targets)
 $(boot_targets): vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
+uimage-default-y			:= uImage.bin
+uimage-default-$(CONFIG_KERNEL_GZIP)	:= uImage.gz
+uimage-default-$(CONFIG_KERNEL_LZMA)	:= uImage.lzma
+
+PHONY += uImage
+uImage: $(uimage-default-y)
+	@ln -sf $< $(boot)/uImage
+	@$(kecho) '  Image $(boot)/uImage is ready'
+
+CLEAN_FILES += $(boot)/uImage
+
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
diff --git a/arch/arc/boot/Makefile b/arch/arc/boot/Makefile
index 538b92f4dd25..3b1f8a69a89e 100644
--- a/arch/arc/boot/Makefile
+++ b/arch/arc/boot/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-targets := vmlinux.bin vmlinux.bin.gz uImage
+targets := vmlinux.bin vmlinux.bin.gz
 
 # uImage build relies on mkimage being availble on your host for ARC target
 # You will need to build u-boot for ARC, rename mkimage to arc-elf32-mkimage
@@ -13,11 +13,6 @@ LINUX_START_TEXT = $$(readelf -h vmlinux | \
 UIMAGE_LOADADDR    = $(CONFIG_LINUX_LINK_BASE)
 UIMAGE_ENTRYADDR   = $(LINUX_START_TEXT)
 
-suffix-y := bin
-suffix-$(CONFIG_KERNEL_GZIP)	:= gz
-suffix-$(CONFIG_KERNEL_LZMA)	:= lzma
-
-targets += uImage
 targets += uImage.bin
 targets += uImage.gz
 targets += uImage.lzma
@@ -42,7 +37,3 @@ $(obj)/uImage.gz: $(obj)/vmlinux.bin.gz FORCE
 
 $(obj)/uImage.lzma: $(obj)/vmlinux.bin.lzma FORCE
 	$(call if_changed,uimage,lzma)
-
-$(obj)/uImage: $(obj)/uImage.$(suffix-y)
-	@ln -sf $(notdir $<) $@
-	@echo '  Image $@ is ready'

From a4e070cfeb9d4961a169a2f1a614665cf51de963 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sun, 22 Nov 2020 04:36:56 +0900
Subject: [PATCH 010/547] ARC: build: remove unneeded extra-y

Adding vmlinux.* to extra-y has no point because we expect they are
built on demand while building uImage.*

Add them to 'targets' is enough to include the corresponding .cmd file.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/boot/Makefile | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/arc/boot/Makefile b/arch/arc/boot/Makefile
index 3b1f8a69a89e..b3870cc100bf 100644
--- a/arch/arc/boot/Makefile
+++ b/arch/arc/boot/Makefile
@@ -1,5 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
-targets := vmlinux.bin vmlinux.bin.gz
 
 # uImage build relies on mkimage being availble on your host for ARC target
 # You will need to build u-boot for ARC, rename mkimage to arc-elf32-mkimage
@@ -13,12 +12,12 @@ LINUX_START_TEXT = $$(readelf -h vmlinux | \
 UIMAGE_LOADADDR    = $(CONFIG_LINUX_LINK_BASE)
 UIMAGE_ENTRYADDR   = $(LINUX_START_TEXT)
 
+targets += vmlinux.bin
+targets += vmlinux.bin.gz
+targets += vmlinux.bin.lzma
 targets += uImage.bin
 targets += uImage.gz
 targets += uImage.lzma
-extra-y += vmlinux.bin
-extra-y += vmlinux.bin.gz
-extra-y += vmlinux.bin.lzma
 
 $(obj)/vmlinux.bin: vmlinux FORCE
 	$(call if_changed,objcopy)

From 3a71e423133a4b1166ffafcb4a7cfa87ddecb910 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sun, 22 Nov 2020 04:36:57 +0900
Subject: [PATCH 011/547] ARC: build: use $(READELF) instead of hard-coded
 readelf

The top Makefile defines READELF as the readelf in the cross-toolchains.
Use it rather than the host readelf.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/boot/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arc/boot/Makefile b/arch/arc/boot/Makefile
index b3870cc100bf..5648748c285f 100644
--- a/arch/arc/boot/Makefile
+++ b/arch/arc/boot/Makefile
@@ -6,7 +6,7 @@
 
 OBJCOPYFLAGS= -O binary -R .note -R .note.gnu.build-id -R .comment -S
 
-LINUX_START_TEXT = $$(readelf -h vmlinux | \
+LINUX_START_TEXT = $$($(READELF) -h vmlinux | \
 			grep "Entry point address" | grep -o 0x.*)
 
 UIMAGE_LOADADDR    = $(CONFIG_LINUX_LINK_BASE)

From 1967939462641d8b36bcb3fcf06d48e66cd67a4f Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sun, 29 Nov 2020 04:33:35 +0900
Subject: [PATCH 012/547] Compiler Attributes: remove CONFIG_ENABLE_MUST_CHECK

Revert commit cebc04ba9aeb ("add CONFIG_ENABLE_MUST_CHECK").

A lot of warn_unused_result warnings existed in 2006, but until now
they have been fixed thanks to people doing allmodconfig tests.

Our goal is to always enable __must_check where appropriate, so this
CONFIG option is no longer needed.

I see a lot of defconfig (arch/*/configs/*_defconfig) files having:

    # CONFIG_ENABLE_MUST_CHECK is not set

I did not touch them for now since it would be a big churn. If arch
maintainers want to clean them up, please go ahead.

While I was here, I also moved __must_check to compiler_attributes.h
from compiler_types.h

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Acked-by: Jason A. Donenfeld <Jason@zx2c4.com>
Acked-by: Nathan Chancellor <natechancellor@gmail.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
[Moved addition in compiler_attributes.h to keep it sorted]
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 include/linux/compiler_attributes.h                 | 6 ++++++
 include/linux/compiler_types.h                      | 6 ------
 lib/Kconfig.debug                                   | 8 --------
 tools/testing/selftests/wireguard/qemu/debug.config | 1 -
 4 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index b2a3f4f641a7..ea5e04e75845 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -272,6 +272,12 @@
  */
 #define __used                          __attribute__((__used__))
 
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-warn_005funused_005fresult-function-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#nodiscard-warn-unused-result
+ */
+#define __must_check                    __attribute__((__warn_unused_result__))
+
 /*
  *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-weak-function-attribute
  *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-weak-variable-attribute
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index ac3fa37a84f9..7ef20d1a6c28 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -110,12 +110,6 @@ struct ftrace_likely_data {
 	unsigned long			constant;
 };
 
-#ifdef CONFIG_ENABLE_MUST_CHECK
-#define __must_check		__attribute__((__warn_unused_result__))
-#else
-#define __must_check
-#endif
-
 #if defined(CC_USING_HOTPATCH)
 #define notrace			__attribute__((hotpatch(0, 0)))
 #elif defined(CC_USING_PATCHABLE_FUNCTION_ENTRY)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c789b39ed527..cb8ef4fd0d02 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -286,14 +286,6 @@ config GDB_SCRIPTS
 
 endif # DEBUG_INFO
 
-config ENABLE_MUST_CHECK
-	bool "Enable __must_check logic"
-	default y
-	help
-	  Enable the __must_check logic in the kernel build.  Disable this to
-	  suppress the "warning: ignoring return value of 'foo', declared with
-	  attribute warn_unused_result" messages.
-
 config FRAME_WARN
 	int "Warn for stack frames larger than"
 	range 0 8192
diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config
index b50c2085c1ac..fe07d97df9fa 100644
--- a/tools/testing/selftests/wireguard/qemu/debug.config
+++ b/tools/testing/selftests/wireguard/qemu/debug.config
@@ -1,5 +1,4 @@
 CONFIG_LOCALVERSION="-debug"
-CONFIG_ENABLE_MUST_CHECK=y
 CONFIG_FRAME_POINTER=y
 CONFIG_STACK_VALIDATION=y
 CONFIG_DEBUG_KERNEL=y

From ec76c2eea903947202098090bbe07a739b5246e9 Mon Sep 17 00:00:00 2001
From: Andreas Kemnade <andreas@kemnade.info>
Date: Fri, 4 Dec 2020 10:55:39 +0100
Subject: [PATCH 013/547] ARM: OMAP2+: omap_device: fix idling of devices
 during probe

On the GTA04A5 od->_driver_status was not set to BUS_NOTIFY_BIND_DRIVER
during probe of the second mmc used for wifi. Therefore
omap_device_late_idle idled the device during probing causing oopses when
accessing the registers.

It was not set because od->_state was set to OMAP_DEVICE_STATE_IDLE
in the notifier callback. Therefore set od->_driver_status also in that
case.

This came apparent after commit 21b2cec61c04 ("mmc: Set
PROBE_PREFER_ASYNCHRONOUS for drivers that existed in v4.4") causing this
oops:

omap_hsmmc 480b4000.mmc: omap_device_late_idle: enabled but no driver.  Idling
8<--- cut here ---
Unhandled fault: external abort on non-linefetch (0x1028) at 0xfa0b402c
...
(omap_hsmmc_set_bus_width) from [<c07996bc>] (omap_hsmmc_set_ios+0x11c/0x258)
(omap_hsmmc_set_ios) from [<c077b2b0>] (mmc_power_up.part.8+0x3c/0xd0)
(mmc_power_up.part.8) from [<c077c14c>] (mmc_start_host+0x88/0x9c)
(mmc_start_host) from [<c077d284>] (mmc_add_host+0x58/0x84)
(mmc_add_host) from [<c0799190>] (omap_hsmmc_probe+0x5fc/0x8c0)
(omap_hsmmc_probe) from [<c0666728>] (platform_drv_probe+0x48/0x98)
(platform_drv_probe) from [<c066457c>] (really_probe+0x1dc/0x3b4)

Fixes: 04abaf07f6d5 ("ARM: OMAP2+: omap_device: Sync omap_device and pm_runtime after probe defer")
Fixes: 21b2cec61c04 ("mmc: Set PROBE_PREFER_ASYNCHRONOUS for drivers that existed in v4.4")
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Andreas Kemnade <andreas@kemnade.info>
[tony@atomide.com: left out extra parens, trimmed description stack trace]
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/omap_device.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c
index fc7bb2ca1672..64b23b0cd23c 100644
--- a/arch/arm/mach-omap2/omap_device.c
+++ b/arch/arm/mach-omap2/omap_device.c
@@ -230,10 +230,12 @@ static int _omap_device_notifier_call(struct notifier_block *nb,
 		break;
 	case BUS_NOTIFY_BIND_DRIVER:
 		od = to_omap_device(pdev);
-		if (od && (od->_state == OMAP_DEVICE_STATE_ENABLED) &&
-		    pm_runtime_status_suspended(dev)) {
+		if (od) {
 			od->_driver_status = BUS_NOTIFY_BIND_DRIVER;
-			pm_runtime_set_active(dev);
+			if (od->_state == OMAP_DEVICE_STATE_ENABLED &&
+			    pm_runtime_status_suspended(dev)) {
+				pm_runtime_set_active(dev);
+			}
 		}
 		break;
 	case BUS_NOTIFY_ADD_DEVICE:

From 2f6fc9e08bf79f11516edef855283c6212bbe78f Mon Sep 17 00:00:00 2001
From: Andreas Kemnade <andreas@kemnade.info>
Date: Tue, 1 Dec 2020 20:12:37 +0100
Subject: [PATCH 014/547] ARM: omap2plus_defconfig: enable SPI GPIO

GTA04 uses that for controlling the td028ttec1 panel. So
for easier testing/bisecting it is useful to have it
enabled in the defconfig.

Signed-off-by: Andreas Kemnade <andreas@kemnade.info>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/configs/omap2plus_defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index 23b53526c4eb..28add0b64d42 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -280,6 +280,7 @@ CONFIG_SERIAL_OMAP_CONSOLE=y
 CONFIG_SERIAL_DEV_BUS=y
 CONFIG_I2C_CHARDEV=y
 CONFIG_SPI=y
+CONFIG_SPI_GPIO=m
 CONFIG_SPI_OMAP24XX=y
 CONFIG_SPI_TI_QSPI=m
 CONFIG_HSI=m

From c0bc969c176b10598b31d5d1a5edf9a5261f0a9f Mon Sep 17 00:00:00 2001
From: Carl Philipp Klemm <philipp@uvos.xyz>
Date: Mon, 7 Dec 2020 20:58:01 +0100
Subject: [PATCH 015/547] ARM: omap2: pmic-cpcap: fix maximum voltage to be
 consistent with defaults on xt875

xt875 comes up with a iva voltage of 1375000 and android runs at this too. fix
maximum voltage to be consistent with this.

Signed-off-by: Carl Philipp Klemm <philipp@uvos.xyz>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/pmic-cpcap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mach-omap2/pmic-cpcap.c b/arch/arm/mach-omap2/pmic-cpcap.c
index eab281a5fc9f..09076ad0576d 100644
--- a/arch/arm/mach-omap2/pmic-cpcap.c
+++ b/arch/arm/mach-omap2/pmic-cpcap.c
@@ -71,7 +71,7 @@ static struct omap_voltdm_pmic omap_cpcap_iva = {
 	.vp_vstepmin = OMAP4_VP_VSTEPMIN_VSTEPMIN,
 	.vp_vstepmax = OMAP4_VP_VSTEPMAX_VSTEPMAX,
 	.vddmin = 900000,
-	.vddmax = 1350000,
+	.vddmax = 1375000,
 	.vp_timeout_us = OMAP4_VP_VLIMITTO_TIMEOUT_US,
 	.i2c_slave_addr = 0x44,
 	.volt_reg_addr = 0x0,

From 44fd9fb599d3d2be4c6838f4b11eaa459bb33989 Mon Sep 17 00:00:00 2001
From: Bean Huo <beanhuo@micron.com>
Date: Mon, 7 Dec 2020 20:01:36 +0100
Subject: [PATCH 016/547] scsi: ufs: Remove unused macro definition
 POWER_DESC_MAX_SIZE

POWER_DESC_MAX_SIZE is unused, remove it.

Link: https://lore.kernel.org/r/20201207190137.6858-2-huobean@gmail.com
Acked-by: Avri Altman <avri.altman@wdc.com>
Acked-by: Alim Akhtar <alim.akhtar@samsung.com>
Signed-off-by: Bean Huo <beanhuo@micron.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ufs/ufs.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h
index d593edb48767..142c2ebd3d7d 100644
--- a/drivers/scsi/ufs/ufs.h
+++ b/drivers/scsi/ufs/ufs.h
@@ -330,7 +330,6 @@ enum {
 	UFS_DEV_WRITE_BOOSTER_SUP	= BIT(8),
 };
 
-#define POWER_DESC_MAX_SIZE			0x62
 #define POWER_DESC_MAX_ACTV_ICC_LVLS		16
 
 /* Attribute  bActiveICCLevel parameter bit masks definitions */

From 1fa0570002e3f66db9b58c32c60de4183b857a19 Mon Sep 17 00:00:00 2001
From: Bean Huo <beanhuo@micron.com>
Date: Mon, 7 Dec 2020 20:01:37 +0100
Subject: [PATCH 017/547] scsi: ufs: Fix wrong print message in dev_err()

Change dev_err() print message from "dme-reset" to "dme_enable" in function
ufshcd_dme_enable().

Link: https://lore.kernel.org/r/20201207190137.6858-3-huobean@gmail.com
Acked-by: Alim Akhtar <alim.akhtar@samsung.com>
Acked-by: Avri Altman <avri.altman@wdc.com>
Signed-off-by: Bean Huo <beanhuo@micron.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ufs/ufshcd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index c1c401b2b69d..6bcedebae4a8 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -3641,7 +3641,7 @@ static int ufshcd_dme_enable(struct ufs_hba *hba)
 	ret = ufshcd_send_uic_cmd(hba, &uic_cmd);
 	if (ret)
 		dev_err(hba->dev,
-			"dme-reset: error code %d\n", ret);
+			"dme-enable: error code %d\n", ret);
 
 	return ret;
 }

From 1918651f2d7e8d58c9b7c49755c61e41ed655009 Mon Sep 17 00:00:00 2001
From: Randall Huang <huangrandall@google.com>
Date: Mon, 30 Nov 2020 20:14:02 -0800
Subject: [PATCH 018/547] scsi: ufs: Clear UAC for RPMB after ufshcd resets

If RPMB is not provisioned, we may see RPMB failure after UFS
suspend/resume.  Inject request_sense to clear uac in ufshcd reset flow.

Link: https://lore.kernel.org/r/20201201041402.3860525-1-jaegeuk@kernel.org
Reported-by: kernel test robot <lkp@intel.com>
Reviewed-by: Stanley Chu <stanley.chu@mediatek.com>
Signed-off-by: Randall Huang <huangrandall@google.com>
Signed-off-by: Leo Liou <leoliou@google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@google.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ufs/ufshcd.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 6bcedebae4a8..a84a8759437a 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -225,6 +225,7 @@ static int ufshcd_reset_and_restore(struct ufs_hba *hba);
 static int ufshcd_eh_host_reset_handler(struct scsi_cmnd *cmd);
 static int ufshcd_clear_tm_cmd(struct ufs_hba *hba, int tag);
 static void ufshcd_hba_exit(struct ufs_hba *hba);
+static int ufshcd_clear_ua_wluns(struct ufs_hba *hba);
 static int ufshcd_probe_hba(struct ufs_hba *hba, bool async);
 static int ufshcd_setup_clocks(struct ufs_hba *hba, bool on);
 static int ufshcd_uic_hibern8_enter(struct ufs_hba *hba);
@@ -6895,7 +6896,8 @@ static int ufshcd_host_reset_and_restore(struct ufs_hba *hba)
 
 	/* Establish the link again and restore the device */
 	err = ufshcd_probe_hba(hba, false);
-
+	if (!err)
+		ufshcd_clear_ua_wluns(hba);
 out:
 	if (err)
 		dev_err(hba->dev, "%s: Host init failed %d\n", __func__, err);
@@ -8379,13 +8381,7 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba,
 	 * handling context.
 	 */
 	hba->host->eh_noresume = 1;
-	if (hba->wlun_dev_clr_ua) {
-		ret = ufshcd_send_request_sense(hba, sdp);
-		if (ret)
-			goto out;
-		/* Unit attention condition is cleared now */
-		hba->wlun_dev_clr_ua = false;
-	}
+	ufshcd_clear_ua_wluns(hba);
 
 	cmd[4] = pwr_mode << 4;
 
@@ -8406,7 +8402,7 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba,
 
 	if (!ret)
 		hba->curr_dev_pwr_mode = pwr_mode;
-out:
+
 	scsi_device_put(sdp);
 	hba->host->eh_noresume = 0;
 	return ret;

From f8162ac70ecf5a3ed638f96dc10e0e19b523ec7f Mon Sep 17 00:00:00 2001
From: Stanley Chu <stanley.chu@mediatek.com>
Date: Mon, 7 Dec 2020 13:49:54 +0800
Subject: [PATCH 019/547] scsi: ufs: Allow regulators being always-on

Introduce a flag "always_on" in struct ufs_vreg to allow vendors to keep
the regulator always-on.

Link: https://lore.kernel.org/r/20201207054955.24366-2-stanley.chu@mediatek.com
Reviewed-by: Andy Teng <andy.teng@mediatek.com>
Signed-off-by: Stanley Chu <stanley.chu@mediatek.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ufs/ufs.h    | 1 +
 drivers/scsi/ufs/ufshcd.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h
index 142c2ebd3d7d..14dfda735adf 100644
--- a/drivers/scsi/ufs/ufs.h
+++ b/drivers/scsi/ufs/ufs.h
@@ -512,6 +512,7 @@ struct ufs_query_res {
 struct ufs_vreg {
 	struct regulator *reg;
 	const char *name;
+	bool always_on;
 	bool enabled;
 	int min_uV;
 	int max_uV;
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index a84a8759437a..4f3ac2566322 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -8012,7 +8012,7 @@ static int ufshcd_disable_vreg(struct device *dev, struct ufs_vreg *vreg)
 {
 	int ret = 0;
 
-	if (!vreg || !vreg->enabled)
+	if (!vreg || !vreg->enabled || vreg->always_on)
 		goto out;
 
 	ret = regulator_disable(vreg->reg);

From b3f3d31a528f78d9903253a23a5e5c6bf5280f40 Mon Sep 17 00:00:00 2001
From: Stanley Chu <stanley.chu@mediatek.com>
Date: Mon, 7 Dec 2020 13:49:55 +0800
Subject: [PATCH 020/547] scsi: ufs-mediatek: Keep VCC always-on for specific
 devices

For some devices which need extra delay after VCC power down, VCC shall be
kept always-on in some MediaTek UFS platforms to ensure the stability of
such devices because the extra delay may not be enough in those platforms.

Link: https://lore.kernel.org/r/20201207054955.24366-3-stanley.chu@mediatek.com
Reviewed-by: Andy Teng <andy.teng@mediatek.com>
Signed-off-by: Stanley Chu <stanley.chu@mediatek.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ufs/ufs-mediatek.c | 21 +++++++++++++++++++++
 drivers/scsi/ufs/ufs-mediatek.h |  1 +
 2 files changed, 22 insertions(+)

diff --git a/drivers/scsi/ufs/ufs-mediatek.c b/drivers/scsi/ufs/ufs-mediatek.c
index 3522458db3bb..80618af7c872 100644
--- a/drivers/scsi/ufs/ufs-mediatek.c
+++ b/drivers/scsi/ufs/ufs-mediatek.c
@@ -70,6 +70,13 @@ static bool ufs_mtk_is_va09_supported(struct ufs_hba *hba)
 	return !!(host->caps & UFS_MTK_CAP_VA09_PWR_CTRL);
 }
 
+static bool ufs_mtk_is_broken_vcc(struct ufs_hba *hba)
+{
+	struct ufs_mtk_host *host = ufshcd_get_variant(hba);
+
+	return !!(host->caps & UFS_MTK_CAP_BROKEN_VCC);
+}
+
 static void ufs_mtk_cfg_unipro_cg(struct ufs_hba *hba, bool enable)
 {
 	u32 tmp;
@@ -514,6 +521,9 @@ static void ufs_mtk_init_host_caps(struct ufs_hba *hba)
 	if (of_property_read_bool(np, "mediatek,ufs-disable-ah8"))
 		host->caps |= UFS_MTK_CAP_DISABLE_AH8;
 
+	if (of_property_read_bool(np, "mediatek,ufs-broken-vcc"))
+		host->caps |= UFS_MTK_CAP_BROKEN_VCC;
+
 	dev_info(hba->dev, "caps: 0x%x", host->caps);
 }
 
@@ -1003,6 +1013,17 @@ static int ufs_mtk_apply_dev_quirks(struct ufs_hba *hba)
 static void ufs_mtk_fixup_dev_quirks(struct ufs_hba *hba)
 {
 	ufshcd_fixup_dev_quirks(hba, ufs_mtk_dev_fixups);
+
+	if (ufs_mtk_is_broken_vcc(hba) && hba->vreg_info.vcc &&
+	    (hba->dev_quirks & UFS_DEVICE_QUIRK_DELAY_AFTER_LPM)) {
+		hba->vreg_info.vcc->always_on = true;
+		/*
+		 * VCC will be kept always-on thus we don't
+		 * need any delay during regulator operations
+		 */
+		hba->dev_quirks &= ~(UFS_DEVICE_QUIRK_DELAY_BEFORE_LPM |
+			UFS_DEVICE_QUIRK_DELAY_AFTER_LPM);
+	}
 }
 
 static void ufs_mtk_event_notify(struct ufs_hba *hba,
diff --git a/drivers/scsi/ufs/ufs-mediatek.h b/drivers/scsi/ufs/ufs-mediatek.h
index 93d35097dfb0..3f0d3bb769e8 100644
--- a/drivers/scsi/ufs/ufs-mediatek.h
+++ b/drivers/scsi/ufs/ufs-mediatek.h
@@ -81,6 +81,7 @@ enum ufs_mtk_host_caps {
 	UFS_MTK_CAP_BOOST_CRYPT_ENGINE         = 1 << 0,
 	UFS_MTK_CAP_VA09_PWR_CTRL              = 1 << 1,
 	UFS_MTK_CAP_DISABLE_AH8                = 1 << 2,
+	UFS_MTK_CAP_BROKEN_VCC                 = 1 << 3,
 };
 
 struct ufs_mtk_crypt_cfg {

From c763729a10e538d997744317cf4a1c4f25266066 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 7 Dec 2020 10:31:17 +0200
Subject: [PATCH 021/547] scsi: ufs-pci: Fix restore from S4 for Intel
 controllers

Currently, ufshcd-pci is the only UFS driver with support for
suspend-to-disk PM callbacks (i.e. freeze/thaw/restore/poweroff). These
callbacks are set by the macro SET_SYSTEM_SLEEP_PM_OPS to the same
functions as system suspend/resume. That will work with spm_lvl 5 because
spm_lvl 5 will result in a full restore for the ->restore() callback.  In
the absence of a full restore, the host controller registers will have
values set up by the restore kernel (the kernel that boots and loads the
restore image) which are not necessarily the same. However it turns out,
the only registers that sometimes need restore are the base address
registers. This has gone un-noticed because, depending on IOMMU settings,
the kernel can end up allocating the same addresses every time.

For Intel controllers, an spm_lvl other than 5 can be used, so to support
S4 (suspend-to-disk) with spm_lvl other than 5, restore the base address
registers.

Link: https://lore.kernel.org/r/20201207083120.26732-2-adrian.hunter@intel.com
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ufs/ufshcd-pci.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c
index df3a564c3e33..360c25f1f061 100644
--- a/drivers/scsi/ufs/ufshcd-pci.c
+++ b/drivers/scsi/ufs/ufshcd-pci.c
@@ -163,6 +163,24 @@ static void ufs_intel_common_exit(struct ufs_hba *hba)
 	intel_ltr_hide(hba->dev);
 }
 
+static int ufs_intel_resume(struct ufs_hba *hba, enum ufs_pm_op op)
+{
+	/*
+	 * To support S4 (suspend-to-disk) with spm_lvl other than 5, the base
+	 * address registers must be restored because the restore kernel can
+	 * have used different addresses.
+	 */
+	ufshcd_writel(hba, lower_32_bits(hba->utrdl_dma_addr),
+		      REG_UTP_TRANSFER_REQ_LIST_BASE_L);
+	ufshcd_writel(hba, upper_32_bits(hba->utrdl_dma_addr),
+		      REG_UTP_TRANSFER_REQ_LIST_BASE_H);
+	ufshcd_writel(hba, lower_32_bits(hba->utmrdl_dma_addr),
+		      REG_UTP_TASK_REQ_LIST_BASE_L);
+	ufshcd_writel(hba, upper_32_bits(hba->utmrdl_dma_addr),
+		      REG_UTP_TASK_REQ_LIST_BASE_H);
+	return 0;
+}
+
 static int ufs_intel_ehl_init(struct ufs_hba *hba)
 {
 	hba->quirks |= UFSHCD_QUIRK_BROKEN_AUTO_HIBERN8;
@@ -174,6 +192,7 @@ static struct ufs_hba_variant_ops ufs_intel_cnl_hba_vops = {
 	.init			= ufs_intel_common_init,
 	.exit			= ufs_intel_common_exit,
 	.link_startup_notify	= ufs_intel_link_startup_notify,
+	.resume			= ufs_intel_resume,
 };
 
 static struct ufs_hba_variant_ops ufs_intel_ehl_hba_vops = {
@@ -181,6 +200,7 @@ static struct ufs_hba_variant_ops ufs_intel_ehl_hba_vops = {
 	.init			= ufs_intel_ehl_init,
 	.exit			= ufs_intel_common_exit,
 	.link_startup_notify	= ufs_intel_link_startup_notify,
+	.resume			= ufs_intel_resume,
 };
 
 #ifdef CONFIG_PM_SLEEP

From af423534d2de86cd0db729a5ac41f056ca8717de Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 7 Dec 2020 10:31:18 +0200
Subject: [PATCH 022/547] scsi: ufs-pci: Ensure UFS device is in PowerDown mode
 for suspend-to-disk ->poweroff()

The expectation for suspend-to-disk is that devices will be powered-off, so
the UFS device should be put in PowerDown mode. If spm_lvl is not 5, then
that will not happen. Change the pm callbacks to force spm_lvl 5 for
suspend-to-disk poweroff.

Link: https://lore.kernel.org/r/20201207083120.26732-3-adrian.hunter@intel.com
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ufs/ufshcd-pci.c | 34 ++++++++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c
index 360c25f1f061..5d33c39fa82f 100644
--- a/drivers/scsi/ufs/ufshcd-pci.c
+++ b/drivers/scsi/ufs/ufshcd-pci.c
@@ -227,6 +227,30 @@ static int ufshcd_pci_resume(struct device *dev)
 {
 	return ufshcd_system_resume(dev_get_drvdata(dev));
 }
+
+/**
+ * ufshcd_pci_poweroff - suspend-to-disk poweroff function
+ * @dev: pointer to PCI device handle
+ *
+ * Returns 0 if successful
+ * Returns non-zero otherwise
+ */
+static int ufshcd_pci_poweroff(struct device *dev)
+{
+	struct ufs_hba *hba = dev_get_drvdata(dev);
+	int spm_lvl = hba->spm_lvl;
+	int ret;
+
+	/*
+	 * For poweroff we need to set the UFS device to PowerDown mode.
+	 * Force spm_lvl to ensure that.
+	 */
+	hba->spm_lvl = 5;
+	ret = ufshcd_system_suspend(hba);
+	hba->spm_lvl = spm_lvl;
+	return ret;
+}
+
 #endif /* !CONFIG_PM_SLEEP */
 
 #ifdef CONFIG_PM
@@ -322,8 +346,14 @@ ufshcd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 }
 
 static const struct dev_pm_ops ufshcd_pci_pm_ops = {
-	SET_SYSTEM_SLEEP_PM_OPS(ufshcd_pci_suspend,
-				ufshcd_pci_resume)
+#ifdef CONFIG_PM_SLEEP
+	.suspend	= ufshcd_pci_suspend,
+	.resume		= ufshcd_pci_resume,
+	.freeze		= ufshcd_pci_suspend,
+	.thaw		= ufshcd_pci_resume,
+	.poweroff	= ufshcd_pci_poweroff,
+	.restore	= ufshcd_pci_resume,
+#endif
 	SET_RUNTIME_PM_OPS(ufshcd_pci_runtime_suspend,
 			   ufshcd_pci_runtime_resume,
 			   ufshcd_pci_runtime_idle)

From 044d5bda7117891d6d0d56f2f807b7b11e120abd Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 7 Dec 2020 10:31:19 +0200
Subject: [PATCH 023/547] scsi: ufs-pci: Fix recovery from hibernate exit
 errors for Intel controllers

Intel controllers can end up in an unrecoverable state after a hibernate
exit error unless a full reset and restore is done before anything else.
Force that to happen.

Link: https://lore.kernel.org/r/20201207083120.26732-4-adrian.hunter@intel.com
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ufs/ufshcd-pci.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c
index 5d33c39fa82f..888d8c9ca3a5 100644
--- a/drivers/scsi/ufs/ufshcd-pci.c
+++ b/drivers/scsi/ufs/ufshcd-pci.c
@@ -178,6 +178,23 @@ static int ufs_intel_resume(struct ufs_hba *hba, enum ufs_pm_op op)
 		      REG_UTP_TASK_REQ_LIST_BASE_L);
 	ufshcd_writel(hba, upper_32_bits(hba->utmrdl_dma_addr),
 		      REG_UTP_TASK_REQ_LIST_BASE_H);
+
+	if (ufshcd_is_link_hibern8(hba)) {
+		int ret = ufshcd_uic_hibern8_exit(hba);
+
+		if (!ret) {
+			ufshcd_set_link_active(hba);
+		} else {
+			dev_err(hba->dev, "%s: hibern8 exit failed %d\n",
+				__func__, ret);
+			/*
+			 * Force reset and restore. Any other actions can lead
+			 * to an unrecoverable state.
+			 */
+			ufshcd_set_link_off(hba);
+		}
+	}
+
 	return 0;
 }
 

From dd78bdb6f810bdcb173b42379af558c676c8e0aa Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 7 Dec 2020 10:31:20 +0200
Subject: [PATCH 024/547] scsi: ufs-pci: Enable UFSHCD_CAP_RPM_AUTOSUSPEND for
 Intel controllers

Enable runtime PM auto-suspend by default for Intel host controllers.

Link: https://lore.kernel.org/r/20201207083120.26732-5-adrian.hunter@intel.com
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ufs/ufshcd-pci.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c
index 888d8c9ca3a5..fadd566025b8 100644
--- a/drivers/scsi/ufs/ufshcd-pci.c
+++ b/drivers/scsi/ufs/ufshcd-pci.c
@@ -148,6 +148,8 @@ static int ufs_intel_common_init(struct ufs_hba *hba)
 {
 	struct intel_host *host;
 
+	hba->caps |= UFSHCD_CAP_RPM_AUTOSUSPEND;
+
 	host = devm_kzalloc(hba->dev, sizeof(*host), GFP_KERNEL);
 	if (!host)
 		return -ENOMEM;

From fa4d0f1992a96f6d7c988ef423e3127e613f6ac9 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 8 Dec 2020 21:29:44 -0800
Subject: [PATCH 025/547] scsi: block: Fix a race in the runtime power
 management code

With the current implementation the following race can happen:

 * blk_pre_runtime_suspend() calls blk_freeze_queue_start() and
   blk_mq_unfreeze_queue().

 * blk_queue_enter() calls blk_queue_pm_only() and that function returns
   true.

 * blk_queue_enter() calls blk_pm_request_resume() and that function does
   not call pm_request_resume() because the queue runtime status is
   RPM_ACTIVE.

 * blk_pre_runtime_suspend() changes the queue status into RPM_SUSPENDING.

Fix this race by changing the queue runtime status into RPM_SUSPENDING
before switching q_usage_counter to atomic mode.

Link: https://lore.kernel.org/r/20201209052951.16136-2-bvanassche@acm.org
Fixes: 986d413b7c15 ("blk-mq: Enable support for runtime power management")
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: stable <stable@vger.kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Stanley Chu <stanley.chu@mediatek.com>
Co-developed-by: Can Guo <cang@codeaurora.org>
Signed-off-by: Can Guo <cang@codeaurora.org>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 block/blk-pm.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/block/blk-pm.c b/block/blk-pm.c
index b85234d758f7..17bd020268d4 100644
--- a/block/blk-pm.c
+++ b/block/blk-pm.c
@@ -67,6 +67,10 @@ int blk_pre_runtime_suspend(struct request_queue *q)
 
 	WARN_ON_ONCE(q->rpm_status != RPM_ACTIVE);
 
+	spin_lock_irq(&q->queue_lock);
+	q->rpm_status = RPM_SUSPENDING;
+	spin_unlock_irq(&q->queue_lock);
+
 	/*
 	 * Increase the pm_only counter before checking whether any
 	 * non-PM blk_queue_enter() calls are in progress to avoid that any
@@ -89,15 +93,14 @@ int blk_pre_runtime_suspend(struct request_queue *q)
 	/* Switch q_usage_counter back to per-cpu mode. */
 	blk_mq_unfreeze_queue(q);
 
-	spin_lock_irq(&q->queue_lock);
-	if (ret < 0)
+	if (ret < 0) {
+		spin_lock_irq(&q->queue_lock);
+		q->rpm_status = RPM_ACTIVE;
 		pm_runtime_mark_last_busy(q->dev);
-	else
-		q->rpm_status = RPM_SUSPENDING;
-	spin_unlock_irq(&q->queue_lock);
+		spin_unlock_irq(&q->queue_lock);
 
-	if (ret)
 		blk_clear_pm_only(q);
+	}
 
 	return ret;
 }

From 0854bcdcdec26aecdc92c303816f349ee1fba2bc Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 8 Dec 2020 21:29:45 -0800
Subject: [PATCH 026/547] scsi: block: Introduce BLK_MQ_REQ_PM

Introduce the BLK_MQ_REQ_PM flag. This flag makes the request allocation
functions set RQF_PM. This is the first step towards removing
BLK_MQ_REQ_PREEMPT.

Link: https://lore.kernel.org/r/20201209052951.16136-3-bvanassche@acm.org
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Stanley Chu <stanley.chu@mediatek.com>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Can Guo <cang@codeaurora.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Can Guo <cang@codeaurora.org>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 block/blk-core.c       | 7 ++++---
 block/blk-mq.c         | 2 ++
 include/linux/blk-mq.h | 2 ++
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 2db8bda43b6e..10696f9fb6ac 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -424,11 +424,11 @@ EXPORT_SYMBOL(blk_cleanup_queue);
 /**
  * blk_queue_enter() - try to increase q->q_usage_counter
  * @q: request queue pointer
- * @flags: BLK_MQ_REQ_NOWAIT and/or BLK_MQ_REQ_PREEMPT
+ * @flags: BLK_MQ_REQ_NOWAIT, BLK_MQ_REQ_PM and/or BLK_MQ_REQ_PREEMPT
  */
 int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
 {
-	const bool pm = flags & BLK_MQ_REQ_PREEMPT;
+	const bool pm = flags & (BLK_MQ_REQ_PM | BLK_MQ_REQ_PREEMPT);
 
 	while (true) {
 		bool success = false;
@@ -630,7 +630,8 @@ struct request *blk_get_request(struct request_queue *q, unsigned int op,
 	struct request *req;
 
 	WARN_ON_ONCE(op & REQ_NOWAIT);
-	WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PREEMPT));
+	WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PM |
+			       BLK_MQ_REQ_PREEMPT));
 
 	req = blk_mq_alloc_request(q, op, flags);
 	if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1b25ec2fe9be..b5880a1fb38d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -292,6 +292,8 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 	rq->mq_hctx = data->hctx;
 	rq->rq_flags = 0;
 	rq->cmd_flags = data->cmd_flags;
+	if (data->flags & BLK_MQ_REQ_PM)
+		rq->rq_flags |= RQF_PM;
 	if (data->flags & BLK_MQ_REQ_PREEMPT)
 		rq->rq_flags |= RQF_PREEMPT;
 	if (blk_queue_io_stat(data->q))
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index b23eeca4d677..c00e856c6fb1 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -444,6 +444,8 @@ enum {
 	BLK_MQ_REQ_NOWAIT	= (__force blk_mq_req_flags_t)(1 << 0),
 	/* allocate from reserved pool */
 	BLK_MQ_REQ_RESERVED	= (__force blk_mq_req_flags_t)(1 << 1),
+	/* set RQF_PM */
+	BLK_MQ_REQ_PM		= (__force blk_mq_req_flags_t)(1 << 2),
 	/* set RQF_PREEMPT */
 	BLK_MQ_REQ_PREEMPT	= (__force blk_mq_req_flags_t)(1 << 3),
 };

From 96d86e6a80a3ab9aff81d12f9f1f2a0da2917d38 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 8 Dec 2020 21:29:46 -0800
Subject: [PATCH 027/547] scsi: ide: Do not set the RQF_PREEMPT flag for sense
 requests

RQF_PREEMPT is used for two different purposes in the legacy IDE code:

 1. To mark power management requests.

 2. To mark requests that should preempt another request. An (old)
    explanation of that feature is as follows: "The IDE driver in the Linux
    kernel normally uses a series of busywait delays during its
    initialization. When the driver executes these busywaits, the kernel
    does nothing for the duration of the wait. The time spent in these
    waits could be used for other initialization activities, if they could
    be run concurrently with these waits.

    More specifically, busywait-style delays such as udelay() in module
    init functions inhibit kernel preemption because the Big Kernel Lock is
    held, while yielding APIs such as schedule_timeout() allow
    preemption. This is true because the kernel handles the BKL specially
    and releases and reacquires it across reschedules allowed by the
    current thread.

    This IDE-preempt specification requires that the driver eliminate these
    busywaits and replace them with a mechanism that allows other work to
    proceed while the IDE driver is initializing."

Since I haven't found an implementation of (2), do not set the PREEMPT flag
for sense requests. This patch causes sense requests to be postponed while
a drive is suspended instead of being submitted to ide_queue_rq().

If it would ever be necessary to restore the IDE PREEMPT functionality,
that can be done by introducing a new flag in struct ide_request.

Link: https://lore.kernel.org/r/20201209052951.16136-4-bvanassche@acm.org
Cc: David S. Miller <davem@davemloft.net>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Can Guo <cang@codeaurora.org>
Cc: Stanley Chu <stanley.chu@mediatek.com>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ide/ide-atapi.c | 1 -
 drivers/ide/ide-io.c    | 5 -----
 2 files changed, 6 deletions(-)

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 2162bc80f09e..013ad33fbbc8 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -223,7 +223,6 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
 	sense_rq->rq_disk = rq->rq_disk;
 	sense_rq->cmd_flags = REQ_OP_DRV_IN;
 	ide_req(sense_rq)->type = ATA_PRIV_SENSE;
-	sense_rq->rq_flags |= RQF_PREEMPT;
 
 	req->cmd[0] = GPCMD_REQUEST_SENSE;
 	req->cmd[4] = cmd_len;
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 1a53c7a75224..c210ea3bd02f 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -515,11 +515,6 @@ repeat:
 		 * above to return us whatever is in the queue. Since we call
 		 * ide_do_request() ourselves, we end up taking requests while
 		 * the queue is blocked...
-		 * 
-		 * We let requests forced at head of queue with ide-preempt
-		 * though. I hope that doesn't happen too much, hopefully not
-		 * unless the subdriver triggers such a thing in its own PM
-		 * state machine.
 		 */
 		if ((drive->dev_flags & IDE_DFLAG_BLOCKED) &&
 		    ata_pm_request(rq) == 0 &&

From 5ae65383fc7633e0247c31b0c8bf0e6ea63b95a3 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 8 Dec 2020 21:29:47 -0800
Subject: [PATCH 028/547] scsi: ide: Mark power management requests with RQF_PM
 instead of RQF_PREEMPT

This is another step that prepares for the removal of RQF_PREEMPT.

Link: https://lore.kernel.org/r/20201209052951.16136-5-bvanassche@acm.org
Cc: David S. Miller <davem@davemloft.net>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Can Guo <cang@codeaurora.org>
Cc: Stanley Chu <stanley.chu@mediatek.com>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ide/ide-io.c | 2 +-
 drivers/ide/ide-pm.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index c210ea3bd02f..4867b67b60d6 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -518,7 +518,7 @@ repeat:
 		 */
 		if ((drive->dev_flags & IDE_DFLAG_BLOCKED) &&
 		    ata_pm_request(rq) == 0 &&
-		    (rq->rq_flags & RQF_PREEMPT) == 0) {
+		    (rq->rq_flags & RQF_PM) == 0) {
 			/* there should be no pending command at this point */
 			ide_unlock_port(hwif);
 			goto plug_device;
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index 192e6c65d34e..82ab308f1aaf 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -77,7 +77,7 @@ int generic_ide_resume(struct device *dev)
 	}
 
 	memset(&rqpm, 0, sizeof(rqpm));
-	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_PREEMPT);
+	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_PM);
 	ide_req(rq)->type = ATA_PRIV_PM_RESUME;
 	ide_req(rq)->special = &rqpm;
 	rqpm.pm_step = IDE_PM_START_RESUME;

From cfefd9f8240a7b9fdd96fcd54cb029870b6d8d88 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 8 Dec 2020 21:29:48 -0800
Subject: [PATCH 029/547] scsi: scsi_transport_spi: Set RQF_PM for domain
 validation commands

Disable runtime power management during domain validation. Since a later
patch removes RQF_PREEMPT, set RQF_PM for domain validation commands such
that these are executed in the quiesced SCSI device state.

Link: https://lore.kernel.org/r/20201209052951.16136-6-bvanassche@acm.org
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Woody Suwalski <terraluna977@gmail.com>
Cc: Can Guo <cang@codeaurora.org>
Cc: Stanley Chu <stanley.chu@mediatek.com>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Stan Johnson <userm57@yahoo.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_transport_spi.c | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/scsi_transport_spi.c b/drivers/scsi/scsi_transport_spi.c
index f3d5b1bbd5aa..c37dd15d16d2 100644
--- a/drivers/scsi/scsi_transport_spi.c
+++ b/drivers/scsi/scsi_transport_spi.c
@@ -117,12 +117,16 @@ static int spi_execute(struct scsi_device *sdev, const void *cmd,
 		sshdr = &sshdr_tmp;
 
 	for(i = 0; i < DV_RETRIES; i++) {
+		/*
+		 * The purpose of the RQF_PM flag below is to bypass the
+		 * SDEV_QUIESCE state.
+		 */
 		result = scsi_execute(sdev, cmd, dir, buffer, bufflen, sense,
 				      sshdr, DV_TIMEOUT, /* retries */ 1,
 				      REQ_FAILFAST_DEV |
 				      REQ_FAILFAST_TRANSPORT |
 				      REQ_FAILFAST_DRIVER,
-				      0, NULL);
+				      RQF_PM, NULL);
 		if (driver_byte(result) != DRIVER_SENSE ||
 		    sshdr->sense_key != UNIT_ATTENTION)
 			break;
@@ -1005,23 +1009,26 @@ spi_dv_device(struct scsi_device *sdev)
 	 */
 	lock_system_sleep();
 
+	if (scsi_autopm_get_device(sdev))
+		goto unlock_system_sleep;
+
 	if (unlikely(spi_dv_in_progress(starget)))
-		goto unlock;
+		goto put_autopm;
 
 	if (unlikely(scsi_device_get(sdev)))
-		goto unlock;
+		goto put_autopm;
 
 	spi_dv_in_progress(starget) = 1;
 
 	buffer = kzalloc(len, GFP_KERNEL);
 
 	if (unlikely(!buffer))
-		goto out_put;
+		goto put_sdev;
 
 	/* We need to verify that the actual device will quiesce; the
 	 * later target quiesce is just a nice to have */
 	if (unlikely(scsi_device_quiesce(sdev)))
-		goto out_free;
+		goto free_buffer;
 
 	scsi_target_quiesce(starget);
 
@@ -1041,12 +1048,16 @@ spi_dv_device(struct scsi_device *sdev)
 
 	spi_initial_dv(starget) = 1;
 
- out_free:
+free_buffer:
 	kfree(buffer);
- out_put:
+
+put_sdev:
 	spi_dv_in_progress(starget) = 0;
 	scsi_device_put(sdev);
-unlock:
+put_autopm:
+	scsi_autopm_put_device(sdev);
+
+unlock_system_sleep:
 	unlock_system_sleep();
 }
 EXPORT_SYMBOL(spi_dv_device);

From e6044f714b256259df9611ff49af433e5411c5c8 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 8 Dec 2020 21:29:49 -0800
Subject: [PATCH 030/547] scsi: core: Only process PM requests if rpm_status !=
 RPM_ACTIVE

Instead of submitting all SCSI commands submitted with scsi_execute() to a
SCSI device if rpm_status != RPM_ACTIVE, only submit RQF_PM (power
management requests) if rpm_status != RPM_ACTIVE. This patch makes the SCSI
core handle the runtime power management status (rpm_status) as it should
be handled.

Link: https://lore.kernel.org/r/20201209052951.16136-7-bvanassche@acm.org
Cc: Can Guo <cang@codeaurora.org>
Cc: Stanley Chu <stanley.chu@mediatek.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Martin Kepplinger <martin.kepplinger@puri.sm>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Can Guo <cang@codeaurora.org>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_lib.c | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index b7ac14571415..91bc39a4c3c3 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -249,7 +249,8 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 
 	req = blk_get_request(sdev->request_queue,
 			data_direction == DMA_TO_DEVICE ?
-			REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, BLK_MQ_REQ_PREEMPT);
+			REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN,
+			rq_flags & RQF_PM ? BLK_MQ_REQ_PM : 0);
 	if (IS_ERR(req))
 		return ret;
 	rq = scsi_req(req);
@@ -1206,6 +1207,8 @@ static blk_status_t
 scsi_device_state_check(struct scsi_device *sdev, struct request *req)
 {
 	switch (sdev->sdev_state) {
+	case SDEV_CREATED:
+		return BLK_STS_OK;
 	case SDEV_OFFLINE:
 	case SDEV_TRANSPORT_OFFLINE:
 		/*
@@ -1232,18 +1235,18 @@ scsi_device_state_check(struct scsi_device *sdev, struct request *req)
 		return BLK_STS_RESOURCE;
 	case SDEV_QUIESCE:
 		/*
-		 * If the devices is blocked we defer normal commands.
+		 * If the device is blocked we only accept power management
+		 * commands.
 		 */
-		if (req && !(req->rq_flags & RQF_PREEMPT))
+		if (req && WARN_ON_ONCE(!(req->rq_flags & RQF_PM)))
 			return BLK_STS_RESOURCE;
 		return BLK_STS_OK;
 	default:
 		/*
 		 * For any other not fully online state we only allow
-		 * special commands.  In particular any user initiated
-		 * command is not allowed.
+		 * power management commands.
 		 */
-		if (req && !(req->rq_flags & RQF_PREEMPT))
+		if (req && !(req->rq_flags & RQF_PM))
 			return BLK_STS_IOERR;
 		return BLK_STS_OK;
 	}
@@ -2517,15 +2520,13 @@ void sdev_evt_send_simple(struct scsi_device *sdev,
 EXPORT_SYMBOL_GPL(sdev_evt_send_simple);
 
 /**
- *	scsi_device_quiesce - Block user issued commands.
+ *	scsi_device_quiesce - Block all commands except power management.
  *	@sdev:	scsi device to quiesce.
  *
  *	This works by trying to transition to the SDEV_QUIESCE state
  *	(which must be a legal transition).  When the device is in this
- *	state, only special requests will be accepted, all others will
- *	be deferred.  Since special requests may also be requeued requests,
- *	a successful return doesn't guarantee the device will be
- *	totally quiescent.
+ *	state, only power management requests will be accepted, all others will
+ *	be deferred.
  *
  *	Must be called with user context, may sleep.
  *
@@ -2587,12 +2588,12 @@ void scsi_device_resume(struct scsi_device *sdev)
 	 * device deleted during suspend)
 	 */
 	mutex_lock(&sdev->state_mutex);
+	if (sdev->sdev_state == SDEV_QUIESCE)
+		scsi_device_set_state(sdev, SDEV_RUNNING);
 	if (sdev->quiesced_by) {
 		sdev->quiesced_by = NULL;
 		blk_clear_pm_only(sdev->request_queue);
 	}
-	if (sdev->sdev_state == SDEV_QUIESCE)
-		scsi_device_set_state(sdev, SDEV_RUNNING);
 	mutex_unlock(&sdev->state_mutex);
 }
 EXPORT_SYMBOL(scsi_device_resume);

From a4d34da715e3cb7e0741fe603dcd511bed067e00 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 8 Dec 2020 21:29:50 -0800
Subject: [PATCH 031/547] scsi: block: Remove RQF_PREEMPT and
 BLK_MQ_REQ_PREEMPT

Remove flag RQF_PREEMPT and BLK_MQ_REQ_PREEMPT since these are no longer
used by any kernel code.

Link: https://lore.kernel.org/r/20201209052951.16136-8-bvanassche@acm.org
Cc: Can Guo <cang@codeaurora.org>
Cc: Stanley Chu <stanley.chu@mediatek.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Martin Kepplinger <martin.kepplinger@puri.sm>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Can Guo <cang@codeaurora.org>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 block/blk-core.c       | 7 +++----
 block/blk-mq-debugfs.c | 1 -
 block/blk-mq.c         | 2 --
 include/linux/blk-mq.h | 2 --
 include/linux/blkdev.h | 6 +-----
 5 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 10696f9fb6ac..a00bce9f46d8 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -424,11 +424,11 @@ EXPORT_SYMBOL(blk_cleanup_queue);
 /**
  * blk_queue_enter() - try to increase q->q_usage_counter
  * @q: request queue pointer
- * @flags: BLK_MQ_REQ_NOWAIT, BLK_MQ_REQ_PM and/or BLK_MQ_REQ_PREEMPT
+ * @flags: BLK_MQ_REQ_NOWAIT and/or BLK_MQ_REQ_PM
  */
 int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
 {
-	const bool pm = flags & (BLK_MQ_REQ_PM | BLK_MQ_REQ_PREEMPT);
+	const bool pm = flags & BLK_MQ_REQ_PM;
 
 	while (true) {
 		bool success = false;
@@ -630,8 +630,7 @@ struct request *blk_get_request(struct request_queue *q, unsigned int op,
 	struct request *req;
 
 	WARN_ON_ONCE(op & REQ_NOWAIT);
-	WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PM |
-			       BLK_MQ_REQ_PREEMPT));
+	WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PM));
 
 	req = blk_mq_alloc_request(q, op, flags);
 	if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn)
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 3094542e12ae..9336a6f8d6ef 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -297,7 +297,6 @@ static const char *const rqf_name[] = {
 	RQF_NAME(MIXED_MERGE),
 	RQF_NAME(MQ_INFLIGHT),
 	RQF_NAME(DONTPREP),
-	RQF_NAME(PREEMPT),
 	RQF_NAME(FAILED),
 	RQF_NAME(QUIET),
 	RQF_NAME(ELVPRIV),
diff --git a/block/blk-mq.c b/block/blk-mq.c
index b5880a1fb38d..d50504888b68 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -294,8 +294,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 	rq->cmd_flags = data->cmd_flags;
 	if (data->flags & BLK_MQ_REQ_PM)
 		rq->rq_flags |= RQF_PM;
-	if (data->flags & BLK_MQ_REQ_PREEMPT)
-		rq->rq_flags |= RQF_PREEMPT;
 	if (blk_queue_io_stat(data->q))
 		rq->rq_flags |= RQF_IO_STAT;
 	INIT_LIST_HEAD(&rq->queuelist);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index c00e856c6fb1..88af1df94308 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -446,8 +446,6 @@ enum {
 	BLK_MQ_REQ_RESERVED	= (__force blk_mq_req_flags_t)(1 << 1),
 	/* set RQF_PM */
 	BLK_MQ_REQ_PM		= (__force blk_mq_req_flags_t)(1 << 2),
-	/* set RQF_PREEMPT */
-	BLK_MQ_REQ_PREEMPT	= (__force blk_mq_req_flags_t)(1 << 3),
 };
 
 struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 639cae2c158b..7d4b746f7e6a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -79,9 +79,6 @@ typedef __u32 __bitwise req_flags_t;
 #define RQF_MQ_INFLIGHT		((__force req_flags_t)(1 << 6))
 /* don't call prep for this one */
 #define RQF_DONTPREP		((__force req_flags_t)(1 << 7))
-/* set for "ide_preempt" requests and also for requests for which the SCSI
-   "quiesce" state must be ignored. */
-#define RQF_PREEMPT		((__force req_flags_t)(1 << 8))
 /* vaguely specified driver internal error.  Ignored by the block layer */
 #define RQF_FAILED		((__force req_flags_t)(1 << 10))
 /* don't warn about errors */
@@ -430,8 +427,7 @@ struct request_queue {
 	unsigned long		queue_flags;
 	/*
 	 * Number of contexts that have called blk_set_pm_only(). If this
-	 * counter is above zero then only RQF_PM and RQF_PREEMPT requests are
-	 * processed.
+	 * counter is above zero then only RQF_PM requests are processed.
 	 */
 	atomic_t		pm_only;
 

From 52abca64fd9410ea6c9a3a74eab25663b403d7da Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Tue, 8 Dec 2020 21:29:51 -0800
Subject: [PATCH 032/547] scsi: block: Do not accept any requests while
 suspended

blk_queue_enter() accepts BLK_MQ_REQ_PM requests independent of the runtime
power management state. Now that SCSI domain validation no longer depends
on this behavior, modify the behavior of blk_queue_enter() as follows:

   - Do not accept any requests while suspended.

   - Only process power management requests while suspending or resuming.

Submitting BLK_MQ_REQ_PM requests to a device that is runtime suspended
causes runtime-suspended devices not to resume as they should. The request
which should cause a runtime resume instead gets issued directly, without
resuming the device first. Of course the device can't handle it properly,
the I/O fails, and the device remains suspended.

The problem is fixed by checking that the queue's runtime-PM status isn't
RPM_SUSPENDED before allowing a request to be issued, and queuing a
runtime-resume request if it is.  In particular, the inline
blk_pm_request_resume() routine is renamed blk_pm_resume_queue() and the
code is unified by merging the surrounding checks into the routine.  If the
queue isn't set up for runtime PM, or there currently is no restriction on
allowed requests, the request is allowed.  Likewise if the BLK_MQ_REQ_PM
flag is set and the status isn't RPM_SUSPENDED.  Otherwise a runtime resume
is queued and the request is blocked until conditions are more suitable.

[ bvanassche: modified commit message and removed Cc: stable because
  without the previous patches from this series this patch would break
  parallel SCSI domain validation + introduced queue_rpm_status() ]

Link: https://lore.kernel.org/r/20201209052951.16136-9-bvanassche@acm.org
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Can Guo <cang@codeaurora.org>
Cc: Stanley Chu <stanley.chu@mediatek.com>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reported-and-tested-by: Martin Kepplinger <martin.kepplinger@puri.sm>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Can Guo <cang@codeaurora.org>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 block/blk-core.c       |  7 ++++---
 block/blk-pm.h         | 14 +++++++++-----
 include/linux/blkdev.h | 12 ++++++++++++
 3 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index a00bce9f46d8..2d53e2ff48ff 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -18,6 +18,7 @@
 #include <linux/bio.h>
 #include <linux/blkdev.h>
 #include <linux/blk-mq.h>
+#include <linux/blk-pm.h>
 #include <linux/highmem.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
@@ -440,7 +441,8 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
 			 * responsible for ensuring that that counter is
 			 * globally visible before the queue is unfrozen.
 			 */
-			if (pm || !blk_queue_pm_only(q)) {
+			if ((pm && queue_rpm_status(q) != RPM_SUSPENDED) ||
+			    !blk_queue_pm_only(q)) {
 				success = true;
 			} else {
 				percpu_ref_put(&q->q_usage_counter);
@@ -465,8 +467,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
 
 		wait_event(q->mq_freeze_wq,
 			   (!q->mq_freeze_depth &&
-			    (pm || (blk_pm_request_resume(q),
-				    !blk_queue_pm_only(q)))) ||
+			    blk_pm_resume_queue(pm, q)) ||
 			   blk_queue_dying(q));
 		if (blk_queue_dying(q))
 			return -ENODEV;
diff --git a/block/blk-pm.h b/block/blk-pm.h
index ea5507d23e75..a2283cc9f716 100644
--- a/block/blk-pm.h
+++ b/block/blk-pm.h
@@ -6,11 +6,14 @@
 #include <linux/pm_runtime.h>
 
 #ifdef CONFIG_PM
-static inline void blk_pm_request_resume(struct request_queue *q)
+static inline int blk_pm_resume_queue(const bool pm, struct request_queue *q)
 {
-	if (q->dev && (q->rpm_status == RPM_SUSPENDED ||
-		       q->rpm_status == RPM_SUSPENDING))
-		pm_request_resume(q->dev);
+	if (!q->dev || !blk_queue_pm_only(q))
+		return 1;	/* Nothing to do */
+	if (pm && q->rpm_status != RPM_SUSPENDED)
+		return 1;	/* Request allowed */
+	pm_request_resume(q->dev);
+	return 0;
 }
 
 static inline void blk_pm_mark_last_busy(struct request *rq)
@@ -44,8 +47,9 @@ static inline void blk_pm_put_request(struct request *rq)
 		--rq->q->nr_pending;
 }
 #else
-static inline void blk_pm_request_resume(struct request_queue *q)
+static inline int blk_pm_resume_queue(const bool pm, struct request_queue *q)
 {
+	return 1;
 }
 
 static inline void blk_pm_mark_last_busy(struct request *rq)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 7d4b746f7e6a..2b6fc3fb3a99 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -692,6 +692,18 @@ static inline bool queue_is_mq(struct request_queue *q)
 	return q->mq_ops;
 }
 
+#ifdef CONFIG_PM
+static inline enum rpm_status queue_rpm_status(struct request_queue *q)
+{
+	return q->rpm_status;
+}
+#else
+static inline enum rpm_status queue_rpm_status(struct request_queue *q)
+{
+	return RPM_ACTIVE;
+}
+#endif
+
 static inline enum blk_zoned_model
 blk_queue_zoned_model(struct request_queue *q)
 {

From 8b3c8035297e71abb9e6d0f50ceab50d33c0d64b Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 9 Dec 2020 17:03:11 +0300
Subject: [PATCH 033/547] scsi: mpt3sas: Signedness bug in
 _base_get_diag_triggers()

The "trigger_flags" variable needs to be signed for the error checking to
work.

Link: https://lore.kernel.org/r/X9DZH37bYPHwSQRP@mwanda
Fixes: aec93e8e2385 ("scsi: mpt3sas: Add persistent trigger pages support")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/mpt3sas/mpt3sas_base.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index b129f3734ed0..26537d503a8b 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -5027,7 +5027,7 @@ _base_check_for_trigger_pages_support(struct MPT3SAS_ADAPTER *ioc)
 static void
 _base_get_diag_triggers(struct MPT3SAS_ADAPTER *ioc)
 {
-	u16 trigger_flags;
+	int trigger_flags;
 
 	/*
 	 * Default setting of master trigger.

From 5213dc7940e0aa4c094413e015790c4a310ef36c Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Wed, 9 Dec 2020 14:31:44 +0800
Subject: [PATCH 034/547] scsi: ufs-mediatek: Use correct path to fix compile
 error

When the kernel is compiled with allmodconfig, the following error is
reported:

In file included from drivers/scsi/ufs/ufs-mediatek-trace.h:36:0,
                 from drivers/scsi/ufs/ufs-mediatek.c:28:
./include/trace/define_trace.h:95:42: fatal error: ./ufs-mediatek-trace.h: No such file or directory
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)

The comment in include/trace/define_trace.h specifies that:
TRACE_INCLUDE_PATH: Note, the path is relative to define_trace.h, not the
file including it. Full path names for out of tree modules must be used.

So without "CFLAGS_ufs-mediatek.o := -I$(src)", the current directory "."
is "include/trace/", the relative path of ufs-mediatek-trace.h is
"../../drivers/scsi/ufs/".

Link: https://lore.kernel.org/r/20201209063144.1840-2-thunder.leizhen@huawei.com
Fixes: ca1bb061d644 ("scsi: ufs-mediatek: Introduce event_notify implementation")
Reviewed-by: Stanley Chu <stanley.chu@mediatek.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ufs/ufs-mediatek-trace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/ufs/ufs-mediatek-trace.h b/drivers/scsi/ufs/ufs-mediatek-trace.h
index fd6f84c1b4e2..895e82ea6ece 100644
--- a/drivers/scsi/ufs/ufs-mediatek-trace.h
+++ b/drivers/scsi/ufs/ufs-mediatek-trace.h
@@ -31,6 +31,6 @@ TRACE_EVENT(ufs_mtk_event,
 
 #undef TRACE_INCLUDE_PATH
 #undef TRACE_INCLUDE_FILE
-#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_PATH ../../drivers/scsi/ufs/
 #define TRACE_INCLUDE_FILE ufs-mediatek-trace
 #include <trace/define_trace.h>

From bd14bf0e4a084514aa62d24d2109e0f09a93822f Mon Sep 17 00:00:00 2001
From: Stanley Chu <stanley.chu@mediatek.com>
Date: Tue, 8 Dec 2020 21:56:34 +0800
Subject: [PATCH 035/547] scsi: ufs: Re-enable WriteBooster after device reset

UFS 3.1 specification mentions that the WriteBooster flags listed below
will be set to their default values, i.e. disabled, after power cycle or
any type of reset event. Thus we need to reset the flag variables kept in
struct hba to align with the device status and ensure that
WriteBooster-related functions are configured properly after device reset.

Without this fix, WriteBooster will not be enabled successfully after by
ufshcd_wb_ctrl() after device reset because hba->wb_enabled remains true.

Flags required to be reset to default values:

 - fWriteBoosterEn: hba->wb_enabled

 - fWriteBoosterBufferFlushEn: hba->wb_buf_flush_enabled

 - fWriteBoosterBufferFlushDuringHibernate: No variable mapped

Link: https://lore.kernel.org/r/20201208135635.15326-2-stanley.chu@mediatek.com
Fixes: 3d17b9b5ab11 ("scsi: ufs: Add write booster feature support")
Reviewed-by: Bean Huo <beanhuo@micron.com>
Signed-off-by: Stanley Chu <stanley.chu@mediatek.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ufs/ufshcd.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 08c8a591e6b0..36d367eb8139 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -1221,8 +1221,13 @@ static inline void ufshcd_vops_device_reset(struct ufs_hba *hba)
 	if (hba->vops && hba->vops->device_reset) {
 		int err = hba->vops->device_reset(hba);
 
-		if (!err)
+		if (!err) {
 			ufshcd_set_ufs_dev_active(hba);
+			if (ufshcd_is_wb_allowed(hba)) {
+				hba->wb_enabled = false;
+				hba->wb_buf_flush_enabled = false;
+			}
+		}
 		if (err != -EOPNOTSUPP)
 			ufshcd_update_evt_hist(hba, UFS_EVT_DEV_RESET, err);
 	}

From 31a5d9cafff163473abf9496318b6a53022d48f7 Mon Sep 17 00:00:00 2001
From: Stanley Chu <stanley.chu@mediatek.com>
Date: Tue, 8 Dec 2020 21:56:35 +0800
Subject: [PATCH 036/547] scsi: ufs: Un-inline ufshcd_vops_device_reset
 function

More and more statements are being added to ufshcd_vops_device_reset() and
this function is being called from multiple locations in the driver.
Un-inline the function to allow the compiler to make better decisions.

Link: https://lore.kernel.org/r/20201208135635.15326-3-stanley.chu@mediatek.com
Reviewed-by: Bean Huo <beanhuo@micron.com>
Signed-off-by: Stanley Chu <stanley.chu@mediatek.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ufs/ufshcd.c | 27 ++++++++++++++++++++++-----
 drivers/scsi/ufs/ufshcd.h | 17 ++++-------------
 2 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 4f3ac2566322..e221add25a7e 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -581,6 +581,23 @@ static void ufshcd_print_pwr_info(struct ufs_hba *hba)
 		 hba->pwr_info.hs_rate);
 }
 
+static void ufshcd_device_reset(struct ufs_hba *hba)
+{
+	int err;
+
+	err = ufshcd_vops_device_reset(hba);
+
+	if (!err) {
+		ufshcd_set_ufs_dev_active(hba);
+		if (ufshcd_is_wb_allowed(hba)) {
+			hba->wb_enabled = false;
+			hba->wb_buf_flush_enabled = false;
+		}
+	}
+	if (err != -EOPNOTSUPP)
+		ufshcd_update_evt_hist(hba, UFS_EVT_DEV_RESET, err);
+}
+
 void ufshcd_delay_us(unsigned long us, unsigned long tolerance)
 {
 	if (!us)
@@ -3933,7 +3950,7 @@ int ufshcd_link_recovery(struct ufs_hba *hba)
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
 
 	/* Reset the attached device */
-	ufshcd_vops_device_reset(hba);
+	ufshcd_device_reset(hba);
 
 	ret = ufshcd_host_reset_and_restore(hba);
 
@@ -6935,7 +6952,7 @@ static int ufshcd_reset_and_restore(struct ufs_hba *hba)
 
 	do {
 		/* Reset the attached device */
-		ufshcd_vops_device_reset(hba);
+		ufshcd_device_reset(hba);
 
 		err = ufshcd_host_reset_and_restore(hba);
 	} while (err && --retries);
@@ -8708,7 +8725,7 @@ set_link_active:
 	 * further below.
 	 */
 	if (ufshcd_is_ufs_dev_deepsleep(hba)) {
-		ufshcd_vops_device_reset(hba);
+		ufshcd_device_reset(hba);
 		WARN_ON(!ufshcd_is_link_off(hba));
 	}
 	if (ufshcd_is_link_hibern8(hba) && !ufshcd_uic_hibern8_exit(hba))
@@ -8718,7 +8735,7 @@ set_link_active:
 set_dev_active:
 	/* Can also get here needing to exit DeepSleep */
 	if (ufshcd_is_ufs_dev_deepsleep(hba)) {
-		ufshcd_vops_device_reset(hba);
+		ufshcd_device_reset(hba);
 		ufshcd_host_reset_and_restore(hba);
 	}
 	if (!ufshcd_set_dev_pwr_mode(hba, UFS_ACTIVE_PWR_MODE))
@@ -9317,7 +9334,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 	}
 
 	/* Reset the attached device */
-	ufshcd_vops_device_reset(hba);
+	ufshcd_device_reset(hba);
 
 	ufshcd_init_crypto(hba);
 
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 36d367eb8139..9bb5f0ed4124 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -1216,21 +1216,12 @@ static inline void ufshcd_vops_dbg_register_dump(struct ufs_hba *hba)
 		hba->vops->dbg_register_dump(hba);
 }
 
-static inline void ufshcd_vops_device_reset(struct ufs_hba *hba)
+static inline int ufshcd_vops_device_reset(struct ufs_hba *hba)
 {
-	if (hba->vops && hba->vops->device_reset) {
-		int err = hba->vops->device_reset(hba);
+	if (hba->vops && hba->vops->device_reset)
+		return hba->vops->device_reset(hba);
 
-		if (!err) {
-			ufshcd_set_ufs_dev_active(hba);
-			if (ufshcd_is_wb_allowed(hba)) {
-				hba->wb_enabled = false;
-				hba->wb_buf_flush_enabled = false;
-			}
-		}
-		if (err != -EOPNOTSUPP)
-			ufshcd_update_evt_hist(hba, UFS_EVT_DEV_RESET, err);
-	}
+	return -EOPNOTSUPP;
 }
 
 static inline void ufshcd_vops_config_scaling_param(struct ufs_hba *hba,

From cb5253198f10a4cd79b7523c581e6173c7d49ddb Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 8 Dec 2020 14:05:05 -0800
Subject: [PATCH 037/547] scsi: cxgb4i: Fix TLS dependency

SCSI_CXGB4_ISCSI selects CHELSIO_T4. The latter depends on TLS || TLS=n, so
since 'select' does not check dependencies of the selected symbol,
SCSI_CXGB4_ISCSI should also depend on TLS || TLS=n.

This prevents the following kconfig warning and restricts SCSI_CXGB4_ISCSI
to 'm' whenever TLS=m.

WARNING: unmet direct dependencies detected for CHELSIO_T4
  Depends on [m]: NETDEVICES [=y] && ETHERNET [=y] && NET_VENDOR_CHELSIO [=y] && PCI [=y] && (IPV6 [=y] || IPV6 [=y]=n) && (TLS [=m] || TLS [=m]=n)
  Selected by [y]:
  - SCSI_CXGB4_ISCSI [=y] && SCSI_LOWLEVEL [=y] && SCSI [=y] && PCI [=y] && INET [=y] && (IPV6 [=y] || IPV6 [=y]=n) && ETHERNET [=y]

Link: https://lore.kernel.org/r/20201208220505.24488-1-rdunlap@infradead.org
Fixes: 7b36b6e03b0d ("[SCSI] cxgb4i v5: iscsi driver")
Cc: Karen Xie <kxie@chelsio.com>
Cc: linux-scsi@vger.kernel.org
Cc: "James E.J. Bottomley" <jejb@linux.ibm.com>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/cxgbi/cxgb4i/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/cxgbi/cxgb4i/Kconfig b/drivers/scsi/cxgbi/cxgb4i/Kconfig
index b206e266b4e7..8b0deece9758 100644
--- a/drivers/scsi/cxgbi/cxgb4i/Kconfig
+++ b/drivers/scsi/cxgbi/cxgb4i/Kconfig
@@ -4,6 +4,7 @@ config SCSI_CXGB4_ISCSI
 	depends on PCI && INET && (IPV6 || IPV6=n)
 	depends on THERMAL || !THERMAL
 	depends on ETHERNET
+	depends on TLS || TLS=n
 	select NET_VENDOR_CHELSIO
 	select CHELSIO_T4
 	select CHELSIO_LIB

From 43ffe817bfe3871ffbaa1e98952a2a01b140e71e Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Date: Thu, 10 Dec 2020 15:10:56 +0530
Subject: [PATCH 038/547] arm64: dts: bitmain: Use generic "ngpios" rather than
 "snps,nr-gpios"

This is to remove similar errors as below:

OF: /.../gpio-port@0: could not find phandle

Commit 7569486d79ae ("gpio: dwapb: Add ngpios DT-property support")
explained the reason of above errors well and added the generic
"ngpios" property, let's use it.

Signed-off-by: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Link: https://lore.kernel.org/r/20201210094056.54553-1-manivannan.sadhasivam@linaro.org'
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm64/boot/dts/bitmain/bm1880.dtsi | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/boot/dts/bitmain/bm1880.dtsi b/arch/arm64/boot/dts/bitmain/bm1880.dtsi
index fa6e6905f588..53a9b76057aa 100644
--- a/arch/arm64/boot/dts/bitmain/bm1880.dtsi
+++ b/arch/arm64/boot/dts/bitmain/bm1880.dtsi
@@ -127,7 +127,7 @@
 				compatible = "snps,dw-apb-gpio-port";
 				gpio-controller;
 				#gpio-cells = <2>;
-				snps,nr-gpios = <32>;
+				ngpios = <32>;
 				reg = <0>;
 				interrupt-controller;
 				#interrupt-cells = <2>;
@@ -145,7 +145,7 @@
 				compatible = "snps,dw-apb-gpio-port";
 				gpio-controller;
 				#gpio-cells = <2>;
-				snps,nr-gpios = <32>;
+				ngpios = <32>;
 				reg = <0>;
 				interrupt-controller;
 				#interrupt-cells = <2>;
@@ -163,7 +163,7 @@
 				compatible = "snps,dw-apb-gpio-port";
 				gpio-controller;
 				#gpio-cells = <2>;
-				snps,nr-gpios = <8>;
+				ngpios = <8>;
 				reg = <0>;
 				interrupt-controller;
 				#interrupt-cells = <2>;

From 1b04fa9900263b4e217ca2509fd778b32c2b4eb2 Mon Sep 17 00:00:00 2001
From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Date: Wed, 9 Dec 2020 21:27:31 +0100
Subject: [PATCH 039/547] rcu-tasks: Move RCU-tasks initialization to before
 early_initcall()

PowerPC testing encountered boot failures due to RCU Tasks not being
fully initialized until core_initcall() time.  This commit therefore
initializes RCU Tasks (along with Rude RCU and RCU Tasks Trace) just
before early_initcall() time, thus allowing waiting on RCU Tasks grace
periods from early_initcall() handlers.

Link: https://lore.kernel.org/rcu/87eekfh80a.fsf@dja-thinkpad.axtens.net/
Fixes: 36dadef23fcc ("kprobes: Init kprobes in early_initcall")
Tested-by: Daniel Axtens <dja@axtens.net>
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/rcupdate.h |  6 ++++++
 init/main.c              |  1 +
 kernel/rcu/tasks.h       | 25 +++++++++++++++++++++----
 3 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 6cdd0152c253..5c119d6cecf1 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -86,6 +86,12 @@ void rcu_sched_clock_irq(int user);
 void rcu_report_dead(unsigned int cpu);
 void rcutree_migrate_callbacks(int cpu);
 
+#ifdef CONFIG_TASKS_RCU_GENERIC
+void rcu_init_tasks_generic(void);
+#else
+static inline void rcu_init_tasks_generic(void) { }
+#endif
+
 #ifdef CONFIG_RCU_STALL_COMMON
 void rcu_sysrq_start(void);
 void rcu_sysrq_end(void);
diff --git a/init/main.c b/init/main.c
index 32b2a8affafd..9d964511fe0c 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1512,6 +1512,7 @@ static noinline void __init kernel_init_freeable(void)
 
 	init_mm_internals();
 
+	rcu_init_tasks_generic();
 	do_pre_smp_initcalls();
 	lockup_detector_init();
 
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index d5d9f2d03e8a..73bbe792fe1e 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -241,7 +241,7 @@ static int __noreturn rcu_tasks_kthread(void *arg)
 	}
 }
 
-/* Spawn RCU-tasks grace-period kthread, e.g., at core_initcall() time. */
+/* Spawn RCU-tasks grace-period kthread. */
 static void __init rcu_spawn_tasks_kthread_generic(struct rcu_tasks *rtp)
 {
 	struct task_struct *t;
@@ -569,7 +569,6 @@ static int __init rcu_spawn_tasks_kthread(void)
 	rcu_spawn_tasks_kthread_generic(&rcu_tasks);
 	return 0;
 }
-core_initcall(rcu_spawn_tasks_kthread);
 
 #ifndef CONFIG_TINY_RCU
 static void show_rcu_tasks_classic_gp_kthread(void)
@@ -697,7 +696,6 @@ static int __init rcu_spawn_tasks_rude_kthread(void)
 	rcu_spawn_tasks_kthread_generic(&rcu_tasks_rude);
 	return 0;
 }
-core_initcall(rcu_spawn_tasks_rude_kthread);
 
 #ifndef CONFIG_TINY_RCU
 static void show_rcu_tasks_rude_gp_kthread(void)
@@ -975,6 +973,11 @@ static void rcu_tasks_trace_pregp_step(void)
 static void rcu_tasks_trace_pertask(struct task_struct *t,
 				    struct list_head *hop)
 {
+	// During early boot when there is only the one boot CPU, there
+	// is no idle task for the other CPUs. Just return.
+	if (unlikely(t == NULL))
+		return;
+
 	WRITE_ONCE(t->trc_reader_special.b.need_qs, false);
 	WRITE_ONCE(t->trc_reader_checked, false);
 	t->trc_ipi_to_cpu = -1;
@@ -1200,7 +1203,6 @@ static int __init rcu_spawn_tasks_trace_kthread(void)
 	rcu_spawn_tasks_kthread_generic(&rcu_tasks_trace);
 	return 0;
 }
-core_initcall(rcu_spawn_tasks_trace_kthread);
 
 #ifndef CONFIG_TINY_RCU
 static void show_rcu_tasks_trace_gp_kthread(void)
@@ -1229,6 +1231,21 @@ void show_rcu_tasks_gp_kthreads(void)
 }
 #endif /* #ifndef CONFIG_TINY_RCU */
 
+void __init rcu_init_tasks_generic(void)
+{
+#ifdef CONFIG_TASKS_RCU
+	rcu_spawn_tasks_kthread();
+#endif
+
+#ifdef CONFIG_TASKS_RUDE_RCU
+	rcu_spawn_tasks_rude_kthread();
+#endif
+
+#ifdef CONFIG_TASKS_TRACE_RCU
+	rcu_spawn_tasks_trace_kthread();
+#endif
+}
+
 #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
 static inline void rcu_tasks_bootup_oddness(void) {}
 void show_rcu_tasks_gp_kthreads(void) {}

From 57f04815fd95bb8c46f6ec5c9d25430bb52d419f Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@chromium.org>
Date: Thu, 10 Dec 2020 09:40:28 -0800
Subject: [PATCH 040/547] drm/msm: Fix WARN_ON() splat in _free_object()

[  192.062000] ------------[ cut here ]------------
[  192.062498] WARNING: CPU: 3 PID: 2039 at drivers/gpu/drm/msm/msm_gem.c:381 put_iova_vmas+0x94/0xa0 [msm]
[  192.062870] Modules linked in: snd_hrtimer snd_seq snd_seq_device rfcomm algif_hash algif_skcipher af_alg bnep xt_CHECKSUM nft_chain_nat xt_MASQUERADE nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nft_counter xt_tcpudp nft_compat cpufreq_powersave cpufreq_conservative q6asm_dai q6routing q6afe_dai q6adm bridge q6afe q6asm q6dsp_common q6core stp llc nf_tables libcrc32c nfnetlink snd_soc_wsa881x regmap_sdw soundwire_qcom gpio_wcd934x snd_soc_wcd934x wcd934x regmap_slimbus venus_enc venus_dec apr videobuf2_dma_sg qrtr_smd uvcvideo videobuf2_vmalloc videobuf2_memops ath10k_snoc ath10k_core hci_uart btqca btbcm mac80211 bluetooth snd_soc_sdm845 ath snd_soc_rt5663 snd_soc_qcom_common snd_soc_rl6231 soundwire_bus ecdh_generic ecc qcom_spmi_adc5 venus_core qcom_pon qcom_spmi_temp_alarm qcom_vadc_common v4l2_mem2mem videobuf2_v4l2 cfg80211 videobuf2_common hid_multitouch reset_qcom_pdc qcrypto qcom_rng rfkill qcom_q6v5_mss libarc4 libdes qrtr ns qcom_wdt socinfo slim_qcom_ngd_ctrl
[  192.065739]  pdr_interface qcom_q6v5_pas slimbus qcom_pil_info qcom_q6v5 qcom_sysmon qcom_common qcom_glink_smem qmi_helpers rmtfs_mem tcp_bbr sch_fq fuse ip_tables x_tables ipv6 crc_ccitt ti_sn65dsi86 i2c_hid msm mdt_loader llcc_qcom rtc_pm8xxx ocmem drm_kms_helper crct10dif_ce phy_qcom_qusb2 i2c_qcom_geni panel_simple drm pwm_bl
[  192.066066] CPU: 3 PID: 2039 Comm: gnome-shell Tainted: G        W         5.10.0-rc7-next-20201208 #1
[  192.066068] Hardware name: LENOVO 81JL/LNVNB161216, BIOS 9UCN33WW(V2.06) 06/ 4/2019
[  192.066072] pstate: 40400005 (nZcv daif +PAN -UAO -TCO BTYPE=--)
[  192.066099] pc : put_iova_vmas+0x94/0xa0 [msm]
[  192.066262] lr : put_iova_vmas+0x1c/0xa0 [msm]
[  192.066403] sp : ffff800019efbbb0
[  192.066405] x29: ffff800019efbbb0 x28: ffff800019efbd88
[  192.066411] x27: 0000000000000000 x26: ffff109582efa400
[  192.066417] x25: 0000000000000009 x24: 000000000000012b
[  192.066422] x23: ffff109582efa438 x22: ffff109582efa450
[  192.066427] x21: ffff109582efa528 x20: ffff1095cbd4f200
[  192.066432] x19: ffff1095cbd4f200 x18: 0000000000000000
[  192.066438] x17: 0000000000000000 x16: ffffc26c200ca750
[  192.066727] x15: 0000000000000000 x14: 0000000000000000
[  192.066741] x13: ffff1096fb8c9100 x12: 0000000000000002
[  192.066754] x11: ffffffffffffffff x10: 0000000000000002
[  192.067046] x9 : 0000000000000001 x8 : 0000000000000a36
[  192.067060] x7 : ffff4e2ad9f11000 x6 : ffffc26c216d4000
[  192.067212] x5 : ffffc26c2022661c x4 : ffff1095c2b98000
[  192.067367] x3 : ffff1095cbd4f300 x2 : 0000000000000000
[  192.067380] x1 : ffff1095c2b98000 x0 : 0000000000000000
[  192.067667] Call trace:
[  192.067734]  put_iova_vmas+0x94/0xa0 [msm]
[  192.068078]  msm_gem_free_object+0xb4/0x110 [msm]
[  192.068399]  drm_gem_object_free+0x1c/0x30 [drm]
[  192.068717]  drm_gem_object_handle_put_unlocked+0xf0/0xf8 [drm]
[  192.069032]  drm_gem_object_release_handle+0x6c/0x88 [drm]
[  192.069349]  drm_gem_handle_delete+0x68/0xc0 [drm]
[  192.069666]  drm_gem_close_ioctl+0x30/0x48 [drm]
[  192.069984]  drm_ioctl_kernel+0xc0/0x110 [drm]
[  192.070303]  drm_ioctl+0x210/0x440 [drm]
[  192.070588]  __arm64_sys_ioctl+0xa8/0xf0
[  192.070599]  el0_svc_common.constprop.0+0x74/0x190
[  192.070608]  do_el0_svc+0x24/0x90
[  192.070618]  el0_svc+0x14/0x20
[  192.070903]  el0_sync_handler+0xb0/0xb8
[  192.070911]  el0_sync+0x174/0x180
[  192.070918] ---[ end trace bee6b12a899001a3 ]---
[  192.072140] ------------[ cut here ]------------

Fixes: 9b73bde39cf2 ("drm/msm: Fix use-after-free in msm_gem with carveout")
Signed-off-by: Rob Clark <robdclark@chromium.org>
Acked-by: Iskren Chernev <iskren.chernev@gmail.com>
---
 drivers/gpu/drm/msm/msm_gem.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 68a6c7eacc0a..a21be5b910ff 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -990,6 +990,8 @@ void msm_gem_free_object(struct drm_gem_object *obj)
 		if (msm_obj->pages)
 			kvfree(msm_obj->pages);
 
+		put_iova_vmas(obj);
+
 		/* dma_buf_detach() grabs resv lock, so we need to unlock
 		 * prior to drm_prime_gem_destroy
 		 */
@@ -999,11 +1001,10 @@ void msm_gem_free_object(struct drm_gem_object *obj)
 	} else {
 		msm_gem_vunmap(obj);
 		put_pages(obj);
+		put_iova_vmas(obj);
 		msm_gem_unlock(obj);
 	}
 
-	put_iova_vmas(obj);
-
 	drm_gem_object_release(obj);
 
 	kfree(msm_obj);

From 161b838e25c6f83495e27e3f546b893622d442bf Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 14 Dec 2020 23:40:15 +0000
Subject: [PATCH 041/547] netfilter: nftables: fix incorrect increment of loop
 counter

The intention of the err_expr cleanup path is to iterate over the
allocated expr_array objects and free them, starting from i - 1 and
working down to the start of the array. Currently the loop counter
is being incremented instead of decremented and also the index i is
being used instead of k, repeatedly destroying the same expr_array
element.  Fix this by decrementing k and using k as the index into
expr_array.

Addresses-Coverity: ("Infinite loop")
Fixes: 8cfd9b0f8515 ("netfilter: nftables: generalize set expressions support")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 8d5aa0ac45f4..4186b1e52d58 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5254,8 +5254,8 @@ static int nft_set_elem_expr_clone(const struct nft_ctx *ctx,
 	return 0;
 
 err_expr:
-	for (k = i - 1; k >= 0; k++)
-		nft_expr_destroy(ctx, expr_array[i]);
+	for (k = i - 1; k >= 0; k--)
+		nft_expr_destroy(ctx, expr_array[k]);
 
 	return -ENOMEM;
 }

From df9716ec9ade3d2e190a2aac199557d30a3a8416 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 15 Dec 2020 13:20:24 +0000
Subject: [PATCH 042/547] regulator: pf8x00: Use specific compatible strings
 for devices

The pf8x00 driver supports three devices, the DT compatible strings
and I2C IDs should enumerate these specifically rather than using a
wildcard so that we don't collide with anything incompatible in the
same ID range in the future and so that we can handle any software
visible differences between the variants we find.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20201215132024.13356-1-broonie@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/regulator/nxp,pf8x00-regulator.yaml          | 6 ++++--
 drivers/regulator/pf8x00-regulator.c                      | 8 ++++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/Documentation/devicetree/bindings/regulator/nxp,pf8x00-regulator.yaml b/Documentation/devicetree/bindings/regulator/nxp,pf8x00-regulator.yaml
index a6c259ce9785..956156fe52a3 100644
--- a/Documentation/devicetree/bindings/regulator/nxp,pf8x00-regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/nxp,pf8x00-regulator.yaml
@@ -19,7 +19,9 @@ description: |
 properties:
   compatible:
     enum:
-      - nxp,pf8x00
+      - nxp,pf8100
+      - nxp,pf8121a
+      - nxp,pf8200
 
   reg:
     maxItems: 1
@@ -118,7 +120,7 @@ examples:
         #size-cells = <0>;
 
         pmic@8 {
-            compatible = "nxp,pf8x00";
+            compatible = "nxp,pf8100";
             reg = <0x08>;
 
             regulators {
diff --git a/drivers/regulator/pf8x00-regulator.c b/drivers/regulator/pf8x00-regulator.c
index 308c27fa6ea8..af9918cd27aa 100644
--- a/drivers/regulator/pf8x00-regulator.c
+++ b/drivers/regulator/pf8x00-regulator.c
@@ -469,13 +469,17 @@ static int pf8x00_i2c_probe(struct i2c_client *client)
 }
 
 static const struct of_device_id pf8x00_dt_ids[] = {
-	{ .compatible = "nxp,pf8x00",},
+	{ .compatible = "nxp,pf8100",},
+	{ .compatible = "nxp,pf8121a",},
+	{ .compatible = "nxp,pf8200",},
 	{ }
 };
 MODULE_DEVICE_TABLE(of, pf8x00_dt_ids);
 
 static const struct i2c_device_id pf8x00_i2c_id[] = {
-	{ "pf8x00", 0 },
+	{ "pf8100", 0 },
+	{ "pf8121a", 0 },
+	{ "pf8200", 0 },
 	{},
 };
 MODULE_DEVICE_TABLE(i2c, pf8x00_i2c_id);

From 2d18e54dd8662442ef5898c6bdadeaf90b3cebbc Mon Sep 17 00:00:00 2001
From: Qinglang Miao <miaoqinglang@huawei.com>
Date: Thu, 10 Dec 2020 09:29:43 +0800
Subject: [PATCH 043/547] cgroup: Fix memory leak when parsing multiple source
 parameters

A memory leak is found in cgroup1_parse_param() when multiple source
parameters overwrite fc->source in the fs_context struct without free.

unreferenced object 0xffff888100d930e0 (size 16):
  comm "mount", pid 520, jiffies 4303326831 (age 152.783s)
  hex dump (first 16 bytes):
    74 65 73 74 6c 65 61 6b 00 00 00 00 00 00 00 00  testleak........
  backtrace:
    [<000000003e5023ec>] kmemdup_nul+0x2d/0xa0
    [<00000000377dbdaa>] vfs_parse_fs_string+0xc0/0x150
    [<00000000cb2b4882>] generic_parse_monolithic+0x15a/0x1d0
    [<000000000f750198>] path_mount+0xee1/0x1820
    [<0000000004756de2>] do_mount+0xea/0x100
    [<0000000094cafb0a>] __x64_sys_mount+0x14b/0x1f0

Fix this bug by permitting a single source parameter and rejecting with
an error all subsequent ones.

Fixes: 8d2451f4994f ("cgroup1: switch to option-by-option parsing")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Qinglang Miao <miaoqinglang@huawei.com>
Reviewed-by: Zefan Li <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/cgroup/cgroup-v1.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 191c329e482a..32596fdbcd5b 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -908,6 +908,8 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param)
 	opt = fs_parse(fc, cgroup1_fs_parameters, param, &result);
 	if (opt == -ENOPARAM) {
 		if (strcmp(param->key, "source") == 0) {
+			if (fc->source)
+				return invalf(fc, "Multiple sources not supported");
 			fc->source = param->string;
 			param->string = NULL;
 			return 0;

From 1a3449c19407a28f7019a887cdf0d6ba2444751a Mon Sep 17 00:00:00 2001
From: Kamal Mostafa <kamal@canonical.com>
Date: Tue, 15 Dec 2020 10:20:10 -0800
Subject: [PATCH 044/547] selftests/bpf: Clarify build error if no vmlinux

If Makefile cannot find any of the vmlinux's in its VMLINUX_BTF_PATHS list,
it tries to run btftool incorrectly, with VMLINUX_BTF unset:

    bpftool btf dump file $(VMLINUX_BTF) format c

Such that the keyword 'format' is misinterpreted as the path to vmlinux.
The resulting build error message is fairly cryptic:

      GEN      vmlinux.h
    Error: failed to load BTF from format: No such file or directory

This patch makes the failure reason clearer by yielding this instead:

    Makefile:...: *** Cannot find a vmlinux for VMLINUX_BTF at any of
    "{paths}".  Stop.

Fixes: acbd06206bbb ("selftests/bpf: Add vmlinux.h selftest exercising tracing of syscalls")
Signed-off-by: Kamal Mostafa <kamal@canonical.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201215182011.15755-1-kamal@canonical.com
---
 tools/testing/selftests/bpf/Makefile | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 8c33e999319a..c51df6b91bef 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -121,6 +121,9 @@ VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux)				\
 		     /sys/kernel/btf/vmlinux				\
 		     /boot/vmlinux-$(shell uname -r)
 VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+ifeq ($(VMLINUX_BTF),)
+$(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)")
+endif
 
 # Define simple and short `make test_progs`, `make test_sysctl`, etc targets
 # to build individual tests.

From 81e7eb5bf08f36d34495a5898f6ef3fec05d9776 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Wed, 16 Dec 2020 22:43:44 -0500
Subject: [PATCH 045/547] Revert "Revert "scsi: megaraid_sas: Added support for
 shared host tagset for cpuhotplug""

This reverts commit 1a0e1943d8798cb3241fb5edb9a836af1611b60a.

Commit b3c6a5997541 ("block: Fix a lockdep complaint triggered by
request queue flushing") has been reverted and commit fb01a2932e81 has
been introduced in its place. Consequently, it is now safe to
reinstate the megaraid_sas tagset changes that led to boot problems in
5.10.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/megaraid/megaraid_sas_base.c   | 39 +++++++++++++++++++++
 drivers/scsi/megaraid/megaraid_sas_fusion.c | 29 ++++++++-------
 2 files changed, 55 insertions(+), 13 deletions(-)

diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index e158d3d62056..41cd66fc7d81 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -37,6 +37,7 @@
 #include <linux/poll.h>
 #include <linux/vmalloc.h>
 #include <linux/irq_poll.h>
+#include <linux/blk-mq-pci.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
@@ -113,6 +114,10 @@ unsigned int enable_sdev_max_qd;
 module_param(enable_sdev_max_qd, int, 0444);
 MODULE_PARM_DESC(enable_sdev_max_qd, "Enable sdev max qd as can_queue. Default: 0");
 
+int host_tagset_enable = 1;
+module_param(host_tagset_enable, int, 0444);
+MODULE_PARM_DESC(host_tagset_enable, "Shared host tagset enable/disable Default: enable(1)");
+
 MODULE_LICENSE("GPL");
 MODULE_VERSION(MEGASAS_VERSION);
 MODULE_AUTHOR("megaraidlinux.pdl@broadcom.com");
@@ -3119,6 +3124,19 @@ megasas_bios_param(struct scsi_device *sdev, struct block_device *bdev,
 	return 0;
 }
 
+static int megasas_map_queues(struct Scsi_Host *shost)
+{
+	struct megasas_instance *instance;
+
+	instance = (struct megasas_instance *)shost->hostdata;
+
+	if (shost->nr_hw_queues == 1)
+		return 0;
+
+	return blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT],
+			instance->pdev, instance->low_latency_index_start);
+}
+
 static void megasas_aen_polling(struct work_struct *work);
 
 /**
@@ -3427,6 +3445,7 @@ static struct scsi_host_template megasas_template = {
 	.eh_timed_out = megasas_reset_timer,
 	.shost_attrs = megaraid_host_attrs,
 	.bios_param = megasas_bios_param,
+	.map_queues = megasas_map_queues,
 	.change_queue_depth = scsi_change_queue_depth,
 	.max_segment_size = 0xffffffff,
 };
@@ -6808,6 +6827,26 @@ static int megasas_io_attach(struct megasas_instance *instance)
 	host->max_lun = MEGASAS_MAX_LUN;
 	host->max_cmd_len = 16;
 
+	/* Use shared host tagset only for fusion adaptors
+	 * if there are managed interrupts (smp affinity enabled case).
+	 * Single msix_vectors in kdump, so shared host tag is also disabled.
+	 */
+
+	host->host_tagset = 0;
+	host->nr_hw_queues = 1;
+
+	if ((instance->adapter_type != MFI_SERIES) &&
+		(instance->msix_vectors > instance->low_latency_index_start) &&
+		host_tagset_enable &&
+		instance->smp_affinity_enable) {
+		host->host_tagset = 1;
+		host->nr_hw_queues = instance->msix_vectors -
+			instance->low_latency_index_start;
+	}
+
+	dev_info(&instance->pdev->dev,
+		"Max firmware commands: %d shared with nr_hw_queues = %d\n",
+		instance->max_fw_cmds, host->nr_hw_queues);
 	/*
 	 * Notify the mid-layer about the new controller
 	 */
diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
index b0c01cf0428f..fd607287608e 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -359,24 +359,29 @@ megasas_get_msix_index(struct megasas_instance *instance,
 {
 	int sdev_busy;
 
-	/* nr_hw_queue = 1 for MegaRAID */
-	struct blk_mq_hw_ctx *hctx =
-		scmd->device->request_queue->queue_hw_ctx[0];
-
-	sdev_busy = atomic_read(&hctx->nr_active);
+	/* TBD - if sml remove device_busy in future, driver
+	 * should track counter in internal structure.
+	 */
+	sdev_busy = atomic_read(&scmd->device->device_busy);
 
 	if (instance->perf_mode == MR_BALANCED_PERF_MODE &&
-	    sdev_busy > (data_arms * MR_DEVICE_HIGH_IOPS_DEPTH))
+	    sdev_busy > (data_arms * MR_DEVICE_HIGH_IOPS_DEPTH)) {
 		cmd->request_desc->SCSIIO.MSIxIndex =
 			mega_mod64((atomic64_add_return(1, &instance->high_iops_outstanding) /
 					MR_HIGH_IOPS_BATCH_COUNT), instance->low_latency_index_start);
-	else if (instance->msix_load_balance)
+	} else if (instance->msix_load_balance) {
 		cmd->request_desc->SCSIIO.MSIxIndex =
 			(mega_mod64(atomic64_add_return(1, &instance->total_io_count),
 				instance->msix_vectors));
-	else
+	} else if (instance->host->nr_hw_queues > 1) {
+		u32 tag = blk_mq_unique_tag(scmd->request);
+
+		cmd->request_desc->SCSIIO.MSIxIndex = blk_mq_unique_tag_to_hwq(tag) +
+			instance->low_latency_index_start;
+	} else {
 		cmd->request_desc->SCSIIO.MSIxIndex =
 			instance->reply_map[raw_smp_processor_id()];
+	}
 }
 
 /**
@@ -956,9 +961,6 @@ megasas_alloc_cmds_fusion(struct megasas_instance *instance)
 	if (megasas_alloc_cmdlist_fusion(instance))
 		goto fail_exit;
 
-	dev_info(&instance->pdev->dev, "Configured max firmware commands: %d\n",
-		 instance->max_fw_cmds);
-
 	/* The first 256 bytes (SMID 0) is not used. Don't add to the cmd list */
 	io_req_base = fusion->io_request_frames + MEGA_MPI2_RAID_DEFAULT_IO_FRAME_SIZE;
 	io_req_base_phys = fusion->io_request_frames_phys + MEGA_MPI2_RAID_DEFAULT_IO_FRAME_SIZE;
@@ -1102,8 +1104,9 @@ megasas_ioc_init_fusion(struct megasas_instance *instance)
 		MR_HIGH_IOPS_QUEUE_COUNT) && cur_intr_coalescing)
 		instance->perf_mode = MR_BALANCED_PERF_MODE;
 
-	dev_info(&instance->pdev->dev, "Performance mode :%s\n",
-		MEGASAS_PERF_MODE_2STR(instance->perf_mode));
+	dev_info(&instance->pdev->dev, "Performance mode :%s (latency index = %d)\n",
+		MEGASAS_PERF_MODE_2STR(instance->perf_mode),
+		instance->low_latency_index_start);
 
 	instance->fw_sync_cache_support = (scratch_pad_1 &
 		MR_CAN_HANDLE_SYNC_CACHE_OFFSET) ? 1 : 0;

From 292bff9480c8d52fc58028979c4162abd83f1aec Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 14 Dec 2020 23:24:17 +0000
Subject: [PATCH 046/547] ath11k: add missing null check on allocated skb

Currently the null check on a newly allocated skb is missing and
this can lead to a null pointer dereference is the allocation fails.
Fix this by adding a null check and returning -ENOMEM.

Addresses-Coverity: ("Dereference null return")
Fixes: 43ed15e1ee01 ("ath11k: put hw to DBS using WMI_PDEV_SET_HW_MODE_CMDID")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20201214232417.84556-1-colin.king@canonical.com
---
 drivers/net/wireless/ath/ath11k/wmi.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c
index da4b546b62cb..73869d445c5b 100644
--- a/drivers/net/wireless/ath/ath11k/wmi.c
+++ b/drivers/net/wireless/ath/ath11k/wmi.c
@@ -3460,6 +3460,9 @@ int ath11k_wmi_set_hw_mode(struct ath11k_base *ab,
 	len = sizeof(*cmd);
 
 	skb = ath11k_wmi_alloc_skb(wmi_ab, len);
+	if (!skb)
+		return -ENOMEM;
+
 	cmd = (struct wmi_pdev_set_hw_mode_cmd_param *)skb->data;
 
 	cmd->tlv_header = FIELD_PREP(WMI_TLV_TAG, WMI_TAG_PDEV_SET_HW_MODE_CMD) |

From 3597010630d0aa96f5778901e691c6068bb86318 Mon Sep 17 00:00:00 2001
From: Carl Huang <cjhuang@codeaurora.org>
Date: Fri, 11 Dec 2020 00:56:13 -0500
Subject: [PATCH 047/547] ath11k: fix crash caused by NULL rx_channel

During connect and disconnect stress test, crashed happened
because ar->rx_channel is NULL. Fix it by checking whether
ar->rx_channel is NULL.

Crash stack is as below:
RIP: 0010:ath11k_dp_rx_h_ppdu+0x110/0x230 [ath11k]
[ 5028.808963]  ath11k_dp_rx_wbm_err+0x14a/0x360 [ath11k]
[ 5028.808970]  ath11k_dp_rx_process_wbm_err+0x41c/0x520 [ath11k]
[ 5028.808978]  ath11k_dp_service_srng+0x25e/0x2d0 [ath11k]
[ 5028.808982]  ath11k_pci_ext_grp_napi_poll+0x23/0x80 [ath11k_pci]
[ 5028.808986]  net_rx_action+0x27e/0x400
[ 5028.808990]  __do_softirq+0xfd/0x2bb
[ 5028.808993]  irq_exit+0xa6/0xb0
[ 5028.808995]  do_IRQ+0x56/0xe0
[ 5028.808997]  common_interrupt+0xf/0xf

Tested-on: QCA6390 hw2.0 PCI WLAN.HST.1.0.1-01740-QCAHSTSWPLZ_V2_TO_X86-1

Signed-off-by: Carl Huang <cjhuang@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20201211055613.9310-1-cjhuang@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/dp_rx.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c
index 205c0f1a40e9..920e5026a635 100644
--- a/drivers/net/wireless/ath/ath11k/dp_rx.c
+++ b/drivers/net/wireless/ath/ath11k/dp_rx.c
@@ -2294,6 +2294,7 @@ static void ath11k_dp_rx_h_ppdu(struct ath11k *ar, struct hal_rx_desc *rx_desc,
 {
 	u8 channel_num;
 	u32 center_freq;
+	struct ieee80211_channel *channel;
 
 	rx_status->freq = 0;
 	rx_status->rate_idx = 0;
@@ -2314,9 +2315,12 @@ static void ath11k_dp_rx_h_ppdu(struct ath11k *ar, struct hal_rx_desc *rx_desc,
 		rx_status->band = NL80211_BAND_5GHZ;
 	} else {
 		spin_lock_bh(&ar->data_lock);
-		rx_status->band = ar->rx_channel->band;
-		channel_num =
-			ieee80211_frequency_to_channel(ar->rx_channel->center_freq);
+		channel = ar->rx_channel;
+		if (channel) {
+			rx_status->band = channel->band;
+			channel_num =
+				ieee80211_frequency_to_channel(channel->center_freq);
+		}
 		spin_unlock_bh(&ar->data_lock);
 		ath11k_dbg_dump(ar->ab, ATH11K_DBG_DATA, NULL, "rx_desc: ",
 				rx_desc, sizeof(struct hal_rx_desc));

From aa44b2f3ecd41f90b7e477158036648a49d21a32 Mon Sep 17 00:00:00 2001
From: Carl Huang <cjhuang@codeaurora.org>
Date: Fri, 11 Dec 2020 00:13:58 -0500
Subject: [PATCH 048/547] ath11k: start vdev if a bss peer is already created

For QCA6390, bss peer must be created before vdev is to start. This
change is to start vdev if a bss peer is created. Otherwise, ath11k
delays to start vdev.

This fixes an issue in a case where HT/VHT/HE settings change between
authentication and association, e.g., due to the user space request
to disable HT.

Tested-on: QCA6390 hw2.0 PCI WLAN.HST.1.0.1-01740-QCAHSTSWPLZ_V2_TO_X86-1

Signed-off-by: Carl Huang <cjhuang@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20201211051358.9191-1-cjhuang@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/mac.c  |  8 ++++++--
 drivers/net/wireless/ath/ath11k/peer.c | 17 +++++++++++++++++
 drivers/net/wireless/ath/ath11k/peer.h |  2 ++
 3 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index 5c175e3e09b2..c1608f64ea95 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -3021,6 +3021,7 @@ static int ath11k_mac_station_add(struct ath11k *ar,
 	}
 
 	if (ab->hw_params.vdev_start_delay &&
+	    !arvif->is_started &&
 	    arvif->vdev_type != WMI_VDEV_TYPE_AP) {
 		ret = ath11k_start_vdev_delay(ar->hw, vif);
 		if (ret) {
@@ -5284,7 +5285,8 @@ ath11k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw,
 	/* for QCA6390 bss peer must be created before vdev_start */
 	if (ab->hw_params.vdev_start_delay &&
 	    arvif->vdev_type != WMI_VDEV_TYPE_AP &&
-	    arvif->vdev_type != WMI_VDEV_TYPE_MONITOR) {
+	    arvif->vdev_type != WMI_VDEV_TYPE_MONITOR &&
+	    !ath11k_peer_find_by_vdev_id(ab, arvif->vdev_id)) {
 		memcpy(&arvif->chanctx, ctx, sizeof(*ctx));
 		ret = 0;
 		goto out;
@@ -5295,7 +5297,9 @@ ath11k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw,
 		goto out;
 	}
 
-	if (ab->hw_params.vdev_start_delay) {
+	if (ab->hw_params.vdev_start_delay &&
+	    (arvif->vdev_type == WMI_VDEV_TYPE_AP ||
+	    arvif->vdev_type == WMI_VDEV_TYPE_MONITOR)) {
 		param.vdev_id = arvif->vdev_id;
 		param.peer_type = WMI_PEER_TYPE_DEFAULT;
 		param.peer_addr = ar->mac_addr;
diff --git a/drivers/net/wireless/ath/ath11k/peer.c b/drivers/net/wireless/ath/ath11k/peer.c
index 1866d82678fa..b69e7ebfa930 100644
--- a/drivers/net/wireless/ath/ath11k/peer.c
+++ b/drivers/net/wireless/ath/ath11k/peer.c
@@ -76,6 +76,23 @@ struct ath11k_peer *ath11k_peer_find_by_id(struct ath11k_base *ab,
 	return NULL;
 }
 
+struct ath11k_peer *ath11k_peer_find_by_vdev_id(struct ath11k_base *ab,
+						int vdev_id)
+{
+	struct ath11k_peer *peer;
+
+	spin_lock_bh(&ab->base_lock);
+
+	list_for_each_entry(peer, &ab->peers, list) {
+		if (vdev_id == peer->vdev_id) {
+			spin_unlock_bh(&ab->base_lock);
+			return peer;
+		}
+	}
+	spin_unlock_bh(&ab->base_lock);
+	return NULL;
+}
+
 void ath11k_peer_unmap_event(struct ath11k_base *ab, u16 peer_id)
 {
 	struct ath11k_peer *peer;
diff --git a/drivers/net/wireless/ath/ath11k/peer.h b/drivers/net/wireless/ath/ath11k/peer.h
index bba2e00b6944..8553ed061aea 100644
--- a/drivers/net/wireless/ath/ath11k/peer.h
+++ b/drivers/net/wireless/ath/ath11k/peer.h
@@ -43,5 +43,7 @@ int ath11k_peer_create(struct ath11k *ar, struct ath11k_vif *arvif,
 		       struct ieee80211_sta *sta, struct peer_create_params *param);
 int ath11k_wait_for_peer_delete_done(struct ath11k *ar, u32 vdev_id,
 				     const u8 *addr);
+struct ath11k_peer *ath11k_peer_find_by_vdev_id(struct ath11k_base *ab,
+						int vdev_id);
 
 #endif /* _PEER_H_ */

From 9b09456258ea2f35fc8a99c4ac4829dcba0ca4be Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 16 Dec 2020 11:31:19 +0300
Subject: [PATCH 049/547] ath11k: Fix error code in ath11k_core_suspend()

The "if (!ret)" condition is inverted and it should be "if (ret)".  It means
that we return success when we had intended to return an error code. This also
caused a spurious warning even when the suspend was successful:

[  297.186612] ath11k_pci 0000:06:00.0: failed to suspend hif: 0

Fixes: d1b0c33850d2 ("ath11k: implement suspend for QCA6390 PCI devices")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/X9nF17L2/EKOSbn/@mwanda
---
 drivers/net/wireless/ath/ath11k/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c
index b97c38b9a270..350b7913622c 100644
--- a/drivers/net/wireless/ath/ath11k/core.c
+++ b/drivers/net/wireless/ath/ath11k/core.c
@@ -185,7 +185,7 @@ int ath11k_core_suspend(struct ath11k_base *ab)
 	ath11k_hif_ce_irq_disable(ab);
 
 	ret = ath11k_hif_suspend(ab);
-	if (!ret) {
+	if (ret) {
 		ath11k_warn(ab, "failed to suspend hif: %d\n", ret);
 		return ret;
 	}

From 30d085039314fcad2c2e33a2dfc8e79765ddf408 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 16 Dec 2020 11:32:12 +0300
Subject: [PATCH 050/547] ath11k: Fix ath11k_pci_fix_l1ss()

All these conditions are reversed so presumably most of the function is
dead code. This caused a spurious warning:

[   95.734922] ath11k_pci 0000:06:00.0: failed to set sysclk: 0

Fixes: 0699940755e9 ("ath11k: pci: fix L1ss clock unstable problem")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/X9nGDHiTh+Z+asDy@mwanda
---
 drivers/net/wireless/ath/ath11k/pci.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c
index 857647aa57c8..9f9a824a4c2d 100644
--- a/drivers/net/wireless/ath/ath11k/pci.c
+++ b/drivers/net/wireless/ath/ath11k/pci.c
@@ -274,7 +274,7 @@ static int ath11k_pci_fix_l1ss(struct ath11k_base *ab)
 				      PCIE_QSERDES_COM_SYSCLK_EN_SEL_REG,
 				      PCIE_QSERDES_COM_SYSCLK_EN_SEL_VAL,
 				      PCIE_QSERDES_COM_SYSCLK_EN_SEL_MSK);
-	if (!ret) {
+	if (ret) {
 		ath11k_warn(ab, "failed to set sysclk: %d\n", ret);
 		return ret;
 	}
@@ -283,7 +283,7 @@ static int ath11k_pci_fix_l1ss(struct ath11k_base *ab)
 				      PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG1_REG,
 				      PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG1_VAL,
 				      PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG_MSK);
-	if (!ret) {
+	if (ret) {
 		ath11k_warn(ab, "failed to set dtct config1 error: %d\n", ret);
 		return ret;
 	}
@@ -292,7 +292,7 @@ static int ath11k_pci_fix_l1ss(struct ath11k_base *ab)
 				      PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG2_REG,
 				      PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG2_VAL,
 				      PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG_MSK);
-	if (!ret) {
+	if (ret) {
 		ath11k_warn(ab, "failed to set dtct config2: %d\n", ret);
 		return ret;
 	}
@@ -301,7 +301,7 @@ static int ath11k_pci_fix_l1ss(struct ath11k_base *ab)
 				      PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG4_REG,
 				      PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG4_VAL,
 				      PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG_MSK);
-	if (!ret) {
+	if (ret) {
 		ath11k_warn(ab, "failed to set dtct config4: %d\n", ret);
 		return ret;
 	}

From e7f6f893ac39c8715d959ff8d677645ef5e0f8b4 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 8 Dec 2020 10:20:15 +0100
Subject: [PATCH 051/547] mt76: mt76u: fix NULL pointer dereference in
 mt76u_status_worker

Fix the following NULL pointer dereference in mt76u_status_worker that
can occur if status thread runs before allocating tx queues

[   31.395373] BUG: kernel NULL pointer dereference, address: 000000000000002c
[   31.395769] #PF: supervisor read access in kernel mode
[   31.395985] #PF: error_code(0x0000) - not-present page
[   31.396178] PGD 0 P4D 0
[   31.396277] Oops: 0000 [#1] SMP
[   31.396430] CPU: 3 PID: 337 Comm: mt76-usb-status Not tainted 5.10.0-rc1-kvm+ #49
[   31.396703] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-3.fc33 04/01/2014
[   31.397048] RIP: 0010:mt76u_status_worker+0x2b/0x190
[   31.397931] RSP: 0018:ffffc9000076fe98 EFLAGS: 00010282
[   31.398118] RAX: 0000000000000001 RBX: ffff888111203fe8 RCX: 0000000000000000
[   31.398400] RDX: 0000000000000001 RSI: 0000000000000246 RDI: ffff888111203fe8
[   31.398668] RBP: ffff888111201d00 R08: 000000000000038c R09: 000000000000009b
[   31.398952] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
[   31.399235] R13: 0000000000000000 R14: 0000000000000000 R15: ffff88810c987300
[   31.399494] FS:  0000000000000000(0000) GS:ffff88817bd80000(0000) knlGS:0000000000000000
[   31.399767] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   31.399991] CR2: 000000000000002c CR3: 0000000103525000 CR4: 00000000000006a0
[   31.400236] Call Trace:
[   31.400348]  ? schedule+0x3e/0xa0
[   31.400514]  __mt76_worker_fn+0x71/0xa0
[   31.400634]  ? mt76_get_min_avg_rssi+0x110/0x110
[   31.400827]  kthread+0x118/0x130
[   31.400984]  ? __kthread_bind_mask+0x60/0x60
[   31.401212]  ret_from_fork+0x1f/0x30
[   31.401353] Modules linked in:
[   31.401480] CR2: 000000000000002c
[   31.401627] ---[ end trace 8bf174505cc34851 ]---
[   31.401798] RIP: 0010:mt76u_status_worker+0x2b/0x190
[   31.402636] RSP: 0018:ffffc9000076fe98 EFLAGS: 00010282
[   31.402829] RAX: 0000000000000001 RBX: ffff888111203fe8 RCX: 0000000000000000
[   31.403118] RDX: 0000000000000001 RSI: 0000000000000246 RDI: ffff888111203fe8
[   31.403424] RBP: ffff888111201d00 R08: 000000000000038c R09: 000000000000009b
[   31.403689] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
[   31.403933] R13: 0000000000000000 R14: 0000000000000000 R15: ffff88810c987300
[   31.404209] FS:  0000000000000000(0000) GS:ffff88817bd80000(0000) knlGS:0000000000000000
[   31.404482] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   31.404726] CR2: 000000000000002c CR3: 0000000103525000 CR4: 00000000000006a0
[   31.405294] mt76x0u: probe of 1-1:1.0 failed with error -110
[   31.406007] usb 1-1: USB disconnect, device number 2
[   31.840756] usb 1-1: new high-speed USB device number 3 using xhci_hcd
[   32.461295] usb 1-1: reset high-speed USB device number 3 using xhci_hcd
[   32.659932] mt76x0u 1-1:1.0: ASIC revision: 76100002 MAC revision: 76502000
[   33.197032] mt76x0u 1-1:1.0: EEPROM ver:02 fae:01

Fixes: 9daf27e62852 ("mt76: mt76u: use dedicated thread for status work")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/cd44dc407cf3e5f27688105d4a75fb1c68e62b06.1607419147.git.lorenzo@kernel.org
---
 drivers/net/wireless/mediatek/mt76/usb.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c
index dc850109de22..8b08cad131c8 100644
--- a/drivers/net/wireless/mediatek/mt76/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/usb.c
@@ -816,6 +816,8 @@ static void mt76u_status_worker(struct mt76_worker *w)
 
 	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
 		q = dev->phy.q_tx[i];
+		if (!q)
+			continue;
 
 		while (q->queued > 0) {
 			if (!q->entry[q->tail].done)

From 4dfde294b9792dcf8615b55c58f093d544f472f0 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Mon, 14 Dec 2020 13:31:06 +0800
Subject: [PATCH 052/547] rtlwifi: rise completion at the last step of firmware
 callback

request_firmware_nowait() which schedules another work is used to load
firmware when USB is probing. If USB is unplugged before running the
firmware work, it goes disconnect ops, and then causes use-after-free.
Though we wait for completion of firmware work before freeing the hw,
firmware callback rises completion too early. So I move it to the
last step.

usb 5-1: Direct firmware load for rtlwifi/rtl8192cufw.bin failed with error -2
rtlwifi: Loading alternative firmware rtlwifi/rtl8192cufw.bin
rtlwifi: Selected firmware is not available
==================================================================
BUG: KASAN: use-after-free in rtl_fw_do_work.cold+0x68/0x6a drivers/net/wireless/realtek/rtlwifi/core.c:93
Write of size 4 at addr ffff8881454cff50 by task kworker/0:6/7379

CPU: 0 PID: 7379 Comm: kworker/0:6 Not tainted 5.10.0-rc7-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Workqueue: events request_firmware_work_func
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x107/0x163 lib/dump_stack.c:118
 print_address_description.constprop.0.cold+0xae/0x4c8 mm/kasan/report.c:385
 __kasan_report mm/kasan/report.c:545 [inline]
 kasan_report.cold+0x1f/0x37 mm/kasan/report.c:562
 rtl_fw_do_work.cold+0x68/0x6a drivers/net/wireless/realtek/rtlwifi/core.c:93
 request_firmware_work_func+0x12c/0x230 drivers/base/firmware_loader/main.c:1079
 process_one_work+0x933/0x1520 kernel/workqueue.c:2272
 worker_thread+0x64c/0x1120 kernel/workqueue.c:2418
 kthread+0x38c/0x460 kernel/kthread.c:292
 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:296

The buggy address belongs to the page:
page:00000000f54435b3 refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1454cf
flags: 0x200000000000000()
raw: 0200000000000000 0000000000000000 ffffea00051533c8 0000000000000000
raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 ffff8881454cfe00: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
 ffff8881454cfe80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
>ffff8881454cff00: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
                                                 ^
 ffff8881454cff80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
 ffff8881454d0000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff

Reported-by: syzbot+65be4277f3c489293939@syzkaller.appspotmail.com
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20201214053106.7748-1-pkshih@realtek.com
---
 drivers/net/wireless/realtek/rtlwifi/core.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c
index a7259dbc953d..965bd9589045 100644
--- a/drivers/net/wireless/realtek/rtlwifi/core.c
+++ b/drivers/net/wireless/realtek/rtlwifi/core.c
@@ -78,7 +78,6 @@ static void rtl_fw_do_work(const struct firmware *firmware, void *context,
 
 	rtl_dbg(rtlpriv, COMP_ERR, DBG_LOUD,
 		"Firmware callback routine entered!\n");
-	complete(&rtlpriv->firmware_loading_complete);
 	if (!firmware) {
 		if (rtlpriv->cfg->alt_fw_name) {
 			err = request_firmware(&firmware,
@@ -91,13 +90,13 @@ static void rtl_fw_do_work(const struct firmware *firmware, void *context,
 		}
 		pr_err("Selected firmware is not available\n");
 		rtlpriv->max_fw_size = 0;
-		return;
+		goto exit;
 	}
 found_alt:
 	if (firmware->size > rtlpriv->max_fw_size) {
 		pr_err("Firmware is too big!\n");
 		release_firmware(firmware);
-		return;
+		goto exit;
 	}
 	if (!is_wow) {
 		memcpy(rtlpriv->rtlhal.pfirmware, firmware->data,
@@ -109,6 +108,9 @@ found_alt:
 		rtlpriv->rtlhal.wowlan_fwsize = firmware->size;
 	}
 	release_firmware(firmware);
+
+exit:
+	complete(&rtlpriv->firmware_loading_complete);
 }
 
 void rtl_fw_cb(const struct firmware *firmware, void *context)

From 443d6e86f821a165fae3fc3fc13086d27ac140b1 Mon Sep 17 00:00:00 2001
From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Date: Wed, 16 Dec 2020 21:38:02 -0700
Subject: [PATCH 053/547] netfilter: x_tables: Update remaining dereference to
 RCU

This fixes the dereference to fetch the RCU pointer when holding
the appropriate xtables lock.

Reported-by: kernel test robot <lkp@intel.com>
Fixes: cc00bcaa5899 ("netfilter: x_tables: Switch synchronization to RCU")
Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Reviewed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/arp_tables.c | 2 +-
 net/ipv4/netfilter/ip_tables.c  | 2 +-
 net/ipv6/netfilter/ip6_tables.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 563b62b76a5f..c576a63d09db 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1379,7 +1379,7 @@ static int compat_get_entries(struct net *net,
 	xt_compat_lock(NFPROTO_ARP);
 	t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
 	if (!IS_ERR(t)) {
-		const struct xt_table_info *private = t->private;
+		const struct xt_table_info *private = xt_table_get_private_protected(t);
 		struct xt_table_info info;
 
 		ret = compat_table_info(private, &info);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 6e2851f8d3a3..e8f6f9d86237 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1589,7 +1589,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
 	xt_compat_lock(AF_INET);
 	t = xt_find_table_lock(net, AF_INET, get.name);
 	if (!IS_ERR(t)) {
-		const struct xt_table_info *private = t->private;
+		const struct xt_table_info *private = xt_table_get_private_protected(t);
 		struct xt_table_info info;
 		ret = compat_table_info(private, &info);
 		if (!ret && get.size == info.size)
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index c4f532f4d311..0d453fa9e327 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1598,7 +1598,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
 	xt_compat_lock(AF_INET6);
 	t = xt_find_table_lock(net, AF_INET6, get.name);
 	if (!IS_ERR(t)) {
-		const struct xt_table_info *private = t->private;
+		const struct xt_table_info *private = xt_table_get_private_protected(t);
 		struct xt_table_info info;
 		ret = compat_table_info(private, &info);
 		if (!ret && get.size == info.size)

From 2b33d6ffa9e38f344418976b06057e2fc2aa9e2a Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Thu, 17 Dec 2020 11:53:40 +0300
Subject: [PATCH 054/547] netfilter: ipset: fixes possible oops in mtype_resize

currently mtype_resize() can cause oops

        t = ip_set_alloc(htable_size(htable_bits));
        if (!t) {
                ret = -ENOMEM;
                goto out;
        }
        t->hregion = ip_set_alloc(ahash_sizeof_regions(htable_bits));

Increased htable_bits can force htable_size() to return 0.
In own turn ip_set_alloc(0) returns not 0 but ZERO_SIZE_PTR,
so follwoing access to t->hregion should trigger an OOPS.

Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Acked-by: Jozsef Kadlecsik <kadlec@netfilter.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipset/ip_set_hash_gen.h | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 5f1208ad049e..1e7dddf824ae 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -640,7 +640,7 @@ mtype_resize(struct ip_set *set, bool retried)
 	struct htype *h = set->data;
 	struct htable *t, *orig;
 	u8 htable_bits;
-	size_t dsize = set->dsize;
+	size_t hsize, dsize = set->dsize;
 #ifdef IP_SET_HASH_WITH_NETS
 	u8 flags;
 	struct mtype_elem *tmp;
@@ -664,14 +664,12 @@ mtype_resize(struct ip_set *set, bool retried)
 retry:
 	ret = 0;
 	htable_bits++;
-	if (!htable_bits) {
-		/* In case we have plenty of memory :-) */
-		pr_warn("Cannot increase the hashsize of set %s further\n",
-			set->name);
-		ret = -IPSET_ERR_HASH_FULL;
-		goto out;
-	}
-	t = ip_set_alloc(htable_size(htable_bits));
+	if (!htable_bits)
+		goto hbwarn;
+	hsize = htable_size(htable_bits);
+	if (!hsize)
+		goto hbwarn;
+	t = ip_set_alloc(hsize);
 	if (!t) {
 		ret = -ENOMEM;
 		goto out;
@@ -813,6 +811,12 @@ cleanup:
 	if (ret == -EAGAIN)
 		goto retry;
 	goto out;
+
+hbwarn:
+	/* In case we have plenty of memory :-) */
+	pr_warn("Cannot increase the hashsize of set %s further\n", set->name);
+	ret = -IPSET_ERR_HASH_FULL;
+	goto out;
 }
 
 /* Get the current number of elements and ext_size in the set  */

From 5c8193f568ae16f3242abad6518dc2ca6c8eef86 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Thu, 17 Dec 2020 17:53:18 +0300
Subject: [PATCH 055/547] netfilter: ipset: fix shift-out-of-bounds in
 htable_bits()

htable_bits() can call jhash_size(32) and trigger shift-out-of-bounds

UBSAN: shift-out-of-bounds in net/netfilter/ipset/ip_set_hash_gen.h:151:6
shift exponent 32 is too large for 32-bit type 'unsigned int'
CPU: 0 PID: 8498 Comm: syz-executor519
 Not tainted 5.10.0-rc7-next-20201208-syzkaller #0
Call Trace:
 __dump_stack lib/dump_stack.c:79 [inline]
 dump_stack+0x107/0x163 lib/dump_stack.c:120
 ubsan_epilogue+0xb/0x5a lib/ubsan.c:148
 __ubsan_handle_shift_out_of_bounds.cold+0xb1/0x181 lib/ubsan.c:395
 htable_bits net/netfilter/ipset/ip_set_hash_gen.h:151 [inline]
 hash_mac_create.cold+0x58/0x9b net/netfilter/ipset/ip_set_hash_gen.h:1524
 ip_set_create+0x610/0x1380 net/netfilter/ipset/ip_set_core.c:1115
 nfnetlink_rcv_msg+0xecc/0x1180 net/netfilter/nfnetlink.c:252
 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2494
 nfnetlink_rcv+0x1ac/0x420 net/netfilter/nfnetlink.c:600
 netlink_unicast_kernel net/netlink/af_netlink.c:1304 [inline]
 netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1330
 netlink_sendmsg+0x907/0xe40 net/netlink/af_netlink.c:1919
 sock_sendmsg_nosec net/socket.c:652 [inline]
 sock_sendmsg+0xcf/0x120 net/socket.c:672
 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2345
 ___sys_sendmsg+0xf3/0x170 net/socket.c:2399
 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2432
 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

This patch replaces htable_bits() by simple fls(hashsize - 1) call:
it alone returns valid nbits both for round and non-round hashsizes.
It is normal to set any nbits here because it is validated inside
following htable_size() call which returns 0 for nbits>31.

Fixes: 1feab10d7e6d("netfilter: ipset: Unified hash type generation")
Reported-by: syzbot+d66bfadebca46cf61a2b@syzkaller.appspotmail.com
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Acked-by: Jozsef Kadlecsik <kadlec@netfilter.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipset/ip_set_hash_gen.h | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 1e7dddf824ae..6186358eac7c 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -141,20 +141,6 @@ htable_size(u8 hbits)
 	return hsize * sizeof(struct hbucket *) + sizeof(struct htable);
 }
 
-/* Compute htable_bits from the user input parameter hashsize */
-static u8
-htable_bits(u32 hashsize)
-{
-	/* Assume that hashsize == 2^htable_bits */
-	u8 bits = fls(hashsize - 1);
-
-	if (jhash_size(bits) != hashsize)
-		/* Round up to the first 2^n value */
-		bits = fls(hashsize);
-
-	return bits;
-}
-
 #ifdef IP_SET_HASH_WITH_NETS
 #if IPSET_NET_COUNT > 1
 #define __CIDR(cidr, i)		(cidr[i])
@@ -1525,7 +1511,11 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
 	if (!h)
 		return -ENOMEM;
 
-	hbits = htable_bits(hashsize);
+	/* Compute htable_bits from the user input parameter hashsize.
+	 * Assume that hashsize == 2^htable_bits,
+	 * otherwise round up to the first 2^n value.
+	 */
+	hbits = fls(hashsize - 1);
 	hsize = htable_size(hbits);
 	if (hsize == 0) {
 		kfree(h);

From 3ac874fa84d1baaf0c0175f2a1499f5d88d528b2 Mon Sep 17 00:00:00 2001
From: Sylwester Dziedziuch <sylwesterx.dziedziuch@intel.com>
Date: Thu, 22 Oct 2020 12:39:36 +0200
Subject: [PATCH 056/547] i40e: Fix Error I40E_AQ_RC_EINVAL when removing VFs

When removing VFs for PF added to bridge there was
an error I40E_AQ_RC_EINVAL. It was caused by not properly
resetting and reinitializing PF when adding/removing VFs.
Changed how reset is performed when adding/removing VFs
to properly reinitialize PFs VSI.

Fixes: fc60861e9b00 ("i40e: start up in VEPA mode by default")
Signed-off-by: Sylwester Dziedziuch <sylwesterx.dziedziuch@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e.h             |  3 +++
 drivers/net/ethernet/intel/i40e/i40e_main.c        | 10 ++++++++++
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c |  4 ++--
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index d231a2cdd98f..118473dfdcbd 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -120,6 +120,7 @@ enum i40e_state_t {
 	__I40E_RESET_INTR_RECEIVED,
 	__I40E_REINIT_REQUESTED,
 	__I40E_PF_RESET_REQUESTED,
+	__I40E_PF_RESET_AND_REBUILD_REQUESTED,
 	__I40E_CORE_RESET_REQUESTED,
 	__I40E_GLOBAL_RESET_REQUESTED,
 	__I40E_EMP_RESET_INTR_RECEIVED,
@@ -146,6 +147,8 @@ enum i40e_state_t {
 };
 
 #define I40E_PF_RESET_FLAG	BIT_ULL(__I40E_PF_RESET_REQUESTED)
+#define I40E_PF_RESET_AND_REBUILD_FLAG	\
+	BIT_ULL(__I40E_PF_RESET_AND_REBUILD_REQUESTED)
 
 /* VSI state flags */
 enum i40e_vsi_state_t {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 1337686bd099..1db482d310c2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -36,6 +36,8 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf);
 static void i40e_determine_queue_usage(struct i40e_pf *pf);
 static int i40e_setup_pf_filter_control(struct i40e_pf *pf);
 static void i40e_prep_for_reset(struct i40e_pf *pf, bool lock_acquired);
+static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit,
+				   bool lock_acquired);
 static int i40e_reset(struct i40e_pf *pf);
 static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired);
 static int i40e_setup_misc_vector_for_recovery_mode(struct i40e_pf *pf);
@@ -8536,6 +8538,14 @@ void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired)
 			 "FW LLDP is disabled\n" :
 			 "FW LLDP is enabled\n");
 
+	} else if (reset_flags & I40E_PF_RESET_AND_REBUILD_FLAG) {
+		/* Request a PF Reset
+		 *
+		 * Resets PF and reinitializes PFs VSI.
+		 */
+		i40e_prep_for_reset(pf, lock_acquired);
+		i40e_reset_and_rebuild(pf, true, lock_acquired);
+
 	} else if (reset_flags & BIT_ULL(__I40E_REINIT_REQUESTED)) {
 		int v;
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 729c4f0d5ac5..21ee56420c3a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1772,7 +1772,7 @@ int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
 	if (num_vfs) {
 		if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
 			pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
-			i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG);
+			i40e_do_reset_safe(pf, I40E_PF_RESET_AND_REBUILD_FLAG);
 		}
 		ret = i40e_pci_sriov_enable(pdev, num_vfs);
 		goto sriov_configure_out;
@@ -1781,7 +1781,7 @@ int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
 	if (!pci_vfs_assigned(pf->pdev)) {
 		i40e_free_vfs(pf);
 		pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
-		i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG);
+		i40e_do_reset_safe(pf, I40E_PF_RESET_AND_REBUILD_FLAG);
 	} else {
 		dev_warn(&pdev->dev, "Unable to free VFs because some are assigned to VMs.\n");
 		ret = -EINVAL;

From 8bee683384087a6275c9183a483435225f7bb209 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Mon, 14 Dec 2020 09:51:27 +0100
Subject: [PATCH 057/547] xsk: Fix memory leak for failed bind
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix a possible memory leak when a bind of an AF_XDP socket fails. When
the fill and completion rings are created, they are tied to the
socket. But when the buffer pool is later created at bind time, the
ownership of these two rings are transferred to the buffer pool as
they might be shared between sockets (and the buffer pool cannot be
created until we know what we are binding to). So, before the buffer
pool is created, these two rings are cleaned up with the socket, and
after they have been transferred they are cleaned up together with
the buffer pool.

The problem is that ownership was transferred before it was absolutely
certain that the buffer pool could be created and initialized
correctly and when one of these errors occurred, the fill and
completion rings did neither belong to the socket nor the pool and
where therefore leaked. Solve this by moving the ownership transfer
to the point where the buffer pool has been completely set up and
there is no way it can fail.

Fixes: 7361f9c3d719 ("xsk: Move fill and completion rings to buffer pool")
Reported-by: syzbot+cfa88ddd0655afa88763@syzkaller.appspotmail.com
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/20201214085127.3960-1-magnus.karlsson@gmail.com
---
 net/xdp/xsk.c           | 4 ++++
 net/xdp/xsk_buff_pool.c | 2 --
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index ac4a317038f1..c6532d77fde7 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -878,6 +878,10 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 		}
 	}
 
+	/* FQ and CQ are now owned by the buffer pool and cleaned up with it. */
+	xs->fq_tmp = NULL;
+	xs->cq_tmp = NULL;
+
 	xs->dev = dev;
 	xs->zc = xs->umem->zc;
 	xs->queue_id = qid;
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 67a4494d63b6..818b75060922 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -75,8 +75,6 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
 
 	pool->fq = xs->fq_tmp;
 	pool->cq = xs->cq_tmp;
-	xs->fq_tmp = NULL;
-	xs->cq_tmp = NULL;
 
 	for (i = 0; i < pool->free_heads_cnt; i++) {
 		xskb = &pool->heads[i];

From f1340265726e0edf8a8cef28e665b28ad6302ce9 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 2 Dec 2020 18:18:06 -0800
Subject: [PATCH 058/547] iavf: fix double-release of rtnl_lock

This code does not jump to exit on an error in iavf_lan_add_device(),
so the rtnl_unlock() from the normal path will follow.

Fixes: b66c7bc1cd4d ("iavf: Refactor init state machine")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/iavf/iavf_main.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 95543dfd4fe7..0a867d64d467 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -1834,11 +1834,9 @@ static int iavf_init_get_resources(struct iavf_adapter *adapter)
 	netif_tx_stop_all_queues(netdev);
 	if (CLIENT_ALLOWED(adapter)) {
 		err = iavf_lan_add_device(adapter);
-		if (err) {
-			rtnl_unlock();
+		if (err)
 			dev_info(&pdev->dev, "Failed to add VF to client API service list: %d\n",
 				 err);
-		}
 	}
 	dev_info(&pdev->dev, "MAC address: %pM\n", adapter->hw.mac.addr);
 	if (netdev->features & NETIF_F_GRO)

From f6f92968e1e5a7a9d211faaebefc26ebe408dad7 Mon Sep 17 00:00:00 2001
From: Carl Huang <cjhuang@codeaurora.org>
Date: Thu, 17 Dec 2020 09:04:57 +0200
Subject: [PATCH 059/547] ath11k: qmi: try to allocate a big block of DMA
 memory first

Not all firmware versions support allocating DMA memory in smaller blocks so
first try to allocate big block of DMA memory for QMI. If the allocation fails,
let firmware request multiple blocks of DMA memory with smaller size.

This also fixes an unnecessary error message seen during ath11k probe on
QCA6390:

ath11k_pci 0000:06:00.0: Respond mem req failed, result: 1, err: 0
ath11k_pci 0000:06:00.0: qmi failed to respond fw mem req:-22

Tested-on: QCA6390 hw2.0 PCI WLAN.HST.1.0.1-01740-QCAHSTSWPLZ_V2_TO_X86-1

Signed-off-by: Carl Huang <cjhuang@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1608127593-15192-1-git-send-email-kvalo@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/qmi.c | 24 ++++++++++++++++++++++--
 drivers/net/wireless/ath/ath11k/qmi.h |  1 +
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/qmi.c b/drivers/net/wireless/ath/ath11k/qmi.c
index f0b5c50974f3..0db623ff4bb9 100644
--- a/drivers/net/wireless/ath/ath11k/qmi.c
+++ b/drivers/net/wireless/ath/ath11k/qmi.c
@@ -1660,6 +1660,7 @@ static int ath11k_qmi_respond_fw_mem_request(struct ath11k_base *ab)
 	struct qmi_wlanfw_respond_mem_resp_msg_v01 resp;
 	struct qmi_txn txn = {};
 	int ret = 0, i;
+	bool delayed;
 
 	req = kzalloc(sizeof(*req), GFP_KERNEL);
 	if (!req)
@@ -1672,11 +1673,13 @@ static int ath11k_qmi_respond_fw_mem_request(struct ath11k_base *ab)
 	 * failure to FW and FW will then request mulitple blocks of small
 	 * chunk size memory.
 	 */
-	if (!ab->bus_params.fixed_mem_region && ab->qmi.mem_seg_count <= 2) {
+	if (!ab->bus_params.fixed_mem_region && ab->qmi.target_mem_delayed) {
+		delayed = true;
 		ath11k_dbg(ab, ATH11K_DBG_QMI, "qmi delays mem_request %d\n",
 			   ab->qmi.mem_seg_count);
 		memset(req, 0, sizeof(*req));
 	} else {
+		delayed = false;
 		req->mem_seg_len = ab->qmi.mem_seg_count;
 
 		for (i = 0; i < req->mem_seg_len ; i++) {
@@ -1708,6 +1711,12 @@ static int ath11k_qmi_respond_fw_mem_request(struct ath11k_base *ab)
 	}
 
 	if (resp.resp.result != QMI_RESULT_SUCCESS_V01) {
+		/* the error response is expected when
+		 * target_mem_delayed is true.
+		 */
+		if (delayed && resp.resp.error == 0)
+			goto out;
+
 		ath11k_warn(ab, "Respond mem req failed, result: %d, err: %d\n",
 			    resp.resp.result, resp.resp.error);
 		ret = -EINVAL;
@@ -1742,6 +1751,8 @@ static int ath11k_qmi_alloc_target_mem_chunk(struct ath11k_base *ab)
 	int i;
 	struct target_mem_chunk *chunk;
 
+	ab->qmi.target_mem_delayed = false;
+
 	for (i = 0; i < ab->qmi.mem_seg_count; i++) {
 		chunk = &ab->qmi.target_mem[i];
 		chunk->vaddr = dma_alloc_coherent(ab->dev,
@@ -1749,6 +1760,15 @@ static int ath11k_qmi_alloc_target_mem_chunk(struct ath11k_base *ab)
 						  &chunk->paddr,
 						  GFP_KERNEL);
 		if (!chunk->vaddr) {
+			if (ab->qmi.mem_seg_count <= 2) {
+				ath11k_dbg(ab, ATH11K_DBG_QMI,
+					   "qmi dma allocation failed (%d B type %u), will try later with small size\n",
+					    chunk->size,
+					    chunk->type);
+				ath11k_qmi_free_target_mem_chunk(ab);
+				ab->qmi.target_mem_delayed = true;
+				return 0;
+			}
 			ath11k_err(ab, "failed to alloc memory, size: 0x%x, type: %u\n",
 				   chunk->size,
 				   chunk->type);
@@ -2517,7 +2537,7 @@ static void ath11k_qmi_msg_mem_request_cb(struct qmi_handle *qmi_hdl,
 				    ret);
 			return;
 		}
-	} else if (msg->mem_seg_len > 2) {
+	} else {
 		ret = ath11k_qmi_alloc_target_mem_chunk(ab);
 		if (ret) {
 			ath11k_warn(ab, "qmi failed to alloc target memory: %d\n",
diff --git a/drivers/net/wireless/ath/ath11k/qmi.h b/drivers/net/wireless/ath/ath11k/qmi.h
index 92925c9eac67..7bad374cc23a 100644
--- a/drivers/net/wireless/ath/ath11k/qmi.h
+++ b/drivers/net/wireless/ath/ath11k/qmi.h
@@ -125,6 +125,7 @@ struct ath11k_qmi {
 	struct target_mem_chunk target_mem[ATH11K_QMI_WLANFW_MAX_NUM_MEM_SEG_V01];
 	u32 mem_seg_count;
 	u32 target_mem_mode;
+	bool target_mem_delayed;
 	u8 cal_done;
 	struct target_info target;
 	struct m3_mem_region m3_mem;

From e9603f4bdcc04417f1c7b3585e63654819dc11f6 Mon Sep 17 00:00:00 2001
From: Carl Huang <cjhuang@codeaurora.org>
Date: Thu, 17 Dec 2020 17:22:10 +0200
Subject: [PATCH 060/547] ath11k: pci: disable ASPM L0sLs before downloading
 firmware

Sometimes QCA6390 doesn't switch to amss state as device enters
L1ss state, so disable L0sL1s during firmware downloading.
Driver recovers the ASPM to default value in start callback
or powerdown callback.

Tested-on: QCA6390 hw2.0 PCI WLAN.HST.1.0.1-01740-QCAHSTSWPLZ_V2_TO_X86-1

Signed-off-by: Carl Huang <cjhuang@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1608218530-15426-1-git-send-email-kvalo@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/pci.c | 36 +++++++++++++++++++++++++++
 drivers/net/wireless/ath/ath11k/pci.h |  2 ++
 2 files changed, 38 insertions(+)

diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c
index 9f9a824a4c2d..20b415cd96c4 100644
--- a/drivers/net/wireless/ath/ath11k/pci.c
+++ b/drivers/net/wireless/ath/ath11k/pci.c
@@ -886,6 +886,32 @@ static void ath11k_pci_free_region(struct ath11k_pci *ab_pci)
 		pci_disable_device(pci_dev);
 }
 
+static void ath11k_pci_aspm_disable(struct ath11k_pci *ab_pci)
+{
+	struct ath11k_base *ab = ab_pci->ab;
+
+	pcie_capability_read_word(ab_pci->pdev, PCI_EXP_LNKCTL,
+				  &ab_pci->link_ctl);
+
+	ath11k_dbg(ab, ATH11K_DBG_PCI, "pci link_ctl 0x%04x L0s %d L1 %d\n",
+		   ab_pci->link_ctl,
+		   u16_get_bits(ab_pci->link_ctl, PCI_EXP_LNKCTL_ASPM_L0S),
+		   u16_get_bits(ab_pci->link_ctl, PCI_EXP_LNKCTL_ASPM_L1));
+
+	/* disable L0s and L1 */
+	pcie_capability_write_word(ab_pci->pdev, PCI_EXP_LNKCTL,
+				   ab_pci->link_ctl & ~PCI_EXP_LNKCTL_ASPMC);
+
+	set_bit(ATH11K_PCI_ASPM_RESTORE, &ab_pci->flags);
+}
+
+static void ath11k_pci_aspm_restore(struct ath11k_pci *ab_pci)
+{
+	if (test_and_clear_bit(ATH11K_PCI_ASPM_RESTORE, &ab_pci->flags))
+		pcie_capability_write_word(ab_pci->pdev, PCI_EXP_LNKCTL,
+					   ab_pci->link_ctl);
+}
+
 static int ath11k_pci_power_up(struct ath11k_base *ab)
 {
 	struct ath11k_pci *ab_pci = ath11k_pci_priv(ab);
@@ -895,6 +921,11 @@ static int ath11k_pci_power_up(struct ath11k_base *ab)
 	clear_bit(ATH11K_PCI_FLAG_INIT_DONE, &ab_pci->flags);
 	ath11k_pci_sw_reset(ab_pci->ab, true);
 
+	/* Disable ASPM during firmware download due to problems switching
+	 * to AMSS state.
+	 */
+	ath11k_pci_aspm_disable(ab_pci);
+
 	ret = ath11k_mhi_start(ab_pci);
 	if (ret) {
 		ath11k_err(ab, "failed to start mhi: %d\n", ret);
@@ -908,6 +939,9 @@ static void ath11k_pci_power_down(struct ath11k_base *ab)
 {
 	struct ath11k_pci *ab_pci = ath11k_pci_priv(ab);
 
+	/* restore aspm in case firmware bootup fails */
+	ath11k_pci_aspm_restore(ab_pci);
+
 	ath11k_pci_force_wake(ab_pci->ab);
 	ath11k_mhi_stop(ab_pci);
 	clear_bit(ATH11K_PCI_FLAG_INIT_DONE, &ab_pci->flags);
@@ -965,6 +999,8 @@ static int ath11k_pci_start(struct ath11k_base *ab)
 
 	set_bit(ATH11K_PCI_FLAG_INIT_DONE, &ab_pci->flags);
 
+	ath11k_pci_aspm_restore(ab_pci);
+
 	ath11k_pci_ce_irqs_enable(ab);
 	ath11k_ce_rx_post_buf(ab);
 
diff --git a/drivers/net/wireless/ath/ath11k/pci.h b/drivers/net/wireless/ath/ath11k/pci.h
index 0432a702416b..fe44d0dfce19 100644
--- a/drivers/net/wireless/ath/ath11k/pci.h
+++ b/drivers/net/wireless/ath/ath11k/pci.h
@@ -63,6 +63,7 @@ struct ath11k_msi_config {
 enum ath11k_pci_flags {
 	ATH11K_PCI_FLAG_INIT_DONE,
 	ATH11K_PCI_FLAG_IS_MSI_64,
+	ATH11K_PCI_ASPM_RESTORE,
 };
 
 struct ath11k_pci {
@@ -80,6 +81,7 @@ struct ath11k_pci {
 
 	/* enum ath11k_pci_flags */
 	unsigned long flags;
+	u16 link_ctl;
 };
 
 static inline struct ath11k_pci *ath11k_pci_priv(struct ath11k_base *ab)

From 3d45f221ce627d13e2e6ef3274f06750c84a6542 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 2 Dec 2020 11:55:58 +0000
Subject: [PATCH 061/547] btrfs: fix deadlock when cloning inline extent and
 low on free metadata space

When cloning an inline extent there are cases where we can not just copy
the inline extent from the source range to the target range (e.g. when the
target range starts at an offset greater than zero). In such cases we copy
the inline extent's data into a page of the destination inode and then
dirty that page. However, after that we will need to start a transaction
for each processed extent and, if we are ever low on available metadata
space, we may need to flush existing delalloc for all dirty inodes in an
attempt to release metadata space - if that happens we may deadlock:

* the async reclaim task queued a delalloc work to flush delalloc for
  the destination inode of the clone operation;

* the task executing that delalloc work gets blocked waiting for the
  range with the dirty page to be unlocked, which is currently locked
  by the task doing the clone operation;

* the async reclaim task blocks waiting for the delalloc work to complete;

* the cloning task is waiting on the waitqueue of its reservation ticket
  while holding the range with the dirty page locked in the inode's
  io_tree;

* if metadata space is not released by some other task (like delalloc for
  some other inode completing for example), the clone task waits forever
  and as a consequence the delalloc work and async reclaim tasks will hang
  forever as well. Releasing more space on the other hand may require
  starting a transaction, which will hang as well when trying to reserve
  metadata space, resulting in a deadlock between all these tasks.

When this happens, traces like the following show up in dmesg/syslog:

  [87452.323003] INFO: task kworker/u16:11:1810830 blocked for more than 120 seconds.
  [87452.323644]       Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
  [87452.324248] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
  [87452.324852] task:kworker/u16:11  state:D stack:    0 pid:1810830 ppid:     2 flags:0x00004000
  [87452.325520] Workqueue: btrfs-flush_delalloc btrfs_work_helper [btrfs]
  [87452.326136] Call Trace:
  [87452.326737]  __schedule+0x5d1/0xcf0
  [87452.327390]  schedule+0x45/0xe0
  [87452.328174]  lock_extent_bits+0x1e6/0x2d0 [btrfs]
  [87452.328894]  ? finish_wait+0x90/0x90
  [87452.329474]  btrfs_invalidatepage+0x32c/0x390 [btrfs]
  [87452.330133]  ? __mod_memcg_state+0x8e/0x160
  [87452.330738]  __extent_writepage+0x2d4/0x400 [btrfs]
  [87452.331405]  extent_write_cache_pages+0x2b2/0x500 [btrfs]
  [87452.332007]  ? lock_release+0x20e/0x4c0
  [87452.332557]  ? trace_hardirqs_on+0x1b/0xf0
  [87452.333127]  extent_writepages+0x43/0x90 [btrfs]
  [87452.333653]  ? lock_acquire+0x1a3/0x490
  [87452.334177]  do_writepages+0x43/0xe0
  [87452.334699]  ? __filemap_fdatawrite_range+0xa4/0x100
  [87452.335720]  __filemap_fdatawrite_range+0xc5/0x100
  [87452.336500]  btrfs_run_delalloc_work+0x17/0x40 [btrfs]
  [87452.337216]  btrfs_work_helper+0xf1/0x600 [btrfs]
  [87452.337838]  process_one_work+0x24e/0x5e0
  [87452.338437]  worker_thread+0x50/0x3b0
  [87452.339137]  ? process_one_work+0x5e0/0x5e0
  [87452.339884]  kthread+0x153/0x170
  [87452.340507]  ? kthread_mod_delayed_work+0xc0/0xc0
  [87452.341153]  ret_from_fork+0x22/0x30
  [87452.341806] INFO: task kworker/u16:1:2426217 blocked for more than 120 seconds.
  [87452.342487]       Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
  [87452.343274] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
  [87452.344049] task:kworker/u16:1   state:D stack:    0 pid:2426217 ppid:     2 flags:0x00004000
  [87452.344974] Workqueue: events_unbound btrfs_async_reclaim_metadata_space [btrfs]
  [87452.345655] Call Trace:
  [87452.346305]  __schedule+0x5d1/0xcf0
  [87452.346947]  ? kvm_clock_read+0x14/0x30
  [87452.347676]  ? wait_for_completion+0x81/0x110
  [87452.348389]  schedule+0x45/0xe0
  [87452.349077]  schedule_timeout+0x30c/0x580
  [87452.349718]  ? _raw_spin_unlock_irqrestore+0x3c/0x60
  [87452.350340]  ? lock_acquire+0x1a3/0x490
  [87452.351006]  ? try_to_wake_up+0x7a/0xa20
  [87452.351541]  ? lock_release+0x20e/0x4c0
  [87452.352040]  ? lock_acquired+0x199/0x490
  [87452.352517]  ? wait_for_completion+0x81/0x110
  [87452.353000]  wait_for_completion+0xab/0x110
  [87452.353490]  start_delalloc_inodes+0x2af/0x390 [btrfs]
  [87452.353973]  btrfs_start_delalloc_roots+0x12d/0x250 [btrfs]
  [87452.354455]  flush_space+0x24f/0x660 [btrfs]
  [87452.355063]  btrfs_async_reclaim_metadata_space+0x1bb/0x480 [btrfs]
  [87452.355565]  process_one_work+0x24e/0x5e0
  [87452.356024]  worker_thread+0x20f/0x3b0
  [87452.356487]  ? process_one_work+0x5e0/0x5e0
  [87452.356973]  kthread+0x153/0x170
  [87452.357434]  ? kthread_mod_delayed_work+0xc0/0xc0
  [87452.357880]  ret_from_fork+0x22/0x30
  (...)
  < stack traces of several tasks waiting for the locks of the inodes of the
    clone operation >
  (...)
  [92867.444138] RSP: 002b:00007ffc3371bbe8 EFLAGS: 00000246 ORIG_RAX: 0000000000000052
  [92867.444624] RAX: ffffffffffffffda RBX: 00007ffc3371bea0 RCX: 00007f61efe73f97
  [92867.445116] RDX: 0000000000000000 RSI: 0000560fbd5d7a40 RDI: 0000560fbd5d8960
  [92867.445595] RBP: 00007ffc3371beb0 R08: 0000000000000001 R09: 0000000000000003
  [92867.446070] R10: 00007ffc3371b996 R11: 0000000000000246 R12: 0000000000000000
  [92867.446820] R13: 000000000000001f R14: 00007ffc3371bea0 R15: 00007ffc3371beb0
  [92867.447361] task:fsstress        state:D stack:    0 pid:2508238 ppid:2508153 flags:0x00004000
  [92867.447920] Call Trace:
  [92867.448435]  __schedule+0x5d1/0xcf0
  [92867.448934]  ? _raw_spin_unlock_irqrestore+0x3c/0x60
  [92867.449423]  schedule+0x45/0xe0
  [92867.449916]  __reserve_bytes+0x4a4/0xb10 [btrfs]
  [92867.450576]  ? finish_wait+0x90/0x90
  [92867.451202]  btrfs_reserve_metadata_bytes+0x29/0x190 [btrfs]
  [92867.451815]  btrfs_block_rsv_add+0x1f/0x50 [btrfs]
  [92867.452412]  start_transaction+0x2d1/0x760 [btrfs]
  [92867.453216]  clone_copy_inline_extent+0x333/0x490 [btrfs]
  [92867.453848]  ? lock_release+0x20e/0x4c0
  [92867.454539]  ? btrfs_search_slot+0x9a7/0xc30 [btrfs]
  [92867.455218]  btrfs_clone+0x569/0x7e0 [btrfs]
  [92867.455952]  btrfs_clone_files+0xf6/0x150 [btrfs]
  [92867.456588]  btrfs_remap_file_range+0x324/0x3d0 [btrfs]
  [92867.457213]  do_clone_file_range+0xd4/0x1f0
  [92867.457828]  vfs_clone_file_range+0x4d/0x230
  [92867.458355]  ? lock_release+0x20e/0x4c0
  [92867.458890]  ioctl_file_clone+0x8f/0xc0
  [92867.459377]  do_vfs_ioctl+0x342/0x750
  [92867.459913]  __x64_sys_ioctl+0x62/0xb0
  [92867.460377]  do_syscall_64+0x33/0x80
  [92867.460842]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
  (...)
  < stack traces of more tasks blocked on metadata reservation like the clone
    task above, because the async reclaim task has deadlocked >
  (...)

Another thing to notice is that the worker task that is deadlocked when
trying to flush the destination inode of the clone operation is at
btrfs_invalidatepage(). This is simply because the clone operation has a
destination offset greater than the i_size and we only update the i_size
of the destination file after cloning an extent (just like we do in the
buffered write path).

Since the async reclaim path uses btrfs_start_delalloc_roots() to trigger
the flushing of delalloc for all inodes that have delalloc, add a runtime
flag to an inode to signal it should not be flushed, and for inodes with
that flag set, start_delalloc_inodes() will simply skip them. When the
cloning code needs to dirty a page to copy an inline extent, set that flag
on the inode and then clear it when the clone operation finishes.

This could be sporadically triggered with test case generic/269 from
fstests, which exercises many fsstress processes running in parallel with
several dd processes filling up the entire filesystem.

CC: stable@vger.kernel.org # 5.9+
Fixes: 05a5a7621ce6 ("Btrfs: implement full reflink support for inline extents")
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/btrfs_inode.h |  9 +++++++++
 fs/btrfs/ctree.h       |  3 ++-
 fs/btrfs/dev-replace.c |  2 +-
 fs/btrfs/inode.c       | 15 +++++++++++----
 fs/btrfs/ioctl.c       |  2 +-
 fs/btrfs/reflink.c     | 15 +++++++++++++++
 fs/btrfs/space-info.c  |  2 +-
 7 files changed, 40 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 555cbcef6585..d9bf53d9ff90 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -42,6 +42,15 @@ enum {
 	 * to an inode.
 	 */
 	BTRFS_INODE_NO_XATTRS,
+	/*
+	 * Set when we are in a context where we need to start a transaction and
+	 * have dirty pages with the respective file range locked. This is to
+	 * ensure that when reserving space for the transaction, if we are low
+	 * on available space and need to flush delalloc, we will not flush
+	 * delalloc for this inode, because that could result in a deadlock (on
+	 * the file range, inode's io_tree).
+	 */
+	BTRFS_INODE_NO_DELALLOC_FLUSH,
 };
 
 /* in memory btrfs inode */
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 9dde7707873a..2674f24cf2e0 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3074,7 +3074,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 			       u32 min_type);
 
 int btrfs_start_delalloc_snapshot(struct btrfs_root *root);
-int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr);
+int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr,
+			       bool in_reclaim_context);
 int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
 			      unsigned int extra_bits,
 			      struct extent_state **cached_state);
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index a98e33f232d5..324f646d6e5e 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -715,7 +715,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
 	 * flush all outstanding I/O and inode extent mappings before the
 	 * copy operation is declared as being finished
 	 */
-	ret = btrfs_start_delalloc_roots(fs_info, U64_MAX);
+	ret = btrfs_start_delalloc_roots(fs_info, U64_MAX, false);
 	if (ret) {
 		mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
 		return ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8e23780acfae..070716650df8 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9390,7 +9390,8 @@ static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode
  * some fairly slow code that needs optimization. This walks the list
  * of all the inodes with pending delalloc and forces them to disk.
  */
-static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot)
+static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot,
+				 bool in_reclaim_context)
 {
 	struct btrfs_inode *binode;
 	struct inode *inode;
@@ -9411,6 +9412,11 @@ static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot
 
 		list_move_tail(&binode->delalloc_inodes,
 			       &root->delalloc_inodes);
+
+		if (in_reclaim_context &&
+		    test_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &binode->runtime_flags))
+			continue;
+
 		inode = igrab(&binode->vfs_inode);
 		if (!inode) {
 			cond_resched_lock(&root->delalloc_lock);
@@ -9464,10 +9470,11 @@ int btrfs_start_delalloc_snapshot(struct btrfs_root *root)
 	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
 		return -EROFS;
 
-	return start_delalloc_inodes(root, &nr, true);
+	return start_delalloc_inodes(root, &nr, true, false);
 }
 
-int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr)
+int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr,
+			       bool in_reclaim_context)
 {
 	struct btrfs_root *root;
 	struct list_head splice;
@@ -9490,7 +9497,7 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr)
 			       &fs_info->delalloc_roots);
 		spin_unlock(&fs_info->delalloc_root_lock);
 
-		ret = start_delalloc_inodes(root, &nr, false);
+		ret = start_delalloc_inodes(root, &nr, false, in_reclaim_context);
 		btrfs_put_root(root);
 		if (ret < 0)
 			goto out;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 703212ff50a5..dde49a791f3e 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4951,7 +4951,7 @@ long btrfs_ioctl(struct file *file, unsigned int
 	case BTRFS_IOC_SYNC: {
 		int ret;
 
-		ret = btrfs_start_delalloc_roots(fs_info, U64_MAX);
+		ret = btrfs_start_delalloc_roots(fs_info, U64_MAX, false);
 		if (ret)
 			return ret;
 		ret = btrfs_sync_fs(inode->i_sb, 1);
diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
index ab80896315be..b03e7891394e 100644
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@@ -89,6 +89,19 @@ static int copy_inline_to_page(struct btrfs_inode *inode,
 	if (ret)
 		goto out_unlock;
 
+	/*
+	 * After dirtying the page our caller will need to start a transaction,
+	 * and if we are low on metadata free space, that can cause flushing of
+	 * delalloc for all inodes in order to get metadata space released.
+	 * However we are holding the range locked for the whole duration of
+	 * the clone/dedupe operation, so we may deadlock if that happens and no
+	 * other task releases enough space. So mark this inode as not being
+	 * possible to flush to avoid such deadlock. We will clear that flag
+	 * when we finish cloning all extents, since a transaction is started
+	 * after finding each extent to clone.
+	 */
+	set_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &inode->runtime_flags);
+
 	if (comp_type == BTRFS_COMPRESS_NONE) {
 		char *map;
 
@@ -549,6 +562,8 @@ process_slot:
 out:
 	btrfs_free_path(path);
 	kvfree(buf);
+	clear_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &BTRFS_I(inode)->runtime_flags);
+
 	return ret;
 }
 
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 64099565ab8f..67e55c5479b8 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -532,7 +532,7 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info,
 
 	loops = 0;
 	while ((delalloc_bytes || dio_bytes) && loops < 3) {
-		btrfs_start_delalloc_roots(fs_info, items);
+		btrfs_start_delalloc_roots(fs_info, items, true);
 
 		loops++;
 		if (wait_ordered && !trans) {

From 9a664971569daf68254928149f580b4f5856d274 Mon Sep 17 00:00:00 2001
From: ethanwu <ethanwu@synology.com>
Date: Tue, 1 Dec 2020 17:25:12 +0800
Subject: [PATCH 062/547] btrfs: correctly calculate item size used when item
 key collision happens

Item key collision is allowed for some item types, like dir item and
inode refs, but the overall item size is limited by the nodesize.

item size(ins_len) passed from btrfs_insert_empty_items to
btrfs_search_slot already contains size of btrfs_item.

When btrfs_search_slot reaches leaf, we'll see if we need to split leaf.
The check incorrectly reports that split leaf is required, because
it treats the space required by the newly inserted item as
btrfs_item + item data. But in item key collision case, only item data
is actually needed, the newly inserted item could merge into the existing
one. No new btrfs_item will be inserted.

And split_leaf return EOVERFLOW from following code:

  if (extend && data_size + btrfs_item_size_nr(l, slot) +
      sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(fs_info))
      return -EOVERFLOW;

In most cases, when callers receive EOVERFLOW, they either return
this error or handle in different ways. For example, in normal dir item
creation the userspace will get errno EOVERFLOW; in inode ref case
INODE_EXTREF is used instead.

However, this is not the case for rename. To avoid the unrecoverable
situation in rename, btrfs_check_dir_item_collision is called in
early phase of rename. In this function, when item key collision is
detected leaf space is checked:

  data_size = sizeof(*di) + name_len;
  if (data_size + btrfs_item_size_nr(leaf, slot) +
      sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root->fs_info))

the sizeof(struct btrfs_item) + btrfs_item_size_nr(leaf, slot) here
refers to existing item size, the condition here correctly calculates
the needed size for collision case rather than the wrong case above.

The consequence of inconsistent condition check between
btrfs_check_dir_item_collision and btrfs_search_slot when item key
collision happens is that we might pass check here but fail
later at btrfs_search_slot. Rename fails and volume is forced readonly

  [436149.586170] ------------[ cut here ]------------
  [436149.586173] BTRFS: Transaction aborted (error -75)
  [436149.586196] WARNING: CPU: 0 PID: 16733 at fs/btrfs/inode.c:9870 btrfs_rename2+0x1938/0x1b70 [btrfs]
  [436149.586227] CPU: 0 PID: 16733 Comm: python Tainted: G      D           4.18.0-rc5+ #1
  [436149.586228] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 04/05/2016
  [436149.586238] RIP: 0010:btrfs_rename2+0x1938/0x1b70 [btrfs]
  [436149.586254] RSP: 0018:ffffa327043a7ce0 EFLAGS: 00010286
  [436149.586255] RAX: 0000000000000000 RBX: ffff8d8a17d13340 RCX: 0000000000000006
  [436149.586256] RDX: 0000000000000007 RSI: 0000000000000096 RDI: ffff8d8a7fc164b0
  [436149.586257] RBP: ffffa327043a7da0 R08: 0000000000000560 R09: 7265282064657472
  [436149.586258] R10: 0000000000000000 R11: 6361736e61725420 R12: ffff8d8a0d4c8b08
  [436149.586258] R13: ffff8d8a17d13340 R14: ffff8d8a33e0a540 R15: 00000000000001fe
  [436149.586260] FS:  00007fa313933740(0000) GS:ffff8d8a7fc00000(0000) knlGS:0000000000000000
  [436149.586261] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  [436149.586262] CR2: 000055d8d9c9a720 CR3: 000000007aae0003 CR4: 00000000003606f0
  [436149.586295] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  [436149.586296] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  [436149.586296] Call Trace:
  [436149.586311]  vfs_rename+0x383/0x920
  [436149.586313]  ? vfs_rename+0x383/0x920
  [436149.586315]  do_renameat2+0x4ca/0x590
  [436149.586317]  __x64_sys_rename+0x20/0x30
  [436149.586324]  do_syscall_64+0x5a/0x120
  [436149.586330]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
  [436149.586332] RIP: 0033:0x7fa3133b1d37
  [436149.586348] RSP: 002b:00007fffd3e43908 EFLAGS: 00000246 ORIG_RAX: 0000000000000052
  [436149.586349] RAX: ffffffffffffffda RBX: 00007fa3133b1d30 RCX: 00007fa3133b1d37
  [436149.586350] RDX: 000055d8da06b5e0 RSI: 000055d8da225d60 RDI: 000055d8da2c4da0
  [436149.586351] RBP: 000055d8da2252f0 R08: 00007fa313782000 R09: 00000000000177e0
  [436149.586351] R10: 000055d8da010680 R11: 0000000000000246 R12: 00007fa313840b00

Thanks to Hans van Kranenburg for information about crc32 hash collision
tools, I was able to reproduce the dir item collision with following
python script.
https://github.com/wutzuchieh/misc_tools/blob/master/crc32_forge.py Run
it under a btrfs volume will trigger the abort transaction.  It simply
creates files and rename them to forged names that leads to
hash collision.

There are two ways to fix this. One is to simply revert the patch
878f2d2cb355 ("Btrfs: fix max dir item size calculation") to make the
condition consistent although that patch is correct about the size.

The other way is to handle the leaf space check correctly when
collision happens. I prefer the second one since it correct leaf
space check in collision case. This fix will not account
sizeof(struct btrfs_item) when the item already exists.
There are two places where ins_len doesn't contain
sizeof(struct btrfs_item), however.

  1. extent-tree.c: lookup_inline_extent_backref
  2. file-item.c: btrfs_csum_file_blocks

to make the logic of btrfs_search_slot more clear, we add a flag
search_for_extension in btrfs_path.

This flag indicates that ins_len passed to btrfs_search_slot doesn't
contain sizeof(struct btrfs_item). When key exists, btrfs_search_slot
will use the actual size needed to calculate the required leaf space.

CC: stable@vger.kernel.org # 4.4+
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: ethanwu <ethanwu@synology.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.c       | 24 ++++++++++++++++++++++--
 fs/btrfs/ctree.h       |  6 ++++++
 fs/btrfs/extent-tree.c |  2 ++
 fs/btrfs/file-item.c   |  2 ++
 4 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 07810891e204..cc89b63d65a4 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2555,8 +2555,14 @@ out:
  * @p:		Holds all btree nodes along the search path
  * @root:	The root node of the tree
  * @key:	The key we are looking for
- * @ins_len:	Indicates purpose of search, for inserts it is 1, for
- *		deletions it's -1. 0 for plain searches
+ * @ins_len:	Indicates purpose of search:
+ *              >0  for inserts it's size of item inserted (*)
+ *              <0  for deletions
+ *               0  for plain searches, not modifying the tree
+ *
+ *              (*) If size of item inserted doesn't include
+ *              sizeof(struct btrfs_item), then p->search_for_extension must
+ *              be set.
  * @cow:	boolean should CoW operations be performed. Must always be 1
  *		when modifying the tree.
  *
@@ -2717,6 +2723,20 @@ cow_done:
 
 		if (level == 0) {
 			p->slots[level] = slot;
+			/*
+			 * Item key already exists. In this case, if we are
+			 * allowed to insert the item (for example, in dir_item
+			 * case, item key collision is allowed), it will be
+			 * merged with the original item. Only the item size
+			 * grows, no new btrfs item will be added. If
+			 * search_for_extension is not set, ins_len already
+			 * accounts the size btrfs_item, deduct it here so leaf
+			 * space check will be correct.
+			 */
+			if (ret == 0 && ins_len > 0 && !p->search_for_extension) {
+				ASSERT(ins_len >= sizeof(struct btrfs_item));
+				ins_len -= sizeof(struct btrfs_item);
+			}
 			if (ins_len > 0 &&
 			    btrfs_leaf_free_space(b) < ins_len) {
 				if (write_lock_level < 1) {
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2674f24cf2e0..3935d297d198 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -368,6 +368,12 @@ struct btrfs_path {
 	unsigned int search_commit_root:1;
 	unsigned int need_commit_sem:1;
 	unsigned int skip_release_on_error:1;
+	/*
+	 * Indicate that new item (btrfs_search_slot) is extending already
+	 * existing item and ins_len contains only the data size and not item
+	 * header (ie. sizeof(struct btrfs_item) is not included).
+	 */
+	unsigned int search_for_extension:1;
 };
 #define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r->fs_info) >> 4) - \
 					sizeof(struct btrfs_item))
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 56ea380f5a17..d79b8369e6aa 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -844,6 +844,7 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
 	want = extent_ref_type(parent, owner);
 	if (insert) {
 		extra_size = btrfs_extent_inline_ref_size(want);
+		path->search_for_extension = 1;
 		path->keep_locks = 1;
 	} else
 		extra_size = -1;
@@ -996,6 +997,7 @@ again:
 out:
 	if (insert) {
 		path->keep_locks = 0;
+		path->search_for_extension = 0;
 		btrfs_unlock_up_safe(path, 1);
 	}
 	return err;
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 1545c22ef280..6ccfc019ad90 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -1016,8 +1016,10 @@ again:
 	}
 
 	btrfs_release_path(path);
+	path->search_for_extension = 1;
 	ret = btrfs_search_slot(trans, root, &file_key, path,
 				csum_size, 1);
+	path->search_for_extension = 0;
 	if (ret < 0)
 		goto out;
 

From ae5e070eaca9dbebde3459dd8f4c2756f8c097d0 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Fri, 4 Dec 2020 09:24:47 +0800
Subject: [PATCH 063/547] btrfs: qgroup: don't try to wait flushing if we're
 already holding a transaction

There is a chance of racing for qgroup flushing which may lead to
deadlock:

	Thread A		|	Thread B
   (not holding trans handle)	|  (holding a trans handle)
--------------------------------+--------------------------------
__btrfs_qgroup_reserve_meta()   | __btrfs_qgroup_reserve_meta()
|- try_flush_qgroup()		| |- try_flush_qgroup()
   |- QGROUP_FLUSHING bit set   |    |
   |				|    |- test_and_set_bit()
   |				|    |- wait_event()
   |- btrfs_join_transaction()	|
   |- btrfs_commit_transaction()|

			!!! DEAD LOCK !!!

Since thread A wants to commit transaction, but thread B is holding a
transaction handle, blocking the commit.
At the same time, thread B is waiting for thread A to finish its commit.

This is just a hot fix, and would lead to more EDQUOT when we're near
the qgroup limit.

The proper fix would be to make all metadata/data reservations happen
without holding a transaction handle.

CC: stable@vger.kernel.org # 5.9+
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/qgroup.c | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index fe3046007f52..47f27658eac1 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -3530,16 +3530,6 @@ static int try_flush_qgroup(struct btrfs_root *root)
 	int ret;
 	bool can_commit = true;
 
-	/*
-	 * We don't want to run flush again and again, so if there is a running
-	 * one, we won't try to start a new flush, but exit directly.
-	 */
-	if (test_and_set_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)) {
-		wait_event(root->qgroup_flush_wait,
-			!test_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state));
-		return 0;
-	}
-
 	/*
 	 * If current process holds a transaction, we shouldn't flush, as we
 	 * assume all space reservation happens before a transaction handle is
@@ -3554,6 +3544,26 @@ static int try_flush_qgroup(struct btrfs_root *root)
 	    current->journal_info != BTRFS_SEND_TRANS_STUB)
 		can_commit = false;
 
+	/*
+	 * We don't want to run flush again and again, so if there is a running
+	 * one, we won't try to start a new flush, but exit directly.
+	 */
+	if (test_and_set_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)) {
+		/*
+		 * We are already holding a transaction, thus we can block other
+		 * threads from flushing.  So exit right now. This increases
+		 * the chance of EDQUOT for heavy load and near limit cases.
+		 * But we can argue that if we're already near limit, EDQUOT is
+		 * unavoidable anyway.
+		 */
+		if (!can_commit)
+			return 0;
+
+		wait_event(root->qgroup_flush_wait,
+			!test_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state));
+		return 0;
+	}
+
 	ret = btrfs_start_delalloc_snapshot(root);
 	if (ret < 0)
 		goto out;

From 0b3f407e6728d990ae1630a02c7b952c21c288d3 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 10 Dec 2020 12:09:02 +0000
Subject: [PATCH 064/547] btrfs: send: fix wrong file path when there is an
 inode with a pending rmdir

When doing an incremental send, if we have a new inode that happens to
have the same number that an old directory inode had in the base snapshot
and that old directory has a pending rmdir operation, we end up computing
a wrong path for the new inode, causing the receiver to fail.

Example reproducer:

  $ cat test-send-rmdir.sh
  #!/bin/bash

  DEV=/dev/sdi
  MNT=/mnt/sdi

  mkfs.btrfs -f $DEV >/dev/null
  mount $DEV $MNT

  mkdir $MNT/dir
  touch $MNT/dir/file1
  touch $MNT/dir/file2
  touch $MNT/dir/file3

  # Filesystem looks like:
  #
  # .                                     (ino 256)
  # |----- dir/                           (ino 257)
  #         |----- file1                  (ino 258)
  #         |----- file2                  (ino 259)
  #         |----- file3                  (ino 260)
  #

  btrfs subvolume snapshot -r $MNT $MNT/snap1
  btrfs send -f /tmp/snap1.send $MNT/snap1

  # Now remove our directory and all its files.
  rm -fr $MNT/dir

  # Unmount the filesystem and mount it again. This is to ensure that
  # the next inode that is created ends up with the same inode number
  # that our directory "dir" had, 257, which is the first free "objectid"
  # available after mounting again the filesystem.
  umount $MNT
  mount $DEV $MNT

  # Now create a new file (it could be a directory as well).
  touch $MNT/newfile

  # Filesystem now looks like:
  #
  # .                                     (ino 256)
  # |----- newfile                        (ino 257)
  #

  btrfs subvolume snapshot -r $MNT $MNT/snap2
  btrfs send -f /tmp/snap2.send -p $MNT/snap1 $MNT/snap2

  # Now unmount the filesystem, create a new one, mount it and try to apply
  # both send streams to recreate both snapshots.
  umount $DEV

  mkfs.btrfs -f $DEV >/dev/null

  mount $DEV $MNT

  btrfs receive -f /tmp/snap1.send $MNT
  btrfs receive -f /tmp/snap2.send $MNT

  umount $MNT

When running the test, the receive operation for the incremental stream
fails:

  $ ./test-send-rmdir.sh
  Create a readonly snapshot of '/mnt/sdi' in '/mnt/sdi/snap1'
  At subvol /mnt/sdi/snap1
  Create a readonly snapshot of '/mnt/sdi' in '/mnt/sdi/snap2'
  At subvol /mnt/sdi/snap2
  At subvol snap1
  At snapshot snap2
  ERROR: chown o257-9-0 failed: No such file or directory

So fix this by tracking directories that have a pending rmdir by inode
number and generation number, instead of only inode number.

A test case for fstests follows soon.

Reported-by: Massimo B. <massimo.b@gmx.net>
Tested-by: Massimo B. <massimo.b@gmx.net>
Link: https://lore.kernel.org/linux-btrfs/6ae34776e85912960a253a8327068a892998e685.camel@gmx.net/
CC: stable@vger.kernel.org # 4.19+
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/send.c | 49 +++++++++++++++++++++++++++++++------------------
 1 file changed, 31 insertions(+), 18 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index d719a2755a40..ae97f4dbaff3 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -236,6 +236,7 @@ struct waiting_dir_move {
 	 * after this directory is moved, we can try to rmdir the ino rmdir_ino.
 	 */
 	u64 rmdir_ino;
+	u64 rmdir_gen;
 	bool orphanized;
 };
 
@@ -316,7 +317,7 @@ static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
 static struct waiting_dir_move *
 get_waiting_dir_move(struct send_ctx *sctx, u64 ino);
 
-static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino);
+static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino, u64 gen);
 
 static int need_send_hole(struct send_ctx *sctx)
 {
@@ -2299,7 +2300,7 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
 
 		fs_path_reset(name);
 
-		if (is_waiting_for_rm(sctx, ino)) {
+		if (is_waiting_for_rm(sctx, ino, gen)) {
 			ret = gen_unique_name(sctx, ino, gen, name);
 			if (ret < 0)
 				goto out;
@@ -2858,8 +2859,8 @@ out:
 	return ret;
 }
 
-static struct orphan_dir_info *
-add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
+static struct orphan_dir_info *add_orphan_dir_info(struct send_ctx *sctx,
+						   u64 dir_ino, u64 dir_gen)
 {
 	struct rb_node **p = &sctx->orphan_dirs.rb_node;
 	struct rb_node *parent = NULL;
@@ -2868,20 +2869,23 @@ add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
 	while (*p) {
 		parent = *p;
 		entry = rb_entry(parent, struct orphan_dir_info, node);
-		if (dir_ino < entry->ino) {
+		if (dir_ino < entry->ino)
 			p = &(*p)->rb_left;
-		} else if (dir_ino > entry->ino) {
+		else if (dir_ino > entry->ino)
 			p = &(*p)->rb_right;
-		} else {
+		else if (dir_gen < entry->gen)
+			p = &(*p)->rb_left;
+		else if (dir_gen > entry->gen)
+			p = &(*p)->rb_right;
+		else
 			return entry;
-		}
 	}
 
 	odi = kmalloc(sizeof(*odi), GFP_KERNEL);
 	if (!odi)
 		return ERR_PTR(-ENOMEM);
 	odi->ino = dir_ino;
-	odi->gen = 0;
+	odi->gen = dir_gen;
 	odi->last_dir_index_offset = 0;
 
 	rb_link_node(&odi->node, parent, p);
@@ -2889,8 +2893,8 @@ add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
 	return odi;
 }
 
-static struct orphan_dir_info *
-get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
+static struct orphan_dir_info *get_orphan_dir_info(struct send_ctx *sctx,
+						   u64 dir_ino, u64 gen)
 {
 	struct rb_node *n = sctx->orphan_dirs.rb_node;
 	struct orphan_dir_info *entry;
@@ -2901,15 +2905,19 @@ get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
 			n = n->rb_left;
 		else if (dir_ino > entry->ino)
 			n = n->rb_right;
+		else if (gen < entry->gen)
+			n = n->rb_left;
+		else if (gen > entry->gen)
+			n = n->rb_right;
 		else
 			return entry;
 	}
 	return NULL;
 }
 
-static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino)
+static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino, u64 gen)
 {
-	struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino);
+	struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino, gen);
 
 	return odi != NULL;
 }
@@ -2954,7 +2962,7 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
 	key.type = BTRFS_DIR_INDEX_KEY;
 	key.offset = 0;
 
-	odi = get_orphan_dir_info(sctx, dir);
+	odi = get_orphan_dir_info(sctx, dir, dir_gen);
 	if (odi)
 		key.offset = odi->last_dir_index_offset;
 
@@ -2985,7 +2993,7 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
 
 		dm = get_waiting_dir_move(sctx, loc.objectid);
 		if (dm) {
-			odi = add_orphan_dir_info(sctx, dir);
+			odi = add_orphan_dir_info(sctx, dir, dir_gen);
 			if (IS_ERR(odi)) {
 				ret = PTR_ERR(odi);
 				goto out;
@@ -2993,12 +3001,13 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
 			odi->gen = dir_gen;
 			odi->last_dir_index_offset = found_key.offset;
 			dm->rmdir_ino = dir;
+			dm->rmdir_gen = dir_gen;
 			ret = 0;
 			goto out;
 		}
 
 		if (loc.objectid > send_progress) {
-			odi = add_orphan_dir_info(sctx, dir);
+			odi = add_orphan_dir_info(sctx, dir, dir_gen);
 			if (IS_ERR(odi)) {
 				ret = PTR_ERR(odi);
 				goto out;
@@ -3038,6 +3047,7 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized)
 		return -ENOMEM;
 	dm->ino = ino;
 	dm->rmdir_ino = 0;
+	dm->rmdir_gen = 0;
 	dm->orphanized = orphanized;
 
 	while (*p) {
@@ -3183,7 +3193,7 @@ static int path_loop(struct send_ctx *sctx, struct fs_path *name,
 	while (ino != BTRFS_FIRST_FREE_OBJECTID) {
 		fs_path_reset(name);
 
-		if (is_waiting_for_rm(sctx, ino))
+		if (is_waiting_for_rm(sctx, ino, gen))
 			break;
 		if (is_waiting_for_move(sctx, ino)) {
 			if (*ancestor_ino == 0)
@@ -3223,6 +3233,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
 	u64 parent_ino, parent_gen;
 	struct waiting_dir_move *dm = NULL;
 	u64 rmdir_ino = 0;
+	u64 rmdir_gen;
 	u64 ancestor;
 	bool is_orphan;
 	int ret;
@@ -3237,6 +3248,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
 	dm = get_waiting_dir_move(sctx, pm->ino);
 	ASSERT(dm);
 	rmdir_ino = dm->rmdir_ino;
+	rmdir_gen = dm->rmdir_gen;
 	is_orphan = dm->orphanized;
 	free_waiting_dir_move(sctx, dm);
 
@@ -3273,6 +3285,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
 			dm = get_waiting_dir_move(sctx, pm->ino);
 			ASSERT(dm);
 			dm->rmdir_ino = rmdir_ino;
+			dm->rmdir_gen = rmdir_gen;
 		}
 		goto out;
 	}
@@ -3291,7 +3304,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
 		struct orphan_dir_info *odi;
 		u64 gen;
 
-		odi = get_orphan_dir_info(sctx, rmdir_ino);
+		odi = get_orphan_dir_info(sctx, rmdir_ino, rmdir_gen);
 		if (!odi) {
 			/* already deleted */
 			goto finish;

From 675a4fc8f3149e93f35fb5739fd8d4764206ba0b Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Tue, 15 Dec 2020 12:00:26 -0500
Subject: [PATCH 065/547] btrfs: tests: initialize test inodes location

I noticed that sometimes the module failed to load because the self
tests failed like this:

  BTRFS: selftest: fs/btrfs/tests/inode-tests.c:963 miscount, wanted 1, got 0

This turned out to be because sometimes the btrfs ino would be the btree
inode number, and thus we'd skip calling the set extent delalloc bit
helper, and thus not adjust ->outstanding_extents.

Fix this by making sure we initialize test inodes with a valid inode
number so that we don't get random failures during self tests.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tests/btrfs-tests.c | 10 ++++++++--
 fs/btrfs/tests/inode-tests.c |  9 ---------
 2 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index 8ca334d554af..6bd97bd4cb37 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -55,8 +55,14 @@ struct inode *btrfs_new_test_inode(void)
 	struct inode *inode;
 
 	inode = new_inode(test_mnt->mnt_sb);
-	if (inode)
-		inode_init_owner(inode, NULL, S_IFREG);
+	if (!inode)
+		return NULL;
+
+	inode->i_mode = S_IFREG;
+	BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
+	BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
+	BTRFS_I(inode)->location.offset = 0;
+	inode_init_owner(inode, NULL, S_IFREG);
 
 	return inode;
 }
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index 04022069761d..c9874b12d337 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -232,11 +232,6 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 		return ret;
 	}
 
-	inode->i_mode = S_IFREG;
-	BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
-	BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
-	BTRFS_I(inode)->location.offset = 0;
-
 	fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
 	if (!fs_info) {
 		test_std_err(TEST_ALLOC_FS_INFO);
@@ -835,10 +830,6 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
 		return ret;
 	}
 
-	BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
-	BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
-	BTRFS_I(inode)->location.offset = 0;
-
 	fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
 	if (!fs_info) {
 		test_std_err(TEST_ALLOC_FS_INFO);

From ea9ed87c73e87e044b2c58d658eb4ba5216bc488 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sun, 6 Dec 2020 15:56:20 +0000
Subject: [PATCH 066/547] btrfs: fix async discard stall

Might happen that bg->discard_eligible_time was changed without
rescheduling, so btrfs_discard_workfn() wakes up earlier than that new
time, peek_discard_list() returns NULL, and all work halts and goes to
sleep without further rescheduling even there are block groups to
discard.

It happens pretty often, but not so visible from the userspace because
after some time it usually will be kicked off anyway by someone else
calling btrfs_discard_reschedule_work().

Fix it by continue rescheduling if block group discard lists are not
empty.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/discard.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c
index 1db966bf85b2..36431d7e1334 100644
--- a/fs/btrfs/discard.c
+++ b/fs/btrfs/discard.c
@@ -199,16 +199,15 @@ static struct btrfs_block_group *find_next_block_group(
 static struct btrfs_block_group *peek_discard_list(
 					struct btrfs_discard_ctl *discard_ctl,
 					enum btrfs_discard_state *discard_state,
-					int *discard_index)
+					int *discard_index, u64 now)
 {
 	struct btrfs_block_group *block_group;
-	const u64 now = ktime_get_ns();
 
 	spin_lock(&discard_ctl->lock);
 again:
 	block_group = find_next_block_group(discard_ctl, now);
 
-	if (block_group && now > block_group->discard_eligible_time) {
+	if (block_group && now >= block_group->discard_eligible_time) {
 		if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED &&
 		    block_group->used != 0) {
 			if (btrfs_is_block_group_data_only(block_group))
@@ -222,12 +221,11 @@ again:
 			block_group->discard_state = BTRFS_DISCARD_EXTENTS;
 		}
 		discard_ctl->block_group = block_group;
+	}
+	if (block_group) {
 		*discard_state = block_group->discard_state;
 		*discard_index = block_group->discard_index;
-	} else {
-		block_group = NULL;
 	}
-
 	spin_unlock(&discard_ctl->lock);
 
 	return block_group;
@@ -438,13 +436,18 @@ static void btrfs_discard_workfn(struct work_struct *work)
 	int discard_index = 0;
 	u64 trimmed = 0;
 	u64 minlen = 0;
+	u64 now = ktime_get_ns();
 
 	discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work);
 
 	block_group = peek_discard_list(discard_ctl, &discard_state,
-					&discard_index);
+					&discard_index, now);
 	if (!block_group || !btrfs_run_discard_work(discard_ctl))
 		return;
+	if (now < block_group->discard_eligible_time) {
+		btrfs_discard_schedule_work(discard_ctl, false);
+		return;
+	}
 
 	/* Perform discarding */
 	minlen = discard_minlen[discard_index];

From 1ea2872fc6f2aaee0a4b4f1578b83ffd9f55c6a7 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sun, 6 Dec 2020 15:56:21 +0000
Subject: [PATCH 067/547] btrfs: fix racy access to discard_ctl data

Because only one discard worker may be running at any given point, it
could have been safe to modify ->prev_discard, etc. without
synchronization, if not for @override flag in
btrfs_discard_schedule_work() and delayed_work_pending() returning false
while workfn is running.

That may lead to torn reads of u64 for some architectures, but that's
not a big problem as only slightly affects the discard rate.

Suggested-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/discard.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c
index 36431d7e1334..d641f451f840 100644
--- a/fs/btrfs/discard.c
+++ b/fs/btrfs/discard.c
@@ -477,13 +477,6 @@ static void btrfs_discard_workfn(struct work_struct *work)
 		discard_ctl->discard_extent_bytes += trimmed;
 	}
 
-	/*
-	 * Updated without locks as this is inside the workfn and nothing else
-	 * is reading the values
-	 */
-	discard_ctl->prev_discard = trimmed;
-	discard_ctl->prev_discard_time = ktime_get_ns();
-
 	/* Determine next steps for a block_group */
 	if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) {
 		if (discard_state == BTRFS_DISCARD_BITMAPS) {
@@ -499,7 +492,10 @@ static void btrfs_discard_workfn(struct work_struct *work)
 		}
 	}
 
+	now = ktime_get_ns();
 	spin_lock(&discard_ctl->lock);
+	discard_ctl->prev_discard = trimmed;
+	discard_ctl->prev_discard_time = now;
 	discard_ctl->block_group = NULL;
 	spin_unlock(&discard_ctl->lock);
 

From 8fc058597a283e9a37720abb0e8d68e342b9387d Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sun, 6 Dec 2020 15:56:22 +0000
Subject: [PATCH 068/547] btrfs: merge critical sections of discard lock in
 workfn

btrfs_discard_workfn() drops discard_ctl->lock just to take it again in
a moment in btrfs_discard_schedule_work(). Avoid that and also reuse
ktime.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/discard.c | 43 +++++++++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c
index d641f451f840..2b8383d41144 100644
--- a/fs/btrfs/discard.c
+++ b/fs/btrfs/discard.c
@@ -328,28 +328,15 @@ void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
 		btrfs_discard_schedule_work(discard_ctl, false);
 }
 
-/**
- * btrfs_discard_schedule_work - responsible for scheduling the discard work
- * @discard_ctl: discard control
- * @override: override the current timer
- *
- * Discards are issued by a delayed workqueue item.  @override is used to
- * update the current delay as the baseline delay interval is reevaluated on
- * transaction commit.  This is also maxed with any other rate limit.
- */
-void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
-				 bool override)
+static void __btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
+					  u64 now, bool override)
 {
 	struct btrfs_block_group *block_group;
-	const u64 now = ktime_get_ns();
-
-	spin_lock(&discard_ctl->lock);
 
 	if (!btrfs_run_discard_work(discard_ctl))
-		goto out;
-
+		return;
 	if (!override && delayed_work_pending(&discard_ctl->work))
-		goto out;
+		return;
 
 	block_group = find_next_block_group(discard_ctl, now);
 	if (block_group) {
@@ -391,7 +378,24 @@ void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
 		mod_delayed_work(discard_ctl->discard_workers,
 				 &discard_ctl->work, nsecs_to_jiffies(delay));
 	}
-out:
+}
+
+/*
+ * btrfs_discard_schedule_work - responsible for scheduling the discard work
+ * @discard_ctl:  discard control
+ * @override:     override the current timer
+ *
+ * Discards are issued by a delayed workqueue item.  @override is used to
+ * update the current delay as the baseline delay interval is reevaluated on
+ * transaction commit.  This is also maxed with any other rate limit.
+ */
+void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
+				 bool override)
+{
+	const u64 now = ktime_get_ns();
+
+	spin_lock(&discard_ctl->lock);
+	__btrfs_discard_schedule_work(discard_ctl, now, override);
 	spin_unlock(&discard_ctl->lock);
 }
 
@@ -497,9 +501,8 @@ static void btrfs_discard_workfn(struct work_struct *work)
 	discard_ctl->prev_discard = trimmed;
 	discard_ctl->prev_discard_time = now;
 	discard_ctl->block_group = NULL;
+	__btrfs_discard_schedule_work(discard_ctl, now, false);
 	spin_unlock(&discard_ctl->lock);
-
-	btrfs_discard_schedule_work(discard_ctl, false);
 }
 
 /**

From cb13eea3b49055bd78e6ddf39defd6340f7379fc Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 14 Dec 2020 10:10:45 +0000
Subject: [PATCH 069/547] btrfs: fix transaction leak and crash after RO
 remount caused by qgroup rescan

If we remount a filesystem in RO mode while the qgroup rescan worker is
running, we can end up having it still running after the remount is done,
and at unmount time we may end up with an open transaction that ends up
never getting committed. If that happens we end up with several memory
leaks and can crash when hardware acceleration is unavailable for crc32c.
Possibly it can lead to other nasty surprises too, due to use-after-free
issues.

The following steps explain how the problem happens.

1) We have a filesystem mounted in RW mode and the qgroup rescan worker is
   running;

2) We remount the filesystem in RO mode, and never stop/pause the rescan
   worker, so after the remount the rescan worker is still running. The
   important detail here is that the rescan task is still running after
   the remount operation committed any ongoing transaction through its
   call to btrfs_commit_super();

3) The rescan is still running, and after the remount completed, the
   rescan worker started a transaction, after it finished iterating all
   leaves of the extent tree, to update the qgroup status item in the
   quotas tree. It does not commit the transaction, it only releases its
   handle on the transaction;

4) A filesystem unmount operation starts shortly after;

5) The unmount task, at close_ctree(), stops the transaction kthread,
   which had not had a chance to commit the open transaction since it was
   sleeping and the commit interval (default of 30 seconds) has not yet
   elapsed since the last time it committed a transaction;

6) So after stopping the transaction kthread we still have the transaction
   used to update the qgroup status item open. At close_ctree(), when the
   filesystem is in RO mode and no transaction abort happened (or the
   filesystem is in error mode), we do not expect to have any transaction
   open, so we do not call btrfs_commit_super();

7) We then proceed to destroy the work queues, free the roots and block
   groups, etc. After that we drop the last reference on the btree inode
   by calling iput() on it. Since there are dirty pages for the btree
   inode, corresponding to the COWed extent buffer for the quotas btree,
   btree_write_cache_pages() is invoked to flush those dirty pages. This
   results in creating a bio and submitting it, which makes us end up at
   btrfs_submit_metadata_bio();

8) At btrfs_submit_metadata_bio() we end up at the if-then-else branch
   that calls btrfs_wq_submit_bio(), because check_async_write() returned
   a value of 1. This value of 1 is because we did not have hardware
   acceleration available for crc32c, so BTRFS_FS_CSUM_IMPL_FAST was not
   set in fs_info->flags;

9) Then at btrfs_wq_submit_bio() we call btrfs_queue_work() against the
   workqueue at fs_info->workers, which was already freed before by the
   call to btrfs_stop_all_workers() at close_ctree(). This results in an
   invalid memory access due to a use-after-free, leading to a crash.

When this happens, before the crash there are several warnings triggered,
since we have reserved metadata space in a block group, the delayed refs
reservation, etc:

  ------------[ cut here ]------------
  WARNING: CPU: 4 PID: 1729896 at fs/btrfs/block-group.c:125 btrfs_put_block_group+0x63/0xa0 [btrfs]
  Modules linked in: btrfs dm_snapshot dm_thin_pool (...)
  CPU: 4 PID: 1729896 Comm: umount Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
  RIP: 0010:btrfs_put_block_group+0x63/0xa0 [btrfs]
  Code: f0 01 00 00 48 39 c2 75 (...)
  RSP: 0018:ffffb270826bbdd8 EFLAGS: 00010206
  RAX: 0000000000000001 RBX: ffff947ed73e4000 RCX: ffff947ebc8b29c8
  RDX: 0000000000000001 RSI: ffffffffc0b150a0 RDI: ffff947ebc8b2800
  RBP: ffff947ebc8b2800 R08: 0000000000000000 R09: 0000000000000000
  R10: 0000000000000000 R11: 0000000000000001 R12: ffff947ed73e4110
  R13: ffff947ed73e4160 R14: ffff947ebc8b2988 R15: dead000000000100
  FS:  00007f15edfea840(0000) GS:ffff9481ad600000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00007f37e2893320 CR3: 0000000138f68001 CR4: 00000000003706e0
  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  Call Trace:
   btrfs_free_block_groups+0x17f/0x2f0 [btrfs]
   close_ctree+0x2ba/0x2fa [btrfs]
   generic_shutdown_super+0x6c/0x100
   kill_anon_super+0x14/0x30
   btrfs_kill_super+0x12/0x20 [btrfs]
   deactivate_locked_super+0x31/0x70
   cleanup_mnt+0x100/0x160
   task_work_run+0x68/0xb0
   exit_to_user_mode_prepare+0x1bb/0x1c0
   syscall_exit_to_user_mode+0x4b/0x260
   entry_SYSCALL_64_after_hwframe+0x44/0xa9
  RIP: 0033:0x7f15ee221ee7
  Code: ff 0b 00 f7 d8 64 89 01 48 (...)
  RSP: 002b:00007ffe9470f0f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
  RAX: 0000000000000000 RBX: 00007f15ee347264 RCX: 00007f15ee221ee7
  RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 000056169701d000
  RBP: 0000561697018a30 R08: 0000000000000000 R09: 00007f15ee2e2be0
  R10: 000056169701efe0 R11: 0000000000000246 R12: 0000000000000000
  R13: 000056169701d000 R14: 0000561697018b40 R15: 0000561697018c60
  irq event stamp: 0
  hardirqs last  enabled at (0): [<0000000000000000>] 0x0
  hardirqs last disabled at (0): [<ffffffff8bcae560>] copy_process+0x8a0/0x1d70
  softirqs last  enabled at (0): [<ffffffff8bcae560>] copy_process+0x8a0/0x1d70
  softirqs last disabled at (0): [<0000000000000000>] 0x0
  ---[ end trace dd74718fef1ed5c6 ]---
  ------------[ cut here ]------------
  WARNING: CPU: 2 PID: 1729896 at fs/btrfs/block-rsv.c:459 btrfs_release_global_block_rsv+0x70/0xc0 [btrfs]
  Modules linked in: btrfs dm_snapshot dm_thin_pool (...)
  CPU: 2 PID: 1729896 Comm: umount Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
  RIP: 0010:btrfs_release_global_block_rsv+0x70/0xc0 [btrfs]
  Code: 48 83 bb b0 03 00 00 00 (...)
  RSP: 0018:ffffb270826bbdd8 EFLAGS: 00010206
  RAX: 000000000033c000 RBX: ffff947ed73e4000 RCX: 0000000000000000
  RDX: 0000000000000001 RSI: ffffffffc0b0d8c1 RDI: 00000000ffffffff
  RBP: ffff947ebc8b7000 R08: 0000000000000001 R09: 0000000000000000
  R10: 0000000000000000 R11: 0000000000000001 R12: ffff947ed73e4110
  R13: ffff947ed73e5278 R14: dead000000000122 R15: dead000000000100
  FS:  00007f15edfea840(0000) GS:ffff9481aca00000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 0000561a79f76e20 CR3: 0000000138f68006 CR4: 00000000003706e0
  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  Call Trace:
   btrfs_free_block_groups+0x24c/0x2f0 [btrfs]
   close_ctree+0x2ba/0x2fa [btrfs]
   generic_shutdown_super+0x6c/0x100
   kill_anon_super+0x14/0x30
   btrfs_kill_super+0x12/0x20 [btrfs]
   deactivate_locked_super+0x31/0x70
   cleanup_mnt+0x100/0x160
   task_work_run+0x68/0xb0
   exit_to_user_mode_prepare+0x1bb/0x1c0
   syscall_exit_to_user_mode+0x4b/0x260
   entry_SYSCALL_64_after_hwframe+0x44/0xa9
  RIP: 0033:0x7f15ee221ee7
  Code: ff 0b 00 f7 d8 64 89 01 (...)
  RSP: 002b:00007ffe9470f0f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
  RAX: 0000000000000000 RBX: 00007f15ee347264 RCX: 00007f15ee221ee7
  RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 000056169701d000
  RBP: 0000561697018a30 R08: 0000000000000000 R09: 00007f15ee2e2be0
  R10: 000056169701efe0 R11: 0000000000000246 R12: 0000000000000000
  R13: 000056169701d000 R14: 0000561697018b40 R15: 0000561697018c60
  irq event stamp: 0
  hardirqs last  enabled at (0): [<0000000000000000>] 0x0
  hardirqs last disabled at (0): [<ffffffff8bcae560>] copy_process+0x8a0/0x1d70
  softirqs last  enabled at (0): [<ffffffff8bcae560>] copy_process+0x8a0/0x1d70
  softirqs last disabled at (0): [<0000000000000000>] 0x0
  ---[ end trace dd74718fef1ed5c7 ]---
  ------------[ cut here ]------------
  WARNING: CPU: 2 PID: 1729896 at fs/btrfs/block-group.c:3377 btrfs_free_block_groups+0x25d/0x2f0 [btrfs]
  Modules linked in: btrfs dm_snapshot dm_thin_pool (...)
  CPU: 5 PID: 1729896 Comm: umount Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
  RIP: 0010:btrfs_free_block_groups+0x25d/0x2f0 [btrfs]
  Code: ad de 49 be 22 01 00 (...)
  RSP: 0018:ffffb270826bbde8 EFLAGS: 00010206
  RAX: ffff947ebeae1d08 RBX: ffff947ed73e4000 RCX: 0000000000000000
  RDX: 0000000000000001 RSI: ffff947e9d823ae8 RDI: 0000000000000246
  RBP: ffff947ebeae1d08 R08: 0000000000000000 R09: 0000000000000000
  R10: 0000000000000000 R11: 0000000000000001 R12: ffff947ebeae1c00
  R13: ffff947ed73e5278 R14: dead000000000122 R15: dead000000000100
  FS:  00007f15edfea840(0000) GS:ffff9481ad200000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00007f1475d98ea8 CR3: 0000000138f68005 CR4: 00000000003706e0
  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  Call Trace:
   close_ctree+0x2ba/0x2fa [btrfs]
   generic_shutdown_super+0x6c/0x100
   kill_anon_super+0x14/0x30
   btrfs_kill_super+0x12/0x20 [btrfs]
   deactivate_locked_super+0x31/0x70
   cleanup_mnt+0x100/0x160
   task_work_run+0x68/0xb0
   exit_to_user_mode_prepare+0x1bb/0x1c0
   syscall_exit_to_user_mode+0x4b/0x260
   entry_SYSCALL_64_after_hwframe+0x44/0xa9
  RIP: 0033:0x7f15ee221ee7
  Code: ff 0b 00 f7 d8 64 89 (...)
  RSP: 002b:00007ffe9470f0f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
  RAX: 0000000000000000 RBX: 00007f15ee347264 RCX: 00007f15ee221ee7
  RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 000056169701d000
  RBP: 0000561697018a30 R08: 0000000000000000 R09: 00007f15ee2e2be0
  R10: 000056169701efe0 R11: 0000000000000246 R12: 0000000000000000
  R13: 000056169701d000 R14: 0000561697018b40 R15: 0000561697018c60
  irq event stamp: 0
  hardirqs last  enabled at (0): [<0000000000000000>] 0x0
  hardirqs last disabled at (0): [<ffffffff8bcae560>] copy_process+0x8a0/0x1d70
  softirqs last  enabled at (0): [<ffffffff8bcae560>] copy_process+0x8a0/0x1d70
  softirqs last disabled at (0): [<0000000000000000>] 0x0
  ---[ end trace dd74718fef1ed5c8 ]---
  BTRFS info (device sdc): space_info 4 has 268238848 free, is not full
  BTRFS info (device sdc): space_info total=268435456, used=114688, pinned=0, reserved=16384, may_use=0, readonly=65536
  BTRFS info (device sdc): global_block_rsv: size 0 reserved 0
  BTRFS info (device sdc): trans_block_rsv: size 0 reserved 0
  BTRFS info (device sdc): chunk_block_rsv: size 0 reserved 0
  BTRFS info (device sdc): delayed_block_rsv: size 0 reserved 0
  BTRFS info (device sdc): delayed_refs_rsv: size 524288 reserved 0

And the crash, which only happens when we do not have crc32c hardware
acceleration, produces the following trace immediately after those
warnings:

  stack segment: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC PTI
  CPU: 2 PID: 1749129 Comm: umount Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
  RIP: 0010:btrfs_queue_work+0x36/0x190 [btrfs]
  Code: 54 55 53 48 89 f3 (...)
  RSP: 0018:ffffb27082443ae8 EFLAGS: 00010282
  RAX: 0000000000000004 RBX: ffff94810ee9ad90 RCX: 0000000000000000
  RDX: 0000000000000001 RSI: ffff94810ee9ad90 RDI: ffff947ed8ee75a0
  RBP: a56b6b6b6b6b6b6b R08: 0000000000000000 R09: 0000000000000000
  R10: 0000000000000007 R11: 0000000000000001 R12: ffff947fa9b435a8
  R13: ffff94810ee9ad90 R14: 0000000000000000 R15: ffff947e93dc0000
  FS:  00007f3cfe974840(0000) GS:ffff9481ac600000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00007f1b42995a70 CR3: 0000000127638003 CR4: 00000000003706e0
  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  Call Trace:
   btrfs_wq_submit_bio+0xb3/0xd0 [btrfs]
   btrfs_submit_metadata_bio+0x44/0xc0 [btrfs]
   submit_one_bio+0x61/0x70 [btrfs]
   btree_write_cache_pages+0x414/0x450 [btrfs]
   ? kobject_put+0x9a/0x1d0
   ? trace_hardirqs_on+0x1b/0xf0
   ? _raw_spin_unlock_irqrestore+0x3c/0x60
   ? free_debug_processing+0x1e1/0x2b0
   do_writepages+0x43/0xe0
   ? lock_acquired+0x199/0x490
   __writeback_single_inode+0x59/0x650
   writeback_single_inode+0xaf/0x120
   write_inode_now+0x94/0xd0
   iput+0x187/0x2b0
   close_ctree+0x2c6/0x2fa [btrfs]
   generic_shutdown_super+0x6c/0x100
   kill_anon_super+0x14/0x30
   btrfs_kill_super+0x12/0x20 [btrfs]
   deactivate_locked_super+0x31/0x70
   cleanup_mnt+0x100/0x160
   task_work_run+0x68/0xb0
   exit_to_user_mode_prepare+0x1bb/0x1c0
   syscall_exit_to_user_mode+0x4b/0x260
   entry_SYSCALL_64_after_hwframe+0x44/0xa9
  RIP: 0033:0x7f3cfebabee7
  Code: ff 0b 00 f7 d8 64 89 01 (...)
  RSP: 002b:00007ffc9c9a05f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
  RAX: 0000000000000000 RBX: 00007f3cfecd1264 RCX: 00007f3cfebabee7
  RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 0000562b6b478000
  RBP: 0000562b6b473a30 R08: 0000000000000000 R09: 00007f3cfec6cbe0
  R10: 0000562b6b479fe0 R11: 0000000000000246 R12: 0000000000000000
  R13: 0000562b6b478000 R14: 0000562b6b473b40 R15: 0000562b6b473c60
  Modules linked in: btrfs dm_snapshot dm_thin_pool (...)
  ---[ end trace dd74718fef1ed5cc ]---

Finally when we remove the btrfs module (rmmod btrfs), there are several
warnings about objects that were allocated from our slabs but were never
freed, consequence of the transaction that was never committed and got
leaked:

  =============================================================================
  BUG btrfs_delayed_ref_head (Tainted: G    B   W        ): Objects remaining in btrfs_delayed_ref_head on __kmem_cache_shutdown()
  -----------------------------------------------------------------------------

  INFO: Slab 0x0000000094c2ae56 objects=24 used=2 fp=0x000000002bfa2521 flags=0x17fffc000010200
  CPU: 5 PID: 1729921 Comm: rmmod Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
  Call Trace:
   dump_stack+0x8d/0xb5
   slab_err+0xb7/0xdc
   ? lock_acquired+0x199/0x490
   __kmem_cache_shutdown+0x1ac/0x3c0
   ? lock_release+0x20e/0x4c0
   kmem_cache_destroy+0x55/0x120
   btrfs_delayed_ref_exit+0x11/0x35 [btrfs]
   exit_btrfs_fs+0xa/0x59 [btrfs]
   __x64_sys_delete_module+0x194/0x260
   ? fpregs_assert_state_consistent+0x1e/0x40
   ? exit_to_user_mode_prepare+0x55/0x1c0
   ? trace_hardirqs_on+0x1b/0xf0
   do_syscall_64+0x33/0x80
   entry_SYSCALL_64_after_hwframe+0x44/0xa9
  RIP: 0033:0x7f693e305897
  Code: 73 01 c3 48 8b 0d f9 f5 (...)
  RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
  RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897
  RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8
  RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000
  R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740
  R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760
  INFO: Object 0x0000000050cbdd61 @offset=12104
  INFO: Allocated in btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs] age=1894 cpu=6 pid=1729873
	__slab_alloc.isra.0+0x109/0x1c0
	kmem_cache_alloc+0x7bb/0x830
	btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs]
	btrfs_free_tree_block+0x128/0x360 [btrfs]
	__btrfs_cow_block+0x489/0x5f0 [btrfs]
	btrfs_cow_block+0xf7/0x220 [btrfs]
	btrfs_search_slot+0x62a/0xc40 [btrfs]
	btrfs_del_orphan_item+0x65/0xd0 [btrfs]
	btrfs_find_orphan_roots+0x1bf/0x200 [btrfs]
	open_ctree+0x125a/0x18a0 [btrfs]
	btrfs_mount_root.cold+0x13/0xed [btrfs]
	legacy_get_tree+0x30/0x60
	vfs_get_tree+0x28/0xe0
	fc_mount+0xe/0x40
	vfs_kern_mount.part.0+0x71/0x90
	btrfs_mount+0x13b/0x3e0 [btrfs]
  INFO: Freed in __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs] age=4292 cpu=2 pid=1729526
	kmem_cache_free+0x34c/0x3c0
	__btrfs_run_delayed_refs+0x1117/0x1290 [btrfs]
	btrfs_run_delayed_refs+0x81/0x210 [btrfs]
	commit_cowonly_roots+0xfb/0x300 [btrfs]
	btrfs_commit_transaction+0x367/0xc40 [btrfs]
	sync_filesystem+0x74/0x90
	generic_shutdown_super+0x22/0x100
	kill_anon_super+0x14/0x30
	btrfs_kill_super+0x12/0x20 [btrfs]
	deactivate_locked_super+0x31/0x70
	cleanup_mnt+0x100/0x160
	task_work_run+0x68/0xb0
	exit_to_user_mode_prepare+0x1bb/0x1c0
	syscall_exit_to_user_mode+0x4b/0x260
	entry_SYSCALL_64_after_hwframe+0x44/0xa9
  INFO: Object 0x0000000086e9b0ff @offset=12776
  INFO: Allocated in btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs] age=1900 cpu=6 pid=1729873
	__slab_alloc.isra.0+0x109/0x1c0
	kmem_cache_alloc+0x7bb/0x830
	btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs]
	btrfs_alloc_tree_block+0x2bf/0x360 [btrfs]
	alloc_tree_block_no_bg_flush+0x4f/0x60 [btrfs]
	__btrfs_cow_block+0x12d/0x5f0 [btrfs]
	btrfs_cow_block+0xf7/0x220 [btrfs]
	btrfs_search_slot+0x62a/0xc40 [btrfs]
	btrfs_del_orphan_item+0x65/0xd0 [btrfs]
	btrfs_find_orphan_roots+0x1bf/0x200 [btrfs]
	open_ctree+0x125a/0x18a0 [btrfs]
	btrfs_mount_root.cold+0x13/0xed [btrfs]
	legacy_get_tree+0x30/0x60
	vfs_get_tree+0x28/0xe0
	fc_mount+0xe/0x40
	vfs_kern_mount.part.0+0x71/0x90
  INFO: Freed in __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs] age=3141 cpu=6 pid=1729803
	kmem_cache_free+0x34c/0x3c0
	__btrfs_run_delayed_refs+0x1117/0x1290 [btrfs]
	btrfs_run_delayed_refs+0x81/0x210 [btrfs]
	btrfs_write_dirty_block_groups+0x17d/0x3d0 [btrfs]
	commit_cowonly_roots+0x248/0x300 [btrfs]
	btrfs_commit_transaction+0x367/0xc40 [btrfs]
	close_ctree+0x113/0x2fa [btrfs]
	generic_shutdown_super+0x6c/0x100
	kill_anon_super+0x14/0x30
	btrfs_kill_super+0x12/0x20 [btrfs]
	deactivate_locked_super+0x31/0x70
	cleanup_mnt+0x100/0x160
	task_work_run+0x68/0xb0
	exit_to_user_mode_prepare+0x1bb/0x1c0
	syscall_exit_to_user_mode+0x4b/0x260
	entry_SYSCALL_64_after_hwframe+0x44/0xa9
  kmem_cache_destroy btrfs_delayed_ref_head: Slab cache still has objects
  CPU: 5 PID: 1729921 Comm: rmmod Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
  Call Trace:
   dump_stack+0x8d/0xb5
   kmem_cache_destroy+0x119/0x120
   btrfs_delayed_ref_exit+0x11/0x35 [btrfs]
   exit_btrfs_fs+0xa/0x59 [btrfs]
   __x64_sys_delete_module+0x194/0x260
   ? fpregs_assert_state_consistent+0x1e/0x40
   ? exit_to_user_mode_prepare+0x55/0x1c0
   ? trace_hardirqs_on+0x1b/0xf0
   do_syscall_64+0x33/0x80
   entry_SYSCALL_64_after_hwframe+0x44/0xa9
  RIP: 0033:0x7f693e305897
  Code: 73 01 c3 48 8b 0d f9 f5 0b (...)
  RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
  RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897
  RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8
  RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000
  R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740
  R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760
  =============================================================================
  BUG btrfs_delayed_tree_ref (Tainted: G    B   W        ): Objects remaining in btrfs_delayed_tree_ref on __kmem_cache_shutdown()
  -----------------------------------------------------------------------------

  INFO: Slab 0x0000000011f78dc0 objects=37 used=2 fp=0x0000000032d55d91 flags=0x17fffc000010200
  CPU: 3 PID: 1729921 Comm: rmmod Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
  Call Trace:
   dump_stack+0x8d/0xb5
   slab_err+0xb7/0xdc
   ? lock_acquired+0x199/0x490
   __kmem_cache_shutdown+0x1ac/0x3c0
   ? lock_release+0x20e/0x4c0
   kmem_cache_destroy+0x55/0x120
   btrfs_delayed_ref_exit+0x1d/0x35 [btrfs]
   exit_btrfs_fs+0xa/0x59 [btrfs]
   __x64_sys_delete_module+0x194/0x260
   ? fpregs_assert_state_consistent+0x1e/0x40
   ? exit_to_user_mode_prepare+0x55/0x1c0
   ? trace_hardirqs_on+0x1b/0xf0
   do_syscall_64+0x33/0x80
   entry_SYSCALL_64_after_hwframe+0x44/0xa9
  RIP: 0033:0x7f693e305897
  Code: 73 01 c3 48 8b 0d f9 f5 (...)
  RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
  RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897
  RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8
  RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000
  R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740
  R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760
  INFO: Object 0x000000001a340018 @offset=4408
  INFO: Allocated in btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs] age=1917 cpu=6 pid=1729873
	__slab_alloc.isra.0+0x109/0x1c0
	kmem_cache_alloc+0x7bb/0x830
	btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs]
	btrfs_free_tree_block+0x128/0x360 [btrfs]
	__btrfs_cow_block+0x489/0x5f0 [btrfs]
	btrfs_cow_block+0xf7/0x220 [btrfs]
	btrfs_search_slot+0x62a/0xc40 [btrfs]
	btrfs_del_orphan_item+0x65/0xd0 [btrfs]
	btrfs_find_orphan_roots+0x1bf/0x200 [btrfs]
	open_ctree+0x125a/0x18a0 [btrfs]
	btrfs_mount_root.cold+0x13/0xed [btrfs]
	legacy_get_tree+0x30/0x60
	vfs_get_tree+0x28/0xe0
	fc_mount+0xe/0x40
	vfs_kern_mount.part.0+0x71/0x90
	btrfs_mount+0x13b/0x3e0 [btrfs]
  INFO: Freed in __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs] age=4167 cpu=4 pid=1729795
	kmem_cache_free+0x34c/0x3c0
	__btrfs_run_delayed_refs+0x63d/0x1290 [btrfs]
	btrfs_run_delayed_refs+0x81/0x210 [btrfs]
	btrfs_commit_transaction+0x60/0xc40 [btrfs]
	create_subvol+0x56a/0x990 [btrfs]
	btrfs_mksubvol+0x3fb/0x4a0 [btrfs]
	__btrfs_ioctl_snap_create+0x119/0x1a0 [btrfs]
	btrfs_ioctl_snap_create+0x58/0x80 [btrfs]
	btrfs_ioctl+0x1a92/0x36f0 [btrfs]
	__x64_sys_ioctl+0x83/0xb0
	do_syscall_64+0x33/0x80
	entry_SYSCALL_64_after_hwframe+0x44/0xa9
  INFO: Object 0x000000002b46292a @offset=13648
  INFO: Allocated in btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs] age=1923 cpu=6 pid=1729873
	__slab_alloc.isra.0+0x109/0x1c0
	kmem_cache_alloc+0x7bb/0x830
	btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs]
	btrfs_alloc_tree_block+0x2bf/0x360 [btrfs]
	alloc_tree_block_no_bg_flush+0x4f/0x60 [btrfs]
	__btrfs_cow_block+0x12d/0x5f0 [btrfs]
	btrfs_cow_block+0xf7/0x220 [btrfs]
	btrfs_search_slot+0x62a/0xc40 [btrfs]
	btrfs_del_orphan_item+0x65/0xd0 [btrfs]
	btrfs_find_orphan_roots+0x1bf/0x200 [btrfs]
	open_ctree+0x125a/0x18a0 [btrfs]
	btrfs_mount_root.cold+0x13/0xed [btrfs]
	legacy_get_tree+0x30/0x60
	vfs_get_tree+0x28/0xe0
	fc_mount+0xe/0x40
	vfs_kern_mount.part.0+0x71/0x90
  INFO: Freed in __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs] age=3164 cpu=6 pid=1729803
	kmem_cache_free+0x34c/0x3c0
	__btrfs_run_delayed_refs+0x63d/0x1290 [btrfs]
	btrfs_run_delayed_refs+0x81/0x210 [btrfs]
	commit_cowonly_roots+0xfb/0x300 [btrfs]
	btrfs_commit_transaction+0x367/0xc40 [btrfs]
	close_ctree+0x113/0x2fa [btrfs]
	generic_shutdown_super+0x6c/0x100
	kill_anon_super+0x14/0x30
	btrfs_kill_super+0x12/0x20 [btrfs]
	deactivate_locked_super+0x31/0x70
	cleanup_mnt+0x100/0x160
	task_work_run+0x68/0xb0
	exit_to_user_mode_prepare+0x1bb/0x1c0
	syscall_exit_to_user_mode+0x4b/0x260
	entry_SYSCALL_64_after_hwframe+0x44/0xa9
  kmem_cache_destroy btrfs_delayed_tree_ref: Slab cache still has objects
  CPU: 5 PID: 1729921 Comm: rmmod Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
  Call Trace:
   dump_stack+0x8d/0xb5
   kmem_cache_destroy+0x119/0x120
   btrfs_delayed_ref_exit+0x1d/0x35 [btrfs]
   exit_btrfs_fs+0xa/0x59 [btrfs]
   __x64_sys_delete_module+0x194/0x260
   ? fpregs_assert_state_consistent+0x1e/0x40
   ? exit_to_user_mode_prepare+0x55/0x1c0
   ? trace_hardirqs_on+0x1b/0xf0
   do_syscall_64+0x33/0x80
   entry_SYSCALL_64_after_hwframe+0x44/0xa9
  RIP: 0033:0x7f693e305897
  Code: 73 01 c3 48 8b 0d f9 f5 (...)
  RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
  RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897
  RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8
  RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000
  R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740
  R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760
  =============================================================================
  BUG btrfs_delayed_extent_op (Tainted: G    B   W        ): Objects remaining in btrfs_delayed_extent_op on __kmem_cache_shutdown()
  -----------------------------------------------------------------------------

  INFO: Slab 0x00000000f145ce2f objects=22 used=1 fp=0x00000000af0f92cf flags=0x17fffc000010200
  CPU: 5 PID: 1729921 Comm: rmmod Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
  Call Trace:
   dump_stack+0x8d/0xb5
   slab_err+0xb7/0xdc
   ? lock_acquired+0x199/0x490
   __kmem_cache_shutdown+0x1ac/0x3c0
   ? __mutex_unlock_slowpath+0x45/0x2a0
   kmem_cache_destroy+0x55/0x120
   exit_btrfs_fs+0xa/0x59 [btrfs]
   __x64_sys_delete_module+0x194/0x260
   ? fpregs_assert_state_consistent+0x1e/0x40
   ? exit_to_user_mode_prepare+0x55/0x1c0
   ? trace_hardirqs_on+0x1b/0xf0
   do_syscall_64+0x33/0x80
   entry_SYSCALL_64_after_hwframe+0x44/0xa9
  RIP: 0033:0x7f693e305897
  Code: 73 01 c3 48 8b 0d f9 f5 (...)
  RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
  RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897
  RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8
  RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000
  R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740
  R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760
  INFO: Object 0x000000004cf95ea8 @offset=6264
  INFO: Allocated in btrfs_alloc_tree_block+0x1e0/0x360 [btrfs] age=1931 cpu=6 pid=1729873
	__slab_alloc.isra.0+0x109/0x1c0
	kmem_cache_alloc+0x7bb/0x830
	btrfs_alloc_tree_block+0x1e0/0x360 [btrfs]
	alloc_tree_block_no_bg_flush+0x4f/0x60 [btrfs]
	__btrfs_cow_block+0x12d/0x5f0 [btrfs]
	btrfs_cow_block+0xf7/0x220 [btrfs]
	btrfs_search_slot+0x62a/0xc40 [btrfs]
	btrfs_del_orphan_item+0x65/0xd0 [btrfs]
	btrfs_find_orphan_roots+0x1bf/0x200 [btrfs]
	open_ctree+0x125a/0x18a0 [btrfs]
	btrfs_mount_root.cold+0x13/0xed [btrfs]
	legacy_get_tree+0x30/0x60
	vfs_get_tree+0x28/0xe0
	fc_mount+0xe/0x40
	vfs_kern_mount.part.0+0x71/0x90
	btrfs_mount+0x13b/0x3e0 [btrfs]
  INFO: Freed in __btrfs_run_delayed_refs+0xabd/0x1290 [btrfs] age=3173 cpu=6 pid=1729803
	kmem_cache_free+0x34c/0x3c0
	__btrfs_run_delayed_refs+0xabd/0x1290 [btrfs]
	btrfs_run_delayed_refs+0x81/0x210 [btrfs]
	commit_cowonly_roots+0xfb/0x300 [btrfs]
	btrfs_commit_transaction+0x367/0xc40 [btrfs]
	close_ctree+0x113/0x2fa [btrfs]
	generic_shutdown_super+0x6c/0x100
	kill_anon_super+0x14/0x30
	btrfs_kill_super+0x12/0x20 [btrfs]
	deactivate_locked_super+0x31/0x70
	cleanup_mnt+0x100/0x160
	task_work_run+0x68/0xb0
	exit_to_user_mode_prepare+0x1bb/0x1c0
	syscall_exit_to_user_mode+0x4b/0x260
	entry_SYSCALL_64_after_hwframe+0x44/0xa9
  kmem_cache_destroy btrfs_delayed_extent_op: Slab cache still has objects
  CPU: 3 PID: 1729921 Comm: rmmod Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
  Call Trace:
   dump_stack+0x8d/0xb5
   kmem_cache_destroy+0x119/0x120
   exit_btrfs_fs+0xa/0x59 [btrfs]
   __x64_sys_delete_module+0x194/0x260
   ? fpregs_assert_state_consistent+0x1e/0x40
   ? exit_to_user_mode_prepare+0x55/0x1c0
   ? trace_hardirqs_on+0x1b/0xf0
   do_syscall_64+0x33/0x80
   entry_SYSCALL_64_after_hwframe+0x44/0xa9
  RIP: 0033:0x7f693e305897
  Code: 73 01 c3 48 8b 0d f9 (...)
  RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
  RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897
  RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8
  RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000
  R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740
  R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760
  BTRFS: state leak: start 30408704 end 30425087 state 1 in tree 1 refs 1

Fix this issue by having the remount path stop the qgroup rescan worker
when we are remounting RO and teach the rescan worker to stop when a
remount is in progress. If later a remount in RW mode happens, we are
already resuming the qgroup rescan worker through the call to
btrfs_qgroup_rescan_resume(), so we do not need to worry about that.

Tested-by: Fabian Vogt <fvogt@suse.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/qgroup.c | 13 ++++++++++---
 fs/btrfs/super.c  |  8 ++++++++
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 47f27658eac1..808370ada888 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -3190,6 +3190,12 @@ out:
 	return ret;
 }
 
+static bool rescan_should_stop(struct btrfs_fs_info *fs_info)
+{
+	return btrfs_fs_closing(fs_info) ||
+		test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
+}
+
 static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
 {
 	struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info,
@@ -3198,6 +3204,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
 	struct btrfs_trans_handle *trans = NULL;
 	int err = -ENOMEM;
 	int ret = 0;
+	bool stopped = false;
 
 	path = btrfs_alloc_path();
 	if (!path)
@@ -3210,7 +3217,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
 	path->skip_locking = 1;
 
 	err = 0;
-	while (!err && !btrfs_fs_closing(fs_info)) {
+	while (!err && !(stopped = rescan_should_stop(fs_info))) {
 		trans = btrfs_start_transaction(fs_info->fs_root, 0);
 		if (IS_ERR(trans)) {
 			err = PTR_ERR(trans);
@@ -3253,7 +3260,7 @@ out:
 	}
 
 	mutex_lock(&fs_info->qgroup_rescan_lock);
-	if (!btrfs_fs_closing(fs_info))
+	if (!stopped)
 		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
 	if (trans) {
 		ret = update_qgroup_status_item(trans);
@@ -3272,7 +3279,7 @@ out:
 
 	btrfs_end_transaction(trans);
 
-	if (btrfs_fs_closing(fs_info)) {
+	if (stopped) {
 		btrfs_info(fs_info, "qgroup scan paused");
 	} else if (err >= 0) {
 		btrfs_info(fs_info, "qgroup scan completed%s",
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 022f20810089..b24fa62375e0 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1968,6 +1968,14 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
 		btrfs_scrub_cancel(fs_info);
 		btrfs_pause_balance(fs_info);
 
+		/*
+		 * Pause the qgroup rescan worker if it is running. We don't want
+		 * it to be still running after we are in RO mode, as after that,
+		 * by the time we unmount, it might have left a transaction open,
+		 * so we would leak the transaction and/or crash.
+		 */
+		btrfs_qgroup_wait_for_completion(fs_info, false);
+
 		ret = btrfs_commit_super(fs_info);
 		if (ret)
 			goto restore;

From 638331fa56caeaa8b4d31cc1dfbe0ce989bcff67 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 14 Dec 2020 10:10:46 +0000
Subject: [PATCH 070/547] btrfs: fix transaction leak and crash after cleaning
 up orphans on RO mount

When we delete a root (subvolume or snapshot), at the very end of the
operation, we attempt to remove the root's orphan item from the root tree,
at btrfs_drop_snapshot(), by calling btrfs_del_orphan_item(). We ignore any
error from btrfs_del_orphan_item() since it is not a serious problem and
the next time the filesystem is mounted we remove such stray orphan items
at btrfs_find_orphan_roots().

However if the filesystem is mounted RO and we have stray orphan items for
any previously deleted root, we can end up leaking a transaction and other
data structures when unmounting the filesystem, as well as crashing if we
do not have hardware acceleration for crc32c available.

The steps that lead to the transaction leak are the following:

1) The filesystem is mounted in RW mode;

2) A subvolume is deleted;

3) When the cleaner kthread runs btrfs_drop_snapshot() to delete the root,
   it gets a failure at btrfs_del_orphan_item(), which is ignored, due to
   an ENOMEM when allocating a path for example. So the orphan item for
   the root remains in the root tree;

4) The filesystem is unmounted;

5) The filesystem is mounted RO (-o ro). During the mount path we call
   btrfs_find_orphan_roots(), which iterates the root tree searching for
   orphan items. It finds the orphan item for our deleted root, and since
   it can not find the root, it starts a transaction to delete the orphan
   item (by calling btrfs_del_orphan_item());

6) The RO mount completes;

7) Before the transaction kthread commits the transaction created for
   deleting the orphan item (i.e. less than 30 seconds elapsed since the
   mount, the default commit interval), a filesystem unmount operation is
   started;

8) At close_ctree(), we stop the transaction kthread, but we still have a
   transaction open with at least one dirty extent buffer, a leaf for the
   tree root which was COWed when deleting the orphan item;

9) We then proceed to destroy the work queues, free the roots and block
   groups, etc. After that we drop the last reference on the btree inode by
   calling iput() on it. Since there are dirty pages for the btree inode,
   corresponding to the COWed extent buffer, btree_write_cache_pages() is
   invoked to flush those dirty pages. This results in creating a bio and
   submitting it, which makes us end up at btrfs_submit_metadata_bio();

10) At btrfs_submit_metadata_bio() we end up at the if-then-else branch
    that calls btrfs_wq_submit_bio(), because check_async_write() returned
    a value of 1. This value of 1 is because we did not have hardware
    acceleration available for crc32c, so BTRFS_FS_CSUM_IMPL_FAST was not
    set in fs_info->flags;

11) Then at btrfs_wq_submit_bio() we call btrfs_queue_work() against the
    workqueue at fs_info->workers, which was already freed before by the
    call to btrfs_stop_all_workers() at close_ctree(). This results in an
    invalid memory access due to a use-after-free, leading to a crash.

When this happens, before the crash there are several warnings triggered,
since we have reserved metadata space in a block group, the delayed refs
reservation, etc:

 ------------[ cut here ]------------
 WARNING: CPU: 4 PID: 1729896 at fs/btrfs/block-group.c:125 btrfs_put_block_group+0x63/0xa0 [btrfs]
 Modules linked in: btrfs dm_snapshot dm_thin_pool (...)
 CPU: 4 PID: 1729896 Comm: umount Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
 RIP: 0010:btrfs_put_block_group+0x63/0xa0 [btrfs]
 Code: f0 01 00 00 48 39 c2 75 (...)
 RSP: 0018:ffffb270826bbdd8 EFLAGS: 00010206
 RAX: 0000000000000001 RBX: ffff947ed73e4000 RCX: ffff947ebc8b29c8
 RDX: 0000000000000001 RSI: ffffffffc0b150a0 RDI: ffff947ebc8b2800
 RBP: ffff947ebc8b2800 R08: 0000000000000000 R09: 0000000000000000
 R10: 0000000000000000 R11: 0000000000000001 R12: ffff947ed73e4110
 R13: ffff947ed73e4160 R14: ffff947ebc8b2988 R15: dead000000000100
 FS:  00007f15edfea840(0000) GS:ffff9481ad600000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00007f37e2893320 CR3: 0000000138f68001 CR4: 00000000003706e0
 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
 Call Trace:
  btrfs_free_block_groups+0x17f/0x2f0 [btrfs]
  close_ctree+0x2ba/0x2fa [btrfs]
  generic_shutdown_super+0x6c/0x100
  kill_anon_super+0x14/0x30
  btrfs_kill_super+0x12/0x20 [btrfs]
  deactivate_locked_super+0x31/0x70
  cleanup_mnt+0x100/0x160
  task_work_run+0x68/0xb0
  exit_to_user_mode_prepare+0x1bb/0x1c0
  syscall_exit_to_user_mode+0x4b/0x260
  entry_SYSCALL_64_after_hwframe+0x44/0xa9
 RIP: 0033:0x7f15ee221ee7
 Code: ff 0b 00 f7 d8 64 89 01 48 (...)
 RSP: 002b:00007ffe9470f0f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
 RAX: 0000000000000000 RBX: 00007f15ee347264 RCX: 00007f15ee221ee7
 RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 000056169701d000
 RBP: 0000561697018a30 R08: 0000000000000000 R09: 00007f15ee2e2be0
 R10: 000056169701efe0 R11: 0000000000000246 R12: 0000000000000000
 R13: 000056169701d000 R14: 0000561697018b40 R15: 0000561697018c60
 irq event stamp: 0
 hardirqs last  enabled at (0): [<0000000000000000>] 0x0
 hardirqs last disabled at (0): [<ffffffff8bcae560>] copy_process+0x8a0/0x1d70
 softirqs last  enabled at (0): [<ffffffff8bcae560>] copy_process+0x8a0/0x1d70
 softirqs last disabled at (0): [<0000000000000000>] 0x0
 ---[ end trace dd74718fef1ed5c6 ]---
 ------------[ cut here ]------------
 WARNING: CPU: 2 PID: 1729896 at fs/btrfs/block-rsv.c:459 btrfs_release_global_block_rsv+0x70/0xc0 [btrfs]
 Modules linked in: btrfs dm_snapshot dm_thin_pool (...)
 CPU: 2 PID: 1729896 Comm: umount Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
 RIP: 0010:btrfs_release_global_block_rsv+0x70/0xc0 [btrfs]
 Code: 48 83 bb b0 03 00 00 00 (...)
 RSP: 0018:ffffb270826bbdd8 EFLAGS: 00010206
 RAX: 000000000033c000 RBX: ffff947ed73e4000 RCX: 0000000000000000
 RDX: 0000000000000001 RSI: ffffffffc0b0d8c1 RDI: 00000000ffffffff
 RBP: ffff947ebc8b7000 R08: 0000000000000001 R09: 0000000000000000
 R10: 0000000000000000 R11: 0000000000000001 R12: ffff947ed73e4110
 R13: ffff947ed73e5278 R14: dead000000000122 R15: dead000000000100
 FS:  00007f15edfea840(0000) GS:ffff9481aca00000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 0000561a79f76e20 CR3: 0000000138f68006 CR4: 00000000003706e0
 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
 Call Trace:
  btrfs_free_block_groups+0x24c/0x2f0 [btrfs]
  close_ctree+0x2ba/0x2fa [btrfs]
  generic_shutdown_super+0x6c/0x100
  kill_anon_super+0x14/0x30
  btrfs_kill_super+0x12/0x20 [btrfs]
  deactivate_locked_super+0x31/0x70
  cleanup_mnt+0x100/0x160
  task_work_run+0x68/0xb0
  exit_to_user_mode_prepare+0x1bb/0x1c0
  syscall_exit_to_user_mode+0x4b/0x260
  entry_SYSCALL_64_after_hwframe+0x44/0xa9
 RIP: 0033:0x7f15ee221ee7
 Code: ff 0b 00 f7 d8 64 89 01 (...)
 RSP: 002b:00007ffe9470f0f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
 RAX: 0000000000000000 RBX: 00007f15ee347264 RCX: 00007f15ee221ee7
 RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 000056169701d000
 RBP: 0000561697018a30 R08: 0000000000000000 R09: 00007f15ee2e2be0
 R10: 000056169701efe0 R11: 0000000000000246 R12: 0000000000000000
 R13: 000056169701d000 R14: 0000561697018b40 R15: 0000561697018c60
 irq event stamp: 0
 hardirqs last  enabled at (0): [<0000000000000000>] 0x0
 hardirqs last disabled at (0): [<ffffffff8bcae560>] copy_process+0x8a0/0x1d70
 softirqs last  enabled at (0): [<ffffffff8bcae560>] copy_process+0x8a0/0x1d70
 softirqs last disabled at (0): [<0000000000000000>] 0x0
 ---[ end trace dd74718fef1ed5c7 ]---
 ------------[ cut here ]------------
 WARNING: CPU: 2 PID: 1729896 at fs/btrfs/block-group.c:3377 btrfs_free_block_groups+0x25d/0x2f0 [btrfs]
 Modules linked in: btrfs dm_snapshot dm_thin_pool (...)
 CPU: 5 PID: 1729896 Comm: umount Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
 RIP: 0010:btrfs_free_block_groups+0x25d/0x2f0 [btrfs]
 Code: ad de 49 be 22 01 00 (...)
 RSP: 0018:ffffb270826bbde8 EFLAGS: 00010206
 RAX: ffff947ebeae1d08 RBX: ffff947ed73e4000 RCX: 0000000000000000
 RDX: 0000000000000001 RSI: ffff947e9d823ae8 RDI: 0000000000000246
 RBP: ffff947ebeae1d08 R08: 0000000000000000 R09: 0000000000000000
 R10: 0000000000000000 R11: 0000000000000001 R12: ffff947ebeae1c00
 R13: ffff947ed73e5278 R14: dead000000000122 R15: dead000000000100
 FS:  00007f15edfea840(0000) GS:ffff9481ad200000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00007f1475d98ea8 CR3: 0000000138f68005 CR4: 00000000003706e0
 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
 Call Trace:
  close_ctree+0x2ba/0x2fa [btrfs]
  generic_shutdown_super+0x6c/0x100
  kill_anon_super+0x14/0x30
  btrfs_kill_super+0x12/0x20 [btrfs]
  deactivate_locked_super+0x31/0x70
  cleanup_mnt+0x100/0x160
  task_work_run+0x68/0xb0
  exit_to_user_mode_prepare+0x1bb/0x1c0
  syscall_exit_to_user_mode+0x4b/0x260
  entry_SYSCALL_64_after_hwframe+0x44/0xa9
 RIP: 0033:0x7f15ee221ee7
 Code: ff 0b 00 f7 d8 64 89 (...)
 RSP: 002b:00007ffe9470f0f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
 RAX: 0000000000000000 RBX: 00007f15ee347264 RCX: 00007f15ee221ee7
 RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 000056169701d000
 RBP: 0000561697018a30 R08: 0000000000000000 R09: 00007f15ee2e2be0
 R10: 000056169701efe0 R11: 0000000000000246 R12: 0000000000000000
 R13: 000056169701d000 R14: 0000561697018b40 R15: 0000561697018c60
 irq event stamp: 0
 hardirqs last  enabled at (0): [<0000000000000000>] 0x0
 hardirqs last disabled at (0): [<ffffffff8bcae560>] copy_process+0x8a0/0x1d70
 softirqs last  enabled at (0): [<ffffffff8bcae560>] copy_process+0x8a0/0x1d70
 softirqs last disabled at (0): [<0000000000000000>] 0x0
 ---[ end trace dd74718fef1ed5c8 ]---
 BTRFS info (device sdc): space_info 4 has 268238848 free, is not full
 BTRFS info (device sdc): space_info total=268435456, used=114688, pinned=0, reserved=16384, may_use=0, readonly=65536
 BTRFS info (device sdc): global_block_rsv: size 0 reserved 0
 BTRFS info (device sdc): trans_block_rsv: size 0 reserved 0
 BTRFS info (device sdc): chunk_block_rsv: size 0 reserved 0
 BTRFS info (device sdc): delayed_block_rsv: size 0 reserved 0
 BTRFS info (device sdc): delayed_refs_rsv: size 524288 reserved 0

And the crash, which only happens when we do not have crc32c hardware
acceleration, produces the following trace immediately after those
warnings:

 stack segment: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC PTI
 CPU: 2 PID: 1749129 Comm: umount Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
 RIP: 0010:btrfs_queue_work+0x36/0x190 [btrfs]
 Code: 54 55 53 48 89 f3 (...)
 RSP: 0018:ffffb27082443ae8 EFLAGS: 00010282
 RAX: 0000000000000004 RBX: ffff94810ee9ad90 RCX: 0000000000000000
 RDX: 0000000000000001 RSI: ffff94810ee9ad90 RDI: ffff947ed8ee75a0
 RBP: a56b6b6b6b6b6b6b R08: 0000000000000000 R09: 0000000000000000
 R10: 0000000000000007 R11: 0000000000000001 R12: ffff947fa9b435a8
 R13: ffff94810ee9ad90 R14: 0000000000000000 R15: ffff947e93dc0000
 FS:  00007f3cfe974840(0000) GS:ffff9481ac600000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00007f1b42995a70 CR3: 0000000127638003 CR4: 00000000003706e0
 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
 Call Trace:
  btrfs_wq_submit_bio+0xb3/0xd0 [btrfs]
  btrfs_submit_metadata_bio+0x44/0xc0 [btrfs]
  submit_one_bio+0x61/0x70 [btrfs]
  btree_write_cache_pages+0x414/0x450 [btrfs]
  ? kobject_put+0x9a/0x1d0
  ? trace_hardirqs_on+0x1b/0xf0
  ? _raw_spin_unlock_irqrestore+0x3c/0x60
  ? free_debug_processing+0x1e1/0x2b0
  do_writepages+0x43/0xe0
  ? lock_acquired+0x199/0x490
  __writeback_single_inode+0x59/0x650
  writeback_single_inode+0xaf/0x120
  write_inode_now+0x94/0xd0
  iput+0x187/0x2b0
  close_ctree+0x2c6/0x2fa [btrfs]
  generic_shutdown_super+0x6c/0x100
  kill_anon_super+0x14/0x30
  btrfs_kill_super+0x12/0x20 [btrfs]
  deactivate_locked_super+0x31/0x70
  cleanup_mnt+0x100/0x160
  task_work_run+0x68/0xb0
  exit_to_user_mode_prepare+0x1bb/0x1c0
  syscall_exit_to_user_mode+0x4b/0x260
  entry_SYSCALL_64_after_hwframe+0x44/0xa9
 RIP: 0033:0x7f3cfebabee7
 Code: ff 0b 00 f7 d8 64 89 01 (...)
 RSP: 002b:00007ffc9c9a05f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
 RAX: 0000000000000000 RBX: 00007f3cfecd1264 RCX: 00007f3cfebabee7
 RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 0000562b6b478000
 RBP: 0000562b6b473a30 R08: 0000000000000000 R09: 00007f3cfec6cbe0
 R10: 0000562b6b479fe0 R11: 0000000000000246 R12: 0000000000000000
 R13: 0000562b6b478000 R14: 0000562b6b473b40 R15: 0000562b6b473c60
 Modules linked in: btrfs dm_snapshot dm_thin_pool (...)
 ---[ end trace dd74718fef1ed5cc ]---

Finally when we remove the btrfs module (rmmod btrfs), there are several
warnings about objects that were allocated from our slabs but were never
freed, consequence of the transaction that was never committed and got
leaked:
 =============================================================================
 BUG btrfs_delayed_ref_head (Tainted: G    B   W        ): Objects remaining in btrfs_delayed_ref_head on __kmem_cache_shutdown()
 -----------------------------------------------------------------------------

 INFO: Slab 0x0000000094c2ae56 objects=24 used=2 fp=0x000000002bfa2521 flags=0x17fffc000010200
 CPU: 5 PID: 1729921 Comm: rmmod Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
 Call Trace:
  dump_stack+0x8d/0xb5
  slab_err+0xb7/0xdc
  ? lock_acquired+0x199/0x490
  __kmem_cache_shutdown+0x1ac/0x3c0
  ? lock_release+0x20e/0x4c0
  kmem_cache_destroy+0x55/0x120
  btrfs_delayed_ref_exit+0x11/0x35 [btrfs]
  exit_btrfs_fs+0xa/0x59 [btrfs]
  __x64_sys_delete_module+0x194/0x260
  ? fpregs_assert_state_consistent+0x1e/0x40
  ? exit_to_user_mode_prepare+0x55/0x1c0
  ? trace_hardirqs_on+0x1b/0xf0
  do_syscall_64+0x33/0x80
  entry_SYSCALL_64_after_hwframe+0x44/0xa9
 RIP: 0033:0x7f693e305897
 Code: 73 01 c3 48 8b 0d f9 f5 (...)
 RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897
 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8
 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000
 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740
 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760
 INFO: Object 0x0000000050cbdd61 @offset=12104
 INFO: Allocated in btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs] age=1894 cpu=6 pid=1729873
        __slab_alloc.isra.0+0x109/0x1c0
        kmem_cache_alloc+0x7bb/0x830
        btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs]
        btrfs_free_tree_block+0x128/0x360 [btrfs]
        __btrfs_cow_block+0x489/0x5f0 [btrfs]
        btrfs_cow_block+0xf7/0x220 [btrfs]
        btrfs_search_slot+0x62a/0xc40 [btrfs]
        btrfs_del_orphan_item+0x65/0xd0 [btrfs]
        btrfs_find_orphan_roots+0x1bf/0x200 [btrfs]
        open_ctree+0x125a/0x18a0 [btrfs]
        btrfs_mount_root.cold+0x13/0xed [btrfs]
        legacy_get_tree+0x30/0x60
        vfs_get_tree+0x28/0xe0
        fc_mount+0xe/0x40
        vfs_kern_mount.part.0+0x71/0x90
        btrfs_mount+0x13b/0x3e0 [btrfs]
 INFO: Freed in __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs] age=4292 cpu=2 pid=1729526
        kmem_cache_free+0x34c/0x3c0
        __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs]
        btrfs_run_delayed_refs+0x81/0x210 [btrfs]
        commit_cowonly_roots+0xfb/0x300 [btrfs]
        btrfs_commit_transaction+0x367/0xc40 [btrfs]
        sync_filesystem+0x74/0x90
        generic_shutdown_super+0x22/0x100
        kill_anon_super+0x14/0x30
        btrfs_kill_super+0x12/0x20 [btrfs]
        deactivate_locked_super+0x31/0x70
        cleanup_mnt+0x100/0x160
        task_work_run+0x68/0xb0
        exit_to_user_mode_prepare+0x1bb/0x1c0
        syscall_exit_to_user_mode+0x4b/0x260
        entry_SYSCALL_64_after_hwframe+0x44/0xa9
 INFO: Object 0x0000000086e9b0ff @offset=12776
 INFO: Allocated in btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs] age=1900 cpu=6 pid=1729873
        __slab_alloc.isra.0+0x109/0x1c0
        kmem_cache_alloc+0x7bb/0x830
        btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs]
        btrfs_alloc_tree_block+0x2bf/0x360 [btrfs]
        alloc_tree_block_no_bg_flush+0x4f/0x60 [btrfs]
        __btrfs_cow_block+0x12d/0x5f0 [btrfs]
        btrfs_cow_block+0xf7/0x220 [btrfs]
        btrfs_search_slot+0x62a/0xc40 [btrfs]
        btrfs_del_orphan_item+0x65/0xd0 [btrfs]
        btrfs_find_orphan_roots+0x1bf/0x200 [btrfs]
        open_ctree+0x125a/0x18a0 [btrfs]
        btrfs_mount_root.cold+0x13/0xed [btrfs]
        legacy_get_tree+0x30/0x60
        vfs_get_tree+0x28/0xe0
        fc_mount+0xe/0x40
        vfs_kern_mount.part.0+0x71/0x90
 INFO: Freed in __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs] age=3141 cpu=6 pid=1729803
        kmem_cache_free+0x34c/0x3c0
        __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs]
        btrfs_run_delayed_refs+0x81/0x210 [btrfs]
        btrfs_write_dirty_block_groups+0x17d/0x3d0 [btrfs]
        commit_cowonly_roots+0x248/0x300 [btrfs]
        btrfs_commit_transaction+0x367/0xc40 [btrfs]
        close_ctree+0x113/0x2fa [btrfs]
        generic_shutdown_super+0x6c/0x100
        kill_anon_super+0x14/0x30
        btrfs_kill_super+0x12/0x20 [btrfs]
        deactivate_locked_super+0x31/0x70
        cleanup_mnt+0x100/0x160
        task_work_run+0x68/0xb0
        exit_to_user_mode_prepare+0x1bb/0x1c0
        syscall_exit_to_user_mode+0x4b/0x260
        entry_SYSCALL_64_after_hwframe+0x44/0xa9
 kmem_cache_destroy btrfs_delayed_ref_head: Slab cache still has objects
 CPU: 5 PID: 1729921 Comm: rmmod Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
 Call Trace:
  dump_stack+0x8d/0xb5
  kmem_cache_destroy+0x119/0x120
  btrfs_delayed_ref_exit+0x11/0x35 [btrfs]
  exit_btrfs_fs+0xa/0x59 [btrfs]
  __x64_sys_delete_module+0x194/0x260
  ? fpregs_assert_state_consistent+0x1e/0x40
  ? exit_to_user_mode_prepare+0x55/0x1c0
  ? trace_hardirqs_on+0x1b/0xf0
  do_syscall_64+0x33/0x80
  entry_SYSCALL_64_after_hwframe+0x44/0xa9
 RIP: 0033:0x7f693e305897
 Code: 73 01 c3 48 8b 0d f9 f5 0b (...)
 RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897
 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8
 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000
 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740
 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760
 =============================================================================
 BUG btrfs_delayed_tree_ref (Tainted: G    B   W        ): Objects remaining in btrfs_delayed_tree_ref on __kmem_cache_shutdown()
 -----------------------------------------------------------------------------

 INFO: Slab 0x0000000011f78dc0 objects=37 used=2 fp=0x0000000032d55d91 flags=0x17fffc000010200
 CPU: 3 PID: 1729921 Comm: rmmod Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
 Call Trace:
  dump_stack+0x8d/0xb5
  slab_err+0xb7/0xdc
  ? lock_acquired+0x199/0x490
  __kmem_cache_shutdown+0x1ac/0x3c0
  ? lock_release+0x20e/0x4c0
  kmem_cache_destroy+0x55/0x120
  btrfs_delayed_ref_exit+0x1d/0x35 [btrfs]
  exit_btrfs_fs+0xa/0x59 [btrfs]
  __x64_sys_delete_module+0x194/0x260
  ? fpregs_assert_state_consistent+0x1e/0x40
  ? exit_to_user_mode_prepare+0x55/0x1c0
  ? trace_hardirqs_on+0x1b/0xf0
  do_syscall_64+0x33/0x80
  entry_SYSCALL_64_after_hwframe+0x44/0xa9
 RIP: 0033:0x7f693e305897
 Code: 73 01 c3 48 8b 0d f9 f5 (...)
 RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897
 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8
 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000
 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740
 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760
 INFO: Object 0x000000001a340018 @offset=4408
 INFO: Allocated in btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs] age=1917 cpu=6 pid=1729873
        __slab_alloc.isra.0+0x109/0x1c0
        kmem_cache_alloc+0x7bb/0x830
        btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs]
        btrfs_free_tree_block+0x128/0x360 [btrfs]
        __btrfs_cow_block+0x489/0x5f0 [btrfs]
        btrfs_cow_block+0xf7/0x220 [btrfs]
        btrfs_search_slot+0x62a/0xc40 [btrfs]
        btrfs_del_orphan_item+0x65/0xd0 [btrfs]
        btrfs_find_orphan_roots+0x1bf/0x200 [btrfs]
        open_ctree+0x125a/0x18a0 [btrfs]
        btrfs_mount_root.cold+0x13/0xed [btrfs]
        legacy_get_tree+0x30/0x60
        vfs_get_tree+0x28/0xe0
        fc_mount+0xe/0x40
        vfs_kern_mount.part.0+0x71/0x90
        btrfs_mount+0x13b/0x3e0 [btrfs]
 INFO: Freed in __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs] age=4167 cpu=4 pid=1729795
        kmem_cache_free+0x34c/0x3c0
        __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs]
        btrfs_run_delayed_refs+0x81/0x210 [btrfs]
        btrfs_commit_transaction+0x60/0xc40 [btrfs]
        create_subvol+0x56a/0x990 [btrfs]
        btrfs_mksubvol+0x3fb/0x4a0 [btrfs]
        __btrfs_ioctl_snap_create+0x119/0x1a0 [btrfs]
        btrfs_ioctl_snap_create+0x58/0x80 [btrfs]
        btrfs_ioctl+0x1a92/0x36f0 [btrfs]
        __x64_sys_ioctl+0x83/0xb0
        do_syscall_64+0x33/0x80
        entry_SYSCALL_64_after_hwframe+0x44/0xa9
 INFO: Object 0x000000002b46292a @offset=13648
 INFO: Allocated in btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs] age=1923 cpu=6 pid=1729873
        __slab_alloc.isra.0+0x109/0x1c0
        kmem_cache_alloc+0x7bb/0x830
        btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs]
        btrfs_alloc_tree_block+0x2bf/0x360 [btrfs]
        alloc_tree_block_no_bg_flush+0x4f/0x60 [btrfs]
        __btrfs_cow_block+0x12d/0x5f0 [btrfs]
        btrfs_cow_block+0xf7/0x220 [btrfs]
        btrfs_search_slot+0x62a/0xc40 [btrfs]
        btrfs_del_orphan_item+0x65/0xd0 [btrfs]
        btrfs_find_orphan_roots+0x1bf/0x200 [btrfs]
        open_ctree+0x125a/0x18a0 [btrfs]
        btrfs_mount_root.cold+0x13/0xed [btrfs]
        legacy_get_tree+0x30/0x60
        vfs_get_tree+0x28/0xe0
        fc_mount+0xe/0x40
        vfs_kern_mount.part.0+0x71/0x90
 INFO: Freed in __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs] age=3164 cpu=6 pid=1729803
        kmem_cache_free+0x34c/0x3c0
        __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs]
        btrfs_run_delayed_refs+0x81/0x210 [btrfs]
        commit_cowonly_roots+0xfb/0x300 [btrfs]
        btrfs_commit_transaction+0x367/0xc40 [btrfs]
        close_ctree+0x113/0x2fa [btrfs]
        generic_shutdown_super+0x6c/0x100
        kill_anon_super+0x14/0x30
        btrfs_kill_super+0x12/0x20 [btrfs]
        deactivate_locked_super+0x31/0x70
        cleanup_mnt+0x100/0x160
        task_work_run+0x68/0xb0
        exit_to_user_mode_prepare+0x1bb/0x1c0
        syscall_exit_to_user_mode+0x4b/0x260
        entry_SYSCALL_64_after_hwframe+0x44/0xa9
 kmem_cache_destroy btrfs_delayed_tree_ref: Slab cache still has objects
 CPU: 5 PID: 1729921 Comm: rmmod Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
 Call Trace:
  dump_stack+0x8d/0xb5
  kmem_cache_destroy+0x119/0x120
  btrfs_delayed_ref_exit+0x1d/0x35 [btrfs]
  exit_btrfs_fs+0xa/0x59 [btrfs]
  __x64_sys_delete_module+0x194/0x260
  ? fpregs_assert_state_consistent+0x1e/0x40
  ? exit_to_user_mode_prepare+0x55/0x1c0
  ? trace_hardirqs_on+0x1b/0xf0
  do_syscall_64+0x33/0x80
  entry_SYSCALL_64_after_hwframe+0x44/0xa9
 RIP: 0033:0x7f693e305897
 Code: 73 01 c3 48 8b 0d f9 f5 (...)
 RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897
 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8
 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000
 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740
 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760
 =============================================================================
 BUG btrfs_delayed_extent_op (Tainted: G    B   W        ): Objects remaining in btrfs_delayed_extent_op on __kmem_cache_shutdown()
 -----------------------------------------------------------------------------

 INFO: Slab 0x00000000f145ce2f objects=22 used=1 fp=0x00000000af0f92cf flags=0x17fffc000010200
 CPU: 5 PID: 1729921 Comm: rmmod Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
 Call Trace:
  dump_stack+0x8d/0xb5
  slab_err+0xb7/0xdc
  ? lock_acquired+0x199/0x490
  __kmem_cache_shutdown+0x1ac/0x3c0
  ? __mutex_unlock_slowpath+0x45/0x2a0
  kmem_cache_destroy+0x55/0x120
  exit_btrfs_fs+0xa/0x59 [btrfs]
  __x64_sys_delete_module+0x194/0x260
  ? fpregs_assert_state_consistent+0x1e/0x40
  ? exit_to_user_mode_prepare+0x55/0x1c0
  ? trace_hardirqs_on+0x1b/0xf0
  do_syscall_64+0x33/0x80
  entry_SYSCALL_64_after_hwframe+0x44/0xa9
 RIP: 0033:0x7f693e305897
 Code: 73 01 c3 48 8b 0d f9 f5 (...)
 RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897
 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8
 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000
 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740
 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760
 INFO: Object 0x000000004cf95ea8 @offset=6264
 INFO: Allocated in btrfs_alloc_tree_block+0x1e0/0x360 [btrfs] age=1931 cpu=6 pid=1729873
        __slab_alloc.isra.0+0x109/0x1c0
        kmem_cache_alloc+0x7bb/0x830
        btrfs_alloc_tree_block+0x1e0/0x360 [btrfs]
        alloc_tree_block_no_bg_flush+0x4f/0x60 [btrfs]
        __btrfs_cow_block+0x12d/0x5f0 [btrfs]
        btrfs_cow_block+0xf7/0x220 [btrfs]
        btrfs_search_slot+0x62a/0xc40 [btrfs]
        btrfs_del_orphan_item+0x65/0xd0 [btrfs]
        btrfs_find_orphan_roots+0x1bf/0x200 [btrfs]
        open_ctree+0x125a/0x18a0 [btrfs]
        btrfs_mount_root.cold+0x13/0xed [btrfs]
        legacy_get_tree+0x30/0x60
        vfs_get_tree+0x28/0xe0
        fc_mount+0xe/0x40
        vfs_kern_mount.part.0+0x71/0x90
        btrfs_mount+0x13b/0x3e0 [btrfs]
 INFO: Freed in __btrfs_run_delayed_refs+0xabd/0x1290 [btrfs] age=3173 cpu=6 pid=1729803
        kmem_cache_free+0x34c/0x3c0
        __btrfs_run_delayed_refs+0xabd/0x1290 [btrfs]
        btrfs_run_delayed_refs+0x81/0x210 [btrfs]
        commit_cowonly_roots+0xfb/0x300 [btrfs]
        btrfs_commit_transaction+0x367/0xc40 [btrfs]
        close_ctree+0x113/0x2fa [btrfs]
        generic_shutdown_super+0x6c/0x100
        kill_anon_super+0x14/0x30
        btrfs_kill_super+0x12/0x20 [btrfs]
        deactivate_locked_super+0x31/0x70
        cleanup_mnt+0x100/0x160
        task_work_run+0x68/0xb0
        exit_to_user_mode_prepare+0x1bb/0x1c0
        syscall_exit_to_user_mode+0x4b/0x260
        entry_SYSCALL_64_after_hwframe+0x44/0xa9
 kmem_cache_destroy btrfs_delayed_extent_op: Slab cache still has objects
 CPU: 3 PID: 1729921 Comm: rmmod Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
 Call Trace:
  dump_stack+0x8d/0xb5
  kmem_cache_destroy+0x119/0x120
  exit_btrfs_fs+0xa/0x59 [btrfs]
  __x64_sys_delete_module+0x194/0x260
  ? fpregs_assert_state_consistent+0x1e/0x40
  ? exit_to_user_mode_prepare+0x55/0x1c0
  ? trace_hardirqs_on+0x1b/0xf0
  do_syscall_64+0x33/0x80
  entry_SYSCALL_64_after_hwframe+0x44/0xa9
 RIP: 0033:0x7f693e305897
 Code: 73 01 c3 48 8b 0d f9 (...)
 RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897
 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8
 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000
 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740
 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760
 BTRFS: state leak: start 30408704 end 30425087 state 1 in tree 1 refs 1

So fix this by calling btrfs_find_orphan_roots() in the mount path only if
we are mounting the filesystem in RW mode. It's pointless to have it called
for RO mounts anyway, since despite adding any deleted roots to the list of
dead roots, we will never have the roots deleted until the filesystem is
remounted in RW mode, as the cleaner kthread does nothing when we are
mounted in RO - btrfs_need_cleaner_sleep() always returns true and the
cleaner spends all time sleeping, never cleaning dead roots.

This is accomplished by moving the call to btrfs_find_orphan_roots() from
open_ctree() to btrfs_start_pre_rw_mount(), which also guarantees that
if later the filesystem is remounted RW, we populate the list of dead
roots and have the cleaner task delete the dead roots.

Tested-by: Fabian Vogt <fvogt@suse.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 765deefda92b..e941cbae3991 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2969,6 +2969,7 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info)
 		}
 	}
 
+	ret = btrfs_find_orphan_roots(fs_info);
 out:
 	return ret;
 }
@@ -3383,10 +3384,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
 		}
 	}
 
-	ret = btrfs_find_orphan_roots(fs_info);
-	if (ret)
-		goto fail_qgroup;
-
 	fs_info->fs_root = btrfs_get_fs_root(fs_info, BTRFS_FS_TREE_OBJECTID, true);
 	if (IS_ERR(fs_info->fs_root)) {
 		err = PTR_ERR(fs_info->fs_root);

From a0a1db70df5f48576fea6d08f0a69c05f3ab4cf4 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 14 Dec 2020 10:10:47 +0000
Subject: [PATCH 071/547] btrfs: fix race between RO remount and the cleaner
 task

When we are remounting a filesystem in RO mode we can race with the cleaner
task and result in leaking a transaction if the filesystem is unmounted
shortly after, before the transaction kthread had a chance to commit that
transaction. That also results in a crash during unmount, due to a
use-after-free, if hardware acceleration is not available for crc32c.

The following sequence of steps explains how the race happens.

1) The filesystem is mounted in RW mode and the cleaner task is running.
   This means that currently BTRFS_FS_CLEANER_RUNNING is set at
   fs_info->flags;

2) The cleaner task is currently running delayed iputs for example;

3) A filesystem RO remount operation starts;

4) The RO remount task calls btrfs_commit_super(), which commits any
   currently open transaction, and it finishes;

5) At this point the cleaner task is still running and it creates a new
   transaction by doing one of the following things:

   * When running the delayed iput() for an inode with a 0 link count,
     in which case at btrfs_evict_inode() we start a transaction through
     the call to evict_refill_and_join(), use it and then release its
     handle through btrfs_end_transaction();

   * When deleting a dead root through btrfs_clean_one_deleted_snapshot(),
     a transaction is started at btrfs_drop_snapshot() and then its handle
     is released through a call to btrfs_end_transaction_throttle();

   * When the remount task was still running, and before the remount task
     called btrfs_delete_unused_bgs(), the cleaner task also called
     btrfs_delete_unused_bgs() and it picked and removed one block group
     from the list of unused block groups. Before the cleaner task started
     a transaction, through btrfs_start_trans_remove_block_group() at
     btrfs_delete_unused_bgs(), the remount task had already called
     btrfs_commit_super();

6) So at this point the filesystem is in RO mode and we have an open
   transaction that was started by the cleaner task;

7) Shortly after a filesystem unmount operation starts. At close_ctree()
   we stop the transaction kthread before it had a chance to commit the
   transaction, since less than 30 seconds (the default commit interval)
   have elapsed since the last transaction was committed;

8) We end up calling iput() against the btree inode at close_ctree() while
   there is an open transaction, and since that transaction was used to
   update btrees by the cleaner, we have dirty pages in the btree inode
   due to COW operations on metadata extents, and therefore writeback is
   triggered for the btree inode.

   So btree_write_cache_pages() is invoked to flush those dirty pages
   during the final iput() on the btree inode. This results in creating a
   bio and submitting it, which makes us end up at
   btrfs_submit_metadata_bio();

9) At btrfs_submit_metadata_bio() we end up at the if-then-else branch
   that calls btrfs_wq_submit_bio(), because check_async_write() returned
   a value of 1. This value of 1 is because we did not have hardware
   acceleration available for crc32c, so BTRFS_FS_CSUM_IMPL_FAST was not
   set in fs_info->flags;

10) Then at btrfs_wq_submit_bio() we call btrfs_queue_work() against the
    workqueue at fs_info->workers, which was already freed before by the
    call to btrfs_stop_all_workers() at close_ctree(). This results in an
    invalid memory access due to a use-after-free, leading to a crash.

When this happens, before the crash there are several warnings triggered,
since we have reserved metadata space in a block group, the delayed refs
reservation, etc:

  ------------[ cut here ]------------
  WARNING: CPU: 4 PID: 1729896 at fs/btrfs/block-group.c:125 btrfs_put_block_group+0x63/0xa0 [btrfs]
  Modules linked in: btrfs dm_snapshot dm_thin_pool (...)
  CPU: 4 PID: 1729896 Comm: umount Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
  RIP: 0010:btrfs_put_block_group+0x63/0xa0 [btrfs]
  Code: f0 01 00 00 48 39 c2 75 (...)
  RSP: 0018:ffffb270826bbdd8 EFLAGS: 00010206
  RAX: 0000000000000001 RBX: ffff947ed73e4000 RCX: ffff947ebc8b29c8
  RDX: 0000000000000001 RSI: ffffffffc0b150a0 RDI: ffff947ebc8b2800
  RBP: ffff947ebc8b2800 R08: 0000000000000000 R09: 0000000000000000
  R10: 0000000000000000 R11: 0000000000000001 R12: ffff947ed73e4110
  R13: ffff947ed73e4160 R14: ffff947ebc8b2988 R15: dead000000000100
  FS:  00007f15edfea840(0000) GS:ffff9481ad600000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00007f37e2893320 CR3: 0000000138f68001 CR4: 00000000003706e0
  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  Call Trace:
   btrfs_free_block_groups+0x17f/0x2f0 [btrfs]
   close_ctree+0x2ba/0x2fa [btrfs]
   generic_shutdown_super+0x6c/0x100
   kill_anon_super+0x14/0x30
   btrfs_kill_super+0x12/0x20 [btrfs]
   deactivate_locked_super+0x31/0x70
   cleanup_mnt+0x100/0x160
   task_work_run+0x68/0xb0
   exit_to_user_mode_prepare+0x1bb/0x1c0
   syscall_exit_to_user_mode+0x4b/0x260
   entry_SYSCALL_64_after_hwframe+0x44/0xa9
  RIP: 0033:0x7f15ee221ee7
  Code: ff 0b 00 f7 d8 64 89 01 48 (...)
  RSP: 002b:00007ffe9470f0f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
  RAX: 0000000000000000 RBX: 00007f15ee347264 RCX: 00007f15ee221ee7
  RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 000056169701d000
  RBP: 0000561697018a30 R08: 0000000000000000 R09: 00007f15ee2e2be0
  R10: 000056169701efe0 R11: 0000000000000246 R12: 0000000000000000
  R13: 000056169701d000 R14: 0000561697018b40 R15: 0000561697018c60
  irq event stamp: 0
  hardirqs last  enabled at (0): [<0000000000000000>] 0x0
  hardirqs last disabled at (0): [<ffffffff8bcae560>] copy_process+0x8a0/0x1d70
  softirqs last  enabled at (0): [<ffffffff8bcae560>] copy_process+0x8a0/0x1d70
  softirqs last disabled at (0): [<0000000000000000>] 0x0
  ---[ end trace dd74718fef1ed5c6 ]---
  ------------[ cut here ]------------
  WARNING: CPU: 2 PID: 1729896 at fs/btrfs/block-rsv.c:459 btrfs_release_global_block_rsv+0x70/0xc0 [btrfs]
  Modules linked in: btrfs dm_snapshot dm_thin_pool (...)
  CPU: 2 PID: 1729896 Comm: umount Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
  RIP: 0010:btrfs_release_global_block_rsv+0x70/0xc0 [btrfs]
  Code: 48 83 bb b0 03 00 00 00 (...)
  RSP: 0018:ffffb270826bbdd8 EFLAGS: 00010206
  RAX: 000000000033c000 RBX: ffff947ed73e4000 RCX: 0000000000000000
  RDX: 0000000000000001 RSI: ffffffffc0b0d8c1 RDI: 00000000ffffffff
  RBP: ffff947ebc8b7000 R08: 0000000000000001 R09: 0000000000000000
  R10: 0000000000000000 R11: 0000000000000001 R12: ffff947ed73e4110
  R13: ffff947ed73e5278 R14: dead000000000122 R15: dead000000000100
  FS:  00007f15edfea840(0000) GS:ffff9481aca00000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 0000561a79f76e20 CR3: 0000000138f68006 CR4: 00000000003706e0
  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  Call Trace:
   btrfs_free_block_groups+0x24c/0x2f0 [btrfs]
   close_ctree+0x2ba/0x2fa [btrfs]
   generic_shutdown_super+0x6c/0x100
   kill_anon_super+0x14/0x30
   btrfs_kill_super+0x12/0x20 [btrfs]
   deactivate_locked_super+0x31/0x70
   cleanup_mnt+0x100/0x160
   task_work_run+0x68/0xb0
   exit_to_user_mode_prepare+0x1bb/0x1c0
   syscall_exit_to_user_mode+0x4b/0x260
   entry_SYSCALL_64_after_hwframe+0x44/0xa9
  RIP: 0033:0x7f15ee221ee7
  Code: ff 0b 00 f7 d8 64 89 01 (...)
  RSP: 002b:00007ffe9470f0f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
  RAX: 0000000000000000 RBX: 00007f15ee347264 RCX: 00007f15ee221ee7
  RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 000056169701d000
  RBP: 0000561697018a30 R08: 0000000000000000 R09: 00007f15ee2e2be0
  R10: 000056169701efe0 R11: 0000000000000246 R12: 0000000000000000
  R13: 000056169701d000 R14: 0000561697018b40 R15: 0000561697018c60
  irq event stamp: 0
  hardirqs last  enabled at (0): [<0000000000000000>] 0x0
  hardirqs last disabled at (0): [<ffffffff8bcae560>] copy_process+0x8a0/0x1d70
  softirqs last  enabled at (0): [<ffffffff8bcae560>] copy_process+0x8a0/0x1d70
  softirqs last disabled at (0): [<0000000000000000>] 0x0
  ---[ end trace dd74718fef1ed5c7 ]---
  ------------[ cut here ]------------
  WARNING: CPU: 2 PID: 1729896 at fs/btrfs/block-group.c:3377 btrfs_free_block_groups+0x25d/0x2f0 [btrfs]
  Modules linked in: btrfs dm_snapshot dm_thin_pool (...)
  CPU: 5 PID: 1729896 Comm: umount Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
  RIP: 0010:btrfs_free_block_groups+0x25d/0x2f0 [btrfs]
  Code: ad de 49 be 22 01 00 (...)
  RSP: 0018:ffffb270826bbde8 EFLAGS: 00010206
  RAX: ffff947ebeae1d08 RBX: ffff947ed73e4000 RCX: 0000000000000000
  RDX: 0000000000000001 RSI: ffff947e9d823ae8 RDI: 0000000000000246
  RBP: ffff947ebeae1d08 R08: 0000000000000000 R09: 0000000000000000
  R10: 0000000000000000 R11: 0000000000000001 R12: ffff947ebeae1c00
  R13: ffff947ed73e5278 R14: dead000000000122 R15: dead000000000100
  FS:  00007f15edfea840(0000) GS:ffff9481ad200000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00007f1475d98ea8 CR3: 0000000138f68005 CR4: 00000000003706e0
  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  Call Trace:
   close_ctree+0x2ba/0x2fa [btrfs]
   generic_shutdown_super+0x6c/0x100
   kill_anon_super+0x14/0x30
   btrfs_kill_super+0x12/0x20 [btrfs]
   deactivate_locked_super+0x31/0x70
   cleanup_mnt+0x100/0x160
   task_work_run+0x68/0xb0
   exit_to_user_mode_prepare+0x1bb/0x1c0
   syscall_exit_to_user_mode+0x4b/0x260
   entry_SYSCALL_64_after_hwframe+0x44/0xa9
  RIP: 0033:0x7f15ee221ee7
  Code: ff 0b 00 f7 d8 64 89 (...)
  RSP: 002b:00007ffe9470f0f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
  RAX: 0000000000000000 RBX: 00007f15ee347264 RCX: 00007f15ee221ee7
  RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 000056169701d000
  RBP: 0000561697018a30 R08: 0000000000000000 R09: 00007f15ee2e2be0
  R10: 000056169701efe0 R11: 0000000000000246 R12: 0000000000000000
  R13: 000056169701d000 R14: 0000561697018b40 R15: 0000561697018c60
  irq event stamp: 0
  hardirqs last  enabled at (0): [<0000000000000000>] 0x0
  hardirqs last disabled at (0): [<ffffffff8bcae560>] copy_process+0x8a0/0x1d70
  softirqs last  enabled at (0): [<ffffffff8bcae560>] copy_process+0x8a0/0x1d70
  softirqs last disabled at (0): [<0000000000000000>] 0x0
  ---[ end trace dd74718fef1ed5c8 ]---
  BTRFS info (device sdc): space_info 4 has 268238848 free, is not full
  BTRFS info (device sdc): space_info total=268435456, used=114688, pinned=0, reserved=16384, may_use=0, readonly=65536
  BTRFS info (device sdc): global_block_rsv: size 0 reserved 0
  BTRFS info (device sdc): trans_block_rsv: size 0 reserved 0
  BTRFS info (device sdc): chunk_block_rsv: size 0 reserved 0
  BTRFS info (device sdc): delayed_block_rsv: size 0 reserved 0
  BTRFS info (device sdc): delayed_refs_rsv: size 524288 reserved 0

And the crash, which only happens when we do not have crc32c hardware
acceleration, produces the following trace immediately after those
warnings:

  stack segment: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC PTI
  CPU: 2 PID: 1749129 Comm: umount Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
  RIP: 0010:btrfs_queue_work+0x36/0x190 [btrfs]
  Code: 54 55 53 48 89 f3 (...)
  RSP: 0018:ffffb27082443ae8 EFLAGS: 00010282
  RAX: 0000000000000004 RBX: ffff94810ee9ad90 RCX: 0000000000000000
  RDX: 0000000000000001 RSI: ffff94810ee9ad90 RDI: ffff947ed8ee75a0
  RBP: a56b6b6b6b6b6b6b R08: 0000000000000000 R09: 0000000000000000
  R10: 0000000000000007 R11: 0000000000000001 R12: ffff947fa9b435a8
  R13: ffff94810ee9ad90 R14: 0000000000000000 R15: ffff947e93dc0000
  FS:  00007f3cfe974840(0000) GS:ffff9481ac600000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00007f1b42995a70 CR3: 0000000127638003 CR4: 00000000003706e0
  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  Call Trace:
   btrfs_wq_submit_bio+0xb3/0xd0 [btrfs]
   btrfs_submit_metadata_bio+0x44/0xc0 [btrfs]
   submit_one_bio+0x61/0x70 [btrfs]
   btree_write_cache_pages+0x414/0x450 [btrfs]
   ? kobject_put+0x9a/0x1d0
   ? trace_hardirqs_on+0x1b/0xf0
   ? _raw_spin_unlock_irqrestore+0x3c/0x60
   ? free_debug_processing+0x1e1/0x2b0
   do_writepages+0x43/0xe0
   ? lock_acquired+0x199/0x490
   __writeback_single_inode+0x59/0x650
   writeback_single_inode+0xaf/0x120
   write_inode_now+0x94/0xd0
   iput+0x187/0x2b0
   close_ctree+0x2c6/0x2fa [btrfs]
   generic_shutdown_super+0x6c/0x100
   kill_anon_super+0x14/0x30
   btrfs_kill_super+0x12/0x20 [btrfs]
   deactivate_locked_super+0x31/0x70
   cleanup_mnt+0x100/0x160
   task_work_run+0x68/0xb0
   exit_to_user_mode_prepare+0x1bb/0x1c0
   syscall_exit_to_user_mode+0x4b/0x260
   entry_SYSCALL_64_after_hwframe+0x44/0xa9
  RIP: 0033:0x7f3cfebabee7
  Code: ff 0b 00 f7 d8 64 89 01 (...)
  RSP: 002b:00007ffc9c9a05f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
  RAX: 0000000000000000 RBX: 00007f3cfecd1264 RCX: 00007f3cfebabee7
  RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 0000562b6b478000
  RBP: 0000562b6b473a30 R08: 0000000000000000 R09: 00007f3cfec6cbe0
  R10: 0000562b6b479fe0 R11: 0000000000000246 R12: 0000000000000000
  R13: 0000562b6b478000 R14: 0000562b6b473b40 R15: 0000562b6b473c60
  Modules linked in: btrfs dm_snapshot dm_thin_pool (...)
  ---[ end trace dd74718fef1ed5cc ]---

Finally when we remove the btrfs module (rmmod btrfs), there are several
warnings about objects that were allocated from our slabs but were never
freed, consequence of the transaction that was never committed and got
leaked:

  =============================================================================
  BUG btrfs_delayed_ref_head (Tainted: G    B   W        ): Objects remaining in btrfs_delayed_ref_head on __kmem_cache_shutdown()
  -----------------------------------------------------------------------------

  INFO: Slab 0x0000000094c2ae56 objects=24 used=2 fp=0x000000002bfa2521 flags=0x17fffc000010200
  CPU: 5 PID: 1729921 Comm: rmmod Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
  Call Trace:
   dump_stack+0x8d/0xb5
   slab_err+0xb7/0xdc
   ? lock_acquired+0x199/0x490
   __kmem_cache_shutdown+0x1ac/0x3c0
   ? lock_release+0x20e/0x4c0
   kmem_cache_destroy+0x55/0x120
   btrfs_delayed_ref_exit+0x11/0x35 [btrfs]
   exit_btrfs_fs+0xa/0x59 [btrfs]
   __x64_sys_delete_module+0x194/0x260
   ? fpregs_assert_state_consistent+0x1e/0x40
   ? exit_to_user_mode_prepare+0x55/0x1c0
   ? trace_hardirqs_on+0x1b/0xf0
   do_syscall_64+0x33/0x80
   entry_SYSCALL_64_after_hwframe+0x44/0xa9
  RIP: 0033:0x7f693e305897
  Code: 73 01 c3 48 8b 0d f9 f5 (...)
  RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
  RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897
  RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8
  RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000
  R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740
  R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760
  INFO: Object 0x0000000050cbdd61 @offset=12104
  INFO: Allocated in btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs] age=1894 cpu=6 pid=1729873
        __slab_alloc.isra.0+0x109/0x1c0
        kmem_cache_alloc+0x7bb/0x830
        btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs]
        btrfs_free_tree_block+0x128/0x360 [btrfs]
        __btrfs_cow_block+0x489/0x5f0 [btrfs]
        btrfs_cow_block+0xf7/0x220 [btrfs]
        btrfs_search_slot+0x62a/0xc40 [btrfs]
        btrfs_del_orphan_item+0x65/0xd0 [btrfs]
        btrfs_find_orphan_roots+0x1bf/0x200 [btrfs]
        open_ctree+0x125a/0x18a0 [btrfs]
        btrfs_mount_root.cold+0x13/0xed [btrfs]
        legacy_get_tree+0x30/0x60
        vfs_get_tree+0x28/0xe0
        fc_mount+0xe/0x40
        vfs_kern_mount.part.0+0x71/0x90
        btrfs_mount+0x13b/0x3e0 [btrfs]
  INFO: Freed in __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs] age=4292 cpu=2 pid=1729526
        kmem_cache_free+0x34c/0x3c0
        __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs]
        btrfs_run_delayed_refs+0x81/0x210 [btrfs]
        commit_cowonly_roots+0xfb/0x300 [btrfs]
        btrfs_commit_transaction+0x367/0xc40 [btrfs]
        sync_filesystem+0x74/0x90
        generic_shutdown_super+0x22/0x100
        kill_anon_super+0x14/0x30
        btrfs_kill_super+0x12/0x20 [btrfs]
        deactivate_locked_super+0x31/0x70
        cleanup_mnt+0x100/0x160
        task_work_run+0x68/0xb0
        exit_to_user_mode_prepare+0x1bb/0x1c0
        syscall_exit_to_user_mode+0x4b/0x260
        entry_SYSCALL_64_after_hwframe+0x44/0xa9
  INFO: Object 0x0000000086e9b0ff @offset=12776
  INFO: Allocated in btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs] age=1900 cpu=6 pid=1729873
        __slab_alloc.isra.0+0x109/0x1c0
        kmem_cache_alloc+0x7bb/0x830
        btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs]
        btrfs_alloc_tree_block+0x2bf/0x360 [btrfs]
        alloc_tree_block_no_bg_flush+0x4f/0x60 [btrfs]
        __btrfs_cow_block+0x12d/0x5f0 [btrfs]
        btrfs_cow_block+0xf7/0x220 [btrfs]
        btrfs_search_slot+0x62a/0xc40 [btrfs]
        btrfs_del_orphan_item+0x65/0xd0 [btrfs]
        btrfs_find_orphan_roots+0x1bf/0x200 [btrfs]
        open_ctree+0x125a/0x18a0 [btrfs]
        btrfs_mount_root.cold+0x13/0xed [btrfs]
        legacy_get_tree+0x30/0x60
        vfs_get_tree+0x28/0xe0
        fc_mount+0xe/0x40
        vfs_kern_mount.part.0+0x71/0x90
  INFO: Freed in __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs] age=3141 cpu=6 pid=1729803
        kmem_cache_free+0x34c/0x3c0
        __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs]
        btrfs_run_delayed_refs+0x81/0x210 [btrfs]
        btrfs_write_dirty_block_groups+0x17d/0x3d0 [btrfs]
        commit_cowonly_roots+0x248/0x300 [btrfs]
        btrfs_commit_transaction+0x367/0xc40 [btrfs]
        close_ctree+0x113/0x2fa [btrfs]
        generic_shutdown_super+0x6c/0x100
        kill_anon_super+0x14/0x30
        btrfs_kill_super+0x12/0x20 [btrfs]
        deactivate_locked_super+0x31/0x70
        cleanup_mnt+0x100/0x160
        task_work_run+0x68/0xb0
        exit_to_user_mode_prepare+0x1bb/0x1c0
        syscall_exit_to_user_mode+0x4b/0x260
        entry_SYSCALL_64_after_hwframe+0x44/0xa9
  kmem_cache_destroy btrfs_delayed_ref_head: Slab cache still has objects
  CPU: 5 PID: 1729921 Comm: rmmod Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
  Call Trace:
   dump_stack+0x8d/0xb5
   kmem_cache_destroy+0x119/0x120
   btrfs_delayed_ref_exit+0x11/0x35 [btrfs]
   exit_btrfs_fs+0xa/0x59 [btrfs]
   __x64_sys_delete_module+0x194/0x260
   ? fpregs_assert_state_consistent+0x1e/0x40
   ? exit_to_user_mode_prepare+0x55/0x1c0
   ? trace_hardirqs_on+0x1b/0xf0
   do_syscall_64+0x33/0x80
   entry_SYSCALL_64_after_hwframe+0x44/0xa9
  RIP: 0033:0x7f693e305897
  Code: 73 01 c3 48 8b 0d f9 f5 0b (...)
  RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
  RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897
  RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8
  RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000
  R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740
  R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760
  =============================================================================
  BUG btrfs_delayed_tree_ref (Tainted: G    B   W        ): Objects remaining in btrfs_delayed_tree_ref on __kmem_cache_shutdown()
  -----------------------------------------------------------------------------

  INFO: Slab 0x0000000011f78dc0 objects=37 used=2 fp=0x0000000032d55d91 flags=0x17fffc000010200
  CPU: 3 PID: 1729921 Comm: rmmod Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
  Call Trace:
   dump_stack+0x8d/0xb5
   slab_err+0xb7/0xdc
   ? lock_acquired+0x199/0x490
   __kmem_cache_shutdown+0x1ac/0x3c0
   ? lock_release+0x20e/0x4c0
   kmem_cache_destroy+0x55/0x120
   btrfs_delayed_ref_exit+0x1d/0x35 [btrfs]
   exit_btrfs_fs+0xa/0x59 [btrfs]
   __x64_sys_delete_module+0x194/0x260
   ? fpregs_assert_state_consistent+0x1e/0x40
   ? exit_to_user_mode_prepare+0x55/0x1c0
   ? trace_hardirqs_on+0x1b/0xf0
   do_syscall_64+0x33/0x80
   entry_SYSCALL_64_after_hwframe+0x44/0xa9
  RIP: 0033:0x7f693e305897
  Code: 73 01 c3 48 8b 0d f9 f5 (...)
  RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
  RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897
  RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8
  RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000
  R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740
  R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760
  INFO: Object 0x000000001a340018 @offset=4408
  INFO: Allocated in btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs] age=1917 cpu=6 pid=1729873
        __slab_alloc.isra.0+0x109/0x1c0
        kmem_cache_alloc+0x7bb/0x830
        btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs]
        btrfs_free_tree_block+0x128/0x360 [btrfs]
        __btrfs_cow_block+0x489/0x5f0 [btrfs]
        btrfs_cow_block+0xf7/0x220 [btrfs]
        btrfs_search_slot+0x62a/0xc40 [btrfs]
        btrfs_del_orphan_item+0x65/0xd0 [btrfs]
        btrfs_find_orphan_roots+0x1bf/0x200 [btrfs]
        open_ctree+0x125a/0x18a0 [btrfs]
        btrfs_mount_root.cold+0x13/0xed [btrfs]
        legacy_get_tree+0x30/0x60
        vfs_get_tree+0x28/0xe0
        fc_mount+0xe/0x40
        vfs_kern_mount.part.0+0x71/0x90
        btrfs_mount+0x13b/0x3e0 [btrfs]
  INFO: Freed in __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs] age=4167 cpu=4 pid=1729795
        kmem_cache_free+0x34c/0x3c0
        __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs]
        btrfs_run_delayed_refs+0x81/0x210 [btrfs]
        btrfs_commit_transaction+0x60/0xc40 [btrfs]
        create_subvol+0x56a/0x990 [btrfs]
        btrfs_mksubvol+0x3fb/0x4a0 [btrfs]
        __btrfs_ioctl_snap_create+0x119/0x1a0 [btrfs]
        btrfs_ioctl_snap_create+0x58/0x80 [btrfs]
        btrfs_ioctl+0x1a92/0x36f0 [btrfs]
        __x64_sys_ioctl+0x83/0xb0
        do_syscall_64+0x33/0x80
        entry_SYSCALL_64_after_hwframe+0x44/0xa9
  INFO: Object 0x000000002b46292a @offset=13648
  INFO: Allocated in btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs] age=1923 cpu=6 pid=1729873
        __slab_alloc.isra.0+0x109/0x1c0
        kmem_cache_alloc+0x7bb/0x830
        btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs]
        btrfs_alloc_tree_block+0x2bf/0x360 [btrfs]
        alloc_tree_block_no_bg_flush+0x4f/0x60 [btrfs]
        __btrfs_cow_block+0x12d/0x5f0 [btrfs]
        btrfs_cow_block+0xf7/0x220 [btrfs]
        btrfs_search_slot+0x62a/0xc40 [btrfs]
        btrfs_del_orphan_item+0x65/0xd0 [btrfs]
        btrfs_find_orphan_roots+0x1bf/0x200 [btrfs]
        open_ctree+0x125a/0x18a0 [btrfs]
        btrfs_mount_root.cold+0x13/0xed [btrfs]
        legacy_get_tree+0x30/0x60
        vfs_get_tree+0x28/0xe0
        fc_mount+0xe/0x40
        vfs_kern_mount.part.0+0x71/0x90
  INFO: Freed in __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs] age=3164 cpu=6 pid=1729803
        kmem_cache_free+0x34c/0x3c0
        __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs]
        btrfs_run_delayed_refs+0x81/0x210 [btrfs]
        commit_cowonly_roots+0xfb/0x300 [btrfs]
        btrfs_commit_transaction+0x367/0xc40 [btrfs]
        close_ctree+0x113/0x2fa [btrfs]
        generic_shutdown_super+0x6c/0x100
        kill_anon_super+0x14/0x30
        btrfs_kill_super+0x12/0x20 [btrfs]
        deactivate_locked_super+0x31/0x70
        cleanup_mnt+0x100/0x160
        task_work_run+0x68/0xb0
        exit_to_user_mode_prepare+0x1bb/0x1c0
        syscall_exit_to_user_mode+0x4b/0x260
        entry_SYSCALL_64_after_hwframe+0x44/0xa9
  kmem_cache_destroy btrfs_delayed_tree_ref: Slab cache still has objects
  CPU: 5 PID: 1729921 Comm: rmmod Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
  Call Trace:
   dump_stack+0x8d/0xb5
   kmem_cache_destroy+0x119/0x120
   btrfs_delayed_ref_exit+0x1d/0x35 [btrfs]
   exit_btrfs_fs+0xa/0x59 [btrfs]
   __x64_sys_delete_module+0x194/0x260
   ? fpregs_assert_state_consistent+0x1e/0x40
   ? exit_to_user_mode_prepare+0x55/0x1c0
   ? trace_hardirqs_on+0x1b/0xf0
   do_syscall_64+0x33/0x80
   entry_SYSCALL_64_after_hwframe+0x44/0xa9
  RIP: 0033:0x7f693e305897
  Code: 73 01 c3 48 8b 0d f9 f5 (...)
  RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
  RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897
  RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8
  RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000
  R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740
  R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760
  =============================================================================
  BUG btrfs_delayed_extent_op (Tainted: G    B   W        ): Objects remaining in btrfs_delayed_extent_op on __kmem_cache_shutdown()
  -----------------------------------------------------------------------------
  INFO: Slab 0x00000000f145ce2f objects=22 used=1 fp=0x00000000af0f92cf flags=0x17fffc000010200
  CPU: 5 PID: 1729921 Comm: rmmod Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
  Call Trace:
   dump_stack+0x8d/0xb5
   slab_err+0xb7/0xdc
   ? lock_acquired+0x199/0x490
   __kmem_cache_shutdown+0x1ac/0x3c0
   ? __mutex_unlock_slowpath+0x45/0x2a0
   kmem_cache_destroy+0x55/0x120
   exit_btrfs_fs+0xa/0x59 [btrfs]
   __x64_sys_delete_module+0x194/0x260
   ? fpregs_assert_state_consistent+0x1e/0x40
   ? exit_to_user_mode_prepare+0x55/0x1c0
   ? trace_hardirqs_on+0x1b/0xf0
   do_syscall_64+0x33/0x80
   entry_SYSCALL_64_after_hwframe+0x44/0xa9
  RIP: 0033:0x7f693e305897
  Code: 73 01 c3 48 8b 0d f9 f5 (...)
  RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
  RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897
  RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8
  RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000
  R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740
  R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760
  INFO: Object 0x000000004cf95ea8 @offset=6264
  INFO: Allocated in btrfs_alloc_tree_block+0x1e0/0x360 [btrfs] age=1931 cpu=6 pid=1729873
        __slab_alloc.isra.0+0x109/0x1c0
        kmem_cache_alloc+0x7bb/0x830
        btrfs_alloc_tree_block+0x1e0/0x360 [btrfs]
        alloc_tree_block_no_bg_flush+0x4f/0x60 [btrfs]
        __btrfs_cow_block+0x12d/0x5f0 [btrfs]
        btrfs_cow_block+0xf7/0x220 [btrfs]
        btrfs_search_slot+0x62a/0xc40 [btrfs]
        btrfs_del_orphan_item+0x65/0xd0 [btrfs]
        btrfs_find_orphan_roots+0x1bf/0x200 [btrfs]
        open_ctree+0x125a/0x18a0 [btrfs]
        btrfs_mount_root.cold+0x13/0xed [btrfs]
        legacy_get_tree+0x30/0x60
        vfs_get_tree+0x28/0xe0
        fc_mount+0xe/0x40
        vfs_kern_mount.part.0+0x71/0x90
        btrfs_mount+0x13b/0x3e0 [btrfs]
  INFO: Freed in __btrfs_run_delayed_refs+0xabd/0x1290 [btrfs] age=3173 cpu=6 pid=1729803
        kmem_cache_free+0x34c/0x3c0
        __btrfs_run_delayed_refs+0xabd/0x1290 [btrfs]
        btrfs_run_delayed_refs+0x81/0x210 [btrfs]
        commit_cowonly_roots+0xfb/0x300 [btrfs]
        btrfs_commit_transaction+0x367/0xc40 [btrfs]
        close_ctree+0x113/0x2fa [btrfs]
        generic_shutdown_super+0x6c/0x100
        kill_anon_super+0x14/0x30
        btrfs_kill_super+0x12/0x20 [btrfs]
        deactivate_locked_super+0x31/0x70
        cleanup_mnt+0x100/0x160
        task_work_run+0x68/0xb0
        exit_to_user_mode_prepare+0x1bb/0x1c0
        syscall_exit_to_user_mode+0x4b/0x260
        entry_SYSCALL_64_after_hwframe+0x44/0xa9
  kmem_cache_destroy btrfs_delayed_extent_op: Slab cache still has objects
  CPU: 3 PID: 1729921 Comm: rmmod Tainted: G    B   W         5.10.0-rc4-btrfs-next-73 #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
  Call Trace:
   dump_stack+0x8d/0xb5
   kmem_cache_destroy+0x119/0x120
   exit_btrfs_fs+0xa/0x59 [btrfs]
   __x64_sys_delete_module+0x194/0x260
   ? fpregs_assert_state_consistent+0x1e/0x40
   ? exit_to_user_mode_prepare+0x55/0x1c0
   ? trace_hardirqs_on+0x1b/0xf0
   do_syscall_64+0x33/0x80
   entry_SYSCALL_64_after_hwframe+0x44/0xa9
  RIP: 0033:0x7f693e305897
  Code: 73 01 c3 48 8b 0d f9 (...)
  RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
  RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897
  RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8
  RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000
  R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740
  R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760
  BTRFS: state leak: start 30408704 end 30425087 state 1 in tree 1 refs 1

So fix this by making the remount path to wait for the cleaner task before
calling btrfs_commit_super(). The remount path now waits for the bit
BTRFS_FS_CLEANER_RUNNING to be cleared from fs_info->flags before calling
btrfs_commit_super() and this ensures the cleaner can not start a
transaction after that, because it sleeps when the filesystem is in RO
mode and we have already flagged the filesystem as RO before waiting for
BTRFS_FS_CLEANER_RUNNING to be cleared.

This also introduces a new flag BTRFS_FS_STATE_RO to be used for
fs_info->fs_state when the filesystem is in RO mode. This is because we
were doing the RO check using the flags of the superblock and setting the
RO mode simply by ORing into the superblock's flags - those operations are
not atomic and could result in the cleaner not seeing the update from the
remount task after it clears BTRFS_FS_CLEANER_RUNNING.

Tested-by: Fabian Vogt <fvogt@suse.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h   | 20 +++++++++++++++++++-
 fs/btrfs/disk-io.c |  5 ++++-
 fs/btrfs/super.c   | 22 +++++++++++++++++++---
 fs/btrfs/volumes.c |  4 ++--
 4 files changed, 44 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3935d297d198..0225c5208f44 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -132,6 +132,8 @@ enum {
 	 * defrag
 	 */
 	BTRFS_FS_STATE_REMOUNTING,
+	/* Filesystem in RO mode */
+	BTRFS_FS_STATE_RO,
 	/* Track if a transaction abort has been reported on this filesystem */
 	BTRFS_FS_STATE_TRANS_ABORTED,
 	/*
@@ -2892,10 +2894,26 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info)
  * If we remount the fs to be R/O or umount the fs, the cleaner needn't do
  * anything except sleeping. This function is used to check the status of
  * the fs.
+ * We check for BTRFS_FS_STATE_RO to avoid races with a concurrent remount,
+ * since setting and checking for SB_RDONLY in the superblock's flags is not
+ * atomic.
  */
 static inline int btrfs_need_cleaner_sleep(struct btrfs_fs_info *fs_info)
 {
-	return fs_info->sb->s_flags & SB_RDONLY || btrfs_fs_closing(fs_info);
+	return test_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state) ||
+		btrfs_fs_closing(fs_info);
+}
+
+static inline void btrfs_set_sb_rdonly(struct super_block *sb)
+{
+	sb->s_flags |= SB_RDONLY;
+	set_bit(BTRFS_FS_STATE_RO, &btrfs_sb(sb)->fs_state);
+}
+
+static inline void btrfs_clear_sb_rdonly(struct super_block *sb)
+{
+	sb->s_flags &= ~SB_RDONLY;
+	clear_bit(BTRFS_FS_STATE_RO, &btrfs_sb(sb)->fs_state);
 }
 
 /* tree mod log functions from ctree.c */
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e941cbae3991..e7bcbd0b93ef 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1729,7 +1729,7 @@ static int cleaner_kthread(void *arg)
 		 */
 		btrfs_delete_unused_bgs(fs_info);
 sleep:
-		clear_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags);
+		clear_and_wake_up_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags);
 		if (kthread_should_park())
 			kthread_parkme();
 		if (kthread_should_stop())
@@ -2830,6 +2830,9 @@ static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block
 		return -ENOMEM;
 	btrfs_init_delayed_root(fs_info->delayed_root);
 
+	if (sb_rdonly(sb))
+		set_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state);
+
 	return btrfs_alloc_stripe_hash_table(fs_info);
 }
 
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index b24fa62375e0..38740cc2919f 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -175,7 +175,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
 	btrfs_discard_stop(fs_info);
 
 	/* btrfs handle error by forcing the filesystem readonly */
-	sb->s_flags |= SB_RDONLY;
+	btrfs_set_sb_rdonly(sb);
 	btrfs_info(fs_info, "forced readonly");
 	/*
 	 * Note that a running device replace operation is not canceled here
@@ -1953,7 +1953,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
 		/* avoid complains from lockdep et al. */
 		up(&fs_info->uuid_tree_rescan_sem);
 
-		sb->s_flags |= SB_RDONLY;
+		btrfs_set_sb_rdonly(sb);
 
 		/*
 		 * Setting SB_RDONLY will put the cleaner thread to
@@ -1964,6 +1964,20 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
 		 */
 		btrfs_delete_unused_bgs(fs_info);
 
+		/*
+		 * The cleaner task could be already running before we set the
+		 * flag BTRFS_FS_STATE_RO (and SB_RDONLY in the superblock).
+		 * We must make sure that after we finish the remount, i.e. after
+		 * we call btrfs_commit_super(), the cleaner can no longer start
+		 * a transaction - either because it was dropping a dead root,
+		 * running delayed iputs or deleting an unused block group (the
+		 * cleaner picked a block group from the list of unused block
+		 * groups before we were able to in the previous call to
+		 * btrfs_delete_unused_bgs()).
+		 */
+		wait_on_bit(&fs_info->flags, BTRFS_FS_CLEANER_RUNNING,
+			    TASK_UNINTERRUPTIBLE);
+
 		btrfs_dev_replace_suspend_for_unmount(fs_info);
 		btrfs_scrub_cancel(fs_info);
 		btrfs_pause_balance(fs_info);
@@ -2014,7 +2028,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
 		if (ret)
 			goto restore;
 
-		sb->s_flags &= ~SB_RDONLY;
+		btrfs_clear_sb_rdonly(sb);
 
 		set_bit(BTRFS_FS_OPEN, &fs_info->flags);
 	}
@@ -2036,6 +2050,8 @@ restore:
 	/* We've hit an error - don't reset SB_RDONLY */
 	if (sb_rdonly(sb))
 		old_flags |= SB_RDONLY;
+	if (!(old_flags & SB_RDONLY))
+		clear_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state);
 	sb->s_flags = old_flags;
 	fs_info->mount_opt = old_opts;
 	fs_info->compress_type = old_compress_type;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 7930e1c78c45..2c0aa03b6437 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2593,7 +2593,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 	set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
 
 	if (seeding_dev) {
-		sb->s_flags &= ~SB_RDONLY;
+		btrfs_clear_sb_rdonly(sb);
 		ret = btrfs_prepare_sprout(fs_info);
 		if (ret) {
 			btrfs_abort_transaction(trans, ret);
@@ -2729,7 +2729,7 @@ error_sysfs:
 	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
 error_trans:
 	if (seeding_dev)
-		sb->s_flags |= SB_RDONLY;
+		btrfs_set_sb_rdonly(sb);
 	if (trans)
 		btrfs_end_transaction(trans);
 error_free_zone:

From 0a31daa4b602ff6861fdf182236d64b2a353bace Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 14 Dec 2020 10:10:48 +0000
Subject: [PATCH 072/547] btrfs: add assertion for empty list of transactions
 at late stage of umount

Add an assertion to close_ctree(), after destroying all the work queues,
to verify we do not have any transaction still open or committing at that
at that point. If we have any, it means something is seriously wrong and
that can cause memory leaks and use-after-free problems. This is motivated
by the previous patches that fixed bugs where we ended up leaking an open
transaction after unmounting the filesystem.

Tested-by: Fabian Vogt <fvogt@suse.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e7bcbd0b93ef..1dfd4b2d0e1e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4181,6 +4181,9 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
 	invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
 	btrfs_stop_all_workers(fs_info);
 
+	/* We shouldn't have any transaction open at this point */
+	ASSERT(list_empty(&fs_info->trans_list));
+
 	clear_bit(BTRFS_FS_OPEN, &fs_info->flags);
 	free_root_pointers(fs_info, true);
 	btrfs_free_fs_roots(fs_info);

From a8cc263eb58ca133617662a5a5e07131d0ebf299 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 14 Dec 2020 10:10:49 +0000
Subject: [PATCH 073/547] btrfs: run delayed iputs when remounting RO to avoid
 leaking them

When remounting RO, after setting the superblock with the RO flag, the
cleaner task will start sleeping and do nothing, since the call to
btrfs_need_cleaner_sleep() keeps returning 'true'. However, when the
cleaner task goes to sleep, the list of delayed iputs may not be empty.

As long as we are in RO mode, the cleaner task will keep sleeping and
never run the delayed iputs. This means that if a filesystem unmount
is started, we get into close_ctree() with a non-empty list of delayed
iputs, and because the filesystem is in RO mode and is not in an error
state (or a transaction aborted), btrfs_error_commit_super() and
btrfs_commit_super(), which run the delayed iputs, are never called,
and later we fail the assertion that checks if the delayed iputs list
is empty:

  assertion failed: list_empty(&fs_info->delayed_iputs), in fs/btrfs/disk-io.c:4049
  ------------[ cut here ]------------
  kernel BUG at fs/btrfs/ctree.h:3153!
  invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC PTI
  CPU: 1 PID: 3780621 Comm: umount Tainted: G             L    5.6.0-rc2-btrfs-next-73 #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014
  RIP: 0010:assertfail.constprop.0+0x18/0x26 [btrfs]
  Code: 8b 7b 58 48 85 ff 74 (...)
  RSP: 0018:ffffb748c89bbdf8 EFLAGS: 00010246
  RAX: 0000000000000051 RBX: ffff9608f2584000 RCX: 0000000000000000
  RDX: 0000000000000000 RSI: ffffffff91998988 RDI: 00000000ffffffff
  RBP: ffff9608f25870d8 R08: 0000000000000000 R09: 0000000000000001
  R10: 0000000000000000 R11: 0000000000000000 R12: ffffffffc0cbc500
  R13: ffffffff92411750 R14: 0000000000000000 R15: ffff9608f2aab250
  FS:  00007fcbfaa66c80(0000) GS:ffff960936c80000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00007fffc2c2dd38 CR3: 0000000235e54002 CR4: 00000000003606e0
  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  Call Trace:
   close_ctree+0x1a2/0x2e6 [btrfs]
   generic_shutdown_super+0x6c/0x100
   kill_anon_super+0x14/0x30
   btrfs_kill_super+0x12/0x20 [btrfs]
   deactivate_locked_super+0x31/0x70
   cleanup_mnt+0x100/0x160
   task_work_run+0x93/0xc0
   exit_to_usermode_loop+0xf9/0x100
   do_syscall_64+0x20d/0x260
   entry_SYSCALL_64_after_hwframe+0x49/0xbe
  RIP: 0033:0x7fcbfaca6307
  Code: eb 0b 00 f7 d8 64 89 (...)
  RSP: 002b:00007fffc2c2ed68 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
  RAX: 0000000000000000 RBX: 0000558203b559b0 RCX: 00007fcbfaca6307
  RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000558203b55bc0
  RBP: 0000000000000000 R08: 0000000000000001 R09: 00007fffc2c2dad0
  R10: 0000558203b55bf0 R11: 0000000000000246 R12: 0000558203b55bc0
  R13: 00007fcbfadcc204 R14: 0000558203b55aa8 R15: 0000000000000000
  Modules linked in: btrfs dm_flakey dm_log_writes (...)
  ---[ end trace d44d303790049ef6 ]---

So fix this by making the remount RO path run any remaining delayed iputs
after waiting for the cleaner to become inactive.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/super.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 38740cc2919f..12d7d3be7cd4 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1978,6 +1978,16 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
 		wait_on_bit(&fs_info->flags, BTRFS_FS_CLEANER_RUNNING,
 			    TASK_UNINTERRUPTIBLE);
 
+		/*
+		 * We've set the superblock to RO mode, so we might have made
+		 * the cleaner task sleep without running all pending delayed
+		 * iputs. Go through all the delayed iputs here, so that if an
+		 * unmount happens without remounting RW we don't end up at
+		 * finishing close_ctree() with a non-empty list of delayed
+		 * iputs.
+		 */
+		btrfs_run_delayed_iputs(fs_info);
+
 		btrfs_dev_replace_suspend_for_unmount(fs_info);
 		btrfs_scrub_cancel(fs_info);
 		btrfs_pause_balance(fs_info);

From f09ced4053bc0a2094a12b60b646114c966ef4c6 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Fri, 18 Dec 2020 14:45:24 +0100
Subject: [PATCH 074/547] xsk: Fix race in SKB mode transmit with shared cq
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix a race when multiple sockets are simultaneously calling sendto()
when the completion ring is shared in the SKB case. This is the case
when you share the same netdev and queue id through the
XDP_SHARED_UMEM bind flag. The problem is that multiple processes can
be in xsk_generic_xmit() and call the backpressure mechanism in
xskq_prod_reserve(xs->pool->cq). As this is a shared resource in this
specific scenario, a race might occur since the rings are
single-producer single-consumer.

Fix this by moving the tx_completion_lock from the socket to the pool
as the pool is shared between the sockets that share the completion
ring. (The pool is not shared when this is not the case.) And then
protect the accesses to xskq_prod_reserve() with this lock. The
tx_completion_lock is renamed cq_lock to better reflect that it
protects accesses to the potentially shared completion ring.

Fixes: 35fcde7f8deb ("xsk: support for Tx")
Reported-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/20201218134525.13119-2-magnus.karlsson@gmail.com
---
 include/net/xdp_sock.h      | 4 ----
 include/net/xsk_buff_pool.h | 5 +++++
 net/xdp/xsk.c               | 9 ++++++---
 net/xdp/xsk_buff_pool.c     | 1 +
 4 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 4f4e93bf814c..cc17bc957548 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -58,10 +58,6 @@ struct xdp_sock {
 
 	struct xsk_queue *tx ____cacheline_aligned_in_smp;
 	struct list_head tx_list;
-	/* Mutual exclusion of NAPI TX thread and sendmsg error paths
-	 * in the SKB destructor callback.
-	 */
-	spinlock_t tx_completion_lock;
 	/* Protects generic receive. */
 	spinlock_t rx_lock;
 
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 01755b838c74..eaa8386dbc63 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -73,6 +73,11 @@ struct xsk_buff_pool {
 	bool dma_need_sync;
 	bool unaligned;
 	void *addrs;
+	/* Mutual exclusion of the completion ring in the SKB mode. Two cases to protect:
+	 * NAPI TX thread and sendmsg error paths in the SKB destructor callback and when
+	 * sockets share a single cq when the same netdev and queue id is shared.
+	 */
+	spinlock_t cq_lock;
 	struct xdp_buff_xsk *free_heads[];
 };
 
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index c6532d77fde7..d531f9cd0de6 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -423,9 +423,9 @@ static void xsk_destruct_skb(struct sk_buff *skb)
 	struct xdp_sock *xs = xdp_sk(skb->sk);
 	unsigned long flags;
 
-	spin_lock_irqsave(&xs->tx_completion_lock, flags);
+	spin_lock_irqsave(&xs->pool->cq_lock, flags);
 	xskq_prod_submit_addr(xs->pool->cq, addr);
-	spin_unlock_irqrestore(&xs->tx_completion_lock, flags);
+	spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
 
 	sock_wfree(skb);
 }
@@ -437,6 +437,7 @@ static int xsk_generic_xmit(struct sock *sk)
 	bool sent_frame = false;
 	struct xdp_desc desc;
 	struct sk_buff *skb;
+	unsigned long flags;
 	int err = 0;
 
 	mutex_lock(&xs->mutex);
@@ -468,10 +469,13 @@ static int xsk_generic_xmit(struct sock *sk)
 		 * if there is space in it. This avoids having to implement
 		 * any buffering in the Tx path.
 		 */
+		spin_lock_irqsave(&xs->pool->cq_lock, flags);
 		if (unlikely(err) || xskq_prod_reserve(xs->pool->cq)) {
+			spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
 			kfree_skb(skb);
 			goto out;
 		}
+		spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
 
 		skb->dev = xs->dev;
 		skb->priority = sk->sk_priority;
@@ -1303,7 +1307,6 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
 	xs->state = XSK_READY;
 	mutex_init(&xs->mutex);
 	spin_lock_init(&xs->rx_lock);
-	spin_lock_init(&xs->tx_completion_lock);
 
 	INIT_LIST_HEAD(&xs->map_list);
 	spin_lock_init(&xs->map_list_lock);
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 818b75060922..20598eea658c 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -71,6 +71,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
 	INIT_LIST_HEAD(&pool->free_list);
 	INIT_LIST_HEAD(&pool->xsk_tx_list);
 	spin_lock_init(&pool->xsk_tx_list_lock);
+	spin_lock_init(&pool->cq_lock);
 	refcount_set(&pool->users, 1);
 
 	pool->fq = xs->fq_tmp;

From b1b95cb5c0a9694d47d5f845ba97e226cfda957d Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Fri, 18 Dec 2020 14:45:25 +0100
Subject: [PATCH 075/547] xsk: Rollback reservation at NETDEV_TX_BUSY
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rollback the reservation in the completion ring when we get a
NETDEV_TX_BUSY. When this error is received from the driver, we are
supposed to let the user application retry the transmit again. And in
order to do this, we need to roll back the failed send so it can be
retried. Unfortunately, we did not cancel the reservation we had made
in the completion ring. By not doing this, we actually make the
completion ring one entry smaller per NETDEV_TX_BUSY error we get, and
after enough of these errors the completion ring will be of size zero
and transmit will stop working.

Fix this by cancelling the reservation when we get a NETDEV_TX_BUSY
error.

Fixes: 642e450b6b59 ("xsk: Do not discard packet when NETDEV_TX_BUSY")
Reported-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/20201218134525.13119-3-magnus.karlsson@gmail.com
---
 net/xdp/xsk.c       | 3 +++
 net/xdp/xsk_queue.h | 5 +++++
 2 files changed, 8 insertions(+)

diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index d531f9cd0de6..8037b04a9edd 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -487,6 +487,9 @@ static int xsk_generic_xmit(struct sock *sk)
 		if  (err == NETDEV_TX_BUSY) {
 			/* Tell user-space to retry the send */
 			skb->destructor = sock_wfree;
+			spin_lock_irqsave(&xs->pool->cq_lock, flags);
+			xskq_prod_cancel(xs->pool->cq);
+			spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
 			/* Free skb without triggering the perf drop trace */
 			consume_skb(skb);
 			err = -EAGAIN;
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 4a9663aa7afe..2823b7c3302d 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -334,6 +334,11 @@ static inline bool xskq_prod_is_full(struct xsk_queue *q)
 	return xskq_prod_nb_free(q, 1) ? false : true;
 }
 
+static inline void xskq_prod_cancel(struct xsk_queue *q)
+{
+	q->cached_prod--;
+}
+
 static inline int xskq_prod_reserve(struct xsk_queue *q)
 {
 	if (xskq_prod_is_full(q))

From e79bb299ccad6983876686a4d8c87c92ebbe5657 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 14 Dec 2020 22:35:39 +0000
Subject: [PATCH 076/547] selftests/bpf: Fix spelling mistake "tranmission" ->
 "transmission"

There are two spelling mistakes in output messages. Fix these.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20201214223539.83168-1-colin.king@canonical.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 014dedaa4dd2..1e722ee76b1f 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -715,7 +715,7 @@ static void worker_pkt_dump(void)
 		int payload = *((uint32_t *)(pkt_buf[iter]->payload + PKT_HDR_SIZE));
 
 		if (payload == EOT) {
-			ksft_print_msg("End-of-tranmission frame received\n");
+			ksft_print_msg("End-of-transmission frame received\n");
 			fprintf(stdout, "---------------------------------------\n");
 			break;
 		}
@@ -747,7 +747,7 @@ static void worker_pkt_validate(void)
 			}
 
 			if (payloadseqnum == EOT) {
-				ksft_print_msg("End-of-tranmission frame received: PASS\n");
+				ksft_print_msg("End-of-transmission frame received: PASS\n");
 				sigvar = 1;
 				break;
 			}

From d467d80dc399ba77875d647f2f37b7d1a70d94c2 Mon Sep 17 00:00:00 2001
From: Tian Tao <tiantao6@hisilicon.com>
Date: Wed, 16 Dec 2020 10:47:15 +0800
Subject: [PATCH 077/547] bpf: Remove unused including <linux/version.h>

Remove including <linux/version.h> that don't need it.

Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/1608086835-54523-1-git-send-email-tiantao6@hisilicon.com
---
 kernel/bpf/syscall.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 287be337d5f6..bb2700ec5bf3 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -17,7 +17,6 @@
 #include <linux/fs.h>
 #include <linux/license.h>
 #include <linux/filter.h>
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/idr.h>
 #include <linux/cred.h>

From 4aa1464acbe3697710279a4bd65cb4801ed30425 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Thu, 17 Dec 2020 14:29:11 -0800
Subject: [PATCH 078/547] spi: spi-geni-qcom: Fix geni_spi_isr() NULL
 dereference in timeout case

In commit 7ba9bdcb91f6 ("spi: spi-geni-qcom: Don't keep a local state
variable") we changed handle_fifo_timeout() so that we set
"mas->cur_xfer" to NULL to make absolutely sure that we don't mess
with the buffers from the previous transfer in the timeout case.

Unfortunately, this caused the IRQ handler to dereference NULL in some
cases.  One case:

  CPU0                           CPU1
  ----                           ----
                                 setup_fifo_xfer()
                                  geni_se_setup_m_cmd()
                                 <hardware starts transfer>
                                 <transfer completes in hardware>
                                 <hardware sets M_RX_FIFO_WATERMARK_EN in m_irq>
                                 ...
                                 handle_fifo_timeout()
                                  spin_lock_irq(mas->lock)
                                  mas->cur_xfer = NULL
                                  geni_se_cancel_m_cmd()
                                  spin_unlock_irq(mas->lock)

  geni_spi_isr()
   spin_lock(mas->lock)
   if (m_irq & M_RX_FIFO_WATERMARK_EN)
    geni_spi_handle_rx()
     mas->cur_xfer NULL dereference!

tl;dr: Seriously delayed interrupts for RX/TX can lead to timeout
handling setting mas->cur_xfer to NULL.

Let's check for the NULL transfer in the TX and RX cases and reset the
watermark or clear out the fifo respectively to put the hardware back
into a sane state.

NOTE: things still could get confused if we get timeouts all the way
through handle_fifo_timeout() and then start a new transfer because
interrupts from the old transfer / cancel / abort could still be
pending.  A future patch will help this corner case.

Fixes: 561de45f72bd ("spi: spi-geni-qcom: Add SPI driver support for GENI based QUP")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Link: https://lore.kernel.org/r/20201217142842.v3.1.I99ee04f0cb823415df59bd4f550d6ff5756e43d6@changeid
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-geni-qcom.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c
index 512e925d5ea4..096edfbde451 100644
--- a/drivers/spi/spi-geni-qcom.c
+++ b/drivers/spi/spi-geni-qcom.c
@@ -354,6 +354,12 @@ static bool geni_spi_handle_tx(struct spi_geni_master *mas)
 	unsigned int bytes_per_fifo_word = geni_byte_per_fifo_word(mas);
 	unsigned int i = 0;
 
+	/* Stop the watermark IRQ if nothing to send */
+	if (!mas->cur_xfer) {
+		writel(0, se->base + SE_GENI_TX_WATERMARK_REG);
+		return false;
+	}
+
 	max_bytes = (mas->tx_fifo_depth - mas->tx_wm) * bytes_per_fifo_word;
 	if (mas->tx_rem_bytes < max_bytes)
 		max_bytes = mas->tx_rem_bytes;
@@ -396,6 +402,14 @@ static void geni_spi_handle_rx(struct spi_geni_master *mas)
 		if (rx_last_byte_valid && rx_last_byte_valid < 4)
 			rx_bytes -= bytes_per_fifo_word - rx_last_byte_valid;
 	}
+
+	/* Clear out the FIFO and bail if nowhere to put it */
+	if (!mas->cur_xfer) {
+		for (i = 0; i < DIV_ROUND_UP(rx_bytes, bytes_per_fifo_word); i++)
+			readl(se->base + SE_GENI_RX_FIFOn);
+		return;
+	}
+
 	if (mas->rx_rem_bytes < rx_bytes)
 		rx_bytes = mas->rx_rem_bytes;
 

From 690d8b917bbe64772cb0b652311bcd50908aea6b Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Thu, 17 Dec 2020 14:29:12 -0800
Subject: [PATCH 079/547] spi: spi-geni-qcom: Fail new xfers if
 xfer/cancel/abort pending

If we got a timeout when trying to send an abort command then it means
that we just got 3 timeouts in a row:

1. The original timeout that caused handle_fifo_timeout() to be
   called.
2. A one second timeout waiting for the cancel command to finish.
3. A one second timeout waiting for the abort command to finish.

SPI is clocked by the controller, so nothing (aside from a hardware
fault or a totally broken sequencer) should be causing the actual
commands to fail in hardware.  However, even though the hardware
itself is not expected to fail (and it'd be hard to predict how we
should handle things if it did), it's easy to hit the timeout case by
simply blocking our interrupt handler from running for a long period
of time.  Obviously the system is in pretty bad shape if a interrupt
handler is blocked for > 2 seconds, but there are certainly bugs (even
bugs in other unrelated drivers) that can make this happen.

Let's make things a bit more robust against this case.  If we fail to
abort we'll set a flag and then we'll block all future transfers until
we have no more interrupts pending.

Fixes: 561de45f72bd ("spi: spi-geni-qcom: Add SPI driver support for GENI based QUP")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Link: https://lore.kernel.org/r/20201217142842.v3.2.Ibade998ed587e070388b4bf58801f1107a40eb53@changeid
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-geni-qcom.c | 59 +++++++++++++++++++++++++++++++++++--
 1 file changed, 57 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c
index 096edfbde451..32c053705dec 100644
--- a/drivers/spi/spi-geni-qcom.c
+++ b/drivers/spi/spi-geni-qcom.c
@@ -83,6 +83,7 @@ struct spi_geni_master {
 	spinlock_t lock;
 	int irq;
 	bool cs_flag;
+	bool abort_failed;
 };
 
 static int get_spi_clk_cfg(unsigned int speed_hz,
@@ -141,8 +142,49 @@ static void handle_fifo_timeout(struct spi_master *spi,
 	spin_unlock_irq(&mas->lock);
 
 	time_left = wait_for_completion_timeout(&mas->abort_done, HZ);
-	if (!time_left)
+	if (!time_left) {
 		dev_err(mas->dev, "Failed to cancel/abort m_cmd\n");
+
+		/*
+		 * No need for a lock since SPI core has a lock and we never
+		 * access this from an interrupt.
+		 */
+		mas->abort_failed = true;
+	}
+}
+
+static bool spi_geni_is_abort_still_pending(struct spi_geni_master *mas)
+{
+	struct geni_se *se = &mas->se;
+	u32 m_irq, m_irq_en;
+
+	if (!mas->abort_failed)
+		return false;
+
+	/*
+	 * The only known case where a transfer times out and then a cancel
+	 * times out then an abort times out is if something is blocking our
+	 * interrupt handler from running.  Avoid starting any new transfers
+	 * until that sorts itself out.
+	 */
+	spin_lock_irq(&mas->lock);
+	m_irq = readl(se->base + SE_GENI_M_IRQ_STATUS);
+	m_irq_en = readl(se->base + SE_GENI_M_IRQ_EN);
+	spin_unlock_irq(&mas->lock);
+
+	if (m_irq & m_irq_en) {
+		dev_err(mas->dev, "Interrupts pending after abort: %#010x\n",
+			m_irq & m_irq_en);
+		return true;
+	}
+
+	/*
+	 * If we're here the problem resolved itself so no need to check more
+	 * on future transfers.
+	 */
+	mas->abort_failed = false;
+
+	return false;
 }
 
 static void spi_geni_set_cs(struct spi_device *slv, bool set_flag)
@@ -158,9 +200,15 @@ static void spi_geni_set_cs(struct spi_device *slv, bool set_flag)
 	if (set_flag == mas->cs_flag)
 		return;
 
+	pm_runtime_get_sync(mas->dev);
+
+	if (spi_geni_is_abort_still_pending(mas)) {
+		dev_err(mas->dev, "Can't set chip select\n");
+		goto exit;
+	}
+
 	mas->cs_flag = set_flag;
 
-	pm_runtime_get_sync(mas->dev);
 	spin_lock_irq(&mas->lock);
 	reinit_completion(&mas->cs_done);
 	if (set_flag)
@@ -173,6 +221,7 @@ static void spi_geni_set_cs(struct spi_device *slv, bool set_flag)
 	if (!time_left)
 		handle_fifo_timeout(spi, NULL);
 
+exit:
 	pm_runtime_put(mas->dev);
 }
 
@@ -280,6 +329,9 @@ static int spi_geni_prepare_message(struct spi_master *spi,
 	int ret;
 	struct spi_geni_master *mas = spi_master_get_devdata(spi);
 
+	if (spi_geni_is_abort_still_pending(mas))
+		return -EBUSY;
+
 	ret = setup_fifo_params(spi_msg->spi, spi);
 	if (ret)
 		dev_err(mas->dev, "Couldn't select mode %d\n", ret);
@@ -509,6 +561,9 @@ static int spi_geni_transfer_one(struct spi_master *spi,
 {
 	struct spi_geni_master *mas = spi_master_get_devdata(spi);
 
+	if (spi_geni_is_abort_still_pending(mas))
+		return -EBUSY;
+
 	/* Terminate and return success for 0 byte length transfer */
 	if (!xfer->len)
 		return 0;

From 3d7d916f9bc98ce88272b3e4405c7c685afbfcd6 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Thu, 17 Dec 2020 14:29:13 -0800
Subject: [PATCH 080/547] spi: spi-geni-qcom: Don't try to set CS if an xfer is
 pending

If we get a timeout sending then this happens:

spi_transfer_one_message()
 ->transfer_one() AKA spi_geni_transfer_one()
  setup_fifo_xfer()
   mas->cur_xfer = non-NULL
 spi_transfer_wait() => TIMES OUT
 if (msg->status != -EINPROGRESS)
  goto out
 if (ret != 0 ...)
  spi_set_cs()
   ->set_cs AKA spi_geni_set_cs()
    # mas->cur_xfer is non-NULL

The above happens _before_ the SPI core calls ->handle_err() AKA
handle_fifo_timeout().

Unfortunately that won't work so well on geni.  If we got a timeout
transferring then it's likely that our interrupt handler is blocked,
but we need that same interrupt handler to run and the command channel
to be unblocked in order to adjust the chip select.  Trying to set the
chip select doesn't crash us but ends up confusing our state machine
and leads to messages like: Premature done. rx_rem = 32 bpw8

Let's just drop the chip select request in this case.  We can detect
the case because cur_xfer is non-NULL--it would have been set to NULL
in the interrupt handler if the previous transfer had finished.  Sure,
we might leave the chip select in the wrong state but it's likely it
was going to fail anyway and this avoids getting the driver even more
confused about what it's doing.

The SPI core in general assumes that setting chip select is a simple
operation that doesn't fail.  Yet another reason to just reconfigure
the chip select line as GPIOs.

Signed-off-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Link: https://lore.kernel.org/r/20201217142842.v3.3.I07afdedcc49655c5d26880f8df9170aac5792378@changeid
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-geni-qcom.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c
index 32c053705dec..365565d3a998 100644
--- a/drivers/spi/spi-geni-qcom.c
+++ b/drivers/spi/spi-geni-qcom.c
@@ -207,9 +207,14 @@ static void spi_geni_set_cs(struct spi_device *slv, bool set_flag)
 		goto exit;
 	}
 
-	mas->cs_flag = set_flag;
-
 	spin_lock_irq(&mas->lock);
+	if (mas->cur_xfer) {
+		dev_err(mas->dev, "Can't set CS when prev xfer running\n");
+		spin_unlock_irq(&mas->lock);
+		goto exit;
+	}
+
+	mas->cs_flag = set_flag;
 	reinit_completion(&mas->cs_done);
 	if (set_flag)
 		geni_se_setup_m_cmd(se, SPI_CS_ASSERT, 0);

From 17fa81aa702ec118f2b835715897041675b06336 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Thu, 17 Dec 2020 14:29:14 -0800
Subject: [PATCH 081/547] spi: spi-geni-qcom: Print an error when we timeout
 setting the CS

If we're using geni to manage the chip select line (don't do it--use a
GPIO!) and we happen to get a timeout waiting for the chip select
command to be completed, no errors are printed even though things
might not be in the best shape.  Let's add a print.

Signed-off-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Link: https://lore.kernel.org/r/20201217142842.v3.4.I666b37646de9652cef438ac7c2c6c2053367fc6b@changeid
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-geni-qcom.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c
index 365565d3a998..881f645661cc 100644
--- a/drivers/spi/spi-geni-qcom.c
+++ b/drivers/spi/spi-geni-qcom.c
@@ -223,8 +223,10 @@ static void spi_geni_set_cs(struct spi_device *slv, bool set_flag)
 	spin_unlock_irq(&mas->lock);
 
 	time_left = wait_for_completion_timeout(&mas->cs_done, HZ);
-	if (!time_left)
+	if (!time_left) {
+		dev_warn(mas->dev, "Timeout setting chip select\n");
 		handle_fifo_timeout(spi, NULL);
+	}
 
 exit:
 	pm_runtime_put(mas->dev);

From abdcd06c4dedbcabaec68c433c7f53f33307811f Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Wed, 16 Dec 2020 09:28:04 +0200
Subject: [PATCH 082/547] net: af_packet: fix procfs header for 64-bit pointers

On 64-bit systems the packet procfs header field names following 'sk'
are not aligned correctly:

sk       RefCnt Type Proto  Iface R Rmem   User   Inode
00000000605d2c64 3      3    0003   7     1 450880 0      16643
00000000080e9b80 2      2    0000   0     0 0      0      17404
00000000b23b8a00 2      2    0000   0     0 0      0      17421
...

With this change field names are correctly aligned:

sk               RefCnt Type Proto  Iface R Rmem   User   Inode
000000005c3b1d97 3      3    0003   7     1 21568  0      16178
000000007be55bb7 3      3    fbce   8     1 0      0      16250
00000000be62127d 3      3    fbcd   8     1 0      0      16254
...

Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Link: https://lore.kernel.org/r/54917251d8433735d9a24e935a6cb8eb88b4058a.1608103684.git.baruch@tkos.co.il
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/packet/af_packet.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index de8e8dbbdeb8..6bbc7a448593 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -4595,7 +4595,9 @@ static void packet_seq_stop(struct seq_file *seq, void *v)
 static int packet_seq_show(struct seq_file *seq, void *v)
 {
 	if (v == SEQ_START_TOKEN)
-		seq_puts(seq, "sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n");
+		seq_printf(seq,
+			   "%*sRefCnt Type Proto  Iface R Rmem   User   Inode\n",
+			   IS_ENABLED(CONFIG_64BIT) ? -17 : -9, "sk");
 	else {
 		struct sock *s = sk_entry(v);
 		const struct packet_sock *po = pkt_sk(s);

From 698285da79f5b0b099db15a37ac661ac408c80eb Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Thu, 17 Dec 2020 22:29:46 +0100
Subject: [PATCH 083/547] net/sched: sch_taprio: ensure to reset/destroy all
 child qdiscs

taprio_graft() can insert a NULL element in the array of child qdiscs. As
a consquence, taprio_reset() might not reset child qdiscs completely, and
taprio_destroy() might leak resources. Fix it by ensuring that loops that
iterate over q->qdiscs[] don't end when they find the first NULL item.

Fixes: 44d4775ca518 ("net/sched: sch_taprio: reset child qdiscs before freeing them")
Fixes: 5a781ccbd19e ("tc: Add support for configuring the taprio scheduler")
Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Link: https://lore.kernel.org/r/13edef6778fef03adc751582562fba4a13e06d6a.1608240532.git.dcaratti@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/sched/sch_taprio.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index c74817ec9964..6f775275826a 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -1605,8 +1605,9 @@ static void taprio_reset(struct Qdisc *sch)
 
 	hrtimer_cancel(&q->advance_timer);
 	if (q->qdiscs) {
-		for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++)
-			qdisc_reset(q->qdiscs[i]);
+		for (i = 0; i < dev->num_tx_queues; i++)
+			if (q->qdiscs[i])
+				qdisc_reset(q->qdiscs[i]);
 	}
 	sch->qstats.backlog = 0;
 	sch->q.qlen = 0;
@@ -1626,7 +1627,7 @@ static void taprio_destroy(struct Qdisc *sch)
 	taprio_disable_offload(dev, q, NULL);
 
 	if (q->qdiscs) {
-		for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++)
+		for (i = 0; i < dev->num_tx_queues; i++)
 			qdisc_put(q->qdiscs[i]);
 
 		kfree(q->qdiscs);

From 87508224485323ce2d4e7fb929ec80f51adcc238 Mon Sep 17 00:00:00 2001
From: Stefan Chulski <stefanc@marvell.com>
Date: Thu, 17 Dec 2020 16:52:15 +0200
Subject: [PATCH 084/547] net: mvpp2: disable force link UP during port init
 procedure

Force link UP can be enabled by bootloader during tftpboot
and breaks NFS support.
Force link UP disabled during port init procedure.

Fixes: f84bf386f395 ("net: mvpp2: initialize the GoP")
Signed-off-by: Stefan Chulski <stefanc@marvell.com>
Acked-by: Marcin Wojtas <mw@semihalf.com>
Link: https://lore.kernel.org/r/1608216735-14501-1-git-send-email-stefanc@marvell.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index afdd22827223..0f18d034ce3e 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -5487,7 +5487,7 @@ static int mvpp2_port_init(struct mvpp2_port *port)
 	struct mvpp2 *priv = port->priv;
 	struct mvpp2_txq_pcpu *txq_pcpu;
 	unsigned int thread;
-	int queue, err;
+	int queue, err, val;
 
 	/* Checks for hardware constraints */
 	if (port->first_rxq + port->nrxqs >
@@ -5501,6 +5501,18 @@ static int mvpp2_port_init(struct mvpp2_port *port)
 	mvpp2_egress_disable(port);
 	mvpp2_port_disable(port);
 
+	if (mvpp2_is_xlg(port->phy_interface)) {
+		val = readl(port->base + MVPP22_XLG_CTRL0_REG);
+		val &= ~MVPP22_XLG_CTRL0_FORCE_LINK_PASS;
+		val |= MVPP22_XLG_CTRL0_FORCE_LINK_DOWN;
+		writel(val, port->base + MVPP22_XLG_CTRL0_REG);
+	} else {
+		val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+		val &= ~MVPP2_GMAC_FORCE_LINK_PASS;
+		val |= MVPP2_GMAC_FORCE_LINK_DOWN;
+		writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+	}
+
 	port->tx_time_coal = MVPP2_TXDONE_COAL_USEC;
 
 	port->txqs = devm_kcalloc(dev, port->ntxqs, sizeof(*port->txqs),

From 3f48fab62bb81a7f9d01e9d43c40395fad011dd5 Mon Sep 17 00:00:00 2001
From: Stefan Chulski <stefanc@marvell.com>
Date: Thu, 17 Dec 2020 20:30:17 +0200
Subject: [PATCH 085/547] net: mvpp2: Add TCAM entry to drop flow control pause
 frames

Issue:
Flow control frame used to pause GoP(MAC) was delivered to the CPU
and created a load on the CPU. Since XOFF/XON frames are used only
by MAC, these frames should be dropped inside MAC.

Fix:
According to 802.3-2012 - IEEE Standard for Ethernet pause frame
has unique destination MAC address 01-80-C2-00-00-01.
Add TCAM parser entry to track and drop pause frames by destination MAC.

Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit")
Signed-off-by: Stefan Chulski <stefanc@marvell.com>
Link: https://lore.kernel.org/r/1608229817-21951-1-git-send-email-stefanc@marvell.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../net/ethernet/marvell/mvpp2/mvpp2_prs.c    | 33 +++++++++++++++++++
 .../net/ethernet/marvell/mvpp2/mvpp2_prs.h    |  2 +-
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c
index 5692c6087bbb..f069593d7e50 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c
@@ -405,6 +405,38 @@ static int mvpp2_prs_tcam_first_free(struct mvpp2 *priv, unsigned char start,
 	return -EINVAL;
 }
 
+/* Drop flow control pause frames */
+static void mvpp2_prs_drop_fc(struct mvpp2 *priv)
+{
+	unsigned char da[ETH_ALEN] = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x01 };
+	struct mvpp2_prs_entry pe;
+	unsigned int len;
+
+	memset(&pe, 0, sizeof(pe));
+
+	/* For all ports - drop flow control frames */
+	pe.index = MVPP2_PE_FC_DROP;
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_MAC);
+
+	/* Set match on DA */
+	len = ETH_ALEN;
+	while (len--)
+		mvpp2_prs_tcam_data_byte_set(&pe, len, da[len], 0xff);
+
+	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_DROP_MASK,
+				 MVPP2_PRS_RI_DROP_MASK);
+
+	mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_GEN_BIT, 1);
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_FLOWS);
+
+	/* Mask all ports */
+	mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+	/* Update shadow table and hw entry */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_MAC);
+	mvpp2_prs_hw_write(priv, &pe);
+}
+
 /* Enable/disable dropping all mac da's */
 static void mvpp2_prs_mac_drop_all_set(struct mvpp2 *priv, int port, bool add)
 {
@@ -1162,6 +1194,7 @@ static void mvpp2_prs_mac_init(struct mvpp2 *priv)
 	mvpp2_prs_hw_write(priv, &pe);
 
 	/* Create dummy entries for drop all and promiscuous modes */
+	mvpp2_prs_drop_fc(priv);
 	mvpp2_prs_mac_drop_all_set(priv, 0, false);
 	mvpp2_prs_mac_promisc_set(priv, 0, MVPP2_PRS_L2_UNI_CAST, false);
 	mvpp2_prs_mac_promisc_set(priv, 0, MVPP2_PRS_L2_MULTI_CAST, false);
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h
index e22f6c85d380..4b68dd374733 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h
@@ -129,7 +129,7 @@
 #define MVPP2_PE_VID_EDSA_FLTR_DEFAULT	(MVPP2_PRS_TCAM_SRAM_SIZE - 7)
 #define MVPP2_PE_VLAN_DBL		(MVPP2_PRS_TCAM_SRAM_SIZE - 6)
 #define MVPP2_PE_VLAN_NONE		(MVPP2_PRS_TCAM_SRAM_SIZE - 5)
-/* reserved */
+#define MVPP2_PE_FC_DROP		(MVPP2_PRS_TCAM_SRAM_SIZE - 4)
 #define MVPP2_PE_MAC_MC_PROMISCUOUS	(MVPP2_PRS_TCAM_SRAM_SIZE - 3)
 #define MVPP2_PE_MAC_UC_PROMISCUOUS	(MVPP2_PRS_TCAM_SRAM_SIZE - 2)
 #define MVPP2_PE_MAC_NON_PROMISCUOUS	(MVPP2_PRS_TCAM_SRAM_SIZE - 1)

From fec6079b2eeab319d9e3d074f54d3b6f623e9701 Mon Sep 17 00:00:00 2001
From: Stefan Chulski <stefanc@marvell.com>
Date: Thu, 17 Dec 2020 20:37:46 +0200
Subject: [PATCH 086/547] net: mvpp2: prs: fix PPPoE with ipv6 packet parse

Current PPPoE+IPv6 entry is jumping to 'next-hdr'
field and not to 'DIP' field as done for IPv4.

Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit")
Reported-by: Liron Himi <lironh@marvell.com>
Signed-off-by: Stefan Chulski <stefanc@marvell.com>
Link: https://lore.kernel.org/r/1608230266-22111-1-git-send-email-stefanc@marvell.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c
index f069593d7e50..a30eb90ba3d2 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c
@@ -1680,8 +1680,9 @@ static int mvpp2_prs_pppoe_init(struct mvpp2 *priv)
 	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP6);
 	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP6,
 				 MVPP2_PRS_RI_L3_PROTO_MASK);
-	/* Skip eth_type + 4 bytes of IPv6 header */
-	mvpp2_prs_sram_shift_set(&pe, MVPP2_ETH_TYPE_LEN + 4,
+	/* Jump to DIP of IPV6 header */
+	mvpp2_prs_sram_shift_set(&pe, MVPP2_ETH_TYPE_LEN + 8 +
+				 MVPP2_MAX_L3_ADDR_SIZE,
 				 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
 	/* Set L3 offset */
 	mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L3,

From e16ab3db87b3d5d4118dfb68e955f62c4e09573a Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sat, 5 Dec 2020 11:35:25 +0100
Subject: [PATCH 087/547] mt76: usb: remove wake logic in mt76u_status_worker

Similar to mmio code path, remove wake logic in mt76u_status_worker handler.
Starting from commit 90d494c99a99 ("mt76: improve tx queue stop/wake")', the
wake queue logic on the usb status path is no longer necessary since the hw
queues are no longer stopped on the mt76 tx path.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/00009bf0cfdc9565e4432cad3ed51888c667c25d.1607164041.git.lorenzo@kernel.org
---
 drivers/net/wireless/mediatek/mt76/usb.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c
index 8b08cad131c8..b95d093728b9 100644
--- a/drivers/net/wireless/mediatek/mt76/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/usb.c
@@ -811,7 +811,6 @@ static void mt76u_status_worker(struct mt76_worker *w)
 	struct mt76_dev *dev = container_of(usb, struct mt76_dev, usb);
 	struct mt76_queue_entry entry;
 	struct mt76_queue *q;
-	bool wake;
 	int i;
 
 	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
@@ -829,10 +828,6 @@ static void mt76u_status_worker(struct mt76_worker *w)
 			mt76_queue_tx_complete(dev, q, &entry);
 		}
 
-		wake = q->stopped && q->queued < q->ndesc - 8;
-		if (wake)
-			q->stopped = false;
-
 		if (!q->queued)
 			wake_up(&dev->tx_wait);
 
@@ -841,8 +836,6 @@ static void mt76u_status_worker(struct mt76_worker *w)
 		if (dev->drv->tx_status_data &&
 		    !test_and_set_bit(MT76_READING_STATS, &dev->phy.state))
 			queue_work(dev->wq, &dev->usb.stat_work);
-		if (wake)
-			ieee80211_wake_queue(dev->hw, i);
 	}
 }
 

From 123bb2b737881127b450e8b3b1bae69a8949498e Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sat, 5 Dec 2020 11:35:26 +0100
Subject: [PATCH 088/547] mt76: sdio: remove wake logic in
 mt76s_process_tx_queue

Similar to mmio/usb code path, remove wake logic in mt76s_process_tx_queue
routine. Starting from commit 90d494c99a99 ("mt76: improve tx queue
stop/wake"), the wake queue logic on the sdio status path is no longer
necessary since the hw queues are no longer stopped on the mt76 tx path.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/d2d7d9d437f4dec2ef1df0ed070b9cf299f021ad.1607164041.git.lorenzo@kernel.org
---
 drivers/net/wireless/mediatek/mt76/sdio.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/sdio.c b/drivers/net/wireless/mediatek/mt76/sdio.c
index 62b5b912818f..7cd995118257 100644
--- a/drivers/net/wireless/mediatek/mt76/sdio.c
+++ b/drivers/net/wireless/mediatek/mt76/sdio.c
@@ -157,7 +157,7 @@ static void mt76s_net_worker(struct mt76_worker *w)
 
 static int mt76s_process_tx_queue(struct mt76_dev *dev, struct mt76_queue *q)
 {
-	bool wake, mcu = q == dev->q_mcu[MT_MCUQ_WM];
+	bool mcu = q == dev->q_mcu[MT_MCUQ_WM];
 	struct mt76_queue_entry entry;
 	int nframes = 0;
 
@@ -177,21 +177,12 @@ static int mt76s_process_tx_queue(struct mt76_dev *dev, struct mt76_queue *q)
 		nframes++;
 	}
 
-	wake = q->stopped && q->queued < q->ndesc - 8;
-	if (wake)
-		q->stopped = false;
-
 	if (!q->queued)
 		wake_up(&dev->tx_wait);
 
-	if (mcu)
-		goto out;
+	if (!mcu)
+		mt76_txq_schedule(&dev->phy, q->qid);
 
-	mt76_txq_schedule(&dev->phy, q->qid);
-
-	if (wake)
-		ieee80211_wake_queue(dev->hw, q->qid);
-out:
 	return nframes;
 }
 

From f7217f718747641fc80cd062f183107439f2a066 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 8 Dec 2020 10:18:11 +0100
Subject: [PATCH 089/547] mt76: mt76s: fix NULL pointer dereference in
 mt76s_process_tx_queue

Fix a possible NULL pointer dereference in mt76s_process_tx_queue that
can occur if status thread runs before allocating tx queues

Fixes: 6a618acb7e62 ("mt76: sdio: convert {status/net}_work to mt76_worker")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/b49c1b4edacd87b2241a9fd0431dd4864c8963f6.1607418933.git.lorenzo@kernel.org
---
 drivers/net/wireless/mediatek/mt76/sdio.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/sdio.c b/drivers/net/wireless/mediatek/mt76/sdio.c
index 7cd995118257..0b6facb17ff7 100644
--- a/drivers/net/wireless/mediatek/mt76/sdio.c
+++ b/drivers/net/wireless/mediatek/mt76/sdio.c
@@ -157,10 +157,14 @@ static void mt76s_net_worker(struct mt76_worker *w)
 
 static int mt76s_process_tx_queue(struct mt76_dev *dev, struct mt76_queue *q)
 {
-	bool mcu = q == dev->q_mcu[MT_MCUQ_WM];
 	struct mt76_queue_entry entry;
 	int nframes = 0;
+	bool mcu;
 
+	if (!q)
+		return 0;
+
+	mcu = q == dev->q_mcu[MT_MCUQ_WM];
 	while (q->queued > 0) {
 		if (!q->entry[q->tail].done)
 			break;

From 0bd157fa2aaa2c77d6254321d7751aa9eec68c7b Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 18 Dec 2020 09:32:02 -0800
Subject: [PATCH 090/547] mt76: mt7915: fix MESH ifdef block

Fix a build error when CONFIG_MAC80211_MESH is not enabled:

../drivers/net/wireless/mediatek/mt76/mt7915/init.c:47:2: error: expected expression before '}' token
  }, {
  ^

Fixes: af901eb4ab80 ("mt76: mt7915: get rid of dbdc debugfs knob")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Shayne Chen <shayne.chen@mediatek.com>
Cc: Ryder Lee <ryder.lee@mediatek.com>
Cc: Lorenzo Bianconi <lorenzo@kernel.org>
Cc: Felix Fietkau <nbd@nbd.name>
Cc: linux-wireless@vger.kernel.org
Cc: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20201218173202.23159-1-rdunlap@infradead.org
---
 drivers/net/wireless/mediatek/mt76/mt7915/init.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index ed4635bd151a..102a8f14c22d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -40,9 +40,9 @@ static const struct ieee80211_iface_limit if_limits[] = {
 		.types = BIT(NL80211_IFTYPE_ADHOC)
 	}, {
 		.max = 16,
-		.types = BIT(NL80211_IFTYPE_AP) |
+		.types = BIT(NL80211_IFTYPE_AP)
 #ifdef CONFIG_MAC80211_MESH
-			 BIT(NL80211_IFTYPE_MESH_POINT)
+			 | BIT(NL80211_IFTYPE_MESH_POINT)
 #endif
 	}, {
 		.max = MT7915_MAX_INTERFACES,

From bfe55584713b4d4d518ffe9cf2dab1129eba6321 Mon Sep 17 00:00:00 2001
From: Arend van Spriel <aspriel@gmail.com>
Date: Sun, 20 Dec 2020 15:18:07 +0100
Subject: [PATCH 091/547] MAINTAINERS: switch to different email address

Switching to private mail account as work email is polluted with a legal
disclaimer. Just making it extra clear by changing the email address in
the MAINTAINERS file as well.

Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Arend van Spriel <aspriel@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20201220141807.17278-1-aspriel@gmail.com
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 5c1a6ba5ef26..85a7c3a2ed63 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3545,7 +3545,7 @@ S:	Supported
 F:	drivers/net/ethernet/broadcom/bnxt/
 
 BROADCOM BRCM80211 IEEE802.11n WIRELESS DRIVER
-M:	Arend van Spriel <arend.vanspriel@broadcom.com>
+M:	Arend van Spriel <aspriel@gmail.com>
 M:	Franky Lin <franky.lin@broadcom.com>
 M:	Hante Meuleman <hante.meuleman@broadcom.com>
 M:	Chi-hsien Lin <chi-hsien.lin@infineon.com>

From a590370d918fc66c62df6620445791fbe840344a Mon Sep 17 00:00:00 2001
From: Roman Guskov <rguskov@dh-electronics.com>
Date: Mon, 21 Dec 2020 13:35:32 +0100
Subject: [PATCH 092/547] spi: stm32: FIFO threshold level - fix align packet
 size

if cur_bpw <= 8 and xfer_len < 4 then the value of fthlv will be 1 and
SPI registers content may have been lost.

* If SPI data register is accessed as a 16-bit register and DSIZE <= 8bit,
  better to select FTHLV = 2, 4, 6 etc

* If SPI data register is accessed as a 32-bit register and DSIZE > 8bit,
  better to select FTHLV = 2, 4, 6 etc, while if DSIZE <= 8bit,
  better to select FTHLV = 4, 8, 12 etc

Signed-off-by: Roman Guskov <rguskov@dh-electronics.com>
Fixes: dcbe0d84dfa5 ("spi: add driver for STM32 SPI controller")
Reviewed-by: Marek Vasut <marex@denx.de>
Link: https://lore.kernel.org/r/20201221123532.27272-1-rguskov@dh-electronics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-stm32.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c
index 471dedf3d339..6017209c6d2f 100644
--- a/drivers/spi/spi-stm32.c
+++ b/drivers/spi/spi-stm32.c
@@ -493,9 +493,9 @@ static u32 stm32h7_spi_prepare_fthlv(struct stm32_spi *spi, u32 xfer_len)
 
 	/* align packet size with data registers access */
 	if (spi->cur_bpw > 8)
-		fthlv -= (fthlv % 2); /* multiple of 2 */
+		fthlv += (fthlv % 2) ? 1 : 0;
 	else
-		fthlv -= (fthlv % 4); /* multiple of 4 */
+		fthlv += (fthlv % 4) ? (4 - (fthlv % 4)) : 0;
 
 	if (!fthlv)
 		fthlv = 1;

From 3b66e4a8e58a85af3212c7117d7a29c9ef6679a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Guido=20G=C3=BCnther?= <agx@sigxcpu.org>
Date: Fri, 18 Dec 2020 19:38:07 +0100
Subject: [PATCH 093/547] regulator: bd718x7: Add enable times
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use the typical startup times from the data sheet so boards get a
reasonable default. Not setting any enable time can lead to board hangs
when e.g. clocks are enabled too soon afterwards.

This fixes gpu power domain resume on the Librem 5.

[Moved #defines into driver, seems to be general agreement and avoids any
cross tree issues -- broonie]

Signed-off-by: Guido Günther <agx@sigxcpu.org>
Reviewed-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Link: https://lore.kernel.org/r/41fb2ed19f584f138336344e2297ae7301f72b75.1608316658.git.agx@sigxcpu.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/bd718x7-regulator.c | 57 +++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)

diff --git a/drivers/regulator/bd718x7-regulator.c b/drivers/regulator/bd718x7-regulator.c
index e6d5d98c3cea..9309765d0450 100644
--- a/drivers/regulator/bd718x7-regulator.c
+++ b/drivers/regulator/bd718x7-regulator.c
@@ -15,6 +15,36 @@
 #include <linux/regulator/of_regulator.h>
 #include <linux/slab.h>
 
+/* Typical regulator startup times as per data sheet in uS */
+#define BD71847_BUCK1_STARTUP_TIME 144
+#define BD71847_BUCK2_STARTUP_TIME 162
+#define BD71847_BUCK3_STARTUP_TIME 162
+#define BD71847_BUCK4_STARTUP_TIME 240
+#define BD71847_BUCK5_STARTUP_TIME 270
+#define BD71847_BUCK6_STARTUP_TIME 200
+#define BD71847_LDO1_STARTUP_TIME  440
+#define BD71847_LDO2_STARTUP_TIME  370
+#define BD71847_LDO3_STARTUP_TIME  310
+#define BD71847_LDO4_STARTUP_TIME  400
+#define BD71847_LDO5_STARTUP_TIME  530
+#define BD71847_LDO6_STARTUP_TIME  400
+
+#define BD71837_BUCK1_STARTUP_TIME 160
+#define BD71837_BUCK2_STARTUP_TIME 180
+#define BD71837_BUCK3_STARTUP_TIME 180
+#define BD71837_BUCK4_STARTUP_TIME 180
+#define BD71837_BUCK5_STARTUP_TIME 160
+#define BD71837_BUCK6_STARTUP_TIME 240
+#define BD71837_BUCK7_STARTUP_TIME 220
+#define BD71837_BUCK8_STARTUP_TIME 200
+#define BD71837_LDO1_STARTUP_TIME  440
+#define BD71837_LDO2_STARTUP_TIME  370
+#define BD71837_LDO3_STARTUP_TIME  310
+#define BD71837_LDO4_STARTUP_TIME  400
+#define BD71837_LDO5_STARTUP_TIME  310
+#define BD71837_LDO6_STARTUP_TIME  400
+#define BD71837_LDO7_STARTUP_TIME  530
+
 /*
  * BD718(37/47/50) have two "enable control modes". ON/OFF can either be
  * controlled by software - or by PMIC internal HW state machine. Whether
@@ -613,6 +643,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = {
 			.vsel_mask = DVS_BUCK_RUN_MASK,
 			.enable_reg = BD718XX_REG_BUCK1_CTRL,
 			.enable_mask = BD718XX_BUCK_EN,
+			.enable_time = BD71847_BUCK1_STARTUP_TIME,
 			.owner = THIS_MODULE,
 			.of_parse_cb = buck_set_hw_dvs_levels,
 		},
@@ -646,6 +677,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = {
 			.vsel_mask = DVS_BUCK_RUN_MASK,
 			.enable_reg = BD718XX_REG_BUCK2_CTRL,
 			.enable_mask = BD718XX_BUCK_EN,
+			.enable_time = BD71847_BUCK2_STARTUP_TIME,
 			.owner = THIS_MODULE,
 			.of_parse_cb = buck_set_hw_dvs_levels,
 		},
@@ -680,6 +712,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = {
 			.linear_range_selectors = bd71847_buck3_volt_range_sel,
 			.enable_reg = BD718XX_REG_1ST_NODVS_BUCK_CTRL,
 			.enable_mask = BD718XX_BUCK_EN,
+			.enable_time = BD71847_BUCK3_STARTUP_TIME,
 			.owner = THIS_MODULE,
 		},
 		.init = {
@@ -706,6 +739,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = {
 			.vsel_range_mask = BD71847_BUCK4_RANGE_MASK,
 			.linear_range_selectors = bd71847_buck4_volt_range_sel,
 			.enable_mask = BD718XX_BUCK_EN,
+			.enable_time = BD71847_BUCK4_STARTUP_TIME,
 			.owner = THIS_MODULE,
 		},
 		.init = {
@@ -727,6 +761,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = {
 			.vsel_mask = BD718XX_3RD_NODVS_BUCK_MASK,
 			.enable_reg = BD718XX_REG_3RD_NODVS_BUCK_CTRL,
 			.enable_mask = BD718XX_BUCK_EN,
+			.enable_time = BD71847_BUCK5_STARTUP_TIME,
 			.owner = THIS_MODULE,
 		},
 		.init = {
@@ -750,6 +785,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = {
 			.vsel_mask = BD718XX_4TH_NODVS_BUCK_MASK,
 			.enable_reg = BD718XX_REG_4TH_NODVS_BUCK_CTRL,
 			.enable_mask = BD718XX_BUCK_EN,
+			.enable_time = BD71847_BUCK6_STARTUP_TIME,
 			.owner = THIS_MODULE,
 		},
 		.init = {
@@ -775,6 +811,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = {
 			.linear_range_selectors = bd718xx_ldo1_volt_range_sel,
 			.enable_reg = BD718XX_REG_LDO1_VOLT,
 			.enable_mask = BD718XX_LDO_EN,
+			.enable_time = BD71847_LDO1_STARTUP_TIME,
 			.owner = THIS_MODULE,
 		},
 		.init = {
@@ -796,6 +833,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = {
 			.n_voltages = ARRAY_SIZE(ldo_2_volts),
 			.enable_reg = BD718XX_REG_LDO2_VOLT,
 			.enable_mask = BD718XX_LDO_EN,
+			.enable_time = BD71847_LDO2_STARTUP_TIME,
 			.owner = THIS_MODULE,
 		},
 		.init = {
@@ -818,6 +856,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = {
 			.vsel_mask = BD718XX_LDO3_MASK,
 			.enable_reg = BD718XX_REG_LDO3_VOLT,
 			.enable_mask = BD718XX_LDO_EN,
+			.enable_time = BD71847_LDO3_STARTUP_TIME,
 			.owner = THIS_MODULE,
 		},
 		.init = {
@@ -840,6 +879,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = {
 			.vsel_mask = BD718XX_LDO4_MASK,
 			.enable_reg = BD718XX_REG_LDO4_VOLT,
 			.enable_mask = BD718XX_LDO_EN,
+			.enable_time = BD71847_LDO4_STARTUP_TIME,
 			.owner = THIS_MODULE,
 		},
 		.init = {
@@ -865,6 +905,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = {
 			.linear_range_selectors = bd71847_ldo5_volt_range_sel,
 			.enable_reg = BD718XX_REG_LDO5_VOLT,
 			.enable_mask = BD718XX_LDO_EN,
+			.enable_time = BD71847_LDO5_STARTUP_TIME,
 			.owner = THIS_MODULE,
 		},
 		.init = {
@@ -889,6 +930,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = {
 			.vsel_mask = BD718XX_LDO6_MASK,
 			.enable_reg = BD718XX_REG_LDO6_VOLT,
 			.enable_mask = BD718XX_LDO_EN,
+			.enable_time = BD71847_LDO6_STARTUP_TIME,
 			.owner = THIS_MODULE,
 		},
 		.init = {
@@ -942,6 +984,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = {
 			.vsel_mask = DVS_BUCK_RUN_MASK,
 			.enable_reg = BD718XX_REG_BUCK1_CTRL,
 			.enable_mask = BD718XX_BUCK_EN,
+			.enable_time = BD71837_BUCK1_STARTUP_TIME,
 			.owner = THIS_MODULE,
 			.of_parse_cb = buck_set_hw_dvs_levels,
 		},
@@ -975,6 +1018,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = {
 			.vsel_mask = DVS_BUCK_RUN_MASK,
 			.enable_reg = BD718XX_REG_BUCK2_CTRL,
 			.enable_mask = BD718XX_BUCK_EN,
+			.enable_time = BD71837_BUCK2_STARTUP_TIME,
 			.owner = THIS_MODULE,
 			.of_parse_cb = buck_set_hw_dvs_levels,
 		},
@@ -1005,6 +1049,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = {
 			.vsel_mask = DVS_BUCK_RUN_MASK,
 			.enable_reg = BD71837_REG_BUCK3_CTRL,
 			.enable_mask = BD718XX_BUCK_EN,
+			.enable_time = BD71837_BUCK3_STARTUP_TIME,
 			.owner = THIS_MODULE,
 			.of_parse_cb = buck_set_hw_dvs_levels,
 		},
@@ -1033,6 +1078,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = {
 			.vsel_mask = DVS_BUCK_RUN_MASK,
 			.enable_reg = BD71837_REG_BUCK4_CTRL,
 			.enable_mask = BD718XX_BUCK_EN,
+			.enable_time = BD71837_BUCK4_STARTUP_TIME,
 			.owner = THIS_MODULE,
 			.of_parse_cb = buck_set_hw_dvs_levels,
 		},
@@ -1065,6 +1111,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = {
 			.linear_range_selectors = bd71837_buck5_volt_range_sel,
 			.enable_reg = BD718XX_REG_1ST_NODVS_BUCK_CTRL,
 			.enable_mask = BD718XX_BUCK_EN,
+			.enable_time = BD71837_BUCK5_STARTUP_TIME,
 			.owner = THIS_MODULE,
 		},
 		.init = {
@@ -1088,6 +1135,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = {
 			.vsel_mask = BD71837_BUCK6_MASK,
 			.enable_reg = BD718XX_REG_2ND_NODVS_BUCK_CTRL,
 			.enable_mask = BD718XX_BUCK_EN,
+			.enable_time = BD71837_BUCK6_STARTUP_TIME,
 			.owner = THIS_MODULE,
 		},
 		.init = {
@@ -1109,6 +1157,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = {
 			.vsel_mask = BD718XX_3RD_NODVS_BUCK_MASK,
 			.enable_reg = BD718XX_REG_3RD_NODVS_BUCK_CTRL,
 			.enable_mask = BD718XX_BUCK_EN,
+			.enable_time = BD71837_BUCK7_STARTUP_TIME,
 			.owner = THIS_MODULE,
 		},
 		.init = {
@@ -1132,6 +1181,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = {
 			.vsel_mask = BD718XX_4TH_NODVS_BUCK_MASK,
 			.enable_reg = BD718XX_REG_4TH_NODVS_BUCK_CTRL,
 			.enable_mask = BD718XX_BUCK_EN,
+			.enable_time = BD71837_BUCK8_STARTUP_TIME,
 			.owner = THIS_MODULE,
 		},
 		.init = {
@@ -1157,6 +1207,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = {
 			.linear_range_selectors = bd718xx_ldo1_volt_range_sel,
 			.enable_reg = BD718XX_REG_LDO1_VOLT,
 			.enable_mask = BD718XX_LDO_EN,
+			.enable_time = BD71837_LDO1_STARTUP_TIME,
 			.owner = THIS_MODULE,
 		},
 		.init = {
@@ -1178,6 +1229,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = {
 			.n_voltages = ARRAY_SIZE(ldo_2_volts),
 			.enable_reg = BD718XX_REG_LDO2_VOLT,
 			.enable_mask = BD718XX_LDO_EN,
+			.enable_time = BD71837_LDO2_STARTUP_TIME,
 			.owner = THIS_MODULE,
 		},
 		.init = {
@@ -1200,6 +1252,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = {
 			.vsel_mask = BD718XX_LDO3_MASK,
 			.enable_reg = BD718XX_REG_LDO3_VOLT,
 			.enable_mask = BD718XX_LDO_EN,
+			.enable_time = BD71837_LDO3_STARTUP_TIME,
 			.owner = THIS_MODULE,
 		},
 		.init = {
@@ -1222,6 +1275,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = {
 			.vsel_mask = BD718XX_LDO4_MASK,
 			.enable_reg = BD718XX_REG_LDO4_VOLT,
 			.enable_mask = BD718XX_LDO_EN,
+			.enable_time = BD71837_LDO4_STARTUP_TIME,
 			.owner = THIS_MODULE,
 		},
 		.init = {
@@ -1246,6 +1300,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = {
 			.vsel_mask = BD71837_LDO5_MASK,
 			.enable_reg = BD718XX_REG_LDO5_VOLT,
 			.enable_mask = BD718XX_LDO_EN,
+			.enable_time = BD71837_LDO5_STARTUP_TIME,
 			.owner = THIS_MODULE,
 		},
 		.init = {
@@ -1272,6 +1327,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = {
 			.vsel_mask = BD718XX_LDO6_MASK,
 			.enable_reg = BD718XX_REG_LDO6_VOLT,
 			.enable_mask = BD718XX_LDO_EN,
+			.enable_time = BD71837_LDO6_STARTUP_TIME,
 			.owner = THIS_MODULE,
 		},
 		.init = {
@@ -1296,6 +1352,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = {
 			.vsel_mask = BD71837_LDO7_MASK,
 			.enable_reg = BD71837_REG_LDO7_VOLT,
 			.enable_mask = BD718XX_LDO_EN,
+			.enable_time = BD71837_LDO7_STARTUP_TIME,
 			.owner = THIS_MODULE,
 		},
 		.init = {

From e7e518053c267bb6be3799520d9f4a34c7264a2e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 21 Dec 2020 11:25:06 -0800
Subject: [PATCH 094/547] bpf: Add schedule point in htab_init_buckets()

We noticed that with a LOCKDEP enabled kernel,
allocating a hash table with 65536 buckets would
use more than 60ms.

htab_init_buckets() runs from process context,
it is safe to schedule to avoid latency spikes.

Fixes: c50eb518e262 ("bpf: Use separate lockdep class for each hashtab")
Reported-by: John Sperbeck <jsperbeck@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201221192506.707584-1-eric.dumazet@gmail.com
---
 kernel/bpf/hashtab.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 7e848200cd26..c1ac7f964bc9 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -152,6 +152,7 @@ static void htab_init_buckets(struct bpf_htab *htab)
 			lockdep_set_class(&htab->buckets[i].lock,
 					  &htab->lockdep_key);
 		}
+		cond_resched();
 	}
 }
 

From 54ddbdb024882e226055cc4c3c246592ddde2ee5 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 18 Dec 2020 09:38:43 -0800
Subject: [PATCH 095/547] net: systemport: set dev->max_mtu to
 UMAC_MAX_MTU_SIZE

The driver is already allocating receive buffers of 2KiB and the
Ethernet MAC is configured to accept frames up to UMAC_MAX_MTU_SIZE.

Fixes: bfcb813203e6 ("net: dsa: configure the MTU for switch ports")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Link: https://lore.kernel.org/r/20201218173843.141046-1-f.fainelli@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/broadcom/bcmsysport.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 0fdd19d99d99..b1ae9eb8f247 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -2577,6 +2577,7 @@ static int bcm_sysport_probe(struct platform_device *pdev)
 			 NETIF_F_HW_VLAN_CTAG_TX;
 	dev->hw_features |= dev->features;
 	dev->vlan_features |= dev->features;
+	dev->max_mtu = UMAC_MAX_MTU_SIZE;
 
 	/* Request the WOL interrupt and advertise suspend if available */
 	priv->wol_irq_disabled = 1;

From 1385ae5c30f238f81bc6528d897c6d7a0816783f Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Date: Fri, 18 Dec 2020 11:55:36 +0100
Subject: [PATCH 096/547] ethernet: ucc_geth: set dev->max_mtu to 1518

All the buffers and registers are already set up appropriately for an
MTU slightly above 1500, so we just need to expose this to the
networking stack. AFAICT, there's no need to implement .ndo_change_mtu
when the receive buffers are always set up to support the max_mtu.

This fixes several warnings during boot on our mpc8309-board with an
embedded mv88e6250 switch:

mv88e6085 mdio@e0102120:10: nonfatal error -34 setting MTU 1500 on port 0
...
mv88e6085 mdio@e0102120:10: nonfatal error -34 setting MTU 1500 on port 4
ucc_geth e0102000.ethernet eth1: error -22 setting MTU to 1504 to include DSA overhead

The last line explains what the DSA stack tries to do: achieving an MTU
of 1500 on-the-wire requires that the master netdevice connected to
the CPU port supports an MTU of 1500+the tagging overhead.

Fixes: bfcb813203e6 ("net: dsa: configure the MTU for switch ports")
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/freescale/ucc_geth.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index ba8869c3d891..d6dbae480716 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -3889,6 +3889,7 @@ static int ucc_geth_probe(struct platform_device* ofdev)
 	INIT_WORK(&ugeth->timeout_work, ucc_geth_timeout_work);
 	netif_napi_add(dev, &ugeth->napi, ucc_geth_poll, 64);
 	dev->mtu = 1500;
+	dev->max_mtu = 1518;
 
 	ugeth->msg_enable = netif_msg_init(debug.msg_enable, UGETH_MSG_DEFAULT);
 	ugeth->phy_interface = phy_interface;

From 887078de2a23689e29d6fa1b75d7cbc544c280be Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Date: Fri, 18 Dec 2020 11:55:37 +0100
Subject: [PATCH 097/547] ethernet: ucc_geth: fix definition and size of
 ucc_geth_tx_global_pram

Table 8-53 in the QUICC Engine Reference manual shows definitions of
fields up to a size of 192 bytes, not just 128. But in table 8-111,
one does find the text

  Base Address of the Global Transmitter Parameter RAM Page. [...]
  The user needs to allocate 128 bytes for this page. The address must
  be aligned to the page size.

I've checked both rev. 7 (11/2015) and rev. 9 (05/2018) of the manual;
they both have this inconsistency (and the table numbers are the
same).

Adding a bit of debug printing, on my board the struct
ucc_geth_tx_global_pram is allocated at offset 0x880, while
the (opaque) ucc_geth_thread_data_tx gets allocated immediately
afterwards, at 0x900. So whatever the engine writes into the thread
data overlaps with the tail of the global tx pram (and devmem says
that something does get written during a simple ping).

I haven't observed any failure that could be attributed to this, but
it seems to be the kind of thing that would be extremely hard to
debug. So extend the struct definition so that we do allocate 192
bytes.

Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/freescale/ucc_geth.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/ucc_geth.h b/drivers/net/ethernet/freescale/ucc_geth.h
index 1a9bdf66a7d8..11d4bf5dc21f 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.h
+++ b/drivers/net/ethernet/freescale/ucc_geth.h
@@ -575,7 +575,14 @@ struct ucc_geth_tx_global_pram {
 	u32 vtagtable[0x8];	/* 8 4-byte VLAN tags */
 	u32 tqptr;		/* a base pointer to the Tx Queues Memory
 				   Region */
-	u8 res2[0x80 - 0x74];
+	u8 res2[0x78 - 0x74];
+	u64 snums_en;
+	u32 l2l3baseptr;	/* top byte consists of a few other bit fields */
+
+	u16 mtu[8];
+	u8 res3[0xa8 - 0x94];
+	u32 wrrtablebase;	/* top byte is reserved */
+	u8 res4[0xc0 - 0xac];
 } __packed;
 
 /* structure representing Extended Filtering Global Parameters in PRAM */

From e925e0cd2a705aaacb0b907bb3691fcac3a973a4 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Date: Fri, 18 Dec 2020 11:55:38 +0100
Subject: [PATCH 098/547] ethernet: ucc_geth: fix use-after-free in
 ucc_geth_remove()

ugeth is the netdiv_priv() part of the netdevice. Accessing the memory
pointed to by ugeth (such as done by ucc_geth_memclean() and the two
of_node_puts) after free_netdev() is thus use-after-free.

Fixes: 80a9fad8e89a ("ucc_geth: fix module removal")
Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/freescale/ucc_geth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index d6dbae480716..6d853f018d53 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -3935,12 +3935,12 @@ static int ucc_geth_remove(struct platform_device* ofdev)
 	struct device_node *np = ofdev->dev.of_node;
 
 	unregister_netdev(dev);
-	free_netdev(dev);
 	ucc_geth_memclean(ugeth);
 	if (of_phy_is_fixed_link(np))
 		of_phy_deregister_fixed_link(np);
 	of_node_put(ugeth->ug_info->tbi_node);
 	of_node_put(ugeth->ug_info->phy_node);
+	free_netdev(dev);
 
 	return 0;
 }

From 83469893204281ecf65d572bddf02de29a19787c Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Fri, 18 Dec 2020 13:50:01 -0800
Subject: [PATCH 099/547] ionic: account for vlan tag len in rx buffer len

Let the FW know we have enough receive buffer space for the
vlan tag if it isn't stripped.

Fixes: 0f3154e6bcb3 ("ionic: Add Tx and Rx handling")
Signed-off-by: Shannon Nelson <snelson@pensando.io>
Link: https://lore.kernel.org/r/20201218215001.64696-1-snelson@pensando.io
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 9156c9825a16..ac4cd5d82e69 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -337,7 +337,7 @@ void ionic_rx_fill(struct ionic_queue *q)
 	unsigned int i, j;
 	unsigned int len;
 
-	len = netdev->mtu + ETH_HLEN;
+	len = netdev->mtu + ETH_HLEN + VLAN_HLEN;
 	nfrags = round_up(len, PAGE_SIZE) / PAGE_SIZE;
 
 	for (i = ionic_q_space_avail(q); i; i--) {

From 8df66af5c1e5f80562fe728db5ec069b21810144 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 19 Dec 2020 14:01:44 +0300
Subject: [PATCH 100/547] atm: idt77252: call pci_disable_device() on error
 path

This error path needs to disable the pci device before returning.

Fixes: ede58ef28e10 ("atm: remove deprecated use of pci api")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Link: https://lore.kernel.org/r/X93dmC4NX0vbTpGp@mwanda
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/atm/idt77252.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index 65a3886f68c9..5f0472c18bcb 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -3607,7 +3607,7 @@ static int idt77252_init_one(struct pci_dev *pcidev,
 
 	if ((err = dma_set_mask_and_coherent(&pcidev->dev, DMA_BIT_MASK(32)))) {
 		printk("idt77252: can't enable DMA for PCI device at %s\n", pci_name(pcidev));
-		return err;
+		goto err_out_disable_pdev;
 	}
 
 	card = kzalloc(sizeof(struct idt77252_dev), GFP_KERNEL);

From bcce55f556e824d43f352d76b94509185585e38d Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Sat, 19 Dec 2020 13:19:24 +0100
Subject: [PATCH 101/547] ppp: Fix PPPIOCUNBRIDGECHAN request number

PPPIOCGL2TPSTATS already uses 54. This shouldn't be a problem in
practice, but let's keep the logical decreasing assignment scheme.

Fixes: 4cf476ced45d ("ppp: add PPPIOCBRIDGECHAN and PPPIOCUNBRIDGECHAN ioctls")
Signed-off-by: Guillaume Nault <gnault@redhat.com>
Link: https://lore.kernel.org/r/e3a4c355e3820331d8e1fffef8522739aae58b57.1608380117.git.gnault@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/ppp-ioctl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/linux/ppp-ioctl.h b/include/uapi/linux/ppp-ioctl.h
index 8dbecb3ad036..1cc5ce0ae062 100644
--- a/include/uapi/linux/ppp-ioctl.h
+++ b/include/uapi/linux/ppp-ioctl.h
@@ -116,7 +116,7 @@ struct pppol2tp_ioc_stats {
 #define PPPIOCGCHAN	_IOR('t', 55, int)	/* get ppp channel number */
 #define PPPIOCGL2TPSTATS _IOR('t', 54, struct pppol2tp_ioc_stats)
 #define PPPIOCBRIDGECHAN _IOW('t', 53, int)	/* bridge one channel to another */
-#define PPPIOCUNBRIDGECHAN _IO('t', 54)	/* unbridge channel */
+#define PPPIOCUNBRIDGECHAN _IO('t', 52)	/* unbridge channel */
 
 #define SIOCGPPPSTATS   (SIOCDEVPRIVATE + 0)
 #define SIOCGPPPVER     (SIOCDEVPRIVATE + 1)	/* NEVER change this!! */

From 2575bc1aa9d52a62342b57a0b7d0a12146cf6aed Mon Sep 17 00:00:00 2001
From: Stefan Chulski <stefanc@marvell.com>
Date: Sun, 20 Dec 2020 13:02:29 +0200
Subject: [PATCH 102/547] net: mvpp2: Fix GoP port 3 Networking Complex Control
 configurations

During GoP port 2 Networking Complex Control mode of operation configurations,
also GoP port 3 mode of operation was wrongly set.
Patch removes these configurations.

Fixes: f84bf386f395 ("net: mvpp2: initialize the GoP")
Acked-by: Marcin Wojtas <mw@semihalf.com>
Signed-off-by: Stefan Chulski <stefanc@marvell.com>
Link: https://lore.kernel.org/r/1608462149-1702-1-git-send-email-stefanc@marvell.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 0f18d034ce3e..f20b31327027 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -1231,7 +1231,7 @@ static void mvpp22_gop_init_rgmii(struct mvpp2_port *port)
 
 	regmap_read(priv->sysctrl_base, GENCONF_CTRL0, &val);
 	if (port->gop_id == 2)
-		val |= GENCONF_CTRL0_PORT0_RGMII | GENCONF_CTRL0_PORT1_RGMII;
+		val |= GENCONF_CTRL0_PORT0_RGMII;
 	else if (port->gop_id == 3)
 		val |= GENCONF_CTRL0_PORT1_RGMII_MII;
 	regmap_write(priv->sysctrl_base, GENCONF_CTRL0, val);

From 74a2921948ed8c0e7f079a98442ec3493168cc85 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Wed, 2 Dec 2020 18:36:57 +0800
Subject: [PATCH 103/547] scsi: hisi_sas: Expose HW queues for v2 hw

As a performance enhancement, make the completion queue interrupts managed.

In addition, in commit bf0beec0607d ("blk-mq: drain I/O when all CPUs in a
hctx are offline"), CPU hotplug for MQ devices using managed interrupts is
made safe. So expose HW queues to blk-mq to take advantage of this.

Flag Scsi_host.host_tagset is also set to ensure that the HBA is not sent
more commands than it can handle. However the driver still does not use
request tag for IPTT as there are many HW bugs means that special rules
apply for IPTT allocation.

Link: https://lore.kernel.org/r/1606905417-183214-6-git-send-email-john.garry@huawei.com
Signed-off-by: John Garry <john.garry@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hisi_sas/hisi_sas.h       |  4 ++
 drivers/scsi/hisi_sas/hisi_sas_main.c  | 11 +++++
 drivers/scsi/hisi_sas/hisi_sas_v2_hw.c | 66 +++++++++++++++++++++-----
 3 files changed, 68 insertions(+), 13 deletions(-)

diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h
index a25cfc11c96d..aa67807c5693 100644
--- a/drivers/scsi/hisi_sas/hisi_sas.h
+++ b/drivers/scsi/hisi_sas/hisi_sas.h
@@ -14,6 +14,7 @@
 #include <linux/debugfs.h>
 #include <linux/dmapool.h>
 #include <linux/iopoll.h>
+#include <linux/irq.h>
 #include <linux/lcm.h>
 #include <linux/libata.h>
 #include <linux/mfd/syscon.h>
@@ -312,6 +313,7 @@ enum {
 
 struct hisi_sas_hw {
 	int (*hw_init)(struct hisi_hba *hisi_hba);
+	int (*interrupt_preinit)(struct hisi_hba *hisi_hba);
 	void (*setup_itct)(struct hisi_hba *hisi_hba,
 			   struct hisi_sas_device *device);
 	int (*slot_index_alloc)(struct hisi_hba *hisi_hba,
@@ -418,6 +420,8 @@ struct hisi_hba {
 	u32 refclk_frequency_mhz;
 	u8 sas_addr[SAS_ADDR_SIZE];
 
+	int *irq_map; /* v2 hw */
+
 	int n_phy;
 	spinlock_t lock;
 	struct semaphore sem;
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index 274ccf18ce2d..061d65b8a287 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -2620,6 +2620,13 @@ err_out:
 	return NULL;
 }
 
+static int hisi_sas_interrupt_preinit(struct hisi_hba *hisi_hba)
+{
+	if (hisi_hba->hw->interrupt_preinit)
+		return hisi_hba->hw->interrupt_preinit(hisi_hba);
+	return 0;
+}
+
 int hisi_sas_probe(struct platform_device *pdev,
 		   const struct hisi_sas_hw *hw)
 {
@@ -2677,6 +2684,10 @@ int hisi_sas_probe(struct platform_device *pdev,
 		sha->sas_port[i] = &hisi_hba->port[i].sas_port;
 	}
 
+	rc = hisi_sas_interrupt_preinit(hisi_hba);
+	if (rc)
+		goto err_out_ha;
+
 	rc = scsi_add_host(shost, &pdev->dev);
 	if (rc)
 		goto err_out_ha;
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
index b57177b52fac..9adfdefef9ca 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
@@ -3302,6 +3302,28 @@ static irq_handler_t fatal_interrupts[HISI_SAS_FATAL_INT_NR] = {
 	fatal_axi_int_v2_hw
 };
 
+#define CQ0_IRQ_INDEX (96)
+
+static int hisi_sas_v2_interrupt_preinit(struct hisi_hba *hisi_hba)
+{
+	struct platform_device *pdev = hisi_hba->platform_dev;
+	struct Scsi_Host *shost = hisi_hba->shost;
+	struct irq_affinity desc = {
+		.pre_vectors = CQ0_IRQ_INDEX,
+		.post_vectors = 16,
+	};
+	int resv = desc.pre_vectors + desc.post_vectors, minvec = resv + 1, nvec;
+
+	nvec = devm_platform_get_irqs_affinity(pdev, &desc, minvec, 128,
+					       &hisi_hba->irq_map);
+	if (nvec < 0)
+		return nvec;
+
+	shost->nr_hw_queues = hisi_hba->cq_nvecs = nvec - resv;
+
+	return 0;
+}
+
 /*
  * There is a limitation in the hip06 chipset that we need
  * to map in all mbigen interrupts, even if they are not used.
@@ -3310,14 +3332,11 @@ static int interrupt_init_v2_hw(struct hisi_hba *hisi_hba)
 {
 	struct platform_device *pdev = hisi_hba->platform_dev;
 	struct device *dev = &pdev->dev;
-	int irq, rc = 0, irq_map[128];
+	int irq, rc = 0;
 	int i, phy_no, fatal_no, queue_no;
 
-	for (i = 0; i < 128; i++)
-		irq_map[i] = platform_get_irq(pdev, i);
-
 	for (i = 0; i < HISI_SAS_PHY_INT_NR; i++) {
-		irq = irq_map[i + 1]; /* Phy up/down is irq1 */
+		irq = hisi_hba->irq_map[i + 1]; /* Phy up/down is irq1 */
 		rc = devm_request_irq(dev, irq, phy_interrupts[i], 0,
 				      DRV_NAME " phy", hisi_hba);
 		if (rc) {
@@ -3331,7 +3350,7 @@ static int interrupt_init_v2_hw(struct hisi_hba *hisi_hba)
 	for (phy_no = 0; phy_no < hisi_hba->n_phy; phy_no++) {
 		struct hisi_sas_phy *phy = &hisi_hba->phy[phy_no];
 
-		irq = irq_map[phy_no + 72];
+		irq = hisi_hba->irq_map[phy_no + 72];
 		rc = devm_request_irq(dev, irq, sata_int_v2_hw, 0,
 				      DRV_NAME " sata", phy);
 		if (rc) {
@@ -3343,7 +3362,7 @@ static int interrupt_init_v2_hw(struct hisi_hba *hisi_hba)
 	}
 
 	for (fatal_no = 0; fatal_no < HISI_SAS_FATAL_INT_NR; fatal_no++) {
-		irq = irq_map[fatal_no + 81];
+		irq = hisi_hba->irq_map[fatal_no + 81];
 		rc = devm_request_irq(dev, irq, fatal_interrupts[fatal_no], 0,
 				      DRV_NAME " fatal", hisi_hba);
 		if (rc) {
@@ -3354,24 +3373,22 @@ static int interrupt_init_v2_hw(struct hisi_hba *hisi_hba)
 		}
 	}
 
-	for (queue_no = 0; queue_no < hisi_hba->queue_count; queue_no++) {
+	for (queue_no = 0; queue_no < hisi_hba->cq_nvecs; queue_no++) {
 		struct hisi_sas_cq *cq = &hisi_hba->cq[queue_no];
 
-		cq->irq_no = irq_map[queue_no + 96];
+		cq->irq_no = hisi_hba->irq_map[queue_no + 96];
 		rc = devm_request_threaded_irq(dev, cq->irq_no,
 					       cq_interrupt_v2_hw,
 					       cq_thread_v2_hw, IRQF_ONESHOT,
 					       DRV_NAME " cq", cq);
 		if (rc) {
 			dev_err(dev, "irq init: could not request cq interrupt %d, rc=%d\n",
-				irq, rc);
+					cq->irq_no, rc);
 			rc = -ENOENT;
 			goto err_out;
 		}
+		cq->irq_mask = irq_get_affinity_mask(cq->irq_no);
 	}
-
-	hisi_hba->cq_nvecs = hisi_hba->queue_count;
-
 err_out:
 	return rc;
 }
@@ -3529,6 +3546,26 @@ static struct device_attribute *host_attrs_v2_hw[] = {
 	NULL
 };
 
+static int map_queues_v2_hw(struct Scsi_Host *shost)
+{
+	struct hisi_hba *hisi_hba = shost_priv(shost);
+	struct blk_mq_queue_map *qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT];
+	const struct cpumask *mask;
+	unsigned int queue, cpu;
+
+	for (queue = 0; queue < qmap->nr_queues; queue++) {
+		mask = irq_get_affinity_mask(hisi_hba->irq_map[96 + queue]);
+		if (!mask)
+			continue;
+
+		for_each_cpu(cpu, mask)
+			qmap->mq_map[cpu] = qmap->queue_offset + queue;
+	}
+
+	return 0;
+
+}
+
 static struct scsi_host_template sht_v2_hw = {
 	.name			= DRV_NAME,
 	.proc_name		= DRV_NAME,
@@ -3553,10 +3590,13 @@ static struct scsi_host_template sht_v2_hw = {
 #endif
 	.shost_attrs		= host_attrs_v2_hw,
 	.host_reset		= hisi_sas_host_reset,
+	.map_queues		= map_queues_v2_hw,
+	.host_tagset		= 1,
 };
 
 static const struct hisi_sas_hw hisi_sas_v2_hw = {
 	.hw_init = hisi_sas_v2_init,
+	.interrupt_preinit = hisi_sas_v2_interrupt_preinit,
 	.setup_itct = setup_itct_v2_hw,
 	.slot_index_alloc = slot_index_alloc_quirk_v2_hw,
 	.alloc_dev = alloc_dev_quirk_v2_hw,

From 2a5f1b67ec577fb1544b563086e0377f095f88e2 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 10 Dec 2020 08:30:59 +0000
Subject: [PATCH 104/547] KVM: arm64: Don't access PMCR_EL0 when no PMU is
 available

We reset the guest's view of PMCR_EL0 unconditionally, based on
the host's view of this register. It is however legal for an
implementation not to provide any PMU, resulting in an UNDEF.

The obvious fix is to skip the reset of this shadow register
when no PMU is available, sidestepping the issue entirely.
If no PMU is available, the guest is not able to request
a virtual PMU anyway, so not doing nothing is the right thing
to do!

It is unlikely that this bug can hit any HW implementation
though, as they all provide a PMU. It has been found using nested
virt with the host KVM not implementing the PMU itself.

Fixes: ab9468340d2bc ("arm64: KVM: Add access handler for PMCR register")
Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20201210083059.1277162-1-maz@kernel.org
---
 arch/arm64/kvm/sys_regs.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 3313dedfa505..d46e7f706cb0 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -594,6 +594,10 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
 {
 	u64 pmcr, val;
 
+	/* No PMU available, PMCR_EL0 may UNDEF... */
+	if (!kvm_arm_support_pmu_v3())
+		return;
+
 	pmcr = read_sysreg(pmcr_el0);
 	/*
 	 * Writable bits of PMCR_EL0 (ARMV8_PMU_PMCR_MASK) are reset to UNKNOWN

From ff367fe473a9857160c17827931375a899076394 Mon Sep 17 00:00:00 2001
From: David Brazdil <dbrazdil@google.com>
Date: Tue, 8 Dec 2020 14:24:47 +0000
Subject: [PATCH 105/547] KVM: arm64: Prevent use of invalid PSCI v0.1 function
 IDs

PSCI driver exposes a struct containing the PSCI v0.1 function IDs
configured in the DT. However, the struct does not convey the
information whether these were set from DT or contain the default value
zero. This could be a problem for PSCI proxy in KVM protected mode.

Extend config passed to KVM with a bit mask with individual bits set
depending on whether the corresponding function pointer in psci_ops is
set, eg. set bit for PSCI_CPU_SUSPEND if psci_ops.cpu_suspend != NULL.

Previously config was split into multiple global variables. Put
everything into a single struct for convenience.

Reported-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: David Brazdil <dbrazdil@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20201208142452.87237-2-dbrazdil@google.com
---
 arch/arm64/include/asm/kvm_host.h    | 20 +++++++++++
 arch/arm64/kvm/arm.c                 | 14 +++++---
 arch/arm64/kvm/hyp/nvhe/psci-relay.c | 53 +++++++++++++++++++++-------
 3 files changed, 70 insertions(+), 17 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 11beda85ee7e..828d50d40dc2 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -17,6 +17,7 @@
 #include <linux/jump_label.h>
 #include <linux/kvm_types.h>
 #include <linux/percpu.h>
+#include <linux/psci.h>
 #include <asm/arch_gicv3.h>
 #include <asm/barrier.h>
 #include <asm/cpufeature.h>
@@ -240,6 +241,25 @@ struct kvm_host_data {
 	struct kvm_pmu_events pmu_events;
 };
 
+#define KVM_HOST_PSCI_0_1_CPU_SUSPEND	BIT(0)
+#define KVM_HOST_PSCI_0_1_CPU_ON	BIT(1)
+#define KVM_HOST_PSCI_0_1_CPU_OFF	BIT(2)
+#define KVM_HOST_PSCI_0_1_MIGRATE	BIT(3)
+
+struct kvm_host_psci_config {
+	/* PSCI version used by host. */
+	u32 version;
+
+	/* Function IDs used by host if version is v0.1. */
+	struct psci_0_1_function_ids function_ids_0_1;
+
+	/* Bitmask of functions enabled for v0.1, bits KVM_HOST_PSCI_0_1_*. */
+	unsigned int enabled_functions_0_1;
+};
+
+extern struct kvm_host_psci_config kvm_nvhe_sym(kvm_host_psci_config);
+#define kvm_host_psci_config CHOOSE_NVHE_SYM(kvm_host_psci_config)
+
 struct vcpu_reset_state {
 	unsigned long	pc;
 	unsigned long	r0;
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 6e637d2b4cfb..6a2f4e01b04f 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -66,8 +66,6 @@ static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
 DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
 
 extern u64 kvm_nvhe_sym(__cpu_logical_map)[NR_CPUS];
-extern u32 kvm_nvhe_sym(kvm_host_psci_version);
-extern struct psci_0_1_function_ids kvm_nvhe_sym(kvm_host_psci_0_1_function_ids);
 
 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 {
@@ -1618,8 +1616,16 @@ static bool init_psci_relay(void)
 		return false;
 	}
 
-	kvm_nvhe_sym(kvm_host_psci_version) = psci_ops.get_version();
-	kvm_nvhe_sym(kvm_host_psci_0_1_function_ids) = get_psci_0_1_function_ids();
+	kvm_host_psci_config.version = psci_ops.get_version();
+
+	if (kvm_host_psci_config.version == PSCI_VERSION(0, 1)) {
+		kvm_host_psci_config.function_ids_0_1 = get_psci_0_1_function_ids();
+		kvm_host_psci_config.enabled_functions_0_1 =
+			(psci_ops.cpu_suspend ? KVM_HOST_PSCI_0_1_CPU_SUSPEND : 0) |
+			(psci_ops.cpu_off ? KVM_HOST_PSCI_0_1_CPU_OFF : 0) |
+			(psci_ops.cpu_on ? KVM_HOST_PSCI_0_1_CPU_ON : 0) |
+			(psci_ops.migrate ? KVM_HOST_PSCI_0_1_MIGRATE : 0);
+	}
 	return true;
 }
 
diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
index 08dc9de69314..0d6f4aa39621 100644
--- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c
+++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
@@ -22,9 +22,8 @@ void kvm_hyp_cpu_resume(unsigned long r0);
 void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt);
 
 /* Config options set by the host. */
-__ro_after_init u32 kvm_host_psci_version;
-__ro_after_init struct psci_0_1_function_ids kvm_host_psci_0_1_function_ids;
-__ro_after_init s64 hyp_physvirt_offset;
+struct kvm_host_psci_config __ro_after_init kvm_host_psci_config;
+s64 __ro_after_init hyp_physvirt_offset;
 
 #define __hyp_pa(x) ((phys_addr_t)((x)) + hyp_physvirt_offset)
 
@@ -54,12 +53,41 @@ static u64 get_psci_func_id(struct kvm_cpu_context *host_ctxt)
 	return func_id;
 }
 
+static inline bool is_psci_0_1_function_enabled(unsigned int fn_bit)
+{
+	return kvm_host_psci_config.enabled_functions_0_1 & fn_bit;
+}
+
+static inline bool is_psci_0_1_cpu_suspend(u64 func_id)
+{
+	return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_CPU_SUSPEND) &&
+	       (func_id == kvm_host_psci_config.function_ids_0_1.cpu_suspend);
+}
+
+static inline bool is_psci_0_1_cpu_on(u64 func_id)
+{
+	return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_CPU_ON) &&
+	       (func_id == kvm_host_psci_config.function_ids_0_1.cpu_on);
+}
+
+static inline bool is_psci_0_1_cpu_off(u64 func_id)
+{
+	return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_CPU_OFF) &&
+	       (func_id == kvm_host_psci_config.function_ids_0_1.cpu_off);
+}
+
+static inline bool is_psci_0_1_migrate(u64 func_id)
+{
+	return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_MIGRATE) &&
+	       (func_id == kvm_host_psci_config.function_ids_0_1.migrate);
+}
+
 static bool is_psci_0_1_call(u64 func_id)
 {
-	return (func_id == kvm_host_psci_0_1_function_ids.cpu_suspend) ||
-	       (func_id == kvm_host_psci_0_1_function_ids.cpu_on) ||
-	       (func_id == kvm_host_psci_0_1_function_ids.cpu_off) ||
-	       (func_id == kvm_host_psci_0_1_function_ids.migrate);
+	return is_psci_0_1_cpu_suspend(func_id) ||
+	       is_psci_0_1_cpu_on(func_id) ||
+	       is_psci_0_1_cpu_off(func_id) ||
+	       is_psci_0_1_migrate(func_id);
 }
 
 static bool is_psci_0_2_call(u64 func_id)
@@ -71,7 +99,7 @@ static bool is_psci_0_2_call(u64 func_id)
 
 static bool is_psci_call(u64 func_id)
 {
-	switch (kvm_host_psci_version) {
+	switch (kvm_host_psci_config.version) {
 	case PSCI_VERSION(0, 1):
 		return is_psci_0_1_call(func_id);
 	default:
@@ -248,12 +276,11 @@ asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on)
 
 static unsigned long psci_0_1_handler(u64 func_id, struct kvm_cpu_context *host_ctxt)
 {
-	if ((func_id == kvm_host_psci_0_1_function_ids.cpu_off) ||
-	    (func_id == kvm_host_psci_0_1_function_ids.migrate))
+	if (is_psci_0_1_cpu_off(func_id) || is_psci_0_1_migrate(func_id))
 		return psci_forward(host_ctxt);
-	else if (func_id == kvm_host_psci_0_1_function_ids.cpu_on)
+	else if (is_psci_0_1_cpu_on(func_id))
 		return psci_cpu_on(func_id, host_ctxt);
-	else if (func_id == kvm_host_psci_0_1_function_ids.cpu_suspend)
+	else if (is_psci_0_1_cpu_suspend(func_id))
 		return psci_cpu_suspend(func_id, host_ctxt);
 	else
 		return PSCI_RET_NOT_SUPPORTED;
@@ -304,7 +331,7 @@ bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt)
 	if (!is_psci_call(func_id))
 		return false;
 
-	switch (kvm_host_psci_version) {
+	switch (kvm_host_psci_config.version) {
 	case PSCI_VERSION(0, 1):
 		ret = psci_0_1_handler(func_id, host_ctxt);
 		break;

From 7a96a0687b80a1870c689418d7b72012c8bdd53d Mon Sep 17 00:00:00 2001
From: David Brazdil <dbrazdil@google.com>
Date: Tue, 8 Dec 2020 14:24:48 +0000
Subject: [PATCH 106/547] KVM: arm64: Use lm_alias in nVHE-only VA conversion

init_hyp_physvirt_offset() computes PA from a kernel VA. Conversion to
kernel linear-map is required first but the code used kvm_ksym_ref() for
this purpose. Under VHE that is a NOP and resulted in a runtime warning.
Replace kvm_ksym_ref with lm_alias.

Reported-by: Qian Cai <qcai@redhat.com>
Signed-off-by: David Brazdil <dbrazdil@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20201208142452.87237-3-dbrazdil@google.com
---
 arch/arm64/kvm/va_layout.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
index d8cc51bd60bf..914732b88c69 100644
--- a/arch/arm64/kvm/va_layout.c
+++ b/arch/arm64/kvm/va_layout.c
@@ -42,7 +42,7 @@ static void init_hyp_physvirt_offset(void)
 	u64 kern_va, hyp_va;
 
 	/* Compute the offset from the hyp VA and PA of a random symbol. */
-	kern_va = (u64)kvm_ksym_ref(__hyp_text_start);
+	kern_va = (u64)lm_alias(__hyp_text_start);
 	hyp_va = __early_kern_hyp_va(kern_va);
 	CHOOSE_NVHE_SYM(hyp_physvirt_offset) = (s64)__pa(kern_va) - (s64)hyp_va;
 }

From c3e181aec96f6ada84df1cb72a72be8970f8b284 Mon Sep 17 00:00:00 2001
From: David Brazdil <dbrazdil@google.com>
Date: Tue, 8 Dec 2020 14:24:49 +0000
Subject: [PATCH 107/547] KVM: arm64: Skip computing hyp VA layout for VHE

Computing the hyp VA layout is redundant when the kernel runs in EL2 and
hyp shares its VA mappings. Make calling kvm_compute_layout()
conditional on not just CONFIG_KVM but also !is_kernel_in_hyp_mode().

Signed-off-by: David Brazdil <dbrazdil@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20201208142452.87237-4-dbrazdil@google.com
---
 arch/arm64/kernel/smp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 18e9727d3f64..4e585cc892e8 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -434,7 +434,7 @@ static void __init hyp_mode_check(void)
 			   "CPU: CPUs started in inconsistent modes");
 	else
 		pr_info("CPU: All CPU(s) started at EL1\n");
-	if (IS_ENABLED(CONFIG_KVM))
+	if (IS_ENABLED(CONFIG_KVM) && !is_kernel_in_hyp_mode())
 		kvm_compute_layout();
 }
 

From 61fe0c37af57ac35472a870581a7d0bb5ac2f63a Mon Sep 17 00:00:00 2001
From: David Brazdil <dbrazdil@google.com>
Date: Tue, 8 Dec 2020 14:24:50 +0000
Subject: [PATCH 108/547] KVM: arm64: Minor cleanup of hyp variables used in
 host

Small cleanup moving declarations of hyp-exported variables to
kvm_host.h and using macros to avoid having to refer to them with
kvm_nvhe_sym() in host.

No functional change intended.

Signed-off-by: David Brazdil <dbrazdil@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20201208142452.87237-5-dbrazdil@google.com
---
 arch/arm64/include/asm/kvm_host.h | 6 ++++++
 arch/arm64/kvm/arm.c              | 4 +---
 arch/arm64/kvm/hyp/nvhe/hyp-smp.c | 6 +++---
 arch/arm64/kvm/va_layout.c        | 5 ++---
 4 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 828d50d40dc2..bce2452b305c 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -260,6 +260,12 @@ struct kvm_host_psci_config {
 extern struct kvm_host_psci_config kvm_nvhe_sym(kvm_host_psci_config);
 #define kvm_host_psci_config CHOOSE_NVHE_SYM(kvm_host_psci_config)
 
+extern s64 kvm_nvhe_sym(hyp_physvirt_offset);
+#define hyp_physvirt_offset CHOOSE_NVHE_SYM(hyp_physvirt_offset)
+
+extern u64 kvm_nvhe_sym(hyp_cpu_logical_map)[NR_CPUS];
+#define hyp_cpu_logical_map CHOOSE_NVHE_SYM(hyp_cpu_logical_map)
+
 struct vcpu_reset_state {
 	unsigned long	pc;
 	unsigned long	r0;
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 6a2f4e01b04f..836ca763b91d 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -65,8 +65,6 @@ static bool vgic_present;
 static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
 DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
 
-extern u64 kvm_nvhe_sym(__cpu_logical_map)[NR_CPUS];
-
 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 {
 	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
@@ -1602,7 +1600,7 @@ static void init_cpu_logical_map(void)
 	 * allow any other CPUs from the `possible` set to boot.
 	 */
 	for_each_online_cpu(cpu)
-		kvm_nvhe_sym(__cpu_logical_map)[cpu] = cpu_logical_map(cpu);
+		hyp_cpu_logical_map[cpu] = cpu_logical_map(cpu);
 }
 
 static bool init_psci_relay(void)
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c
index cbab0c6246e2..2997aa156d8e 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c
@@ -14,14 +14,14 @@
  * Other CPUs should not be allowed to boot because their features were
  * not checked against the finalized system capabilities.
  */
-u64 __ro_after_init __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
+u64 __ro_after_init hyp_cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
 
 u64 cpu_logical_map(unsigned int cpu)
 {
-	if (cpu >= ARRAY_SIZE(__cpu_logical_map))
+	if (cpu >= ARRAY_SIZE(hyp_cpu_logical_map))
 		hyp_panic();
 
-	return __cpu_logical_map[cpu];
+	return hyp_cpu_logical_map[cpu];
 }
 
 unsigned long __hyp_per_cpu_offset(unsigned int cpu)
diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
index 914732b88c69..70fcd6a12fe1 100644
--- a/arch/arm64/kvm/va_layout.c
+++ b/arch/arm64/kvm/va_layout.c
@@ -34,17 +34,16 @@ static u64 __early_kern_hyp_va(u64 addr)
 }
 
 /*
- * Store a hyp VA <-> PA offset into a hyp-owned variable.
+ * Store a hyp VA <-> PA offset into a EL2-owned variable.
  */
 static void init_hyp_physvirt_offset(void)
 {
-	extern s64 kvm_nvhe_sym(hyp_physvirt_offset);
 	u64 kern_va, hyp_va;
 
 	/* Compute the offset from the hyp VA and PA of a random symbol. */
 	kern_va = (u64)lm_alias(__hyp_text_start);
 	hyp_va = __early_kern_hyp_va(kern_va);
-	CHOOSE_NVHE_SYM(hyp_physvirt_offset) = (s64)__pa(kern_va) - (s64)hyp_va;
+	hyp_physvirt_offset = (s64)__pa(kern_va) - (s64)hyp_va;
 }
 
 /*

From e6829e0384a49efe68537298132230bebd8bd1b3 Mon Sep 17 00:00:00 2001
From: David Brazdil <dbrazdil@google.com>
Date: Tue, 8 Dec 2020 14:24:51 +0000
Subject: [PATCH 109/547] KVM: arm64: Remove unused includes in psci-relay.c

Minor cleanup removing unused includes.

Signed-off-by: David Brazdil <dbrazdil@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20201208142452.87237-6-dbrazdil@google.com
---
 arch/arm64/kvm/hyp/nvhe/psci-relay.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
index 0d6f4aa39621..1f7237e45148 100644
--- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c
+++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
@@ -7,11 +7,8 @@
 #include <asm/kvm_asm.h>
 #include <asm/kvm_hyp.h>
 #include <asm/kvm_mmu.h>
-#include <kvm/arm_hypercalls.h>
 #include <linux/arm-smccc.h>
 #include <linux/kvm_host.h>
-#include <linux/psci.h>
-#include <kvm/arm_psci.h>
 #include <uapi/linux/psci.h>
 
 #include <nvhe/trap_handler.h>

From 860a4c3d1e04a3c3e62bacbbba64417bf49768e2 Mon Sep 17 00:00:00 2001
From: David Brazdil <dbrazdil@google.com>
Date: Tue, 8 Dec 2020 14:24:52 +0000
Subject: [PATCH 110/547] KVM: arm64: Move skip_host_instruction to adjust_pc.h

Move function for skipping host instruction in the host trap handler to
a header file containing analogical helpers for guests.

Signed-off-by: David Brazdil <dbrazdil@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20201208142452.87237-7-dbrazdil@google.com
---
 arch/arm64/kvm/hyp/include/hyp/adjust_pc.h |  9 +++++++++
 arch/arm64/kvm/hyp/nvhe/hyp-main.c         | 12 ++----------
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h
index b1f60923a8fe..61716359035d 100644
--- a/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h
+++ b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h
@@ -59,4 +59,13 @@ static inline void __adjust_pc(struct kvm_vcpu *vcpu)
 	}
 }
 
+/*
+ * Skip an instruction while host sysregs are live.
+ * Assumes host is always 64-bit.
+ */
+static inline void kvm_skip_host_instr(void)
+{
+	write_sysreg_el2(read_sysreg_el2(SYS_ELR) + 4, SYS_ELR);
+}
+
 #endif
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index bde658d51404..a906f9e2ff34 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -157,11 +157,6 @@ static void default_host_smc_handler(struct kvm_cpu_context *host_ctxt)
 	__kvm_hyp_host_forward_smc(host_ctxt);
 }
 
-static void skip_host_instruction(void)
-{
-	write_sysreg_el2(read_sysreg_el2(SYS_ELR) + 4, SYS_ELR);
-}
-
 static void handle_host_smc(struct kvm_cpu_context *host_ctxt)
 {
 	bool handled;
@@ -170,11 +165,8 @@ static void handle_host_smc(struct kvm_cpu_context *host_ctxt)
 	if (!handled)
 		default_host_smc_handler(host_ctxt);
 
-	/*
-	 * Unlike HVC, the return address of an SMC is the instruction's PC.
-	 * Move the return address past the instruction.
-	 */
-	skip_host_instruction();
+	/* SMC was trapped, move ELR past the current PC. */
+	kvm_skip_host_instr();
 }
 
 void handle_trap(struct kvm_cpu_context *host_ctxt)

From 767c973f2e4a9264a4f159c9fad5ca8acdb9915e Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 22 Dec 2020 12:46:41 +0000
Subject: [PATCH 111/547] KVM: arm64: Declutter host PSCI 0.1 handling

Although there is nothing wrong with the current host PSCI relay
implementation, we can clean it up and remove some of the helpers
that do not improve the overall readability of the legacy PSCI 0.1
handling.

Opportunity is taken to turn the bitmap into a set of booleans,
and creative use of preprocessor macros make init and check
more concise/readable.

Suggested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/include/asm/kvm_host.h    | 11 ++--
 arch/arm64/kvm/arm.c                 | 12 +++--
 arch/arm64/kvm/hyp/nvhe/psci-relay.c | 77 +++++++---------------------
 3 files changed, 30 insertions(+), 70 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index bce2452b305c..8fcfab0c2567 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -241,11 +241,6 @@ struct kvm_host_data {
 	struct kvm_pmu_events pmu_events;
 };
 
-#define KVM_HOST_PSCI_0_1_CPU_SUSPEND	BIT(0)
-#define KVM_HOST_PSCI_0_1_CPU_ON	BIT(1)
-#define KVM_HOST_PSCI_0_1_CPU_OFF	BIT(2)
-#define KVM_HOST_PSCI_0_1_MIGRATE	BIT(3)
-
 struct kvm_host_psci_config {
 	/* PSCI version used by host. */
 	u32 version;
@@ -253,8 +248,10 @@ struct kvm_host_psci_config {
 	/* Function IDs used by host if version is v0.1. */
 	struct psci_0_1_function_ids function_ids_0_1;
 
-	/* Bitmask of functions enabled for v0.1, bits KVM_HOST_PSCI_0_1_*. */
-	unsigned int enabled_functions_0_1;
+	bool psci_0_1_cpu_suspend_implemented;
+	bool psci_0_1_cpu_on_implemented;
+	bool psci_0_1_cpu_off_implemented;
+	bool psci_0_1_migrate_implemented;
 };
 
 extern struct kvm_host_psci_config kvm_nvhe_sym(kvm_host_psci_config);
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 836ca763b91d..e207e4541f55 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1603,6 +1603,9 @@ static void init_cpu_logical_map(void)
 		hyp_cpu_logical_map[cpu] = cpu_logical_map(cpu);
 }
 
+#define init_psci_0_1_impl_state(config, what)	\
+	config.psci_0_1_ ## what ## _implemented = psci_ops.what
+
 static bool init_psci_relay(void)
 {
 	/*
@@ -1618,11 +1621,10 @@ static bool init_psci_relay(void)
 
 	if (kvm_host_psci_config.version == PSCI_VERSION(0, 1)) {
 		kvm_host_psci_config.function_ids_0_1 = get_psci_0_1_function_ids();
-		kvm_host_psci_config.enabled_functions_0_1 =
-			(psci_ops.cpu_suspend ? KVM_HOST_PSCI_0_1_CPU_SUSPEND : 0) |
-			(psci_ops.cpu_off ? KVM_HOST_PSCI_0_1_CPU_OFF : 0) |
-			(psci_ops.cpu_on ? KVM_HOST_PSCI_0_1_CPU_ON : 0) |
-			(psci_ops.migrate ? KVM_HOST_PSCI_0_1_MIGRATE : 0);
+		init_psci_0_1_impl_state(kvm_host_psci_config, cpu_suspend);
+		init_psci_0_1_impl_state(kvm_host_psci_config, cpu_on);
+		init_psci_0_1_impl_state(kvm_host_psci_config, cpu_off);
+		init_psci_0_1_impl_state(kvm_host_psci_config, migrate);
 	}
 	return true;
 }
diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
index 1f7237e45148..e3947846ffcb 100644
--- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c
+++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
@@ -43,48 +43,16 @@ struct psci_boot_args {
 static DEFINE_PER_CPU(struct psci_boot_args, cpu_on_args) = PSCI_BOOT_ARGS_INIT;
 static DEFINE_PER_CPU(struct psci_boot_args, suspend_args) = PSCI_BOOT_ARGS_INIT;
 
-static u64 get_psci_func_id(struct kvm_cpu_context *host_ctxt)
-{
-	DECLARE_REG(u64, func_id, host_ctxt, 0);
-
-	return func_id;
-}
-
-static inline bool is_psci_0_1_function_enabled(unsigned int fn_bit)
-{
-	return kvm_host_psci_config.enabled_functions_0_1 & fn_bit;
-}
-
-static inline bool is_psci_0_1_cpu_suspend(u64 func_id)
-{
-	return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_CPU_SUSPEND) &&
-	       (func_id == kvm_host_psci_config.function_ids_0_1.cpu_suspend);
-}
-
-static inline bool is_psci_0_1_cpu_on(u64 func_id)
-{
-	return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_CPU_ON) &&
-	       (func_id == kvm_host_psci_config.function_ids_0_1.cpu_on);
-}
-
-static inline bool is_psci_0_1_cpu_off(u64 func_id)
-{
-	return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_CPU_OFF) &&
-	       (func_id == kvm_host_psci_config.function_ids_0_1.cpu_off);
-}
-
-static inline bool is_psci_0_1_migrate(u64 func_id)
-{
-	return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_MIGRATE) &&
-	       (func_id == kvm_host_psci_config.function_ids_0_1.migrate);
-}
+#define	is_psci_0_1(what, func_id)					\
+	(kvm_host_psci_config.psci_0_1_ ## what ## _implemented &&	\
+	 (func_id) == kvm_host_psci_config.function_ids_0_1.what)
 
 static bool is_psci_0_1_call(u64 func_id)
 {
-	return is_psci_0_1_cpu_suspend(func_id) ||
-	       is_psci_0_1_cpu_on(func_id) ||
-	       is_psci_0_1_cpu_off(func_id) ||
-	       is_psci_0_1_migrate(func_id);
+	return (is_psci_0_1(cpu_suspend, func_id) ||
+		is_psci_0_1(cpu_on, func_id) ||
+		is_psci_0_1(cpu_off, func_id) ||
+		is_psci_0_1(migrate, func_id));
 }
 
 static bool is_psci_0_2_call(u64 func_id)
@@ -94,16 +62,6 @@ static bool is_psci_0_2_call(u64 func_id)
 	       (PSCI_0_2_FN64(0) <= func_id && func_id <= PSCI_0_2_FN64(31));
 }
 
-static bool is_psci_call(u64 func_id)
-{
-	switch (kvm_host_psci_config.version) {
-	case PSCI_VERSION(0, 1):
-		return is_psci_0_1_call(func_id);
-	default:
-		return is_psci_0_2_call(func_id);
-	}
-}
-
 static unsigned long psci_call(unsigned long fn, unsigned long arg0,
 			       unsigned long arg1, unsigned long arg2)
 {
@@ -273,14 +231,14 @@ asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on)
 
 static unsigned long psci_0_1_handler(u64 func_id, struct kvm_cpu_context *host_ctxt)
 {
-	if (is_psci_0_1_cpu_off(func_id) || is_psci_0_1_migrate(func_id))
+	if (is_psci_0_1(cpu_off, func_id) || is_psci_0_1(migrate, func_id))
 		return psci_forward(host_ctxt);
-	else if (is_psci_0_1_cpu_on(func_id))
+	if (is_psci_0_1(cpu_on, func_id))
 		return psci_cpu_on(func_id, host_ctxt);
-	else if (is_psci_0_1_cpu_suspend(func_id))
+	if (is_psci_0_1(cpu_suspend, func_id))
 		return psci_cpu_suspend(func_id, host_ctxt);
-	else
-		return PSCI_RET_NOT_SUPPORTED;
+
+	return PSCI_RET_NOT_SUPPORTED;
 }
 
 static unsigned long psci_0_2_handler(u64 func_id, struct kvm_cpu_context *host_ctxt)
@@ -322,20 +280,23 @@ static unsigned long psci_1_0_handler(u64 func_id, struct kvm_cpu_context *host_
 
 bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt)
 {
-	u64 func_id = get_psci_func_id(host_ctxt);
+	DECLARE_REG(u64, func_id, host_ctxt, 0);
 	unsigned long ret;
 
-	if (!is_psci_call(func_id))
-		return false;
-
 	switch (kvm_host_psci_config.version) {
 	case PSCI_VERSION(0, 1):
+		if (!is_psci_0_1_call(func_id))
+			return false;
 		ret = psci_0_1_handler(func_id, host_ctxt);
 		break;
 	case PSCI_VERSION(0, 2):
+		if (!is_psci_0_2_call(func_id))
+			return false;
 		ret = psci_0_2_handler(func_id, host_ctxt);
 		break;
 	default:
+		if (!is_psci_0_2_call(func_id))
+			return false;
 		ret = psci_1_0_handler(func_id, host_ctxt);
 		break;
 	}

From 8b3fd902391fdee526f6ba46899a3f8005983ae1 Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Thu, 17 Dec 2020 08:15:01 +0100
Subject: [PATCH 112/547] MAINTAINERS: include governors into CPU IDLE TIME
 MANAGEMENT FRAMEWORK

The current pattern in the file entry does not make the files in the
governors subdirectory to be a part of the CPU IDLE TIME MANAGEMENT
FRAMEWORK.

Adjust the file pattern to include files in governors.

Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 7e76ae7ee8a2..59be280069d1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4583,7 +4583,7 @@ B:	https://bugzilla.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git
 F:	Documentation/admin-guide/pm/cpuidle.rst
 F:	Documentation/driver-api/pm/cpuidle.rst
-F:	drivers/cpuidle/*
+F:	drivers/cpuidle/
 F:	include/linux/cpuidle.h
 
 CPU POWER MONITORING SUBSYSTEM

From 7887cc89d5851cbdec49219e9614beec776af150 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Mon, 14 Dec 2020 23:34:13 +0100
Subject: [PATCH 113/547] ARM: dts: ux500/golden: Set display max brightness

A too high brightness by default (default is max) makes the
screen go blank. Set this to 15 as in the Vendor tree.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Cc: Stephan Gerhold <stephan@gerhold.net>
Link: https://lore.kernel.org/r/20201214223413.253893-1-linus.walleij@linaro.org'
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/boot/dts/ste-ux500-samsung-golden.dts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/boot/dts/ste-ux500-samsung-golden.dts b/arch/arm/boot/dts/ste-ux500-samsung-golden.dts
index a1093cb37dc7..aed1f2d5f246 100644
--- a/arch/arm/boot/dts/ste-ux500-samsung-golden.dts
+++ b/arch/arm/boot/dts/ste-ux500-samsung-golden.dts
@@ -326,6 +326,7 @@
 				panel@0 {
 					compatible = "samsung,s6e63m0";
 					reg = <0>;
+					max-brightness = <15>;
 					vdd3-supply = <&panel_reg_3v0>;
 					vci-supply = <&panel_reg_1v8>;
 					reset-gpios = <&gpio4 11 GPIO_ACTIVE_LOW>;

From f87777a3c30cf50c66a20e1d153f0e003bb30774 Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Sat, 19 Dec 2020 14:50:36 +0100
Subject: [PATCH 114/547] net: stmmac: dwmac-meson8b: ignore the second clock
 input

The dwmac glue registers on Amlogic Meson8b and newer SoCs has two clock
inputs:
- Meson8b and Meson8m2: MPLL2 and MPLL2 (the same parent is wired to
  both inputs)
- GXBB, GXL, GXM, AXG, G12A, G12B, SM1: FCLK_DIV2 and MPLL2

All known vendor kernels and u-boots are using the first input only. We
let the common clock framework automatically choose the "right" parent.
For some boards this causes a problem though, specificially with G12A and
newer SoCs. The clock input is used for generating the 125MHz RGMII TX
clock. For the two input clocks this means on G12A:
- FCLK_DIV2: 999999985Hz / 8 = 124999998.125Hz
- MPLL2: 499999993Hz / 4 = 124999998.25Hz

In theory MPLL2 is the "better" clock input because it's gets us 0.125Hz
closer to the requested frequency than FCLK_DIV2. In reality however
there is a resource conflict because MPLL2 is needed to generate some of
the audio clocks. dwmac-meson8b probes first and sets up the clock tree
with MPLL2. This works fine until the audio driver comes and "steals"
the MPLL2 clocks and configures it with it's own rate (294909637Hz). The
common clock framework happily changes the MPLL2 rate but does not
reconfigure our RGMII TX clock tree, which then ends up at 73727409Hz,
which is more than 40% off the requested 125MHz.

Don't use the second clock input for now to force the common clock
framework to always select the first parent. This mimics the behavior
from the vendor driver and fixes the clock resource conflict with the
audio driver on G12A boards. Once the common clock framework can handle
this situation this change can be reverted again.

Fixes: 566e8251625304 ("net: stmmac: add a glue driver for the Amlogic Meson 8b / GXBB DWMAC")
Reported-by: Thomas Graichen <thomas.graichen@gmail.com>
Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Tested-by: thomas graichen <thomas.graichen@gmail.com>
Link: https://lore.kernel.org/r/20201219135036.3216017-1-martin.blumenstingl@googlemail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index 459ae715b33d..f184b00f5116 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -135,7 +135,7 @@ static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac)
 	struct device *dev = dwmac->dev;
 	static const struct clk_parent_data mux_parents[] = {
 		{ .fw_name = "clkin0", },
-		{ .fw_name = "clkin1", },
+		{ .index = -1, },
 	};
 	static const struct clk_div_table div_table[] = {
 		{ .div = 2, .val = 2, },

From 8b0f64b113d617c995ffdf50196948c3e99c6e49 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Sat, 19 Dec 2020 10:55:38 -0800
Subject: [PATCH 115/547] MAINTAINERS: remove names from mailing list
 maintainers

When searching for inactive maintainers it's useful to filter
out mailing list addresses. Such "maintainers" will obviously
never feature in a "From:" line of an email or a review tag.

Since "L:" entries only provide the address of a mailing list
without a fancy name extend this pattern to "M:" entries.

Alternatively we could reserve M: entries for humans only
and move the fake "maintainers" to L:. While I'd personally
prefer to reserve M: for humans only, I'm not 100% that's
a great choice either, given most L: entries are in fact
open mailing lists with public archives.

Link: https://lore.kernel.org/r/20201219185538.750076-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 MAINTAINERS | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 28d7acdb0591..20cd4cc08dd1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -203,8 +203,8 @@ F:	include/uapi/linux/nl80211.h
 F:	net/wireless/
 
 8169 10/100/1000 GIGABIT ETHERNET DRIVER
-M:	Realtek linux nic maintainers <nic_swsd@realtek.com>
 M:	Heiner Kallweit <hkallweit1@gmail.com>
+M:	nic_swsd@realtek.com
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/ethernet/realtek/r8169*
@@ -2119,7 +2119,7 @@ N:	atmel
 ARM/Microchip Sparx5 SoC support
 M:	Lars Povlsen <lars.povlsen@microchip.com>
 M:	Steen Hegelund <Steen.Hegelund@microchip.com>
-M:	Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
+M:	UNGLinuxDriver@microchip.com
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Supported
 T:	git git://github.com/microchip-ung/linux-upstream.git
@@ -3958,7 +3958,7 @@ F:	net/can/
 CAN-J1939 NETWORK LAYER
 M:	Robin van der Gracht <robin@protonic.nl>
 M:	Oleksij Rempel <o.rempel@pengutronix.de>
-R:	Pengutronix Kernel Team <kernel@pengutronix.de>
+R:	kernel@pengutronix.de
 L:	linux-can@vger.kernel.org
 S:	Maintained
 F:	Documentation/networking/j1939.rst
@@ -11651,7 +11651,7 @@ F:	drivers/media/platform/atmel/atmel-isi.h
 
 MICROCHIP KSZ SERIES ETHERNET SWITCH DRIVER
 M:	Woojung Huh <woojung.huh@microchip.com>
-M:	Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
+M:	UNGLinuxDriver@microchip.com
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml
@@ -11661,7 +11661,7 @@ F:	net/dsa/tag_ksz.c
 
 MICROCHIP LAN743X ETHERNET DRIVER
 M:	Bryan Whitehead <bryan.whitehead@microchip.com>
-M:	Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
+M:	UNGLinuxDriver@microchip.com
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/ethernet/microchip/lan743x_*
@@ -11755,7 +11755,7 @@ F:	drivers/net/wireless/microchip/wilc1000/
 
 MICROSEMI MIPS SOCS
 M:	Alexandre Belloni <alexandre.belloni@bootlin.com>
-M:	Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
+M:	UNGLinuxDriver@microchip.com
 L:	linux-mips@vger.kernel.org
 S:	Supported
 F:	Documentation/devicetree/bindings/mips/mscc.txt
@@ -12809,10 +12809,10 @@ F:	tools/objtool/
 F:	include/linux/objtool.h
 
 OCELOT ETHERNET SWITCH DRIVER
-M:	Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
 M:	Vladimir Oltean <vladimir.oltean@nxp.com>
 M:	Claudiu Manoil <claudiu.manoil@nxp.com>
 M:	Alexandre Belloni <alexandre.belloni@bootlin.com>
+M:	UNGLinuxDriver@microchip.com
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/dsa/ocelot/*
@@ -13874,7 +13874,7 @@ F:	drivers/platform/x86/peaq-wmi.c
 
 PENSANDO ETHERNET DRIVERS
 M:	Shannon Nelson <snelson@pensando.io>
-M:	Pensando Drivers <drivers@pensando.io>
+M:	drivers@pensando.io
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	Documentation/networking/device_drivers/ethernet/pensando/ionic.rst
@@ -14653,7 +14653,7 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
 F:	drivers/net/wireless/ath/ath11k/
 
 QUALCOMM ATHEROS ATH9K WIRELESS DRIVER
-M:	QCA ath9k Development <ath9k-devel@qca.qualcomm.com>
+M:	ath9k-devel@qca.qualcomm.com
 L:	linux-wireless@vger.kernel.org
 S:	Supported
 W:	https://wireless.wiki.kernel.org/en/users/Drivers/ath9k
@@ -18338,7 +18338,7 @@ F:	include/linux/usb/isp116x.h
 
 USB LAN78XX ETHERNET DRIVER
 M:	Woojung Huh <woojung.huh@microchip.com>
-M:	Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
+M:	UNGLinuxDriver@microchip.com
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	Documentation/devicetree/bindings/net/microchip,lan78xx.txt
@@ -18452,7 +18452,7 @@ F:	drivers/net/usb/smsc75xx.*
 
 USB SMSC95XX ETHERNET DRIVER
 M:	Steve Glendinning <steve.glendinning@shawell.net>
-M:	Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
+M:	UNGLinuxDriver@microchip.com
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/usb/smsc95xx.*
@@ -18999,7 +18999,7 @@ F:	drivers/input/mouse/vmmouse.h
 
 VMWARE VMXNET3 ETHERNET DRIVER
 M:	Ronak Doshi <doshir@vmware.com>
-M:	"VMware, Inc." <pv-drivers@vmware.com>
+M:	pv-drivers@vmware.com
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/vmxnet3/

From a0c8be56affa7d5ffbdec24c992223be54db3b6e Mon Sep 17 00:00:00 2001
From: Lijun Pan <ljp@linux.ibm.com>
Date: Sat, 19 Dec 2020 15:39:19 -0600
Subject: [PATCH 116/547] ibmvnic: fix login buffer memory leak

Commit 34f0f4e3f488 ("ibmvnic: Fix login buffer memory leaks") frees
login_rsp_buffer in release_resources() and send_login()
because handle_login_rsp() does not free it.
Commit f3ae59c0c015 ("ibmvnic: store RX and TX subCRQ handle array in
ibmvnic_adapter struct") frees login_rsp_buffer in handle_login_rsp().
It seems unnecessary to free it in release_resources() and send_login().
There are chances that handle_login_rsp returns earlier without freeing
buffers. Double-checking the buffer is harmless since
release_login_buffer and release_login_rsp_buffer will
do nothing if buffer is already freed.

Fixes: f3ae59c0c015 ("ibmvnic: store RX and TX subCRQ handle array in ibmvnic_adapter struct")
Fixes: 34f0f4e3f488 ("ibmvnic: Fix login buffer memory leaks")
Signed-off-by: Lijun Pan <ljp@linux.ibm.com>
Link: https://lore.kernel.org/r/20201219213919.21045-1-ljp@linux.ibm.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index f302504faa8a..bd0281020d51 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -955,6 +955,7 @@ static void release_resources(struct ibmvnic_adapter *adapter)
 	release_rx_pools(adapter);
 
 	release_napi(adapter);
+	release_login_buffer(adapter);
 	release_login_rsp_buffer(adapter);
 }
 
@@ -3873,7 +3874,9 @@ static int send_login(struct ibmvnic_adapter *adapter)
 		return -1;
 	}
 
+	release_login_buffer(adapter);
 	release_login_rsp_buffer(adapter);
+
 	client_data_len = vnic_client_data_len(adapter);
 
 	buffer_size =

From 58f60329a6be35a5653edb3fd2023ccef9eb9943 Mon Sep 17 00:00:00 2001
From: Dinghao Liu <dinghao.liu@zju.edu.cn>
Date: Sun, 20 Dec 2020 16:29:30 +0800
Subject: [PATCH 117/547] net: ethernet: mvneta: Fix error handling in
 mvneta_probe

When mvneta_port_power_up() fails, we should execute
cleanup functions after label err_netdev to avoid memleak.

Fixes: 41c2b6b4f0f80 ("net: ethernet: mvneta: Add back interface mode validation")
Signed-off-by: Dinghao Liu <dinghao.liu@zju.edu.cn>
Link: https://lore.kernel.org/r/20201220082930.21623-1-dinghao.liu@zju.edu.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/marvell/mvneta.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 563ceac3060f..3369ec717a51 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -5255,7 +5255,7 @@ static int mvneta_probe(struct platform_device *pdev)
 	err = mvneta_port_power_up(pp, pp->phy_interface);
 	if (err < 0) {
 		dev_err(&pdev->dev, "can't power up port\n");
-		return err;
+		goto err_netdev;
 	}
 
 	/* Armada3700 network controller does not support per-cpu

From 1d898b283576c38dedcb6b21fcbb65968ab03581 Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Sun, 20 Dec 2020 10:49:47 +0200
Subject: [PATCH 118/547] docs: netdev-FAQ: fix question headers formatting

Join adjacent questions to a single question line. This fixes the
formatting of questions that were not part of the heading.

Also, drop Q: and A: prefixes. We don't need them now that questions and
answers are visually separated.

Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Link: https://lore.kernel.org/r/f76078ba5547744f2ec178984c32fbc7dcd29a2b.1608454187.git.baruch@tkos.co.il
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/networking/netdev-FAQ.rst | 126 +++++++++++-------------
 1 file changed, 59 insertions(+), 67 deletions(-)

diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst
index 4b9ed5874d5a..ae2ae37cd921 100644
--- a/Documentation/networking/netdev-FAQ.rst
+++ b/Documentation/networking/netdev-FAQ.rst
@@ -6,9 +6,9 @@
 netdev FAQ
 ==========
 
-Q: What is netdev?
-------------------
-A: It is a mailing list for all network-related Linux stuff.  This
+What is netdev?
+---------------
+It is a mailing list for all network-related Linux stuff.  This
 includes anything found under net/ (i.e. core code like IPv6) and
 drivers/net (i.e. hardware specific drivers) in the Linux source tree.
 
@@ -25,9 +25,9 @@ Aside from subsystems like that mentioned above, all network-related
 Linux development (i.e. RFC, review, comments, etc.) takes place on
 netdev.
 
-Q: How do the changes posted to netdev make their way into Linux?
------------------------------------------------------------------
-A: There are always two trees (git repositories) in play.  Both are
+How do the changes posted to netdev make their way into Linux?
+--------------------------------------------------------------
+There are always two trees (git repositories) in play.  Both are
 driven by David Miller, the main network maintainer.  There is the
 ``net`` tree, and the ``net-next`` tree.  As you can probably guess from
 the names, the ``net`` tree is for fixes to existing code already in the
@@ -37,9 +37,9 @@ for the future release.  You can find the trees here:
 - https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
 - https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
 
-Q: How often do changes from these trees make it to the mainline Linus tree?
-----------------------------------------------------------------------------
-A: To understand this, you need to know a bit of background information on
+How often do changes from these trees make it to the mainline Linus tree?
+-------------------------------------------------------------------------
+To understand this, you need to know a bit of background information on
 the cadence of Linux development.  Each new release starts off with a
 two week "merge window" where the main maintainers feed their new stuff
 to Linus for merging into the mainline tree.  After the two weeks, the
@@ -81,7 +81,8 @@ focus for ``net`` is on stabilization and bug fixes.
 
 Finally, the vX.Y gets released, and the whole cycle starts over.
 
-Q: So where are we now in this cycle?
+So where are we now in this cycle?
+----------------------------------
 
 Load the mainline (Linus) page here:
 
@@ -91,9 +92,9 @@ and note the top of the "tags" section.  If it is rc1, it is early in
 the dev cycle.  If it was tagged rc7 a week ago, then a release is
 probably imminent.
 
-Q: How do I indicate which tree (net vs. net-next) my patch should be in?
--------------------------------------------------------------------------
-A: Firstly, think whether you have a bug fix or new "next-like" content.
+How do I indicate which tree (net vs. net-next) my patch should be in?
+----------------------------------------------------------------------
+Firstly, think whether you have a bug fix or new "next-like" content.
 Then once decided, assuming that you use git, use the prefix flag, i.e.
 ::
 
@@ -105,48 +106,45 @@ in the above is just the subject text of the outgoing e-mail, and you
 can manually change it yourself with whatever MUA you are comfortable
 with.
 
-Q: I sent a patch and I'm wondering what happened to it?
---------------------------------------------------------
-Q: How can I tell whether it got merged?
-A: Start by looking at the main patchworks queue for netdev:
+I sent a patch and I'm wondering what happened to it - how can I tell whether it got merged?
+--------------------------------------------------------------------------------------------
+Start by looking at the main patchworks queue for netdev:
 
   https://patchwork.kernel.org/project/netdevbpf/list/
 
 The "State" field will tell you exactly where things are at with your
 patch.
 
-Q: The above only says "Under Review".  How can I find out more?
-----------------------------------------------------------------
-A: Generally speaking, the patches get triaged quickly (in less than
+The above only says "Under Review".  How can I find out more?
+-------------------------------------------------------------
+Generally speaking, the patches get triaged quickly (in less than
 48h).  So be patient.  Asking the maintainer for status updates on your
 patch is a good way to ensure your patch is ignored or pushed to the
 bottom of the priority list.
 
-Q: I submitted multiple versions of the patch series
-----------------------------------------------------
-Q: should I directly update patchwork for the previous versions of these
-patch series?
-A: No, please don't interfere with the patch status on patchwork, leave
+I submitted multiple versions of the patch series. Should I directly update patchwork for the previous versions of these patch series?
+--------------------------------------------------------------------------------------------------------------------------------------
+No, please don't interfere with the patch status on patchwork, leave
 it to the maintainer to figure out what is the most recent and current
 version that should be applied. If there is any doubt, the maintainer
 will reply and ask what should be done.
 
-Q: I made changes to only a few patches in a patch series should I resend only those changed?
----------------------------------------------------------------------------------------------
-A: No, please resend the entire patch series and make sure you do number your
+I made changes to only a few patches in a patch series should I resend only those changed?
+------------------------------------------------------------------------------------------
+No, please resend the entire patch series and make sure you do number your
 patches such that it is clear this is the latest and greatest set of patches
 that can be applied.
 
-Q: I submitted multiple versions of a patch series and it looks like a version other than the last one has been accepted, what should I do?
--------------------------------------------------------------------------------------------------------------------------------------------
-A: There is no revert possible, once it is pushed out, it stays like that.
+I submitted multiple versions of a patch series and it looks like a version other than the last one has been accepted, what should I do?
+----------------------------------------------------------------------------------------------------------------------------------------
+There is no revert possible, once it is pushed out, it stays like that.
 Please send incremental versions on top of what has been merged in order to fix
 the patches the way they would look like if your latest patch series was to be
 merged.
 
-Q: How can I tell what patches are queued up for backporting to the various stable releases?
---------------------------------------------------------------------------------------------
-A: Normally Greg Kroah-Hartman collects stable commits himself, but for
+How can I tell what patches are queued up for backporting to the various stable releases?
+-----------------------------------------------------------------------------------------
+Normally Greg Kroah-Hartman collects stable commits himself, but for
 networking, Dave collects up patches he deems critical for the
 networking subsystem, and then hands them off to Greg.
 
@@ -169,11 +167,9 @@ simply clone the repo, and then git grep the mainline commit ID, e.g.
   releases/3.9.8/ipv6-fix-possible-crashes-in-ip6_cork_release.patch
   stable/stable-queue$
 
-Q: I see a network patch and I think it should be backported to stable.
------------------------------------------------------------------------
-Q: Should I request it via stable@vger.kernel.org like the references in
-the kernel's Documentation/process/stable-kernel-rules.rst file say?
-A: No, not for networking.  Check the stable queues as per above first
+I see a network patch and I think it should be backported to stable. Should I request it via stable@vger.kernel.org like the references in the kernel's Documentation/process/stable-kernel-rules.rst file say?
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+No, not for networking.  Check the stable queues as per above first
 to see if it is already queued.  If not, then send a mail to netdev,
 listing the upstream commit ID and why you think it should be a stable
 candidate.
@@ -190,11 +186,9 @@ mainline, the better the odds that it is an OK candidate for stable.  So
 scrambling to request a commit be added the day after it appears should
 be avoided.
 
-Q: I have created a network patch and I think it should be backported to stable.
---------------------------------------------------------------------------------
-Q: Should I add a Cc: stable@vger.kernel.org like the references in the
-kernel's Documentation/ directory say?
-A: No.  See above answer.  In short, if you think it really belongs in
+I have created a network patch and I think it should be backported to stable. Should I add a Cc: stable@vger.kernel.org like the references in the kernel's Documentation/ directory say?
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+No.  See above answer.  In short, if you think it really belongs in
 stable, then ensure you write a decent commit log that describes who
 gets impacted by the bug fix and how it manifests itself, and when the
 bug was introduced.  If you do that properly, then the commit will get
@@ -207,18 +201,18 @@ marker line as described in
 :ref:`Documentation/process/submitting-patches.rst <the_canonical_patch_format>`
 to temporarily embed that information into the patch that you send.
 
-Q: Are all networking bug fixes backported to all stable releases?
-------------------------------------------------------------------
-A: Due to capacity, Dave could only take care of the backports for the
+Are all networking bug fixes backported to all stable releases?
+---------------------------------------------------------------
+Due to capacity, Dave could only take care of the backports for the
 last two stable releases. For earlier stable releases, each stable
 branch maintainer is supposed to take care of them. If you find any
 patch is missing from an earlier stable branch, please notify
 stable@vger.kernel.org with either a commit ID or a formal patch
 backported, and CC Dave and other relevant networking developers.
 
-Q: Is the comment style convention different for the networking content?
-------------------------------------------------------------------------
-A: Yes, in a largely trivial way.  Instead of this::
+Is the comment style convention different for the networking content?
+---------------------------------------------------------------------
+Yes, in a largely trivial way.  Instead of this::
 
   /*
    * foobar blah blah blah
@@ -231,32 +225,30 @@ it is requested that you make it look like this::
    * another line of text
    */
 
-Q: I am working in existing code that has the former comment style and not the latter.
---------------------------------------------------------------------------------------
-Q: Should I submit new code in the former style or the latter?
-A: Make it the latter style, so that eventually all code in the domain
+I am working in existing code that has the former comment style and not the latter. Should I submit new code in the former style or the latter?
+-----------------------------------------------------------------------------------------------------------------------------------------------
+Make it the latter style, so that eventually all code in the domain
 of netdev is of this format.
 
-Q: I found a bug that might have possible security implications or similar.
----------------------------------------------------------------------------
-Q: Should I mail the main netdev maintainer off-list?**
-A: No. The current netdev maintainer has consistently requested that
+I found a bug that might have possible security implications or similar. Should I mail the main netdev maintainer off-list?
+---------------------------------------------------------------------------------------------------------------------------
+No. The current netdev maintainer has consistently requested that
 people use the mailing lists and not reach out directly.  If you aren't
 OK with that, then perhaps consider mailing security@kernel.org or
 reading about http://oss-security.openwall.org/wiki/mailing-lists/distros
 as possible alternative mechanisms.
 
-Q: What level of testing is expected before I submit my change?
----------------------------------------------------------------
-A: If your changes are against ``net-next``, the expectation is that you
+What level of testing is expected before I submit my change?
+------------------------------------------------------------
+If your changes are against ``net-next``, the expectation is that you
 have tested by layering your changes on top of ``net-next``.  Ideally
 you will have done run-time testing specific to your change, but at a
 minimum, your changes should survive an ``allyesconfig`` and an
 ``allmodconfig`` build without new warnings or failures.
 
-Q: How do I post corresponding changes to user space components?
-----------------------------------------------------------------
-A: User space code exercising kernel features should be posted
+How do I post corresponding changes to user space components?
+-------------------------------------------------------------
+User space code exercising kernel features should be posted
 alongside kernel patches. This gives reviewers a chance to see
 how any new interface is used and how well it works.
 
@@ -280,9 +272,9 @@ to the mailing list, e.g.::
 Posting as one thread is discouraged because it confuses patchwork
 (as of patchwork 2.2.2).
 
-Q: Any other tips to help ensure my net/net-next patch gets OK'd?
------------------------------------------------------------------
-A: Attention to detail.  Re-read your own work as if you were the
+Any other tips to help ensure my net/net-next patch gets OK'd?
+--------------------------------------------------------------
+Attention to detail.  Re-read your own work as if you were the
 reviewer.  You can start with using ``checkpatch.pl``, perhaps even with
 the ``--strict`` flag.  But do not be mindlessly robotic in doing so.
 If your change is a bug fix, make sure your commit log indicates the

From 5d5647dad259bb416fd5d3d87012760386d97530 Mon Sep 17 00:00:00 2001
From: Manish Chopra <manishc@marvell.com>
Date: Mon, 21 Dec 2020 06:55:30 -0800
Subject: [PATCH 119/547] qede: fix offload for IPIP tunnel packets

IPIP tunnels packets are unknown to device,
hence these packets are incorrectly parsed and
caused the packet corruption, so disable offlods
for such packets at run time.

Signed-off-by: Manish Chopra <manishc@marvell.com>
Signed-off-by: Sudarsana Kalluru <skalluru@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Link: https://lore.kernel.org/r/20201221145530.7771-1-manishc@marvell.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/qlogic/qede/qede_fp.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c
index a2494bf85007..ca0ee29a57b5 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_fp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c
@@ -1799,6 +1799,11 @@ netdev_features_t qede_features_check(struct sk_buff *skb,
 			      ntohs(udp_hdr(skb)->dest) != gnv_port))
 				return features & ~(NETIF_F_CSUM_MASK |
 						    NETIF_F_GSO_MASK);
+		} else if (l4_proto == IPPROTO_IPIP) {
+			/* IPIP tunnels are unknown to the device or at least unsupported natively,
+			 * offloads for them can't be done trivially, so disable them for such skb.
+			 */
+			return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
 		}
 	}
 

From f86de9b1c0663b0a3ca2dcddec9aa910ff0fbf2c Mon Sep 17 00:00:00 2001
From: Kailang Yang <kailang@realtek.com>
Date: Fri, 23 Oct 2020 14:46:47 +0800
Subject: [PATCH 120/547] ALSA: hda/realtek - Fix speaker volume control on
 Lenovo C940

Cannot adjust speaker's volume on Lenovo C940.
Applying the alc298_fixup_speaker_volume function can fix the issue.

[ Additional note: C940 has I2S amp for the speaker and this needs the
  same initialization as Dell machines.
  The patch was slightly modified so that the quirk entry is moved
  next to the corresponding Dell quirk entry. -- tiwai ]

Signed-off-by: Kailang Yang <kailang@realtek.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/ea25b4e5c468491aa2e9d6cb1f2fced3@realtek.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index dde5ba209541..5b03259622a8 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6289,6 +6289,7 @@ enum {
 	ALC221_FIXUP_HP_FRONT_MIC,
 	ALC292_FIXUP_TPT460,
 	ALC298_FIXUP_SPK_VOLUME,
+	ALC298_FIXUP_LENOVO_SPK_VOLUME,
 	ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER,
 	ALC269_FIXUP_ATIV_BOOK_8,
 	ALC221_FIXUP_HP_MIC_NO_PRESENCE,
@@ -7119,6 +7120,10 @@ static const struct hda_fixup alc269_fixups[] = {
 		.chained = true,
 		.chain_id = ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE,
 	},
+	[ALC298_FIXUP_LENOVO_SPK_VOLUME] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc298_fixup_speaker_volume,
+	},
 	[ALC295_FIXUP_DISABLE_DAC3] = {
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc295_fixup_disable_dac3,
@@ -8126,6 +8131,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x17aa, 0x3151, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC),
 	SND_PCI_QUIRK(0x17aa, 0x3176, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC),
 	SND_PCI_QUIRK(0x17aa, 0x3178, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC),
+	SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940", ALC298_FIXUP_LENOVO_SPK_VOLUME),
 	SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
 	SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC),
 	SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo B50-70", ALC269_FIXUP_DMIC_THINKPAD_ACPI),

From c1e8952395c1f44a6304c71401519d19ed2ac56a Mon Sep 17 00:00:00 2001
From: Kailang Yang <kailang@realtek.com>
Date: Wed, 23 Dec 2020 15:34:57 +0800
Subject: [PATCH 121/547] ALSA: hda/realtek - Modify Dell platform name

Dell platform SSID:0x0a58 change platform name.
Use the generic name instead for avoiding confusion.

Fixes: 150927c3674d ("ALSA: hda/realtek - Supported Dell fixed type headset")
Signed-off-by: Kailang Yang <kailang@realtek.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/efe7c196158241aa817229df7835d645@realtek.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 5b03259622a8..f49cf0f227c4 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -7890,7 +7890,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1028, 0x09bf, "Dell Precision", ALC233_FIXUP_ASUS_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x0a2e, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC),
 	SND_PCI_QUIRK(0x1028, 0x0a30, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC),
-	SND_PCI_QUIRK(0x1028, 0x0a58, "Dell Precision 3650 Tower", ALC255_FIXUP_DELL_HEADSET_MIC),
+	SND_PCI_QUIRK(0x1028, 0x0a58, "Dell", ALC255_FIXUP_DELL_HEADSET_MIC),
 	SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),

From 3557ae187c32203d1bb8b48ee1e2e7bdb23d98d5 Mon Sep 17 00:00:00 2001
From: Alexandru Elisei <alexandru.elisei@arm.com>
Date: Tue, 1 Dec 2020 15:01:53 +0000
Subject: [PATCH 122/547] KVM: Documentation: Add arm64 KVM_RUN error codes

The API documentation states that general error codes are not detailed, but
errors with specific meanings are. On arm64, KVM_RUN can return error
numbers with a different meaning than what is described by POSIX or the C99
standard (as taken from man 3 errno).

Absent from the newly documented error codes is ERANGE which can be
returned when making a change to the EL2 stage 1 tables if the address is
larger than the largest supported input address. Assuming no bugs in the
implementation, that is not possible because the input addresses which are
mapped are the result of applying the macro kern_hyp_va() on kernel virtual
addresses.

CC: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20201201150157.223625-2-alexandru.elisei@arm.com
---
 Documentation/virt/kvm/api.rst | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index e00a66d72372..4e5316ed10e9 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -380,9 +380,14 @@ This ioctl is obsolete and has been removed.
 
 Errors:
 
-  =====      =============================
+  =======    ==============================================================
   EINTR      an unmasked signal is pending
-  =====      =============================
+  ENOEXEC    the vcpu hasn't been initialized or the guest tried to execute
+             instructions from device memory (arm64)
+  ENOSYS     data abort outside memslots with no syndrome info and
+             KVM_CAP_ARM_NISV_TO_USER not enabled (arm64)
+  EPERM      SVE feature set but not finalized (arm64)
+  =======    ==============================================================
 
 This ioctl is used to run a guest virtual cpu.  While there are no
 explicit parameters, there is an implicit parameter block that can be

From f16570ba47ff2b3766ebeaba6f4b80ad48cfd6a1 Mon Sep 17 00:00:00 2001
From: Alexandru Elisei <alexandru.elisei@arm.com>
Date: Tue, 1 Dec 2020 15:01:54 +0000
Subject: [PATCH 123/547] KVM: arm64: arch_timer: Remove VGIC initialization
 check

kvm_timer_enable() is called in kvm_vcpu_first_run_init() after
kvm_vgic_map_resources() if the VGIC wasn't ready. kvm_vgic_map_resources()
is the only place where kvm->arch.vgic.ready is set to true.

For a v2 VGIC, kvm_vgic_map_resources() will attempt to initialize the VGIC
and set the initialized flag.

For a v3 VGIC, kvm_vgic_map_resources() will return an error code if the
VGIC isn't already initialized.

The end result is that if we've reached kvm_timer_enable(), the VGIC is
initialzed and ready and vgic_initialized() will always be true, so remove
this check.

Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
[maz: added comment about vgic initialisation, as suggested by Eric]
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20201201150157.223625-3-alexandru.elisei@arm.com
---
 arch/arm64/kvm/arch_timer.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 32ba6fbc3814..74e0699661e9 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -1129,9 +1129,10 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
 	if (!irqchip_in_kernel(vcpu->kvm))
 		goto no_vgic;
 
-	if (!vgic_initialized(vcpu->kvm))
-		return -ENODEV;
-
+	/*
+	 * At this stage, we have the guarantee that the vgic is both
+	 * available and initialized.
+	 */
 	if (!timer_irqs_are_valid(vcpu)) {
 		kvm_debug("incorrectly configured timer irqs\n");
 		return -EINVAL;

From 1c91f06d296de4f0c27022f5ec464e047d471215 Mon Sep 17 00:00:00 2001
From: Alexandru Elisei <alexandru.elisei@arm.com>
Date: Tue, 1 Dec 2020 15:01:55 +0000
Subject: [PATCH 124/547] KVM: arm64: Move double-checked lock to
 kvm_vgic_map_resources()

kvm_vgic_map_resources() is called when a VCPU if first run and it maps all
the VGIC MMIO regions. To prevent double-initialization, the VGIC uses the
ready variable to keep track of the state of resources and the global KVM
mutex to protect against concurrent accesses. After the lock is taken, the
variable is checked again in case another VCPU took the lock between the
current VCPU reading ready equals false and taking the lock.

The double-checked lock pattern is spread across four different functions:
in kvm_vcpu_first_run_init(), in kvm_vgic_map_resource() and in
vgic_{v2,v3}_map_resources(), which makes it hard to reason about and
introduces minor code duplication. Consolidate the checks in
kvm_vgic_map_resources(), where the lock is taken.

No functional change intended.

Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20201201150157.223625-4-alexandru.elisei@arm.com
---
 arch/arm64/kvm/arm.c            | 8 +++-----
 arch/arm64/kvm/vgic/vgic-init.c | 6 ++++++
 arch/arm64/kvm/vgic/vgic-v2.c   | 3 ---
 arch/arm64/kvm/vgic/vgic-v3.c   | 3 ---
 4 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index e207e4541f55..ab782c480e9a 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -580,11 +580,9 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 		 * Map the VGIC hardware resources before running a vcpu the
 		 * first time on this VM.
 		 */
-		if (unlikely(!vgic_ready(kvm))) {
-			ret = kvm_vgic_map_resources(kvm);
-			if (ret)
-				return ret;
-		}
+		ret = kvm_vgic_map_resources(kvm);
+		if (ret)
+			return ret;
 	} else {
 		/*
 		 * Tell the rest of the code that there are userspace irqchip
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index 32e32d67a127..a2f4d1c85f00 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -428,7 +428,13 @@ int kvm_vgic_map_resources(struct kvm *kvm)
 	struct vgic_dist *dist = &kvm->arch.vgic;
 	int ret = 0;
 
+	if (likely(vgic_ready(kvm)))
+		return 0;
+
 	mutex_lock(&kvm->lock);
+	if (vgic_ready(kvm))
+		goto out;
+
 	if (!irqchip_in_kernel(kvm))
 		goto out;
 
diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c
index ebf53a4e1296..7f38c1a93639 100644
--- a/arch/arm64/kvm/vgic/vgic-v2.c
+++ b/arch/arm64/kvm/vgic/vgic-v2.c
@@ -306,9 +306,6 @@ int vgic_v2_map_resources(struct kvm *kvm)
 	struct vgic_dist *dist = &kvm->arch.vgic;
 	int ret = 0;
 
-	if (vgic_ready(kvm))
-		goto out;
-
 	if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) ||
 	    IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) {
 		kvm_err("Need to set vgic cpu and dist addresses first\n");
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index 9cdf39a94a63..35029c5cb0f1 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -500,9 +500,6 @@ int vgic_v3_map_resources(struct kvm *kvm)
 	int ret = 0;
 	int c;
 
-	if (vgic_ready(kvm))
-		goto out;
-
 	kvm_for_each_vcpu(c, vcpu, kvm) {
 		struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 

From de33212f768c5d9e2fe791b008cb26f92f0aa31c Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@akamai.com>
Date: Tue, 22 Dec 2020 21:54:21 -0500
Subject: [PATCH 125/547] virtio_net: Fix recursive call to cpus_read_lock()

virtnet_set_channels can recursively call cpus_read_lock if CONFIG_XPS
and CONFIG_HOTPLUG are enabled.

The path is:
    virtnet_set_channels - calls get_online_cpus(), which is a trivial
wrapper around cpus_read_lock()
    netif_set_real_num_tx_queues
    netif_reset_xps_queues_gt
    netif_reset_xps_queues - calls cpus_read_lock()

This call chain and potential deadlock happens when the number of TX
queues is reduced.

This commit the removes netif_set_real_num_[tr]x_queues calls from
inside the get/put_online_cpus section, as they don't require that it
be held.

Fixes: 47be24796c13 ("virtio-net: fix the set affinity bug when CPU IDs are not consecutive")
Signed-off-by: Jeff Dike <jdike@akamai.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Link: https://lore.kernel.org/r/20201223025421.671-1-jdike@akamai.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/virtio_net.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 052975ea0af4..ee611d107294 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2093,14 +2093,16 @@ static int virtnet_set_channels(struct net_device *dev,
 
 	get_online_cpus();
 	err = _virtnet_set_queues(vi, queue_pairs);
-	if (!err) {
-		netif_set_real_num_tx_queues(dev, queue_pairs);
-		netif_set_real_num_rx_queues(dev, queue_pairs);
-
-		virtnet_set_affinity(vi);
+	if (err) {
+		put_online_cpus();
+		goto err;
 	}
+	virtnet_set_affinity(vi);
 	put_online_cpus();
 
+	netif_set_real_num_tx_queues(dev, queue_pairs);
+	netif_set_real_num_rx_queues(dev, queue_pairs);
+ err:
 	return err;
 }
 

From c06ccf3ebb7503706ea49fd248e709287ef385a3 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 23 Dec 2020 18:45:57 +0100
Subject: [PATCH 126/547] ALSA: usb-audio: Fix UBSAN warnings for MIDI jacks

The calculation of in_cables and out_cables bitmaps are done with the
bit shift by the value from the descriptor, which is an arbitrary
value, and can lead to UBSAN shift-out-of-bounds warnings.

Fix it by filtering the bad descriptor values with the check of the
upper bound 0x10 (the cable bitmaps are 16 bits).

Reported-by: syzbot+92e45ae45543f89e8c88@syzkaller.appspotmail.com
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20201223174557.10249-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/midi.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sound/usb/midi.c b/sound/usb/midi.c
index c8213652470c..0c23fa6d8525 100644
--- a/sound/usb/midi.c
+++ b/sound/usb/midi.c
@@ -1889,6 +1889,8 @@ static int snd_usbmidi_get_ms_info(struct snd_usb_midi *umidi,
 		ms_ep = find_usb_ms_endpoint_descriptor(hostep);
 		if (!ms_ep)
 			continue;
+		if (ms_ep->bNumEmbMIDIJack > 0x10)
+			continue;
 		if (usb_endpoint_dir_out(ep)) {
 			if (endpoints[epidx].out_ep) {
 				if (++epidx >= MIDI_MAX_ENDPOINTS) {
@@ -2141,6 +2143,8 @@ static int snd_usbmidi_detect_roland(struct snd_usb_midi *umidi,
 		    cs_desc[1] == USB_DT_CS_INTERFACE &&
 		    cs_desc[2] == 0xf1 &&
 		    cs_desc[3] == 0x02) {
+			if (cs_desc[4] > 0x10 || cs_desc[5] > 0x10)
+				continue;
 			endpoint->in_cables  = (1 << cs_desc[4]) - 1;
 			endpoint->out_cables = (1 << cs_desc[5]) - 1;
 			return snd_usbmidi_detect_endpoints(umidi, endpoint, 1);

From b250bf5f924f7b42725fc9e4135aa0b667dfb119 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Tue, 22 Dec 2020 09:16:13 -0600
Subject: [PATCH 127/547] net: ipa: fix interconnect enable bug

When the core clock rate and interconnect bandwidth specifications
were moved into configuration data, a copy/paste bug was introduced,
causing the memory interconnect bandwidth to be set three times
rather than enabling the three different interconnects.

Fix this bug.

Fixes: 91d02f9551501 ("net: ipa: use config data for clocking")
Signed-off-by: Alex Elder <elder@linaro.org>
Reviewed-by: Georgi Djakov <georgi.djakov@linaro.org>
Link: https://lore.kernel.org/r/20201222151613.5730-1-elder@linaro.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ipa/ipa_clock.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ipa/ipa_clock.c b/drivers/net/ipa/ipa_clock.c
index 9dcf16f399b7..135c393437f1 100644
--- a/drivers/net/ipa/ipa_clock.c
+++ b/drivers/net/ipa/ipa_clock.c
@@ -115,13 +115,13 @@ static int ipa_interconnect_enable(struct ipa *ipa)
 		return ret;
 
 	data = &clock->interconnect_data[IPA_INTERCONNECT_IMEM];
-	ret = icc_set_bw(clock->memory_path, data->average_rate,
+	ret = icc_set_bw(clock->imem_path, data->average_rate,
 			 data->peak_rate);
 	if (ret)
 		goto err_memory_path_disable;
 
 	data = &clock->interconnect_data[IPA_INTERCONNECT_CONFIG];
-	ret = icc_set_bw(clock->memory_path, data->average_rate,
+	ret = icc_set_bw(clock->config_path, data->average_rate,
 			 data->peak_rate);
 	if (ret)
 		goto err_imem_path_disable;

From 8450e23f142f629e40bd67afc8375c86c7fbf8f1 Mon Sep 17 00:00:00 2001
From: Noor Azura Ahmad Tarmizi <noor.azura.ahmad.tarmizi@intel.com>
Date: Wed, 23 Dec 2020 00:03:37 +0800
Subject: [PATCH 128/547] stmmac: intel: Add PCI IDs for TGL-H platform

Add TGL-H PCI info and PCI IDs for the new TSN Controller to the list
of supported devices.

Signed-off-by: Noor Azura Ahmad Tarmizi <noor.azura.ahmad.tarmizi@intel.com>
Signed-off-by: Voon Weifeng <weifeng.voon@intel.com>
Signed-off-by: Muhammad Husaini Zulkifli <muhammad.husaini.zulkifli@intel.com>
Link: https://lore.kernel.org/r/20201222160337.30870-1-muhammad.husaini.zulkifli@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index a2e80c89de2d..9a6a519426a0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -721,6 +721,8 @@ static SIMPLE_DEV_PM_OPS(intel_eth_pm_ops, intel_eth_pci_suspend,
 #define PCI_DEVICE_ID_INTEL_EHL_PSE1_RGMII1G_ID		0x4bb0
 #define PCI_DEVICE_ID_INTEL_EHL_PSE1_SGMII1G_ID		0x4bb1
 #define PCI_DEVICE_ID_INTEL_EHL_PSE1_SGMII2G5_ID	0x4bb2
+#define PCI_DEVICE_ID_INTEL_TGLH_SGMII1G_0_ID		0x43ac
+#define PCI_DEVICE_ID_INTEL_TGLH_SGMII1G_1_ID		0x43a2
 #define PCI_DEVICE_ID_INTEL_TGL_SGMII1G_ID		0xa0ac
 
 static const struct pci_device_id intel_eth_pci_id_table[] = {
@@ -735,6 +737,8 @@ static const struct pci_device_id intel_eth_pci_id_table[] = {
 	{ PCI_DEVICE_DATA(INTEL, EHL_PSE1_SGMII1G_ID, &ehl_pse1_sgmii1g_info) },
 	{ PCI_DEVICE_DATA(INTEL, EHL_PSE1_SGMII2G5_ID, &ehl_pse1_sgmii1g_info) },
 	{ PCI_DEVICE_DATA(INTEL, TGL_SGMII1G_ID, &tgl_sgmii1g_info) },
+	{ PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_0_ID, &tgl_sgmii1g_info) },
+	{ PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_1_ID, &tgl_sgmii1g_info) },
 	{}
 };
 MODULE_DEVICE_TABLE(pci, intel_eth_pci_id_table);

From 94ad8f3ac6aff5acde3f6c4719997efc61e0dccf Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Tue, 22 Dec 2020 12:00:10 -0600
Subject: [PATCH 129/547] net: ipa: clear pending interrupts before enabling

We enable the completion interrupt for channel or event ring
commands only when we issue them.  The interrupt is disabled after
the interrupt has fired, or after we have timed out waiting for it.

If we time out, the command could complete after the interrupt has
been disabled, causing a state change in the channel or event ring.
The interrupt associated with that state change would be delivered
the next time the completion interrupt is enabled.

To avoid previous command completions interfering with new commands,
clear all pending completion interrupts before re-enabling them for
a new command.

Fixes: b4175f8731f78 ("net: ipa: only enable GSI event control IRQs when needed")
Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ipa/gsi.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c
index c4795249719d..4aee60d62ab0 100644
--- a/drivers/net/ipa/gsi.c
+++ b/drivers/net/ipa/gsi.c
@@ -340,7 +340,13 @@ static int evt_ring_command(struct gsi *gsi, u32 evt_ring_id,
 	 * is issued here.  Only permit *this* event ring to trigger
 	 * an interrupt, and only enable the event control IRQ type
 	 * when we expect it to occur.
+	 *
+	 * There's a small chance that a previous command completed
+	 * after the interrupt was disabled, so make sure we have no
+	 * pending interrupts before we enable them.
 	 */
+	iowrite32(~0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_CLR_OFFSET);
+
 	val = BIT(evt_ring_id);
 	iowrite32(val, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET);
 	gsi_irq_type_enable(gsi, GSI_EV_CTRL);
@@ -453,7 +459,13 @@ gsi_channel_command(struct gsi_channel *channel, enum gsi_ch_cmd_opcode opcode)
 	 * issued here.  So we only permit *this* channel to trigger
 	 * an interrupt and only enable the channel control IRQ type
 	 * when we expect it to occur.
+	 *
+	 * There's a small chance that a previous command completed
+	 * after the interrupt was disabled, so make sure we have no
+	 * pending interrupts before we enable them.
 	 */
+	iowrite32(~0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_CLR_OFFSET);
+
 	val = BIT(channel_id);
 	iowrite32(val, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET);
 	gsi_irq_type_enable(gsi, GSI_CH_CTRL);

From 6ffddf3b3d182d886d754cfafdf909ccb14f464b Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Tue, 22 Dec 2020 12:00:11 -0600
Subject: [PATCH 130/547] net: ipa: use state to determine channel command
 success

The result of issuing a channel control command should be that the
channel changes state.  If enabled, a completion interrupt signals
that the channel state has changed.  This interrupt is enabled by
gsi_channel_command() and disabled again after the command has
completed (or we time out).

There is a window of time--after the completion interrupt is disabled
but before the channel state is read--during which the command could
complete successfully without interrupting.  This would cause the
channel to transition to the desired new state.

So whether a channel command ends via completion interrupt or
timeout, we can consider the command successful if the channel
has entered the desired state (and a failure if it has not,
regardless of the cause).

Fixes: d6c9e3f506ae8 ("net: ipa: only enable generic command completion IRQ when needed");
Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ipa/gsi.c | 43 ++++++++++++++++++++++---------------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c
index 4aee60d62ab0..4f0e79176423 100644
--- a/drivers/net/ipa/gsi.c
+++ b/drivers/net/ipa/gsi.c
@@ -505,15 +505,15 @@ static int gsi_channel_alloc_command(struct gsi *gsi, u32 channel_id)
 
 	ret = gsi_channel_command(channel, GSI_CH_ALLOCATE);
 
-	/* Channel state will normally have been updated */
+	/* If successful the channel state will have changed */
 	state = gsi_channel_state(channel);
-	if (!ret && state != GSI_CHANNEL_STATE_ALLOCATED) {
-		dev_err(dev, "channel %u bad state %u after alloc\n",
-			channel_id, state);
-		ret = -EIO;
-	}
+	if (state == GSI_CHANNEL_STATE_ALLOCATED)
+		return 0;
 
-	return ret;
+	dev_err(dev, "channel %u bad state %u after alloc\n",
+		channel_id, state);
+
+	return -EIO;
 }
 
 /* Start an ALLOCATED channel */
@@ -533,15 +533,15 @@ static int gsi_channel_start_command(struct gsi_channel *channel)
 
 	ret = gsi_channel_command(channel, GSI_CH_START);
 
-	/* Channel state will normally have been updated */
+	/* If successful the channel state will have changed */
 	state = gsi_channel_state(channel);
-	if (!ret && state != GSI_CHANNEL_STATE_STARTED) {
-		dev_err(dev, "channel %u bad state %u after start\n",
-			gsi_channel_id(channel), state);
-		ret = -EIO;
-	}
+	if (state == GSI_CHANNEL_STATE_STARTED)
+		return 0;
 
-	return ret;
+	dev_err(dev, "channel %u bad state %u after start\n",
+		gsi_channel_id(channel), state);
+
+	return -EIO;
 }
 
 /* Stop a GSI channel in STARTED state */
@@ -568,10 +568,10 @@ static int gsi_channel_stop_command(struct gsi_channel *channel)
 
 	ret = gsi_channel_command(channel, GSI_CH_STOP);
 
-	/* Channel state will normally have been updated */
+	/* If successful the channel state will have changed */
 	state = gsi_channel_state(channel);
-	if (ret || state == GSI_CHANNEL_STATE_STOPPED)
-		return ret;
+	if (state == GSI_CHANNEL_STATE_STOPPED)
+		return 0;
 
 	/* We may have to try again if stop is in progress */
 	if (state == GSI_CHANNEL_STATE_STOP_IN_PROC)
@@ -604,9 +604,9 @@ static void gsi_channel_reset_command(struct gsi_channel *channel)
 
 	ret = gsi_channel_command(channel, GSI_CH_RESET);
 
-	/* Channel state will normally have been updated */
+	/* If successful the channel state will have changed */
 	state = gsi_channel_state(channel);
-	if (!ret && state != GSI_CHANNEL_STATE_ALLOCATED)
+	if (state != GSI_CHANNEL_STATE_ALLOCATED)
 		dev_err(dev, "channel %u bad state %u after reset\n",
 			gsi_channel_id(channel), state);
 }
@@ -628,9 +628,10 @@ static void gsi_channel_de_alloc_command(struct gsi *gsi, u32 channel_id)
 
 	ret = gsi_channel_command(channel, GSI_CH_DE_ALLOC);
 
-	/* Channel state will normally have been updated */
+	/* If successful the channel state will have changed */
 	state = gsi_channel_state(channel);
-	if (!ret && state != GSI_CHANNEL_STATE_NOT_ALLOCATED)
+
+	if (state != GSI_CHANNEL_STATE_NOT_ALLOCATED)
 		dev_err(dev, "channel %u bad state %u after dealloc\n",
 			channel_id, state);
 }

From 428b448ee764a264b7a2eeed295b282755114aa7 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Tue, 22 Dec 2020 12:00:12 -0600
Subject: [PATCH 131/547] net: ipa: use state to determine event ring command
 success

This patch implements the same basic fix for event rings as the
previous one does for channels.

The result of issuing an event ring control command should be that
the event ring changes state.  If enabled, a completion interrupt
signals that the event ring state has changed.  This interrupt is
enabled by gsi_evt_ring_command() and disabled again after the
command has completed (or we time out).

There is a window of time during which the command could complete
successfully without interrupting.  This would cause the event ring
to transition to the desired new state.

So whether a event ring command ends via completion interrupt or
timeout, we can consider the command successful if the event ring
has entered the desired state (and a failure if it has not,
regardless of the cause).

Fixes: b4175f8731f78 ("net: ipa: only enable GSI event control IRQs when needed")
Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ipa/gsi.c | 34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c
index 4f0e79176423..579cc3e51677 100644
--- a/drivers/net/ipa/gsi.c
+++ b/drivers/net/ipa/gsi.c
@@ -384,13 +384,15 @@ static int gsi_evt_ring_alloc_command(struct gsi *gsi, u32 evt_ring_id)
 	}
 
 	ret = evt_ring_command(gsi, evt_ring_id, GSI_EVT_ALLOCATE);
-	if (!ret && evt_ring->state != GSI_EVT_RING_STATE_ALLOCATED) {
-		dev_err(gsi->dev, "event ring %u bad state %u after alloc\n",
-			evt_ring_id, evt_ring->state);
-		ret = -EIO;
-	}
 
-	return ret;
+	/* If successful the event ring state will have changed */
+	if (evt_ring->state == GSI_EVT_RING_STATE_ALLOCATED)
+		return 0;
+
+	dev_err(gsi->dev, "event ring %u bad state %u after alloc\n",
+		evt_ring_id, evt_ring->state);
+
+	return -EIO;
 }
 
 /* Reset a GSI event ring in ALLOCATED or ERROR state. */
@@ -408,9 +410,13 @@ static void gsi_evt_ring_reset_command(struct gsi *gsi, u32 evt_ring_id)
 	}
 
 	ret = evt_ring_command(gsi, evt_ring_id, GSI_EVT_RESET);
-	if (!ret && evt_ring->state != GSI_EVT_RING_STATE_ALLOCATED)
-		dev_err(gsi->dev, "event ring %u bad state %u after reset\n",
-			evt_ring_id, evt_ring->state);
+
+	/* If successful the event ring state will have changed */
+	if (evt_ring->state == GSI_EVT_RING_STATE_ALLOCATED)
+		return;
+
+	dev_err(gsi->dev, "event ring %u bad state %u after reset\n",
+		evt_ring_id, evt_ring->state);
 }
 
 /* Issue a hardware de-allocation request for an allocated event ring */
@@ -426,9 +432,13 @@ static void gsi_evt_ring_de_alloc_command(struct gsi *gsi, u32 evt_ring_id)
 	}
 
 	ret = evt_ring_command(gsi, evt_ring_id, GSI_EVT_DE_ALLOC);
-	if (!ret && evt_ring->state != GSI_EVT_RING_STATE_NOT_ALLOCATED)
-		dev_err(gsi->dev, "event ring %u bad state %u after dealloc\n",
-			evt_ring_id, evt_ring->state);
+
+	/* If successful the event ring state will have changed */
+	if (evt_ring->state == GSI_EVT_RING_STATE_NOT_ALLOCATED)
+		return;
+
+	dev_err(gsi->dev, "event ring %u bad state %u after dealloc\n",
+		evt_ring_id, evt_ring->state);
 }
 
 /* Fetch the current state of a channel from hardware */

From 826f328e2b7e8854dd42ea44e6519cd75018e7b1 Mon Sep 17 00:00:00 2001
From: Petr Machata <me@pmachata.org>
Date: Tue, 22 Dec 2020 22:49:44 +0100
Subject: [PATCH 132/547] net: dcb: Validate netlink message in DCB handler

DCB uses the same handler function for both RTM_GETDCB and RTM_SETDCB
messages. dcb_doit() bounces RTM_SETDCB mesasges if the user does not have
the CAP_NET_ADMIN capability.

However, the operation to be performed is not decided from the DCB message
type, but from the DCB command. Thus DCB_CMD_*_GET commands are used for
reading DCB objects, the corresponding SET and DEL commands are used for
manipulation.

The assumption is that set-like commands will be sent via an RTM_SETDCB
message, and get-like ones via RTM_GETDCB. However, this assumption is not
enforced.

It is therefore possible to manipulate DCB objects without CAP_NET_ADMIN
capability by sending the corresponding command in an RTM_GETDCB message.
That is a bug. Fix it by validating the type of the request message against
the type used for the response.

Fixes: 2f90b8657ec9 ("ixgbe: this patch adds support for DCB to the kernel and ixgbe driver")
Signed-off-by: Petr Machata <me@pmachata.org>
Link: https://lore.kernel.org/r/a2a9b88418f3a58ef211b718f2970128ef9e3793.1608673640.git.me@pmachata.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/dcb/dcbnl.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 084e159a12ba..7d49b6fd6cef 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1765,6 +1765,8 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 	fn = &reply_funcs[dcb->cmd];
 	if (!fn->cb)
 		return -EOPNOTSUPP;
+	if (fn->type != nlh->nlmsg_type)
+		return -EPERM;
 
 	if (!tb[DCB_ATTR_IFNAME])
 		return -EINVAL;

From 427c940558560bff2583d07fc119a21094675982 Mon Sep 17 00:00:00 2001
From: John Wang <wangzhiqiang.bj@bytedance.com>
Date: Wed, 23 Dec 2020 13:55:23 +0800
Subject: [PATCH 133/547] net/ncsi: Use real net-device for response handler

When aggregating ncsi interfaces and dedicated interfaces to bond
interfaces, the ncsi response handler will use the wrong net device to
find ncsi_dev, so that the ncsi interface will not work properly.
Here, we use the original net device to fix it.

Fixes: 138635cc27c9 ("net/ncsi: NCSI response packet handler")
Signed-off-by: John Wang <wangzhiqiang.bj@bytedance.com>
Link: https://lore.kernel.org/r/20201223055523.2069-1-wangzhiqiang.bj@bytedance.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ncsi/ncsi-rsp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 5b1f4ec66dd9..888ccc2d4e34 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -1120,7 +1120,7 @@ int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev,
 	int payload, i, ret;
 
 	/* Find the NCSI device */
-	nd = ncsi_find_dev(dev);
+	nd = ncsi_find_dev(orig_dev);
 	ndp = nd ? TO_NCSI_DEV_PRIV(nd) : NULL;
 	if (!ndp)
 		return -ENODEV;

From 5d41f9b7ee7a5a5138894f58846a4ffed601498a Mon Sep 17 00:00:00 2001
From: Dinghao Liu <dinghao.liu@zju.edu.cn>
Date: Wed, 23 Dec 2020 19:06:12 +0800
Subject: [PATCH 134/547] net: ethernet: Fix memleak in ethoc_probe

When mdiobus_register() fails, priv->mdio allocated
by mdiobus_alloc() has not been freed, which leads
to memleak.

Fixes: e7f4dc3536a4 ("mdio: Move allocation of interrupts into core")
Signed-off-by: Dinghao Liu <dinghao.liu@zju.edu.cn>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://lore.kernel.org/r/20201223110615.31389-1-dinghao.liu@zju.edu.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/ethoc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
index 0981fe9652e5..3d9b0b161e24 100644
--- a/drivers/net/ethernet/ethoc.c
+++ b/drivers/net/ethernet/ethoc.c
@@ -1211,7 +1211,7 @@ static int ethoc_probe(struct platform_device *pdev)
 	ret = mdiobus_register(priv->mdio);
 	if (ret) {
 		dev_err(&netdev->dev, "failed to register MDIO bus\n");
-		goto free2;
+		goto free3;
 	}
 
 	ret = ethoc_mdio_probe(netdev);
@@ -1243,6 +1243,7 @@ error2:
 	netif_napi_del(&priv->napi);
 error:
 	mdiobus_unregister(priv->mdio);
+free3:
 	mdiobus_free(priv->mdio);
 free2:
 	clk_disable_unprepare(priv->clk);

From 1f45dc22066797479072978feeada0852502e180 Mon Sep 17 00:00:00 2001
From: Lijun Pan <ljp@linux.ibm.com>
Date: Wed, 23 Dec 2020 14:49:04 -0600
Subject: [PATCH 135/547] ibmvnic: continue fatal error reset after passive
 init

Commit f9c6cea0b385 ("ibmvnic: Skip fatal error reset after passive init")
says "If the passive
CRQ initialization occurs before the FATAL reset task is processed,
the FATAL error reset task would try to access a CRQ message queue
that was freed, causing an oops. The problem may be most likely to
occur during DLPAR add vNIC with a non-default MTU, because the DLPAR
process will automatically issue a change MTU request.
Fix this by not processing fatal error reset if CRQ is passively
initialized after client-driven CRQ initialization fails."

The original commit skips a specific reset condition, but that does
not fix the problem it claims to fix, and misses a reset condition.
The effective fix is commit 0e435befaea4 ("ibmvnic: fix NULL pointer
dereference in ibmvic_reset_crq") and commit a0faaa27c716 ("ibmvnic:
fix NULL pointer dereference in reset_sub_crq_queues"). With above
two fixes, there are no more crashes seen as described even without
the original commit, so I would like to revert the original commit.

Fixes: f9c6cea0b385 ("ibmvnic: Skip fatal error reset after passive init")
Signed-off-by: Lijun Pan <ljp@linux.ibm.com>
Link: https://lore.kernel.org/r/20201223204904.12677-1-ljp@linux.ibm.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index bd0281020d51..91ebcde30292 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2342,8 +2342,7 @@ static void __ibmvnic_reset(struct work_struct *work)
 				set_current_state(TASK_UNINTERRUPTIBLE);
 				schedule_timeout(60 * HZ);
 			}
-		} else if (!(rwi->reset_reason == VNIC_RESET_FATAL &&
-				adapter->from_passive_init)) {
+		} else {
 			rc = do_reset(adapter, rwi, reset_state);
 		}
 		kfree(rwi);

From 808e0d8832cc81738f3e8df12dff0688352baf50 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@dell.com>
Date: Mon, 14 Dec 2020 13:29:32 -0600
Subject: [PATCH 136/547] e1000e: Only run S0ix flows if shutdown succeeded

If the shutdown failed, the part will be thawed and running
S0ix flows will put it into an undefined state.

Reported-by: Alexander Duyck <alexander.duyck@gmail.com>
Reviewed-by: Alexander Duyck <alexander.duyck@gmail.com>
Signed-off-by: Mario Limonciello <mario.limonciello@dell.com>
Tested-by: Yijun Shen <Yijun.shen@dell.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 128ab6898070..6588f5d4a2be 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -6970,13 +6970,14 @@ static __maybe_unused int e1000e_pm_suspend(struct device *dev)
 	e1000e_pm_freeze(dev);
 
 	rc = __e1000_shutdown(pdev, false);
-	if (rc)
+	if (rc) {
 		e1000e_pm_thaw(dev);
-
-	/* Introduce S0ix implementation */
-	if (hw->mac.type >= e1000_pch_cnp &&
-	    !e1000e_check_me(hw->adapter->pdev->device))
-		e1000e_s0ix_entry_flow(adapter);
+	} else {
+		/* Introduce S0ix implementation */
+		if (hw->mac.type >= e1000_pch_cnp &&
+		    !e1000e_check_me(hw->adapter->pdev->device))
+			e1000e_s0ix_entry_flow(adapter);
+	}
 
 	return rc;
 }

From 3cf31b1a9effd859bb3d6ff9f8b5b0d5e6cac952 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@dell.com>
Date: Mon, 14 Dec 2020 13:29:33 -0600
Subject: [PATCH 137/547] e1000e: bump up timeout to wait when ME un-configures
 ULP mode

Per guidance from Intel ethernet architecture team, it may take
up to 1 second for unconfiguring ULP mode.

However in practice this seems to be taking up to 2 seconds on
some Lenovo machines.  Detect scenarios that take more than 1 second
but less than 2.5 seconds and emit a warning on resume for those
scenarios.

Suggested-by: Aaron Ma <aaron.ma@canonical.com>
Suggested-by: Sasha Netfin <sasha.neftin@intel.com>
Suggested-by: Hans de Goede <hdegoede@redhat.com>
CC: Mark Pearson <markpearson@lenovo.com>
Fixes: f15bb6dde738cc8fa0 ("e1000e: Add support for S0ix")
BugLink: https://bugs.launchpad.net/bugs/1865570
Link: https://patchwork.ozlabs.org/project/intel-wired-lan/patch/20200323191639.48826-1-aaron.ma@canonical.com/
Link: https://lkml.org/lkml/2020/12/13/15
Link: https://lkml.org/lkml/2020/12/14/708
Signed-off-by: Mario Limonciello <mario.limonciello@dell.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Tested-by: Yijun Shen <Yijun.shen@dell.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/e1000e/ich8lan.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 9aa6fad8ed47..6fb46682b058 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -1240,6 +1240,9 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force)
 		return 0;
 
 	if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) {
+		struct e1000_adapter *adapter = hw->adapter;
+		bool firmware_bug = false;
+
 		if (force) {
 			/* Request ME un-configure ULP mode in the PHY */
 			mac_reg = er32(H2ME);
@@ -1248,16 +1251,24 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force)
 			ew32(H2ME, mac_reg);
 		}
 
-		/* Poll up to 300msec for ME to clear ULP_CFG_DONE. */
+		/* Poll up to 2.5 seconds for ME to clear ULP_CFG_DONE.
+		 * If this takes more than 1 second, show a warning indicating a
+		 * firmware bug
+		 */
 		while (er32(FWSM) & E1000_FWSM_ULP_CFG_DONE) {
-			if (i++ == 30) {
+			if (i++ == 250) {
 				ret_val = -E1000_ERR_PHY;
 				goto out;
 			}
+			if (i > 100 && !firmware_bug)
+				firmware_bug = true;
 
 			usleep_range(10000, 11000);
 		}
-		e_dbg("ULP_CONFIG_DONE cleared after %dmsec\n", i * 10);
+		if (firmware_bug)
+			e_warn("ULP_CONFIG_DONE took %dmsec.  This is a firmware bug\n", i * 10);
+		else
+			e_dbg("ULP_CONFIG_DONE cleared after %dmsec\n", i * 10);
 
 		if (force) {
 			mac_reg = er32(H2ME);

From 6cecf02e77ab9bf97e9252f9fcb8f0738a6de12c Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@dell.com>
Date: Mon, 14 Dec 2020 13:29:34 -0600
Subject: [PATCH 138/547] Revert "e1000e: disable s0ix entry and exit flows for
 ME systems"

commit e086ba2fccda ("e1000e: disable s0ix entry and exit flows for ME
systems") disabled s0ix flows for systems that have various incarnations of
the i219-LM ethernet controller.  This changed caused power consumption
regressions on the following shipping Dell Comet Lake based laptops:
* Latitude 5310
* Latitude 5410
* Latitude 5410
* Latitude 5510
* Precision 3550
* Latitude 5411
* Latitude 5511
* Precision 3551
* Precision 7550
* Precision 7750

This commit was introduced because of some regressions on certain Thinkpad
laptops.  This comment was potentially caused by an earlier
commit 632fbd5eb5b0e ("e1000e: fix S0ix flows for cable connected case").
or it was possibly caused by a system not meeting platform architectural
requirements for low power consumption.  Other changes made in the driver
with extended timeouts are expected to make the driver more impervious to
platform firmware behavior.

Fixes: e086ba2fccda ("e1000e: disable s0ix entry and exit flows for ME systems")
Reviewed-by: Alexander Duyck <alexander.duyck@gmail.com>
Signed-off-by: Mario Limonciello <mario.limonciello@dell.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Tested-by: Yijun Shen <Yijun.shen@dell.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 45 +---------------------
 1 file changed, 2 insertions(+), 43 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 6588f5d4a2be..b9800ba2006c 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -103,45 +103,6 @@ static const struct e1000_reg_info e1000_reg_info_tbl[] = {
 	{0, NULL}
 };
 
-struct e1000e_me_supported {
-	u16 device_id;		/* supported device ID */
-};
-
-static const struct e1000e_me_supported me_supported[] = {
-	{E1000_DEV_ID_PCH_LPT_I217_LM},
-	{E1000_DEV_ID_PCH_LPTLP_I218_LM},
-	{E1000_DEV_ID_PCH_I218_LM2},
-	{E1000_DEV_ID_PCH_I218_LM3},
-	{E1000_DEV_ID_PCH_SPT_I219_LM},
-	{E1000_DEV_ID_PCH_SPT_I219_LM2},
-	{E1000_DEV_ID_PCH_LBG_I219_LM3},
-	{E1000_DEV_ID_PCH_SPT_I219_LM4},
-	{E1000_DEV_ID_PCH_SPT_I219_LM5},
-	{E1000_DEV_ID_PCH_CNP_I219_LM6},
-	{E1000_DEV_ID_PCH_CNP_I219_LM7},
-	{E1000_DEV_ID_PCH_ICP_I219_LM8},
-	{E1000_DEV_ID_PCH_ICP_I219_LM9},
-	{E1000_DEV_ID_PCH_CMP_I219_LM10},
-	{E1000_DEV_ID_PCH_CMP_I219_LM11},
-	{E1000_DEV_ID_PCH_CMP_I219_LM12},
-	{E1000_DEV_ID_PCH_TGP_I219_LM13},
-	{E1000_DEV_ID_PCH_TGP_I219_LM14},
-	{E1000_DEV_ID_PCH_TGP_I219_LM15},
-	{0}
-};
-
-static bool e1000e_check_me(u16 device_id)
-{
-	struct e1000e_me_supported *id;
-
-	for (id = (struct e1000e_me_supported *)me_supported;
-	     id->device_id; id++)
-		if (device_id == id->device_id)
-			return true;
-
-	return false;
-}
-
 /**
  * __ew32_prepare - prepare to write to MAC CSR register on certain parts
  * @hw: pointer to the HW structure
@@ -6974,8 +6935,7 @@ static __maybe_unused int e1000e_pm_suspend(struct device *dev)
 		e1000e_pm_thaw(dev);
 	} else {
 		/* Introduce S0ix implementation */
-		if (hw->mac.type >= e1000_pch_cnp &&
-		    !e1000e_check_me(hw->adapter->pdev->device))
+		if (hw->mac.type >= e1000_pch_cnp)
 			e1000e_s0ix_entry_flow(adapter);
 	}
 
@@ -6991,8 +6951,7 @@ static __maybe_unused int e1000e_pm_resume(struct device *dev)
 	int rc;
 
 	/* Introduce S0ix implementation */
-	if (hw->mac.type >= e1000_pch_cnp &&
-	    !e1000e_check_me(hw->adapter->pdev->device))
+	if (hw->mac.type >= e1000_pch_cnp)
 		e1000e_s0ix_exit_flow(adapter);
 
 	rc = __e1000_resume(pdev);

From 3c98cbf22a96c1b12f48c1b2a4680dfe5cb280f9 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@dell.com>
Date: Mon, 14 Dec 2020 13:29:35 -0600
Subject: [PATCH 139/547] e1000e: Export S0ix flags to ethtool

This flag can be used by an end user to disable S0ix flows on a
buggy system or by an OEM for development purposes.

If you need this flag to be persisted across reboots, it's suggested
to use a udev rule to call adjust it until the kernel could have your
configuration in a disallow list.

Signed-off-by: Mario Limonciello <mario.limonciello@dell.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Tested-by: Yijun Shen <Yijun.shen@dell.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/e1000e/e1000.h   |  1 +
 drivers/net/ethernet/intel/e1000e/ethtool.c | 46 +++++++++++++++++++++
 drivers/net/ethernet/intel/e1000e/netdev.c  |  9 ++--
 3 files changed, 52 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h
index ba7a0f8f6937..5b2143f4b1f8 100644
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -436,6 +436,7 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca);
 #define FLAG2_DFLT_CRC_STRIPPING          BIT(12)
 #define FLAG2_CHECK_RX_HWTSTAMP           BIT(13)
 #define FLAG2_CHECK_SYSTIM_OVERFLOW       BIT(14)
+#define FLAG2_ENABLE_S0IX_FLOWS           BIT(15)
 
 #define E1000_RX_DESC_PS(R, i)	    \
 	(&(((union e1000_rx_desc_packet_split *)((R).desc))[i]))
diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c
index 03215b0aee4b..06442e6bef73 100644
--- a/drivers/net/ethernet/intel/e1000e/ethtool.c
+++ b/drivers/net/ethernet/intel/e1000e/ethtool.c
@@ -23,6 +23,13 @@ struct e1000_stats {
 	int stat_offset;
 };
 
+static const char e1000e_priv_flags_strings[][ETH_GSTRING_LEN] = {
+#define E1000E_PRIV_FLAGS_S0IX_ENABLED	BIT(0)
+	"s0ix-enabled",
+};
+
+#define E1000E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(e1000e_priv_flags_strings)
+
 #define E1000_STAT(str, m) { \
 		.stat_string = str, \
 		.type = E1000_STATS, \
@@ -1776,6 +1783,8 @@ static int e1000e_get_sset_count(struct net_device __always_unused *netdev,
 		return E1000_TEST_LEN;
 	case ETH_SS_STATS:
 		return E1000_STATS_LEN;
+	case ETH_SS_PRIV_FLAGS:
+		return E1000E_PRIV_FLAGS_STR_LEN;
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -2097,6 +2106,10 @@ static void e1000_get_strings(struct net_device __always_unused *netdev,
 			p += ETH_GSTRING_LEN;
 		}
 		break;
+	case ETH_SS_PRIV_FLAGS:
+		memcpy(data, e1000e_priv_flags_strings,
+		       E1000E_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN);
+		break;
 	}
 }
 
@@ -2305,6 +2318,37 @@ static int e1000e_get_ts_info(struct net_device *netdev,
 	return 0;
 }
 
+static u32 e1000e_get_priv_flags(struct net_device *netdev)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	u32 priv_flags = 0;
+
+	if (adapter->flags2 & FLAG2_ENABLE_S0IX_FLOWS)
+		priv_flags |= E1000E_PRIV_FLAGS_S0IX_ENABLED;
+
+	return priv_flags;
+}
+
+static int e1000e_set_priv_flags(struct net_device *netdev, u32 priv_flags)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	unsigned int flags2 = adapter->flags2;
+
+	flags2 &= ~FLAG2_ENABLE_S0IX_FLOWS;
+	if (priv_flags & E1000E_PRIV_FLAGS_S0IX_ENABLED) {
+		struct e1000_hw *hw = &adapter->hw;
+
+		if (hw->mac.type < e1000_pch_cnp)
+			return -EINVAL;
+		flags2 |= FLAG2_ENABLE_S0IX_FLOWS;
+	}
+
+	if (flags2 != adapter->flags2)
+		adapter->flags2 = flags2;
+
+	return 0;
+}
+
 static const struct ethtool_ops e1000_ethtool_ops = {
 	.supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS,
 	.get_drvinfo		= e1000_get_drvinfo,
@@ -2336,6 +2380,8 @@ static const struct ethtool_ops e1000_ethtool_ops = {
 	.set_eee		= e1000e_set_eee,
 	.get_link_ksettings	= e1000_get_link_ksettings,
 	.set_link_ksettings	= e1000_set_link_ksettings,
+	.get_priv_flags		= e1000e_get_priv_flags,
+	.set_priv_flags		= e1000e_set_priv_flags,
 };
 
 void e1000e_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index b9800ba2006c..e9b82c209c2d 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -6923,7 +6923,6 @@ static __maybe_unused int e1000e_pm_suspend(struct device *dev)
 	struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev));
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	struct pci_dev *pdev = to_pci_dev(dev);
-	struct e1000_hw *hw = &adapter->hw;
 	int rc;
 
 	e1000e_flush_lpic(pdev);
@@ -6935,7 +6934,7 @@ static __maybe_unused int e1000e_pm_suspend(struct device *dev)
 		e1000e_pm_thaw(dev);
 	} else {
 		/* Introduce S0ix implementation */
-		if (hw->mac.type >= e1000_pch_cnp)
+		if (adapter->flags2 & FLAG2_ENABLE_S0IX_FLOWS)
 			e1000e_s0ix_entry_flow(adapter);
 	}
 
@@ -6947,11 +6946,10 @@ static __maybe_unused int e1000e_pm_resume(struct device *dev)
 	struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev));
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	struct pci_dev *pdev = to_pci_dev(dev);
-	struct e1000_hw *hw = &adapter->hw;
 	int rc;
 
 	/* Introduce S0ix implementation */
-	if (hw->mac.type >= e1000_pch_cnp)
+	if (adapter->flags2 & FLAG2_ENABLE_S0IX_FLOWS)
 		e1000e_s0ix_exit_flow(adapter);
 
 	rc = __e1000_resume(pdev);
@@ -7615,6 +7613,9 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (!(adapter->flags & FLAG_HAS_AMT))
 		e1000e_get_hw_control(adapter);
 
+	if (hw->mac.type >= e1000_pch_cnp)
+		adapter->flags2 |= FLAG2_ENABLE_S0IX_FLOWS;
+
 	strlcpy(netdev->name, "eth%d", sizeof(netdev->name));
 	err = register_netdev(netdev);
 	if (err)

From 11b844b0b7c7c3dc8e8f4d0bbaad5e798351862c Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 23 Dec 2020 12:06:52 -0800
Subject: [PATCH 140/547] selftests/bpf: Work-around EBUSY errors from hashmap
 update/delete

20b6cc34ea74 ("bpf: Avoid hashtab deadlock with map_locked") introduced
a possibility of getting EBUSY error on lock contention, which seems to happen
very deterministically in test_maps when running 1024 threads on low-CPU
machine. In libbpf CI case, it's a 2 CPU VM and it's hitting this 100% of the
time. Work around by retrying on EBUSY (and EAGAIN, while we are at it) after
a small sleep. sched_yield() is too agressive and fails even after 20 retries,
so I went with usleep(1) for backoff.

Also log actual error returned to make it easier to see what's going on.

Fixes: 20b6cc34ea74 ("bpf: Avoid hashtab deadlock with map_locked")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201223200652.3417075-1-andrii@kernel.org
---
 tools/testing/selftests/bpf/test_maps.c | 48 +++++++++++++++++++++----
 1 file changed, 42 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 0ad3e6305ff0..51adc42b2b40 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -1312,22 +1312,58 @@ static void test_map_stress(void)
 #define DO_UPDATE 1
 #define DO_DELETE 0
 
+#define MAP_RETRIES 20
+
+static int map_update_retriable(int map_fd, const void *key, const void *value,
+				int flags, int attempts)
+{
+	while (bpf_map_update_elem(map_fd, key, value, flags)) {
+		if (!attempts || (errno != EAGAIN && errno != EBUSY))
+			return -errno;
+
+		usleep(1);
+		attempts--;
+	}
+
+	return 0;
+}
+
+static int map_delete_retriable(int map_fd, const void *key, int attempts)
+{
+	while (bpf_map_delete_elem(map_fd, key)) {
+		if (!attempts || (errno != EAGAIN && errno != EBUSY))
+			return -errno;
+
+		usleep(1);
+		attempts--;
+	}
+
+	return 0;
+}
+
 static void test_update_delete(unsigned int fn, void *data)
 {
 	int do_update = ((int *)data)[1];
 	int fd = ((int *)data)[0];
-	int i, key, value;
+	int i, key, value, err;
 
 	for (i = fn; i < MAP_SIZE; i += TASKS) {
 		key = value = i;
 
 		if (do_update) {
-			assert(bpf_map_update_elem(fd, &key, &value,
-						   BPF_NOEXIST) == 0);
-			assert(bpf_map_update_elem(fd, &key, &value,
-						   BPF_EXIST) == 0);
+			err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES);
+			if (err)
+				printf("error %d %d\n", err, errno);
+			assert(err == 0);
+			err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES);
+			if (err)
+				printf("error %d %d\n", err, errno);
+			assert(err == 0);
 		} else {
-			assert(bpf_map_delete_elem(fd, &key) == 0);
+			err = map_delete_retriable(fd, &key, MAP_RETRIES);
+			if (err)
+				printf("error %d %d\n", err, errno);
+			assert(err == 0);
 		}
 	}
 }

From 69ca310f34168eae0ada434796bfc22fb4a0fa26 Mon Sep 17 00:00:00 2001
From: Jonathan Lemon <bsd@fb.com>
Date: Fri, 18 Dec 2020 10:50:30 -0800
Subject: [PATCH 141/547] bpf: Save correct stopping point in file seq
 iteration

On some systems, some variant of the following splat is
repeatedly seen.  The common factor in all traces seems
to be the entry point to task_file_seq_next().  With the
patch, all warnings go away.

    rcu: INFO: rcu_sched self-detected stall on CPU
    rcu: \x0926-....: (20992 ticks this GP) idle=d7e/1/0x4000000000000002 softirq=81556231/81556231 fqs=4876
    \x09(t=21033 jiffies g=159148529 q=223125)
    NMI backtrace for cpu 26
    CPU: 26 PID: 2015853 Comm: bpftool Kdump: loaded Not tainted 5.6.13-0_fbk4_3876_gd8d1f9bf80bb #1
    Hardware name: Quanta Twin Lakes MP/Twin Lakes Passive MP, BIOS F09_3A12 10/08/2018
    Call Trace:
     <IRQ>
     dump_stack+0x50/0x70
     nmi_cpu_backtrace.cold.6+0x13/0x50
     ? lapic_can_unplug_cpu.cold.30+0x40/0x40
     nmi_trigger_cpumask_backtrace+0xba/0xca
     rcu_dump_cpu_stacks+0x99/0xc7
     rcu_sched_clock_irq.cold.90+0x1b4/0x3aa
     ? tick_sched_do_timer+0x60/0x60
     update_process_times+0x24/0x50
     tick_sched_timer+0x37/0x70
     __hrtimer_run_queues+0xfe/0x270
     hrtimer_interrupt+0xf4/0x210
     smp_apic_timer_interrupt+0x5e/0x120
     apic_timer_interrupt+0xf/0x20
     </IRQ>
    RIP: 0010:get_pid_task+0x38/0x80
    Code: 89 f6 48 8d 44 f7 08 48 8b 00 48 85 c0 74 2b 48 83 c6 55 48 c1 e6 04 48 29 f0 74 19 48 8d 78 20 ba 01 00 00 00 f0 0f c1 50 20 <85> d2 74 27 78 11 83 c2 01 78 0c 48 83 c4 08 c3 31 c0 48 83 c4 08
    RSP: 0018:ffffc9000d293dc8 EFLAGS: 00000202 ORIG_RAX: ffffffffffffff13
    RAX: ffff888637c05600 RBX: ffffc9000d293e0c RCX: 0000000000000000
    RDX: 0000000000000001 RSI: 0000000000000550 RDI: ffff888637c05620
    RBP: ffffffff8284eb80 R08: ffff88831341d300 R09: ffff88822ffd8248
    R10: ffff88822ffd82d0 R11: 00000000003a93c0 R12: 0000000000000001
    R13: 00000000ffffffff R14: ffff88831341d300 R15: 0000000000000000
     ? find_ge_pid+0x1b/0x20
     task_seq_get_next+0x52/0xc0
     task_file_seq_get_next+0x159/0x220
     task_file_seq_next+0x4f/0xa0
     bpf_seq_read+0x159/0x390
     vfs_read+0x8a/0x140
     ksys_read+0x59/0xd0
     do_syscall_64+0x42/0x110
     entry_SYSCALL_64_after_hwframe+0x44/0xa9
    RIP: 0033:0x7f95ae73e76e
    Code: Bad RIP value.
    RSP: 002b:00007ffc02c1dbf8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
    RAX: ffffffffffffffda RBX: 000000000170faa0 RCX: 00007f95ae73e76e
    RDX: 0000000000001000 RSI: 00007ffc02c1dc30 RDI: 0000000000000007
    RBP: 00007ffc02c1ec70 R08: 0000000000000005 R09: 0000000000000006
    R10: fffffffffffff20b R11: 0000000000000246 R12: 00000000019112a0
    R13: 0000000000000000 R14: 0000000000000007 R15: 00000000004283c0

If unable to obtain the file structure for the current task,
proceed to the next task number after the one returned from
task_seq_get_next(), instead of the next task number from the
original iterator.

Also, save the stopping task number from task_seq_get_next()
on failure in case of restarts.

Fixes: eaaacd23910f ("bpf: Add task and task/file iterator targets")
Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201218185032.2464558-2-jonathan.lemon@gmail.com
---
 kernel/bpf/task_iter.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 0458a40edf10..8033ab19138a 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -158,13 +158,14 @@ again:
 		if (!curr_task) {
 			info->task = NULL;
 			info->files = NULL;
+			info->tid = curr_tid;
 			return NULL;
 		}
 
 		curr_files = get_files_struct(curr_task);
 		if (!curr_files) {
 			put_task_struct(curr_task);
-			curr_tid = ++(info->tid);
+			curr_tid = curr_tid + 1;
 			info->fd = 0;
 			goto again;
 		}

From a61daaf351da7c8493f2586437617d60c24350b0 Mon Sep 17 00:00:00 2001
From: Jonathan Lemon <bsd@fb.com>
Date: Fri, 18 Dec 2020 10:50:31 -0800
Subject: [PATCH 142/547] bpf: Use thread_group_leader()

Instead of directly comparing task->tgid and task->pid, use the
thread_group_leader() helper.  This helps with readability, and
there should be no functional change.

Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201218185032.2464558-3-jonathan.lemon@gmail.com
---
 kernel/bpf/task_iter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 8033ab19138a..dc4007f1843b 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -37,7 +37,7 @@ retry:
 		if (!task) {
 			++*tid;
 			goto retry;
-		} else if (skip_if_dup_files && task->tgid != task->pid &&
+		} else if (skip_if_dup_files && !thread_group_leader(task) &&
 			   task->files == task->group_leader->files) {
 			put_task_struct(task);
 			task = NULL;

From e13a6915a03ffc3ce332d28c141a335e25187fa3 Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Wed, 23 Dec 2020 15:36:38 +0100
Subject: [PATCH 143/547] vhost/vsock: add IOTLB API support

This patch enables the IOTLB API support for vhost-vsock devices,
allowing the userspace to emulate an IOMMU for the guest.

These changes were made following vhost-net, in details this patch:
- exposes VIRTIO_F_ACCESS_PLATFORM feature and inits the iotlb
  device if the feature is acked
- implements VHOST_GET_BACKEND_FEATURES and
  VHOST_SET_BACKEND_FEATURES ioctls
- calls vq_meta_prefetch() before vq processing to prefetch vq
  metadata address in IOTLB
- provides .read_iter, .write_iter, and .poll callbacks for the
  chardev; they are used by the userspace to exchange IOTLB messages

This patch was tested specifying "intel_iommu=strict" in the guest
kernel command line. I used QEMU with a patch applied [1] to fix a
simple issue (that patch was merged in QEMU v5.2.0):
    $ qemu -M q35,accel=kvm,kernel-irqchip=split \
           -drive file=fedora.qcow2,format=qcow2,if=virtio \
           -device intel-iommu,intremap=on,device-iotlb=on \
           -device vhost-vsock-pci,guest-cid=3,iommu_platform=on,ats=on

[1] https://lists.gnu.org/archive/html/qemu-devel/2020-10/msg09077.html

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Link: https://lore.kernel.org/r/20201223143638.123417-1-sgarzare@redhat.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
 drivers/vhost/vsock.c | 68 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 65 insertions(+), 3 deletions(-)

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index a483cec31d5c..5e78fb719602 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -30,7 +30,12 @@
 #define VHOST_VSOCK_PKT_WEIGHT 256
 
 enum {
-	VHOST_VSOCK_FEATURES = VHOST_FEATURES,
+	VHOST_VSOCK_FEATURES = VHOST_FEATURES |
+			       (1ULL << VIRTIO_F_ACCESS_PLATFORM)
+};
+
+enum {
+	VHOST_VSOCK_BACKEND_FEATURES = (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)
 };
 
 /* Used to track all the vhost_vsock instances on the system. */
@@ -94,6 +99,9 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
 	if (!vhost_vq_get_backend(vq))
 		goto out;
 
+	if (!vq_meta_prefetch(vq))
+		goto out;
+
 	/* Avoid further vmexits, we're already processing the virtqueue */
 	vhost_disable_notify(&vsock->dev, vq);
 
@@ -449,6 +457,9 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
 	if (!vhost_vq_get_backend(vq))
 		goto out;
 
+	if (!vq_meta_prefetch(vq))
+		goto out;
+
 	vhost_disable_notify(&vsock->dev, vq);
 	do {
 		u32 len;
@@ -766,8 +777,12 @@ static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features)
 	mutex_lock(&vsock->dev.mutex);
 	if ((features & (1 << VHOST_F_LOG_ALL)) &&
 	    !vhost_log_access_ok(&vsock->dev)) {
-		mutex_unlock(&vsock->dev.mutex);
-		return -EFAULT;
+		goto err;
+	}
+
+	if ((features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) {
+		if (vhost_init_device_iotlb(&vsock->dev, true))
+			goto err;
 	}
 
 	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
@@ -778,6 +793,10 @@ static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features)
 	}
 	mutex_unlock(&vsock->dev.mutex);
 	return 0;
+
+err:
+	mutex_unlock(&vsock->dev.mutex);
+	return -EFAULT;
 }
 
 static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
@@ -811,6 +830,18 @@ static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
 		if (copy_from_user(&features, argp, sizeof(features)))
 			return -EFAULT;
 		return vhost_vsock_set_features(vsock, features);
+	case VHOST_GET_BACKEND_FEATURES:
+		features = VHOST_VSOCK_BACKEND_FEATURES;
+		if (copy_to_user(argp, &features, sizeof(features)))
+			return -EFAULT;
+		return 0;
+	case VHOST_SET_BACKEND_FEATURES:
+		if (copy_from_user(&features, argp, sizeof(features)))
+			return -EFAULT;
+		if (features & ~VHOST_VSOCK_BACKEND_FEATURES)
+			return -EOPNOTSUPP;
+		vhost_set_backend_features(&vsock->dev, features);
+		return 0;
 	default:
 		mutex_lock(&vsock->dev.mutex);
 		r = vhost_dev_ioctl(&vsock->dev, ioctl, argp);
@@ -823,6 +854,34 @@ static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
 	}
 }
 
+static ssize_t vhost_vsock_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+	struct file *file = iocb->ki_filp;
+	struct vhost_vsock *vsock = file->private_data;
+	struct vhost_dev *dev = &vsock->dev;
+	int noblock = file->f_flags & O_NONBLOCK;
+
+	return vhost_chr_read_iter(dev, to, noblock);
+}
+
+static ssize_t vhost_vsock_chr_write_iter(struct kiocb *iocb,
+					struct iov_iter *from)
+{
+	struct file *file = iocb->ki_filp;
+	struct vhost_vsock *vsock = file->private_data;
+	struct vhost_dev *dev = &vsock->dev;
+
+	return vhost_chr_write_iter(dev, from);
+}
+
+static __poll_t vhost_vsock_chr_poll(struct file *file, poll_table *wait)
+{
+	struct vhost_vsock *vsock = file->private_data;
+	struct vhost_dev *dev = &vsock->dev;
+
+	return vhost_chr_poll(file, dev, wait);
+}
+
 static const struct file_operations vhost_vsock_fops = {
 	.owner          = THIS_MODULE,
 	.open           = vhost_vsock_dev_open,
@@ -830,6 +889,9 @@ static const struct file_operations vhost_vsock_fops = {
 	.llseek		= noop_llseek,
 	.unlocked_ioctl = vhost_vsock_dev_ioctl,
 	.compat_ioctl   = compat_ptr_ioctl,
+	.read_iter      = vhost_vsock_chr_read_iter,
+	.write_iter     = vhost_vsock_chr_write_iter,
+	.poll           = vhost_vsock_chr_poll,
 };
 
 static struct miscdevice vhost_vsock_misc = {

From 6cb56218ad9e580e519dcd23bfb3db08d8692e5a Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 22 Dec 2020 23:23:56 +0100
Subject: [PATCH 144/547] netfilter: xt_RATEEST: reject non-null terminated
 string from userspace

syzbot reports:
detected buffer overflow in strlen
[..]
Call Trace:
 strlen include/linux/string.h:325 [inline]
 strlcpy include/linux/string.h:348 [inline]
 xt_rateest_tg_checkentry+0x2a5/0x6b0 net/netfilter/xt_RATEEST.c:143

strlcpy assumes src is a c-string. Check info->name before its used.

Reported-by: syzbot+e86f7c428c8c50db65b4@syzkaller.appspotmail.com
Fixes: 5859034d7eb8793 ("[NETFILTER]: x_tables: add RATEEST target")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_RATEEST.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 37253d399c6b..0d5c422f8745 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -115,6 +115,9 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
 	} cfg;
 	int ret;
 
+	if (strnlen(info->name, sizeof(est->name)) >= sizeof(est->name))
+		return -ENAMETOOLONG;
+
 	net_get_random_once(&jhash_rnd, sizeof(jhash_rnd));
 
 	mutex_lock(&xn->hash_lock);

From 9e5c23b9bd71d00b07720b2a8037b019d356e9df Mon Sep 17 00:00:00 2001
From: Alexandru Elisei <alexandru.elisei@arm.com>
Date: Tue, 1 Dec 2020 15:01:56 +0000
Subject: [PATCH 145/547] KVM: arm64: Update comment in
 kvm_vgic_map_resources()

vgic_v3_map_resources() returns -EBUSY if the VGIC isn't initialized,
update the comment to kvm_vgic_map_resources() to match what the function
does.

Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20201201150157.223625-5-alexandru.elisei@arm.com
---
 arch/arm64/kvm/vgic/vgic-init.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index a2f4d1c85f00..5b54787a9ad5 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -419,7 +419,8 @@ int vgic_lazy_init(struct kvm *kvm)
  * Map the MMIO regions depending on the VGIC model exposed to the guest
  * called on the first VCPU run.
  * Also map the virtual CPU interface into the VM.
- * v2/v3 derivatives call vgic_init if not already done.
+ * v2 calls vgic_init() if not already done.
+ * v3 and derivatives return an error if the VGIC is not initialized.
  * vgic_ready() returns true if this function has succeeded.
  * @kvm: kvm struct pointer
  */

From 282ff80135717cc43f1e33ddd4b0cd9e760d060b Mon Sep 17 00:00:00 2001
From: Alexandru Elisei <alexandru.elisei@arm.com>
Date: Tue, 1 Dec 2020 15:01:57 +0000
Subject: [PATCH 146/547] KVM: arm64: Remove redundant call to
 kvm_pmu_vcpu_reset()

KVM_ARM_VCPU_INIT ioctl calls kvm_reset_vcpu(), which in turn resets the
PMU with a call to kvm_pmu_vcpu_reset(). The function zeroes the PMU
chained counters bitmap and stops all the counters with a perf event
attached. Because it is called before the VCPU has had the chance to run,
no perf events are in use and none are released.

kvm_arm_pmu_v3_enable(), called by kvm_vcpu_first_run_init() only if the
VCPU has been initialized, also resets the PMU. kvm_pmu_vcpu_reset() in
this case does the exact same thing as the previous call, so remove it.

Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20201201150157.223625-6-alexandru.elisei@arm.com
---
 arch/arm64/kvm/pmu-emul.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 398f6df1bbe4..4ad66a532e38 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -850,8 +850,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
 		   return -EINVAL;
 	}
 
-	kvm_pmu_vcpu_reset(vcpu);
-
 	return 0;
 }
 

From 101068b566ef227b605d807aad9e72efd8b6bc5b Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Sun, 27 Dec 2020 14:28:34 +0000
Subject: [PATCH 147/547] KVM: arm64: Consolidate dist->ready setting into
 kvm_vgic_map_resources()

dist->ready setting is pointlessly spread across the two vgic
backends, while it could be consolidated in kvm_vgic_map_resources().

Move it there, and slightly simplify the flows in both backends.

Suggested-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/vgic/vgic-init.c |  2 ++
 arch/arm64/kvm/vgic/vgic-v2.c   | 17 ++++++-----------
 arch/arm64/kvm/vgic/vgic-v3.c   | 18 ++++++------------
 3 files changed, 14 insertions(+), 23 deletions(-)

diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index 5b54787a9ad5..052917deb149 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -446,6 +446,8 @@ int kvm_vgic_map_resources(struct kvm *kvm)
 
 	if (ret)
 		__kvm_vgic_destroy(kvm);
+	else
+		dist->ready = true;
 
 out:
 	mutex_unlock(&kvm->lock);
diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c
index 7f38c1a93639..11934c2af2f4 100644
--- a/arch/arm64/kvm/vgic/vgic-v2.c
+++ b/arch/arm64/kvm/vgic/vgic-v2.c
@@ -309,14 +309,12 @@ int vgic_v2_map_resources(struct kvm *kvm)
 	if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) ||
 	    IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) {
 		kvm_err("Need to set vgic cpu and dist addresses first\n");
-		ret = -ENXIO;
-		goto out;
+		return -ENXIO;
 	}
 
 	if (!vgic_v2_check_base(dist->vgic_dist_base, dist->vgic_cpu_base)) {
 		kvm_err("VGIC CPU and dist frames overlap\n");
-		ret = -EINVAL;
-		goto out;
+		return -EINVAL;
 	}
 
 	/*
@@ -326,13 +324,13 @@ int vgic_v2_map_resources(struct kvm *kvm)
 	ret = vgic_init(kvm);
 	if (ret) {
 		kvm_err("Unable to initialize VGIC dynamic data structures\n");
-		goto out;
+		return ret;
 	}
 
 	ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V2);
 	if (ret) {
 		kvm_err("Unable to register VGIC MMIO regions\n");
-		goto out;
+		return ret;
 	}
 
 	if (!static_branch_unlikely(&vgic_v2_cpuif_trap)) {
@@ -341,14 +339,11 @@ int vgic_v2_map_resources(struct kvm *kvm)
 					    KVM_VGIC_V2_CPU_SIZE, true);
 		if (ret) {
 			kvm_err("Unable to remap VGIC CPU to VCPU\n");
-			goto out;
+			return ret;
 		}
 	}
 
-	dist->ready = true;
-
-out:
-	return ret;
+	return 0;
 }
 
 DEFINE_STATIC_KEY_FALSE(vgic_v2_cpuif_trap);
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index 35029c5cb0f1..52915b342351 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -505,21 +505,18 @@ int vgic_v3_map_resources(struct kvm *kvm)
 
 		if (IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) {
 			kvm_debug("vcpu %d redistributor base not set\n", c);
-			ret = -ENXIO;
-			goto out;
+			return -ENXIO;
 		}
 	}
 
 	if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base)) {
 		kvm_err("Need to set vgic distributor addresses first\n");
-		ret = -ENXIO;
-		goto out;
+		return -ENXIO;
 	}
 
 	if (!vgic_v3_check_base(kvm)) {
 		kvm_err("VGIC redist and dist frames overlap\n");
-		ret = -EINVAL;
-		goto out;
+		return -EINVAL;
 	}
 
 	/*
@@ -527,22 +524,19 @@ int vgic_v3_map_resources(struct kvm *kvm)
 	 * the VGIC before we need to use it.
 	 */
 	if (!vgic_initialized(kvm)) {
-		ret = -EBUSY;
-		goto out;
+		return -EBUSY;
 	}
 
 	ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V3);
 	if (ret) {
 		kvm_err("Unable to register VGICv3 dist MMIO regions\n");
-		goto out;
+		return ret;
 	}
 
 	if (kvm_vgic_global_state.has_gicv4_1)
 		vgic_v4_configure_vsgis(kvm);
-	dist->ready = true;
 
-out:
-	return ret;
+	return 0;
 }
 
 DEFINE_STATIC_KEY_FALSE(vgic_v3_cpuif_trap);

From 976509bb310b913d30577f15b58bdd30effb0542 Mon Sep 17 00:00:00 2001
From: Quanyang Wang <quanyang.wang@windriver.com>
Date: Thu, 24 Dec 2020 18:49:27 +0800
Subject: [PATCH 148/547] opp: fix memory leak in _allocate_opp_table

In function _allocate_opp_table, opp_dev is allocated and referenced
by opp_table via _add_opp_dev. But in the case that the subsequent calls
return -EPROBE_DEFER, it will jump to err label and opp_table will be
freed. Then opp_dev becomes an unreferenced object to cause memory leak.
So let's call _remove_opp_dev to do the cleanup.

This fixes the following kmemleak report:

unreferenced object 0xffff000801524a00 (size 128):
  comm "swapper/0", pid 1, jiffies 4294892465 (age 84.616s)
  hex dump (first 32 bytes):
    40 00 56 01 08 00 ff ff 40 00 56 01 08 00 ff ff  @.V.....@.V.....
    b8 52 77 7f 08 00 ff ff 00 3c 4c 00 08 00 ff ff  .Rw......<L.....
  backtrace:
    [<00000000b1289fb1>] kmemleak_alloc+0x30/0x40
    [<0000000056da48f0>] kmem_cache_alloc+0x3d4/0x588
    [<00000000a84b3b0e>] _add_opp_dev+0x2c/0x88
    [<0000000062a380cd>] _add_opp_table_indexed+0x124/0x268
    [<000000008b4c8f1f>] dev_pm_opp_of_add_table+0x20/0x1d8
    [<00000000e5316798>] dev_pm_opp_of_cpumask_add_table+0x48/0xf0
    [<00000000db0a8ec2>] dt_cpufreq_probe+0x20c/0x448
    [<0000000030a3a26c>] platform_probe+0x68/0xd8
    [<00000000c618e78d>] really_probe+0xd0/0x3a0
    [<00000000642e856f>] driver_probe_device+0x58/0xb8
    [<00000000f10f5307>] device_driver_attach+0x74/0x80
    [<0000000004f254b8>] __driver_attach+0x58/0xe0
    [<0000000009d5d19e>] bus_for_each_dev+0x70/0xc8
    [<0000000000d22e1c>] driver_attach+0x24/0x30
    [<0000000001d4e952>] bus_add_driver+0x14c/0x1f0
    [<0000000089928aaa>] driver_register+0x64/0x120

Cc: v5.10 <stable@vger.kernel.org> # v5.10
Fixes: dd461cd9183f ("opp: Allow dev_pm_opp_get_opp_table() to return -EPROBE_DEFER")
Signed-off-by: Quanyang Wang <quanyang.wang@windriver.com>
[ Viresh: Added the stable tag ]
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/core.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index 4268eb359915..c9e50836b4c2 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -1092,7 +1092,7 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index)
 	if (IS_ERR(opp_table->clk)) {
 		ret = PTR_ERR(opp_table->clk);
 		if (ret == -EPROBE_DEFER)
-			goto err;
+			goto remove_opp_dev;
 
 		dev_dbg(dev, "%s: Couldn't find clock: %d\n", __func__, ret);
 	}
@@ -1101,7 +1101,7 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index)
 	ret = dev_pm_opp_of_find_icc_paths(dev, opp_table);
 	if (ret) {
 		if (ret == -EPROBE_DEFER)
-			goto err;
+			goto remove_opp_dev;
 
 		dev_warn(dev, "%s: Error finding interconnect paths: %d\n",
 			 __func__, ret);
@@ -1113,6 +1113,8 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index)
 
 	return opp_table;
 
+remove_opp_dev:
+	_remove_opp_dev(opp_dev, opp_table);
 err:
 	kfree(opp_table);
 	return ERR_PTR(ret);

From 0e1d9ca1766f5d95fb881f57b6c4a1ffa63d4648 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 28 Dec 2020 10:51:04 +0530
Subject: [PATCH 149/547] opp: Call the missing clk_put() on error

Fix the clock reference counting by calling the missing clk_put() in the
error path.

Cc: v5.10 <stable@vger.kernel.org> # v5.10
Fixes: dd461cd9183f ("opp: Allow dev_pm_opp_get_opp_table() to return -EPROBE_DEFER")
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/core.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index c9e50836b4c2..8c905aabacc0 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -1101,7 +1101,7 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index)
 	ret = dev_pm_opp_of_find_icc_paths(dev, opp_table);
 	if (ret) {
 		if (ret == -EPROBE_DEFER)
-			goto remove_opp_dev;
+			goto put_clk;
 
 		dev_warn(dev, "%s: Error finding interconnect paths: %d\n",
 			 __func__, ret);
@@ -1113,6 +1113,9 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index)
 
 	return opp_table;
 
+put_clk:
+	if (!IS_ERR(opp_table->clk))
+		clk_put(opp_table->clk);
 remove_opp_dev:
 	_remove_opp_dev(opp_dev, opp_table);
 err:

From 105b5ca9b1e38a8db8446a493ca062eea98171eb Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Mon, 30 Nov 2020 16:36:30 +0200
Subject: [PATCH 150/547] habanalabs: Fix a missing-braces warning

Fix a compilation "missing braces around initializer" warning.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/habanalabs_ioctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index 32e6af1db4e3..a9927a06c5ea 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -406,7 +406,7 @@ static int total_energy_consumption_info(struct hl_fpriv *hpriv,
 static int pll_frequency_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 {
 	struct hl_device *hdev = hpriv->hdev;
-	struct hl_pll_frequency_info freq_info = {0};
+	struct hl_pll_frequency_info freq_info = {};
 	u32 max_size = args->return_size;
 	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
 	int rc;

From 429f1571e8f0b14ec42b8fb14efcfc0576b2788f Mon Sep 17 00:00:00 2001
From: Alon Mizrahi <amizrahi@habana.ai>
Date: Tue, 1 Dec 2020 18:44:11 +0200
Subject: [PATCH 151/547] habanalabs: add comment for pll frequency ioctl
 opcode

Forgot to add the comment for the opcode when it was added.

Signed-off-by: Alon Mizrahi <amizrahi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 include/uapi/misc/habanalabs.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 8c15a7d336a0..dc8bcec195cc 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -279,6 +279,7 @@ enum hl_device_status {
  * HL_INFO_CLK_THROTTLE_REASON - Retrieve clock throttling reason
  * HL_INFO_SYNC_MANAGER  - Retrieve sync manager info per dcore
  * HL_INFO_TOTAL_ENERGY  - Retrieve total energy consumption
+ * HL_INFO_PLL_FREQUENCY - Retrieve PLL frequency
  */
 #define HL_INFO_HW_IP_INFO		0
 #define HL_INFO_HW_EVENTS		1

From 4783489951b78525a6e61b43936cbbd88b7938af Mon Sep 17 00:00:00 2001
From: Alon Mizrahi <amizrahi@habana.ai>
Date: Thu, 26 Nov 2020 13:05:20 +0200
Subject: [PATCH 152/547] habanalabs: fetch PSOC PLL frequency from F/W in goya

When the F/W security is enabled, goya needs to fetch the PSOC pll
frequency through a dedicated interface

Signed-off-by: Alon Mizrahi <amizrahi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/goya/goya.c | 59 ++++++++++++++++++-----------
 1 file changed, 37 insertions(+), 22 deletions(-)

diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 3e5eb9e3d7bd..b66fd55accb5 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -694,32 +694,47 @@ static void goya_qman0_set_security(struct hl_device *hdev, bool secure)
 static void goya_fetch_psoc_frequency(struct hl_device *hdev)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
-	u32 trace_freq = 0;
-	u32 pll_clk = 0;
-	u32 div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
-	u32 div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1);
-	u32 nr = RREG32(mmPSOC_PCI_PLL_NR);
-	u32 nf = RREG32(mmPSOC_PCI_PLL_NF);
-	u32 od = RREG32(mmPSOC_PCI_PLL_OD);
+	u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
+	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
+	int rc;
 
-	if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) {
-		if (div_sel == DIV_SEL_REF_CLK)
-			trace_freq = PLL_REF_CLK;
-		else
-			trace_freq = PLL_REF_CLK / (div_fctr + 1);
-	} else if (div_sel == DIV_SEL_PLL_CLK ||
-					div_sel == DIV_SEL_DIVIDED_PLL) {
-		pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1));
-		if (div_sel == DIV_SEL_PLL_CLK)
-			trace_freq = pll_clk;
-		else
-			trace_freq = pll_clk / (div_fctr + 1);
+	if (hdev->asic_prop.fw_security_disabled) {
+		div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
+		div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1);
+		nr = RREG32(mmPSOC_PCI_PLL_NR);
+		nf = RREG32(mmPSOC_PCI_PLL_NF);
+		od = RREG32(mmPSOC_PCI_PLL_OD);
+
+		if (div_sel == DIV_SEL_REF_CLK ||
+				div_sel == DIV_SEL_DIVIDED_REF) {
+			if (div_sel == DIV_SEL_REF_CLK)
+				freq = PLL_REF_CLK;
+			else
+				freq = PLL_REF_CLK / (div_fctr + 1);
+		} else if (div_sel == DIV_SEL_PLL_CLK ||
+				div_sel == DIV_SEL_DIVIDED_PLL) {
+			pll_clk = PLL_REF_CLK * (nf + 1) /
+					((nr + 1) * (od + 1));
+			if (div_sel == DIV_SEL_PLL_CLK)
+				freq = pll_clk;
+			else
+				freq = pll_clk / (div_fctr + 1);
+		} else {
+			dev_warn(hdev->dev,
+				"Received invalid div select value: %d",
+				div_sel);
+			freq = 0;
+		}
 	} else {
-		dev_warn(hdev->dev,
-			"Received invalid div select value: %d", div_sel);
+		rc = hl_fw_cpucp_pll_info_get(hdev, PCI_PLL, pll_freq_arr);
+
+		if (rc)
+			return;
+
+		freq = pll_freq_arr[1];
 	}
 
-	prop->psoc_timestamp_frequency = trace_freq;
+	prop->psoc_timestamp_frequency = freq;
 	prop->psoc_pci_pll_nr = nr;
 	prop->psoc_pci_pll_nf = nf;
 	prop->psoc_pci_pll_od = od;

From 6585489e808d9964dbde9dad89ac8e792e1185fc Mon Sep 17 00:00:00 2001
From: Alon Mizrahi <amizrahi@habana.ai>
Date: Thu, 19 Nov 2020 16:34:19 +0200
Subject: [PATCH 153/547] habanalabs: remove generic gaudi get_pll_freq
 function

As we only fetch the CPU_PLL frequency in gaudi, we don't need a
generic get_pll_frequency function which takes a pll index as input

Signed-off-by: Alon Mizrahi <amizrahi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../misc/habanalabs/common/habanalabs_ioctl.c |   2 +-
 drivers/misc/habanalabs/gaudi/gaudi.c         | 132 ++++++------------
 drivers/misc/habanalabs/gaudi/gaudiP.h        |   7 -
 3 files changed, 44 insertions(+), 97 deletions(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index a9927a06c5ea..a0c0d20f6f8f 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -406,7 +406,7 @@ static int total_energy_consumption_info(struct hl_fpriv *hpriv,
 static int pll_frequency_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 {
 	struct hl_device *hdev = hpriv->hdev;
-	struct hl_pll_frequency_info freq_info = {};
+	struct hl_pll_frequency_info freq_info = { {0} };
 	u32 max_size = args->return_size;
 	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
 	int rc;
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 1f1926607c5e..278c4de98e22 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -151,19 +151,6 @@ static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
 	[PACKET_LOAD_AND_EXE]	= sizeof(struct packet_load_and_exe)
 };
 
-static const u32 gaudi_pll_base_addresses[GAUDI_PLL_MAX] = {
-	[CPU_PLL] = mmPSOC_CPU_PLL_NR,
-	[PCI_PLL] = mmPSOC_PCI_PLL_NR,
-	[SRAM_PLL] = mmSRAM_W_PLL_NR,
-	[HBM_PLL] = mmPSOC_HBM_PLL_NR,
-	[NIC_PLL] = mmNIC0_PLL_NR,
-	[DMA_PLL] = mmDMA_W_PLL_NR,
-	[MESH_PLL] = mmMESH_W_PLL_NR,
-	[MME_PLL] = mmPSOC_MME_PLL_NR,
-	[TPC_PLL] = mmPSOC_TPC_PLL_NR,
-	[IF_PLL] = mmIF_W_PLL_NR
-};
-
 static inline bool validate_packet_id(enum packet_id id)
 {
 	switch (id) {
@@ -702,73 +689,6 @@ static int gaudi_early_fini(struct hl_device *hdev)
 	return 0;
 }
 
-/**
- * gaudi_fetch_pll_frequency - Fetch PLL frequency values
- *
- * @hdev: pointer to hl_device structure
- * @pll_index: index of the pll to fetch frequency from
- * @pll_freq: pointer to store the pll frequency in MHz in each of the available
- *            outputs. if a certain output is not available a 0 will be set
- *
- */
-static int gaudi_fetch_pll_frequency(struct hl_device *hdev,
-				enum gaudi_pll_index pll_index,
-				u16 *pll_freq_arr)
-{
-	u32 nr = 0, nf = 0, od = 0, pll_clk = 0, div_fctr, div_sel,
-			pll_base_addr = gaudi_pll_base_addresses[pll_index];
-	u16 freq = 0;
-	int i, rc;
-
-	if (hdev->asic_prop.fw_security_status_valid &&
-			(hdev->asic_prop.fw_app_security_map &
-					CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
-		rc = hl_fw_cpucp_pll_info_get(hdev, pll_index, pll_freq_arr);
-
-		if (rc)
-			return rc;
-	} else if (hdev->asic_prop.fw_security_disabled) {
-		/* Backward compatibility */
-		nr = RREG32(pll_base_addr + PLL_NR_OFFSET);
-		nf = RREG32(pll_base_addr + PLL_NF_OFFSET);
-		od = RREG32(pll_base_addr + PLL_OD_OFFSET);
-
-		for (i = 0; i < HL_PLL_NUM_OUTPUTS; i++) {
-			div_fctr = RREG32(pll_base_addr +
-					PLL_DIV_FACTOR_0_OFFSET + i * 4);
-			div_sel = RREG32(pll_base_addr +
-					PLL_DIV_SEL_0_OFFSET + i * 4);
-
-			if (div_sel == DIV_SEL_REF_CLK ||
-				div_sel == DIV_SEL_DIVIDED_REF) {
-				if (div_sel == DIV_SEL_REF_CLK)
-					freq = PLL_REF_CLK;
-				else
-					freq = PLL_REF_CLK / (div_fctr + 1);
-			} else if (div_sel == DIV_SEL_PLL_CLK ||
-					div_sel == DIV_SEL_DIVIDED_PLL) {
-				pll_clk = PLL_REF_CLK * (nf + 1) /
-						((nr + 1) * (od + 1));
-				if (div_sel == DIV_SEL_PLL_CLK)
-					freq = pll_clk;
-				else
-					freq = pll_clk / (div_fctr + 1);
-			} else {
-				dev_warn(hdev->dev,
-					"Received invalid div select value: %d",
-					div_sel);
-			}
-
-			pll_freq_arr[i] = freq;
-		}
-	} else {
-		dev_err(hdev->dev, "Failed to fetch PLL frequency values\n");
-		return -EIO;
-	}
-
-	return 0;
-}
-
 /**
  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
  *
@@ -778,18 +698,52 @@ static int gaudi_fetch_pll_frequency(struct hl_device *hdev,
 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
-	u16 pll_freq[HL_PLL_NUM_OUTPUTS];
+	u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
+	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
 	int rc;
 
-	rc = gaudi_fetch_pll_frequency(hdev, CPU_PLL, pll_freq);
-	if (rc)
-		return rc;
+	if (hdev->asic_prop.fw_security_disabled) {
+		/* Backward compatibility */
+		div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
+		div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
+		nr = RREG32(mmPSOC_CPU_PLL_NR);
+		nf = RREG32(mmPSOC_CPU_PLL_NF);
+		od = RREG32(mmPSOC_CPU_PLL_OD);
 
-	prop->psoc_timestamp_frequency = pll_freq[2];
-	prop->psoc_pci_pll_nr = 0;
-	prop->psoc_pci_pll_nf = 0;
-	prop->psoc_pci_pll_od = 0;
-	prop->psoc_pci_pll_div_factor = 0;
+		if (div_sel == DIV_SEL_REF_CLK ||
+				div_sel == DIV_SEL_DIVIDED_REF) {
+			if (div_sel == DIV_SEL_REF_CLK)
+				freq = PLL_REF_CLK;
+			else
+				freq = PLL_REF_CLK / (div_fctr + 1);
+		} else if (div_sel == DIV_SEL_PLL_CLK ||
+			div_sel == DIV_SEL_DIVIDED_PLL) {
+			pll_clk = PLL_REF_CLK * (nf + 1) /
+					((nr + 1) * (od + 1));
+			if (div_sel == DIV_SEL_PLL_CLK)
+				freq = pll_clk;
+			else
+				freq = pll_clk / (div_fctr + 1);
+		} else {
+			dev_warn(hdev->dev,
+				"Received invalid div select value: %d",
+				div_sel);
+			freq = 0;
+		}
+	} else {
+		rc = hl_fw_cpucp_pll_info_get(hdev, CPU_PLL, pll_freq_arr);
+
+		if (rc)
+			return rc;
+
+		freq = pll_freq_arr[2];
+	}
+
+	prop->psoc_timestamp_frequency = freq;
+	prop->psoc_pci_pll_nr = nr;
+	prop->psoc_pci_pll_nf = nf;
+	prop->psoc_pci_pll_od = od;
+	prop->psoc_pci_pll_div_factor = div_fctr;
 
 	return 0;
 }
diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h
index f2d91f4fcffe..a7ab2d7e57d4 100644
--- a/drivers/misc/habanalabs/gaudi/gaudiP.h
+++ b/drivers/misc/habanalabs/gaudi/gaudiP.h
@@ -105,13 +105,6 @@
 #define MME_ACC_OFFSET		(mmMME1_ACC_BASE - mmMME0_ACC_BASE)
 #define SRAM_BANK_OFFSET	(mmSRAM_Y0_X1_RTR_BASE - mmSRAM_Y0_X0_RTR_BASE)
 
-#define PLL_NR_OFFSET		0
-#define PLL_NF_OFFSET		(mmPSOC_CPU_PLL_NF - mmPSOC_CPU_PLL_NR)
-#define PLL_OD_OFFSET		(mmPSOC_CPU_PLL_OD - mmPSOC_CPU_PLL_NR)
-#define PLL_DIV_FACTOR_0_OFFSET	(mmPSOC_CPU_PLL_DIV_FACTOR_0 - \
-				mmPSOC_CPU_PLL_NR)
-#define PLL_DIV_SEL_0_OFFSET	(mmPSOC_CPU_PLL_DIV_SEL_0 - mmPSOC_CPU_PLL_NR)
-
 #define NUM_OF_SOB_IN_BLOCK		\
 	(((mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_2047 - \
 	mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0) + 4) >> 2)

From 9c9013cbd8338ff8eac732d115c9005bc512cbc5 Mon Sep 17 00:00:00 2001
From: Ofir Bitton <obitton@habana.ai>
Date: Tue, 1 Dec 2020 10:39:54 +0200
Subject: [PATCH 154/547] habanalabs: preboot hard reset support

FW hard reset capability indication is now moved to preboot stage.
Driver will check if HW is dirty only after it validated preboot
is up. If HW is dirty, driver will perform a hard reset according
to the FW capability.
In addition, FW defines a new message which driver need to send in
order to initiate a hard reset.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c  | 25 ++++++++++---------
 drivers/misc/habanalabs/gaudi/gaudi.c         | 17 +++++++------
 drivers/misc/habanalabs/goya/goya.c           | 12 ++++-----
 .../habanalabs/include/common/hl_boot_if.h    |  2 ++
 4 files changed, 31 insertions(+), 25 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 0e1c629e9800..c970bfc6db66 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -627,7 +627,9 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
 	security_status = RREG32(cpu_security_boot_status_reg);
 
 	/* We read security status multiple times during boot:
-	 * 1. preboot - we check if fw security feature is supported
+	 * 1. preboot - a. Check whether the security status bits are valid
+	 *              b. Check whether fw security is enabled
+	 *              c. Check whether hard reset is done by fw
 	 * 2. boot cpu - we get boot cpu security status
 	 * 3. FW application - we get FW application security status
 	 *
@@ -637,13 +639,20 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
 	 */
 	if (security_status & CPU_BOOT_DEV_STS0_ENABLED) {
 		hdev->asic_prop.fw_security_status_valid = 1;
-		prop->fw_security_disabled =
-			!(security_status & CPU_BOOT_DEV_STS0_SECURITY_EN);
+
+		if (!(security_status & CPU_BOOT_DEV_STS0_SECURITY_EN))
+			prop->fw_security_disabled = true;
+
+		if (security_status & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
+			hdev->asic_prop.hard_reset_done_by_fw = true;
 	} else {
 		hdev->asic_prop.fw_security_status_valid = 0;
 		prop->fw_security_disabled = true;
 	}
 
+	dev_dbg(hdev->dev, "Firmware hard-reset is %s\n",
+		hdev->asic_prop.hard_reset_done_by_fw ? "enabled" : "disabled");
+
 	dev_info(hdev->dev, "firmware-level security is %s\n",
 		prop->fw_security_disabled ? "disabled" : "enabled");
 
@@ -797,18 +806,10 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
 	}
 
 	/* Read FW application security bits */
-	if (hdev->asic_prop.fw_security_status_valid) {
+	if (hdev->asic_prop.fw_security_status_valid)
 		hdev->asic_prop.fw_app_security_map =
 				RREG32(cpu_security_boot_status_reg);
 
-		if (hdev->asic_prop.fw_app_security_map &
-				CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
-			hdev->asic_prop.hard_reset_done_by_fw = true;
-	}
-
-	dev_dbg(hdev->dev, "Firmware hard-reset is %s\n",
-		hdev->asic_prop.hard_reset_done_by_fw ? "enabled" : "disabled");
-
 	dev_info(hdev->dev, "Successfully loaded firmware to device\n");
 
 out:
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 278c4de98e22..e465c158eaeb 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -654,12 +654,6 @@ static int gaudi_early_init(struct hl_device *hdev)
 	if (rc)
 		goto free_queue_props;
 
-	if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
-		dev_info(hdev->dev,
-			"H/W state is dirty, must reset before initializing\n");
-		hdev->asic_funcs->hw_fini(hdev, true);
-	}
-
 	/* Before continuing in the initialization, we need to read the preboot
 	 * version to determine whether we run with a security-enabled firmware
 	 */
@@ -672,6 +666,12 @@ static int gaudi_early_init(struct hl_device *hdev)
 		goto pci_fini;
 	}
 
+	if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
+		dev_info(hdev->dev,
+			"H/W state is dirty, must reset before initializing\n");
+		hdev->asic_funcs->hw_fini(hdev, true);
+	}
+
 	return 0;
 
 pci_fini:
@@ -3881,7 +3881,10 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
 	/* I don't know what is the state of the CPU so make sure it is
 	 * stopped in any means necessary
 	 */
-	WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
+	if (hdev->asic_prop.hard_reset_done_by_fw)
+		WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_RST_DEV);
+	else
+		WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
 
 	WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE);
 
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index b66fd55accb5..d61177bf36a5 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -613,12 +613,6 @@ static int goya_early_init(struct hl_device *hdev)
 	if (rc)
 		goto free_queue_props;
 
-	if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
-		dev_info(hdev->dev,
-			"H/W state is dirty, must reset before initializing\n");
-		hdev->asic_funcs->hw_fini(hdev, true);
-	}
-
 	/* Before continuing in the initialization, we need to read the preboot
 	 * version to determine whether we run with a security-enabled firmware
 	 */
@@ -631,6 +625,12 @@ static int goya_early_init(struct hl_device *hdev)
 		goto pci_fini;
 	}
 
+	if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
+		dev_info(hdev->dev,
+			"H/W state is dirty, must reset before initializing\n");
+		hdev->asic_funcs->hw_fini(hdev, true);
+	}
+
 	if (!hdev->pldm) {
 		val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
 		if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK)
diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h
index e5801ecf0cb2..755c4800f002 100644
--- a/drivers/misc/habanalabs/include/common/hl_boot_if.h
+++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h
@@ -204,6 +204,8 @@ enum kmd_msg {
 	KMD_MSG_GOTO_WFE,
 	KMD_MSG_FIT_RDY,
 	KMD_MSG_SKIP_BMC,
+	RESERVED,
+	KMD_MSG_RST_DEV,
 };
 
 enum cpu_msg_status {

From 72ab9ca52de6856380c26b2045aa826ae4308b76 Mon Sep 17 00:00:00 2001
From: Alon Mizrahi <amizrahi@habana.ai>
Date: Wed, 2 Dec 2020 19:55:30 +0200
Subject: [PATCH 155/547] habanalabs/gaudi: do not set EB in collective slave
 queues

We don't need to set EB on signal packets from collective slave
queues as it degrades performance. Because the slaves are the network
queues, the engine barrier doesn't actually guarantee that the
packet has been sent.

Signed-off-by: Alon Mizrahi <amizrahi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/habanalabs.h | 2 +-
 drivers/misc/habanalabs/common/hw_queue.c   | 5 ++++-
 drivers/misc/habanalabs/gaudi/gaudi.c       | 8 ++++----
 drivers/misc/habanalabs/goya/goya.c         | 2 +-
 4 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 571eda6ef5ab..70b778a0d60e 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -944,7 +944,7 @@ struct hl_asic_funcs {
 	u32 (*get_signal_cb_size)(struct hl_device *hdev);
 	u32 (*get_wait_cb_size)(struct hl_device *hdev);
 	u32 (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id,
-			u32 size);
+			u32 size, bool eb);
 	u32 (*gen_wait_cb)(struct hl_device *hdev,
 			struct hl_gen_wait_properties *prop);
 	void (*reset_sob)(struct hl_device *hdev, void *data);
diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c
index 7caf868d1585..76217258780a 100644
--- a/drivers/misc/habanalabs/common/hw_queue.c
+++ b/drivers/misc/habanalabs/common/hw_queue.c
@@ -418,8 +418,11 @@ static void init_signal_cs(struct hl_device *hdev,
 		"generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n",
 		cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx);
 
+	/* we set an EB since we must make sure all oeprations are done
+	 * when sending the signal
+	 */
 	hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb,
-				cs_cmpl->hw_sob->sob_id, 0);
+				cs_cmpl->hw_sob->sob_id, 0, true);
 
 	kref_get(&hw_sob->kref);
 
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index e465c158eaeb..65895ba075fe 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -361,7 +361,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev);
 static void gaudi_disable_clock_gating(struct hl_device *hdev);
 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
-				u32 size);
+				u32 size, bool eb);
 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
 				struct hl_gen_wait_properties *prop);
 
@@ -1064,7 +1064,7 @@ static void gaudi_collective_slave_init_job(struct hl_device *hdev,
 		prop->collective_sob_id, queue_id);
 
 	cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
-			prop->collective_sob_id, cb_size);
+			prop->collective_sob_id, cb_size, false);
 }
 
 static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
@@ -7893,7 +7893,7 @@ static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
 }
 
 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
-				u32 size)
+				u32 size, bool eb)
 {
 	struct hl_cb *cb = (struct hl_cb *) data;
 	struct packet_msg_short *pkt;
@@ -7910,7 +7910,7 @@ static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
-	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 1);
+	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, eb);
 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
 
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index d61177bf36a5..b8b4aa636b7c 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -5339,7 +5339,7 @@ static u32 goya_get_wait_cb_size(struct hl_device *hdev)
 }
 
 static u32 goya_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
-		u32 size)
+				u32 size, bool eb)
 {
 	return 0;
 }

From 7a585dfc32110a106f70474c6fa822d912a92c7e Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Mon, 30 Nov 2020 14:56:06 +0200
Subject: [PATCH 156/547] habanalabs: Revise comment to align with mirror list
 name

hw_queues_mirror was renamed to cs_mirror, so revise accordingly a
comment that refers to this list.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/command_submission.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index beb482310a58..92c1c516b65f 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -562,7 +562,7 @@ void hl_cs_rollback_all(struct hl_device *hdev)
 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
 		flush_workqueue(hdev->cq_wq[i]);
 
-	/* Make sure we don't have leftovers in the H/W queues mirror list */
+	/* Make sure we don't have leftovers in the CS mirror list */
 	list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) {
 		cs_get(cs);
 		cs->aborted = true;

From 0024c094851f718ccb0b797255292bdce850a01f Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Sat, 5 Dec 2020 22:55:09 +0200
Subject: [PATCH 157/547] habanalabs/gaudi: disable CGM at HW initialization

In case the clock gating was enabled in preboot we need to disable it
at the H/W initialization stage before touching the MME/TPC registers.
Otherwise, the ASIC can get stuck. If the security is enabled in
the firmware level, the CGM is always disabled and the driver can't
enable it.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi/gaudi.c              | 14 +++++++++++---
 .../misc/habanalabs/include/common/hl_boot_if.h    |  5 +++++
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 65895ba075fe..f316b898e8e0 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -2403,8 +2403,6 @@ static void gaudi_init_golden_registers(struct hl_device *hdev)
 	gaudi_init_e2e(hdev);
 	gaudi_init_hbm_cred(hdev);
 
-	hdev->asic_funcs->disable_clock_gating(hdev);
-
 	for (tpc_id = 0, tpc_offset = 0;
 				tpc_id < TPC_NUMBER_OF_ENGINES;
 				tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
@@ -3416,6 +3414,9 @@ static void gaudi_set_clock_gating(struct hl_device *hdev)
 	if (hdev->in_debug)
 		return;
 
+	if (!hdev->asic_prop.fw_security_disabled)
+		return;
+
 	for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
 		enable = !!(hdev->clock_gating_mask &
 				(BIT_ULL(gaudi_dma_assignment[i])));
@@ -3467,7 +3468,7 @@ static void gaudi_disable_clock_gating(struct hl_device *hdev)
 	u32 qman_offset;
 	int i;
 
-	if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE))
+	if (!hdev->asic_prop.fw_security_disabled)
 		return;
 
 	for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
@@ -3801,6 +3802,13 @@ static int gaudi_hw_init(struct hl_device *hdev)
 		return rc;
 	}
 
+	/* In case the clock gating was enabled in preboot we need to disable
+	 * it here before touching the MME/TPC registers.
+	 * There is no need to take clk gating mutex because when this function
+	 * runs, no other relevant code can run
+	 */
+	hdev->asic_funcs->disable_clock_gating(hdev);
+
 	/* SRAM scrambler must be initialized after CPU is running from HBM */
 	gaudi_init_scrambler_sram(hdev);
 
diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h
index 755c4800f002..7cb5f2d3e565 100644
--- a/drivers/misc/habanalabs/include/common/hl_boot_if.h
+++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h
@@ -150,6 +150,10 @@
  * CPU_BOOT_DEV_STS0_PLL_INFO_EN	FW retrieval of PLL info is enabled.
  *					Initialized in: linux
  *
+ * CPU_BOOT_DEV_STS0_CLK_GATE_EN	Clock Gating enabled.
+ *					FW initialized Clock Gating.
+ *					Initialized in: preboot
+ *
  * CPU_BOOT_DEV_STS0_ENABLED		Device status register enabled.
  *					This is a main indication that the
  *					running FW populates the device status
@@ -171,6 +175,7 @@
 #define CPU_BOOT_DEV_STS0_DRAM_SCR_EN			(1 << 9)
 #define CPU_BOOT_DEV_STS0_FW_HARD_RST_EN		(1 << 10)
 #define CPU_BOOT_DEV_STS0_PLL_INFO_EN			(1 << 11)
+#define CPU_BOOT_DEV_STS0_CLK_GATE_EN			(1 << 13)
 #define CPU_BOOT_DEV_STS0_ENABLED			(1 << 31)
 
 enum cpu_boot_status {

From 6bbb77b9e6f0bd5595724b7c0cb1189afdd133d3 Mon Sep 17 00:00:00 2001
From: Ofir Bitton <obitton@habana.ai>
Date: Sun, 6 Dec 2020 14:00:35 +0200
Subject: [PATCH 158/547] habanalabs: full FW hard reset support

Driver must fetch FW hard reset capability at every FW boot stage:
preboot, CPU boot, CPU application.
If hard reset is triggered, driver will take into consideration
only the last capability received.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c | 55 +++++++++++++++-----
 1 file changed, 41 insertions(+), 14 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index c970bfc6db66..20f77f58edef 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -629,32 +629,36 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
 	/* We read security status multiple times during boot:
 	 * 1. preboot - a. Check whether the security status bits are valid
 	 *              b. Check whether fw security is enabled
-	 *              c. Check whether hard reset is done by fw
-	 * 2. boot cpu - we get boot cpu security status
-	 * 3. FW application - we get FW application security status
+	 *              c. Check whether hard reset is done by preboot
+	 * 2. boot cpu - a. Fetch boot cpu security status
+	 *               b. Check whether hard reset is done by boot cpu
+	 * 3. FW application - a. Fetch fw application security status
+	 *                     b. Check whether hard reset is done by fw app
 	 *
 	 * Preboot:
 	 * Check security status bit (CPU_BOOT_DEV_STS0_ENABLED), if it is set
 	 * check security enabled bit (CPU_BOOT_DEV_STS0_SECURITY_EN)
 	 */
 	if (security_status & CPU_BOOT_DEV_STS0_ENABLED) {
-		hdev->asic_prop.fw_security_status_valid = 1;
+		prop->fw_security_status_valid = 1;
 
-		if (!(security_status & CPU_BOOT_DEV_STS0_SECURITY_EN))
+		if (security_status & CPU_BOOT_DEV_STS0_SECURITY_EN)
+			prop->fw_security_disabled = false;
+		else
 			prop->fw_security_disabled = true;
 
 		if (security_status & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
-			hdev->asic_prop.hard_reset_done_by_fw = true;
+			prop->hard_reset_done_by_fw = true;
 	} else {
-		hdev->asic_prop.fw_security_status_valid = 0;
+		prop->fw_security_status_valid = 0;
 		prop->fw_security_disabled = true;
 	}
 
-	dev_dbg(hdev->dev, "Firmware hard-reset is %s\n",
-		hdev->asic_prop.hard_reset_done_by_fw ? "enabled" : "disabled");
+	dev_dbg(hdev->dev, "Firmware preboot hard-reset is %s\n",
+			prop->hard_reset_done_by_fw ? "enabled" : "disabled");
 
 	dev_info(hdev->dev, "firmware-level security is %s\n",
-		prop->fw_security_disabled ? "disabled" : "enabled");
+			prop->fw_security_disabled ? "disabled" : "enabled");
 
 	return 0;
 }
@@ -664,6 +668,7 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
 			u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
 			bool skip_bmc, u32 cpu_timeout, u32 boot_fit_timeout)
 {
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	u32 status;
 	int rc;
 
@@ -732,11 +737,22 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
 	/* Read U-Boot version now in case we will later fail */
 	hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT);
 
+	/* Clear reset status since we need to read it again from boot CPU */
+	prop->hard_reset_done_by_fw = false;
+
 	/* Read boot_cpu security bits */
-	if (hdev->asic_prop.fw_security_status_valid)
-		hdev->asic_prop.fw_boot_cpu_security_map =
+	if (prop->fw_security_status_valid) {
+		prop->fw_boot_cpu_security_map =
 				RREG32(cpu_security_boot_status_reg);
 
+		if (prop->fw_boot_cpu_security_map &
+				CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
+			prop->hard_reset_done_by_fw = true;
+	}
+
+	dev_dbg(hdev->dev, "Firmware boot CPU hard-reset is %s\n",
+			prop->hard_reset_done_by_fw ? "enabled" : "disabled");
+
 	if (rc) {
 		detect_cpu_boot_status(hdev, status);
 		rc = -EIO;
@@ -805,11 +821,22 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
 		goto out;
 	}
 
+	/* Clear reset status since we need to read again from app */
+	prop->hard_reset_done_by_fw = false;
+
 	/* Read FW application security bits */
-	if (hdev->asic_prop.fw_security_status_valid)
-		hdev->asic_prop.fw_app_security_map =
+	if (prop->fw_security_status_valid) {
+		prop->fw_app_security_map =
 				RREG32(cpu_security_boot_status_reg);
 
+		if (prop->fw_app_security_map &
+				CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
+			prop->hard_reset_done_by_fw = true;
+	}
+
+	dev_dbg(hdev->dev, "Firmware application CPU hard-reset is %s\n",
+			prop->hard_reset_done_by_fw ? "enabled" : "disabled");
+
 	dev_info(hdev->dev, "Successfully loaded firmware to device\n");
 
 out:

From 13d0ee10b55ecec01fd3c91e086e4f3ba75a7911 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Sun, 6 Dec 2020 23:48:45 +0200
Subject: [PATCH 159/547] habanalabs/gaudi: enhance reset message

Print the initiator who performs the hard-reset for easier debugging.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi/gaudi.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index f316b898e8e0..b0528883a995 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -3936,11 +3936,15 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
 
 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
 			1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
-	}
 
-	dev_info(hdev->dev,
-		"Issued HARD reset command, going to wait %dms\n",
-		reset_timeout_ms);
+		dev_info(hdev->dev,
+			"Issued HARD reset command, going to wait %dms\n",
+			reset_timeout_ms);
+	} else {
+		dev_info(hdev->dev,
+			"Firmware performs HARD reset, going to wait %dms\n",
+			reset_timeout_ms);
+	}
 
 	/*
 	 * After hard reset, we can't poll the BTM_FSM register because the PSOC

From 90ffe170a390d5a620f8fe66758514e369e85d24 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Tue, 8 Dec 2020 16:52:48 +0200
Subject: [PATCH 160/547] habanalabs: update comment in hl_boot_if.h

Hard-reset flag is updated in many stages of the boot sequence of the
firmware.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/include/common/hl_boot_if.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h
index 7cb5f2d3e565..b637dfd69f6e 100644
--- a/drivers/misc/habanalabs/include/common/hl_boot_if.h
+++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h
@@ -145,7 +145,7 @@
  *					implemented. This means that FW will
  *					perform hard reset procedure on
  *					receiving the halt-machine event.
- *					Initialized in: linux
+ *					Initialized in: preboot, u-boot, linux
  *
  * CPU_BOOT_DEV_STS0_PLL_INFO_EN	FW retrieval of PLL info is enabled.
  *					Initialized in: linux

From 377182a3cc5ae6cc17fb04d06864c975f9f71c18 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Wed, 9 Dec 2020 19:50:46 +0200
Subject: [PATCH 161/547] habanalabs: adjust pci controller init to new
 firmware

When the firmware security is enabled, the pcie_aux_dbi_reg_addr
register in the PCI controller is blocked. Therefore, ignore
the result of writing to this register and assume it worked. Also
remove the prints on errors in the internal ELBI write function.

If the security is enabled, the firmware is responsible for setting
this register correctly so we won't have any problem.

If the security is disabled, the write will work (unless something
is totally broken at the PCI level and then the whole sequence
will fail).

In addition, remove a write to register pcie_aux_dbi_reg_addr+4,
which was never actually needed.

Moreover, PCIE_DBI registers are blocked to access from host when
firmware security is enabled. Use a different register to flush the
writes.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/pci.c          | 28 +++++++++++--------
 drivers/misc/habanalabs/gaudi/gaudi.c         |  4 +--
 .../misc/habanalabs/gaudi/gaudi_coresight.c   |  3 +-
 3 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/drivers/misc/habanalabs/common/pci.c b/drivers/misc/habanalabs/common/pci.c
index 923b2606e29f..b4725e6101f6 100644
--- a/drivers/misc/habanalabs/common/pci.c
+++ b/drivers/misc/habanalabs/common/pci.c
@@ -130,10 +130,8 @@ static int hl_pci_elbi_write(struct hl_device *hdev, u64 addr, u32 data)
 	if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE)
 		return 0;
 
-	if (val & PCI_CONFIG_ELBI_STS_ERR) {
-		dev_err(hdev->dev, "Error writing to ELBI\n");
+	if (val & PCI_CONFIG_ELBI_STS_ERR)
 		return -EIO;
-	}
 
 	if (!(val & PCI_CONFIG_ELBI_STS_MASK)) {
 		dev_err(hdev->dev, "ELBI write didn't finish in time\n");
@@ -160,8 +158,12 @@ int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data)
 
 	dbi_offset = addr & 0xFFF;
 
-	rc = hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0x00300000);
-	rc |= hl_pci_elbi_write(hdev, prop->pcie_dbi_base_address + dbi_offset,
+	/* Ignore result of writing to pcie_aux_dbi_reg_addr as it could fail
+	 * in case the firmware security is enabled
+	 */
+	hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0x00300000);
+
+	rc = hl_pci_elbi_write(hdev, prop->pcie_dbi_base_address + dbi_offset,
 				data);
 
 	if (rc)
@@ -244,9 +246,11 @@ int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
 
 	rc |= hl_pci_iatu_write(hdev, offset + 0x4, ctrl_reg_val);
 
-	/* Return the DBI window to the default location */
-	rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
-	rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
+	/* Return the DBI window to the default location
+	 * Ignore result of writing to pcie_aux_dbi_reg_addr as it could fail
+	 * in case the firmware security is enabled
+	 */
+	hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
 
 	if (rc)
 		dev_err(hdev->dev, "failed to map bar %u to 0x%08llx\n",
@@ -294,9 +298,11 @@ int hl_pci_set_outbound_region(struct hl_device *hdev,
 	/* Enable */
 	rc |= hl_pci_iatu_write(hdev, 0x004, 0x80000000);
 
-	/* Return the DBI window to the default location */
-	rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
-	rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
+	/* Return the DBI window to the default location
+	 * Ignore result of writing to pcie_aux_dbi_reg_addr as it could fail
+	 * in case the firmware security is enabled
+	 */
+	hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
 
 	return rc;
 }
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index b0528883a995..88d0e4356d59 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -3761,7 +3761,7 @@ static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
 static void gaudi_pre_hw_init(struct hl_device *hdev)
 {
 	/* Perform read from the device to make sure device is up */
-	RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
+	RREG32(mmHW_STATE);
 
 	if (hdev->asic_prop.fw_security_disabled) {
 		/* Set the access through PCI bars (Linux driver only) as
@@ -3847,7 +3847,7 @@ static int gaudi_hw_init(struct hl_device *hdev)
 	}
 
 	/* Perform read from the device to flush all configuration */
-	RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
+	RREG32(mmHW_STATE);
 
 	return 0;
 
diff --git a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c
index 2e3612e1ee28..88a09d42e111 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c
@@ -9,6 +9,7 @@
 #include "../include/gaudi/gaudi_coresight.h"
 #include "../include/gaudi/asic_reg/gaudi_regs.h"
 #include "../include/gaudi/gaudi_masks.h"
+#include "../include/gaudi/gaudi_reg_map.h"
 
 #include <uapi/misc/habanalabs.h>
 #define SPMU_SECTION_SIZE		MME0_ACC_SPMU_MAX_OFFSET
@@ -874,7 +875,7 @@ int gaudi_debug_coresight(struct hl_device *hdev, void *data)
 	}
 
 	/* Perform read from the device to flush all configuration */
-	RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
+	RREG32(mmHW_STATE);
 
 	return rc;
 }

From 98e8781f008372057bd5cb059ca6b507371e473d Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Wed, 9 Dec 2020 23:07:58 +0200
Subject: [PATCH 162/547] habanalabs/gaudi: retry loading TPC f/w on -EINTR

If loading the firmware file for the TPC f/w was interrupted, try
to do it again, up to 5 times.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi/gaudi.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 88d0e4356d59..8c09e4466af8 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -838,11 +838,17 @@ static int gaudi_init_tpc_mem(struct hl_device *hdev)
 	size_t fw_size;
 	void *cpu_addr;
 	dma_addr_t dma_handle;
-	int rc;
+	int rc, count = 5;
 
+again:
 	rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
+	if (rc == -EINTR && count-- > 0) {
+		msleep(50);
+		goto again;
+	}
+
 	if (rc) {
-		dev_err(hdev->dev, "Firmware file %s is not found!\n",
+		dev_err(hdev->dev, "Failed to load firmware file %s\n",
 				GAUDI_TPC_FW_FILE);
 		goto out;
 	}

From a3fd28306329e8e82efab973aafe81e9001dcf6f Mon Sep 17 00:00:00 2001
From: Alon Mizrahi <amizrahi@habana.ai>
Date: Tue, 8 Dec 2020 16:14:01 +0200
Subject: [PATCH 163/547] habanalabs: add validation cs counter, fix misplaced
 counters

Up until now validation errors were counted in the parsing field
of the cs_counters struct, so we added a new counter and increased
it when needed.

In addition, there were some locations where only one of the counters
was updated (ctx or aggregate) so add the second one to be updated
as well.

Signed-off-by: Alon Mizrahi <amizrahi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../habanalabs/common/command_submission.c    | 75 ++++++++++++++-----
 drivers/misc/habanalabs/common/habanalabs.h   |  2 +
 .../misc/habanalabs/common/habanalabs_ioctl.c |  5 ++
 include/uapi/misc/habanalabs.h                |  4 +
 4 files changed, 68 insertions(+), 18 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 92c1c516b65f..b2b3d2b0f808 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -472,8 +472,11 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 	cntr = &hdev->aggregated_cs_counters;
 
 	cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
-	if (!cs)
+	if (!cs) {
+		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+		atomic64_inc(&cntr->out_of_mem_drop_cnt);
 		return -ENOMEM;
+	}
 
 	cs->ctx = ctx;
 	cs->submitted = false;
@@ -486,6 +489,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 
 	cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
 	if (!cs_cmpl) {
+		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+		atomic64_inc(&cntr->out_of_mem_drop_cnt);
 		rc = -ENOMEM;
 		goto free_cs;
 	}
@@ -513,6 +518,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 	cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
 			sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
 	if (!cs->jobs_in_queue_cnt) {
+		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+		atomic64_inc(&cntr->out_of_mem_drop_cnt);
 		rc = -ENOMEM;
 		goto free_fence;
 	}
@@ -764,11 +771,14 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
 
 static int hl_cs_copy_chunk_array(struct hl_device *hdev,
 					struct hl_cs_chunk **cs_chunk_array,
-					void __user *chunks, u32 num_chunks)
+					void __user *chunks, u32 num_chunks,
+					struct hl_ctx *ctx)
 {
 	u32 size_to_copy;
 
 	if (num_chunks > HL_MAX_JOBS_PER_CS) {
+		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+		atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
 		dev_err(hdev->dev,
 			"Number of chunks can NOT be larger than %d\n",
 			HL_MAX_JOBS_PER_CS);
@@ -777,11 +787,16 @@ static int hl_cs_copy_chunk_array(struct hl_device *hdev,
 
 	*cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array),
 					GFP_ATOMIC);
-	if (!*cs_chunk_array)
+	if (!*cs_chunk_array) {
+		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+		atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
 		return -ENOMEM;
+	}
 
 	size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
 	if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) {
+		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+		atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
 		dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
 		kfree(*cs_chunk_array);
 		return -EFAULT;
@@ -797,6 +812,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 	struct hl_device *hdev = hpriv->hdev;
 	struct hl_cs_chunk *cs_chunk_array;
 	struct hl_cs_counters_atomic *cntr;
+	struct hl_ctx *ctx = hpriv->ctx;
 	struct hl_cs_job *job;
 	struct hl_cs *cs;
 	struct hl_cb *cb;
@@ -805,7 +821,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 	cntr = &hdev->aggregated_cs_counters;
 	*cs_seq = ULLONG_MAX;
 
-	rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks);
+	rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
+			hpriv->ctx);
 	if (rc)
 		goto out;
 
@@ -832,8 +849,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 		rc = validate_queue_index(hdev, chunk, &queue_type,
 						&is_kernel_allocated_cb);
 		if (rc) {
-			atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
-			atomic64_inc(&cntr->parsing_drop_cnt);
+			atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+			atomic64_inc(&cntr->validation_drop_cnt);
 			goto free_cs_object;
 		}
 
@@ -841,8 +858,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 			cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
 			if (!cb) {
 				atomic64_inc(
-				&hpriv->ctx->cs_counters.parsing_drop_cnt);
-				atomic64_inc(&cntr->parsing_drop_cnt);
+					&ctx->cs_counters.validation_drop_cnt);
+				atomic64_inc(&cntr->validation_drop_cnt);
 				rc = -EINVAL;
 				goto free_cs_object;
 			}
@@ -856,8 +873,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 		job = hl_cs_allocate_job(hdev, queue_type,
 						is_kernel_allocated_cb);
 		if (!job) {
-			atomic64_inc(
-			&hpriv->ctx->cs_counters.out_of_mem_drop_cnt);
+			atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
 			atomic64_inc(&cntr->out_of_mem_drop_cnt);
 			dev_err(hdev->dev, "Failed to allocate a new job\n");
 			rc = -ENOMEM;
@@ -891,7 +907,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 
 		rc = cs_parser(hpriv, job);
 		if (rc) {
-			atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
+			atomic64_inc(&ctx->cs_counters.parsing_drop_cnt);
 			atomic64_inc(&cntr->parsing_drop_cnt);
 			dev_err(hdev->dev,
 				"Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
@@ -901,8 +917,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 	}
 
 	if (int_queues_only) {
-		atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
-		atomic64_inc(&cntr->parsing_drop_cnt);
+		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+		atomic64_inc(&cntr->validation_drop_cnt);
 		dev_err(hdev->dev,
 			"Reject CS %d.%llu because only internal queues jobs are present\n",
 			cs->ctx->asid, cs->sequence);
@@ -1042,7 +1058,7 @@ out:
 }
 
 static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
-		struct hl_cs_chunk *chunk, u64 *signal_seq)
+		struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx)
 {
 	u64 *signal_seq_arr = NULL;
 	u32 size_to_copy, signal_seq_arr_len;
@@ -1052,6 +1068,8 @@ static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
 
 	/* currently only one signal seq is supported */
 	if (signal_seq_arr_len != 1) {
+		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+		atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
 		dev_err(hdev->dev,
 			"Wait for signal CS supports only one signal CS seq\n");
 		return -EINVAL;
@@ -1060,13 +1078,18 @@ static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
 	signal_seq_arr = kmalloc_array(signal_seq_arr_len,
 					sizeof(*signal_seq_arr),
 					GFP_ATOMIC);
-	if (!signal_seq_arr)
+	if (!signal_seq_arr) {
+		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+		atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
 		return -ENOMEM;
+	}
 
 	size_to_copy = chunk->num_signal_seq_arr * sizeof(*signal_seq_arr);
 	if (copy_from_user(signal_seq_arr,
 				u64_to_user_ptr(chunk->signal_seq_arr),
 				size_to_copy)) {
+		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+		atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
 		dev_err(hdev->dev,
 			"Failed to copy signal seq array from user\n");
 		rc = -EFAULT;
@@ -1153,6 +1176,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 	struct hl_device *hdev = hpriv->hdev;
 	struct hl_cs_compl *sig_waitcs_cmpl;
 	u32 q_idx, collective_engine_id = 0;
+	struct hl_cs_counters_atomic *cntr;
 	struct hl_fence *sig_fence = NULL;
 	struct hl_ctx *ctx = hpriv->ctx;
 	enum hl_queue_type q_type;
@@ -1160,9 +1184,11 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 	u64 signal_seq;
 	int rc;
 
+	cntr = &hdev->aggregated_cs_counters;
 	*cs_seq = ULLONG_MAX;
 
-	rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks);
+	rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
+			ctx);
 	if (rc)
 		goto out;
 
@@ -1170,6 +1196,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 	chunk = &cs_chunk_array[0];
 
 	if (chunk->queue_index >= hdev->asic_prop.max_queues) {
+		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+		atomic64_inc(&cntr->validation_drop_cnt);
 		dev_err(hdev->dev, "Queue index %d is invalid\n",
 			chunk->queue_index);
 		rc = -EINVAL;
@@ -1181,6 +1209,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 	q_type = hw_queue_prop->type;
 
 	if (!hw_queue_prop->supports_sync_stream) {
+		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+		atomic64_inc(&cntr->validation_drop_cnt);
 		dev_err(hdev->dev,
 			"Queue index %d does not support sync stream operations\n",
 			q_idx);
@@ -1190,6 +1220,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 
 	if (cs_type == CS_TYPE_COLLECTIVE_WAIT) {
 		if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
+			atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+			atomic64_inc(&cntr->validation_drop_cnt);
 			dev_err(hdev->dev,
 				"Queue index %d is invalid\n", q_idx);
 			rc = -EINVAL;
@@ -1200,12 +1232,14 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 	}
 
 	if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT) {
-		rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq);
+		rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq, ctx);
 		if (rc)
 			goto free_cs_chunk_array;
 
 		sig_fence = hl_ctx_get_fence(ctx, signal_seq);
 		if (IS_ERR(sig_fence)) {
+			atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+			atomic64_inc(&cntr->validation_drop_cnt);
 			dev_err(hdev->dev,
 				"Failed to get signal CS with seq 0x%llx\n",
 				signal_seq);
@@ -1223,6 +1257,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 			container_of(sig_fence, struct hl_cs_compl, base_fence);
 
 		if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) {
+			atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+			atomic64_inc(&cntr->validation_drop_cnt);
 			dev_err(hdev->dev,
 				"CS seq 0x%llx is not of a signal CS\n",
 				signal_seq);
@@ -1270,8 +1306,11 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 	else if (cs_type == CS_TYPE_COLLECTIVE_WAIT)
 		rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx,
 				cs, q_idx, collective_engine_id);
-	else
+	else {
+		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+		atomic64_inc(&cntr->validation_drop_cnt);
 		rc = -EINVAL;
+	}
 
 	if (rc)
 		goto free_cs_object;
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 70b778a0d60e..e0d7f5fbaa5c 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1000,6 +1000,7 @@ struct hl_va_range {
  * @queue_full_drop_cnt: dropped due to queue full
  * @device_in_reset_drop_cnt: dropped due to device in reset
  * @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight
+ * @validation_drop_cnt: dropped due to error in validation
  */
 struct hl_cs_counters_atomic {
 	atomic64_t out_of_mem_drop_cnt;
@@ -1007,6 +1008,7 @@ struct hl_cs_counters_atomic {
 	atomic64_t queue_full_drop_cnt;
 	atomic64_t device_in_reset_drop_cnt;
 	atomic64_t max_cs_in_flight_drop_cnt;
+	atomic64_t validation_drop_cnt;
 };
 
 /**
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index a0c0d20f6f8f..12efbd9d2e3a 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -335,6 +335,8 @@ static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 			atomic64_read(&cntr->device_in_reset_drop_cnt);
 	cs_counters.total_max_cs_in_flight_drop_cnt =
 			atomic64_read(&cntr->max_cs_in_flight_drop_cnt);
+	cs_counters.total_validation_drop_cnt =
+			atomic64_read(&cntr->validation_drop_cnt);
 
 	if (hpriv->ctx) {
 		cs_counters.ctx_out_of_mem_drop_cnt =
@@ -352,6 +354,9 @@ static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 		cs_counters.ctx_max_cs_in_flight_drop_cnt =
 				atomic64_read(
 			&hpriv->ctx->cs_counters.max_cs_in_flight_drop_cnt);
+		cs_counters.ctx_validation_drop_cnt =
+				atomic64_read(
+				&hpriv->ctx->cs_counters.validation_drop_cnt);
 	}
 
 	return copy_to_user(out, &cs_counters,
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index dc8bcec195cc..dba3827c43ca 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -426,6 +426,8 @@ struct hl_info_sync_manager {
  * @ctx_device_in_reset_drop_cnt: context dropped due to device in reset
  * @total_max_cs_in_flight_drop_cnt: total dropped due to maximum CS in-flight
  * @ctx_max_cs_in_flight_drop_cnt: context dropped due to maximum CS in-flight
+ * @total_validation_drop_cnt: total dropped due to validation error
+ * @ctx_validation_drop_cnt: context dropped due to validation error
  */
 struct hl_info_cs_counters {
 	__u64 total_out_of_mem_drop_cnt;
@@ -438,6 +440,8 @@ struct hl_info_cs_counters {
 	__u64 ctx_device_in_reset_drop_cnt;
 	__u64 total_max_cs_in_flight_drop_cnt;
 	__u64 ctx_max_cs_in_flight_drop_cnt;
+	__u64 total_validation_drop_cnt;
+	__u64 ctx_validation_drop_cnt;
 };
 
 enum gaudi_dcores {

From fcaebc7354188b0d708c79df4390fbabd4d9799d Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Mon, 14 Dec 2020 12:52:06 +0200
Subject: [PATCH 164/547] habanalabs: register to pci shutdown callback

We need to make sure our device is idle when rebooting a virtual
machine. This is done in the driver level.

The firmware will later handle FLR but we want to be extra safe and
stop the devices until the FLR is handled.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/habanalabs_drv.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index 6bbb6bca6860..032d114f01ea 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -544,6 +544,7 @@ static struct pci_driver hl_pci_driver = {
 	.id_table = ids,
 	.probe = hl_pci_probe,
 	.remove = hl_pci_remove,
+	.shutdown = hl_pci_remove,
 	.driver.pm = &hl_pm_ops,
 	.err_handler = &hl_pci_err_handler,
 };

From 097c62b6f0ec2bdadf86afbe80df03856338724d Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Tue, 22 Dec 2020 15:21:07 +0200
Subject: [PATCH 165/547] habanalabs: fix order of status check

When the device is in reset or needs to be reset, the disabled property
is don't-care.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 5871162a8442..0749c92cbcf6 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -17,12 +17,12 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
 {
 	enum hl_device_status status;
 
-	if (hdev->disabled)
-		status = HL_DEVICE_STATUS_MALFUNCTION;
-	else if (atomic_read(&hdev->in_reset))
+	if (atomic_read(&hdev->in_reset))
 		status = HL_DEVICE_STATUS_IN_RESET;
 	else if (hdev->needs_reset)
 		status = HL_DEVICE_STATUS_NEEDS_RESET;
+	else if (hdev->disabled)
+		status = HL_DEVICE_STATUS_MALFUNCTION;
 	else
 		status = HL_DEVICE_STATUS_OPERATIONAL;
 

From 95cd4bca7b1f4a25810f3ddfc5e767fb46931789 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 27 Dec 2020 12:33:44 +0100
Subject: [PATCH 166/547] netfilter: nft_dynset: report EOPNOTSUPP on missing
 set feature

If userspace requests a feature which is not available the original set
definition, then bail out with EOPNOTSUPP. If userspace sends
unsupported dynset flags (new feature not supported by this kernel),
then report EOPNOTSUPP to userspace. EINVAL should be only used to
report malformed netlink messages from userspace.

Fixes: 22fe54d5fefc ("netfilter: nf_tables: add support for dynamic set updates")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_dynset.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 983a1d5ca3ab..f35df221a633 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -177,7 +177,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
 		u32 flags = ntohl(nla_get_be32(tb[NFTA_DYNSET_FLAGS]));
 
 		if (flags & ~NFT_DYNSET_F_INV)
-			return -EINVAL;
+			return -EOPNOTSUPP;
 		if (flags & NFT_DYNSET_F_INV)
 			priv->invert = true;
 	}
@@ -210,7 +210,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
 	timeout = 0;
 	if (tb[NFTA_DYNSET_TIMEOUT] != NULL) {
 		if (!(set->flags & NFT_SET_TIMEOUT))
-			return -EINVAL;
+			return -EOPNOTSUPP;
 
 		err = nf_msecs_to_jiffies64(tb[NFTA_DYNSET_TIMEOUT], &timeout);
 		if (err)
@@ -224,7 +224,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
 
 	if (tb[NFTA_DYNSET_SREG_DATA] != NULL) {
 		if (!(set->flags & NFT_SET_MAP))
-			return -EINVAL;
+			return -EOPNOTSUPP;
 		if (set->dtype == NFT_DATA_VERDICT)
 			return -EOPNOTSUPP;
 

From b4e70d8dd9ea6bd5d5fb3122586f652326ca09cd Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 27 Dec 2020 12:35:43 +0100
Subject: [PATCH 167/547] netfilter: nftables: add set expression flags

The set flag NFT_SET_EXPR provides a hint to the kernel that userspace
supports for multiple expressions per set element. In the same
direction, NFT_DYNSET_F_EXPR specifies that dynset expression defines
multiple expressions per set element.

This allows new userspace software with old kernels to bail out with
EOPNOTSUPP. This update is similar to ef516e8625dd ("netfilter:
nf_tables: reintroduce the NFT_SET_CONCAT flag"). The NFT_SET_EXPR flag
needs to be set on when the NFTA_SET_EXPRESSIONS attribute is specified.
The NFT_SET_EXPR flag is not set on with NFTA_SET_EXPR to retain
backward compatibility in old userspace binaries.

Fixes: 48b0ae046ee9 ("netfilter: nftables: netlink support for several set element expressions")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h | 3 +++
 net/netfilter/nf_tables_api.c            | 6 +++++-
 net/netfilter/nft_dynset.c               | 9 +++++++--
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 28b6ee53305f..b1633e7ba529 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -293,6 +293,7 @@ enum nft_rule_compat_attributes {
  * @NFT_SET_EVAL: set can be updated from the evaluation path
  * @NFT_SET_OBJECT: set contains stateful objects
  * @NFT_SET_CONCAT: set contains a concatenation
+ * @NFT_SET_EXPR: set contains expressions
  */
 enum nft_set_flags {
 	NFT_SET_ANONYMOUS		= 0x1,
@@ -303,6 +304,7 @@ enum nft_set_flags {
 	NFT_SET_EVAL			= 0x20,
 	NFT_SET_OBJECT			= 0x40,
 	NFT_SET_CONCAT			= 0x80,
+	NFT_SET_EXPR			= 0x100,
 };
 
 /**
@@ -706,6 +708,7 @@ enum nft_dynset_ops {
 
 enum nft_dynset_flags {
 	NFT_DYNSET_F_INV	= (1 << 0),
+	NFT_DYNSET_F_EXPR	= (1 << 1),
 };
 
 /**
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 4186b1e52d58..15c467f1a9dd 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4162,7 +4162,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
 		if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT |
 			      NFT_SET_INTERVAL | NFT_SET_TIMEOUT |
 			      NFT_SET_MAP | NFT_SET_EVAL |
-			      NFT_SET_OBJECT | NFT_SET_CONCAT))
+			      NFT_SET_OBJECT | NFT_SET_CONCAT | NFT_SET_EXPR))
 			return -EOPNOTSUPP;
 		/* Only one of these operations is supported */
 		if ((flags & (NFT_SET_MAP | NFT_SET_OBJECT)) ==
@@ -4304,6 +4304,10 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
 		struct nlattr *tmp;
 		int left;
 
+		if (!(flags & NFT_SET_EXPR)) {
+			err = -EINVAL;
+			goto err_set_alloc_name;
+		}
 		i = 0;
 		nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) {
 			if (i == NFT_SET_EXPR_MAX) {
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index f35df221a633..0b053f75cd60 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -19,6 +19,7 @@ struct nft_dynset {
 	enum nft_registers		sreg_key:8;
 	enum nft_registers		sreg_data:8;
 	bool				invert;
+	bool				expr;
 	u8				num_exprs;
 	u64				timeout;
 	struct nft_expr			*expr_array[NFT_SET_EXPR_MAX];
@@ -175,11 +176,12 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
 
 	if (tb[NFTA_DYNSET_FLAGS]) {
 		u32 flags = ntohl(nla_get_be32(tb[NFTA_DYNSET_FLAGS]));
-
-		if (flags & ~NFT_DYNSET_F_INV)
+		if (flags & ~(NFT_DYNSET_F_INV | NFT_DYNSET_F_EXPR))
 			return -EOPNOTSUPP;
 		if (flags & NFT_DYNSET_F_INV)
 			priv->invert = true;
+		if (flags & NFT_DYNSET_F_EXPR)
+			priv->expr = true;
 	}
 
 	set = nft_set_lookup_global(ctx->net, ctx->table,
@@ -261,6 +263,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
 		struct nlattr *tmp;
 		int left;
 
+		if (!priv->expr)
+			return -EINVAL;
+
 		i = 0;
 		nla_for_each_nested(tmp, tb[NFTA_DYNSET_EXPRESSIONS], left) {
 			if (i == NFT_SET_EXPR_MAX) {

From 2ca408d9c749c32288bc28725f9f12ba30299e8f Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Mon, 30 Nov 2020 17:30:59 -0500
Subject: [PATCH 168/547] fanotify: Fix sys_fanotify_mark() on native x86-32
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit

  121b32a58a3a ("x86/entry/32: Use IA32-specific wrappers for syscalls taking 64-bit arguments")

converted native x86-32 which take 64-bit arguments to use the
compat handlers to allow conversion to passing args via pt_regs.
sys_fanotify_mark() was however missed, as it has a general compat
handler. Add a config option that will use the syscall wrapper that
takes the split args for native 32-bit.

 [ bp: Fix typo in Kconfig help text. ]

Fixes: 121b32a58a3a ("x86/entry/32: Use IA32-specific wrappers for syscalls taking 64-bit arguments")
Reported-by: Paweł Jasiak <pawel@jasiak.xyz>
Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Jan Kara <jack@suse.cz>
Acked-by: Andy Lutomirski <luto@kernel.org>
Link: https://lkml.kernel.org/r/20201130223059.101286-1-brgerst@gmail.com
---
 arch/Kconfig                       |  6 ++++++
 arch/x86/Kconfig                   |  1 +
 fs/notify/fanotify/fanotify_user.c | 17 +++++++----------
 include/linux/syscalls.h           | 24 ++++++++++++++++++++++++
 4 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 78c6f05b10f9..24862d15f3a3 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1105,6 +1105,12 @@ config HAVE_ARCH_PFN_VALID
 config ARCH_SUPPORTS_DEBUG_PAGEALLOC
 	bool
 
+config ARCH_SPLIT_ARG64
+	bool
+	help
+	   If a 32-bit architecture requires 64-bit arguments to be split into
+	   pairs of 32-bit arguments, select this option.
+
 source "kernel/gcov/Kconfig"
 
 source "scripts/gcc-plugins/Kconfig"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7b6dd10b162a..21f851179ff0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -19,6 +19,7 @@ config X86_32
 	select KMAP_LOCAL
 	select MODULES_USE_ELF_REL
 	select OLD_SIGACTION
+	select ARCH_SPLIT_ARG64
 
 config X86_64
 	def_bool y
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 3e01d8f2ab90..dcab112e1f00 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -1285,26 +1285,23 @@ fput_and_out:
 	return ret;
 }
 
+#ifndef CONFIG_ARCH_SPLIT_ARG64
 SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags,
 			      __u64, mask, int, dfd,
 			      const char  __user *, pathname)
 {
 	return do_fanotify_mark(fanotify_fd, flags, mask, dfd, pathname);
 }
+#endif
 
-#ifdef CONFIG_COMPAT
-COMPAT_SYSCALL_DEFINE6(fanotify_mark,
+#if defined(CONFIG_ARCH_SPLIT_ARG64) || defined(CONFIG_COMPAT)
+SYSCALL32_DEFINE6(fanotify_mark,
 				int, fanotify_fd, unsigned int, flags,
-				__u32, mask0, __u32, mask1, int, dfd,
+				SC_ARG64(mask), int, dfd,
 				const char  __user *, pathname)
 {
-	return do_fanotify_mark(fanotify_fd, flags,
-#ifdef __BIG_ENDIAN
-				((__u64)mask0 << 32) | mask1,
-#else
-				((__u64)mask1 << 32) | mask0,
-#endif
-				 dfd, pathname);
+	return do_fanotify_mark(fanotify_fd, flags, SC_VAL64(__u64, mask),
+				dfd, pathname);
 }
 #endif
 
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index f3929aff39cf..7688bc983de5 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -251,6 +251,30 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event)
 	static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
 #endif /* __SYSCALL_DEFINEx */
 
+/* For split 64-bit arguments on 32-bit architectures */
+#ifdef __LITTLE_ENDIAN
+#define SC_ARG64(name) u32, name##_lo, u32, name##_hi
+#else
+#define SC_ARG64(name) u32, name##_hi, u32, name##_lo
+#endif
+#define SC_VAL64(type, name) ((type) name##_hi << 32 | name##_lo)
+
+#ifdef CONFIG_COMPAT
+#define SYSCALL32_DEFINE1 COMPAT_SYSCALL_DEFINE1
+#define SYSCALL32_DEFINE2 COMPAT_SYSCALL_DEFINE2
+#define SYSCALL32_DEFINE3 COMPAT_SYSCALL_DEFINE3
+#define SYSCALL32_DEFINE4 COMPAT_SYSCALL_DEFINE4
+#define SYSCALL32_DEFINE5 COMPAT_SYSCALL_DEFINE5
+#define SYSCALL32_DEFINE6 COMPAT_SYSCALL_DEFINE6
+#else
+#define SYSCALL32_DEFINE1 SYSCALL_DEFINE1
+#define SYSCALL32_DEFINE2 SYSCALL_DEFINE2
+#define SYSCALL32_DEFINE3 SYSCALL_DEFINE3
+#define SYSCALL32_DEFINE4 SYSCALL_DEFINE4
+#define SYSCALL32_DEFINE5 SYSCALL_DEFINE5
+#define SYSCALL32_DEFINE6 SYSCALL_DEFINE6
+#endif
+
 /*
  * Called before coming back to user-mode. Returning to user-mode with an
  * address limit different than USER_DS can allow to overwrite kernel memory.

From 512d4a26abdbd11c6ffa03032740e5ab3c62c55b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 28 Dec 2020 14:03:02 +0200
Subject: [PATCH 169/547] interconnect: qcom: fix rpmh link failures

When CONFIG_COMPILE_TEST is set, it is possible to build some
of the interconnect drivers into the kernel while their dependencies
are loadable modules, which is bad:

arm-linux-gnueabi-ld: drivers/interconnect/qcom/bcm-voter.o: in function `qcom_icc_bcm_voter_commit':
(.text+0x1f8): undefined reference to `rpmh_invalidate'
arm-linux-gnueabi-ld: (.text+0x20c): undefined reference to `rpmh_write_batch'
arm-linux-gnueabi-ld: (.text+0x2b0): undefined reference to `rpmh_write_batch'
arm-linux-gnueabi-ld: (.text+0x2e8): undefined reference to `rpmh_write_batch'
arm-linux-gnueabi-ld: drivers/interconnect/qcom/icc-rpmh.o: in function `qcom_icc_bcm_init':
(.text+0x2ac): undefined reference to `cmd_db_read_addr'
arm-linux-gnueabi-ld: (.text+0x2c8): undefined reference to `cmd_db_read_aux_data'

The exact dependencies are a bit complicated, so split them out into a
hidden Kconfig symbol that all drivers can in turn depend on to get it
right.

Fixes: 976daac4a1c5 ("interconnect: qcom: Consolidate interconnect RPMh support")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/20201204165030.3747484-1-arnd@kernel.org
Signed-off-by: Georgi Djakov <georgi.djakov@linaro.org>
---
 drivers/interconnect/qcom/Kconfig | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/drivers/interconnect/qcom/Kconfig b/drivers/interconnect/qcom/Kconfig
index a8f93ba265f8..b3fb5b02bcf1 100644
--- a/drivers/interconnect/qcom/Kconfig
+++ b/drivers/interconnect/qcom/Kconfig
@@ -42,13 +42,23 @@ config INTERCONNECT_QCOM_QCS404
 	  This is a driver for the Qualcomm Network-on-Chip on qcs404-based
 	  platforms.
 
+config INTERCONNECT_QCOM_RPMH_POSSIBLE
+	tristate
+	default INTERCONNECT_QCOM
+	depends on QCOM_RPMH || (COMPILE_TEST && !QCOM_RPMH)
+	depends on QCOM_COMMAND_DB || (COMPILE_TEST && !QCOM_COMMAND_DB)
+	depends on OF || COMPILE_TEST
+	help
+	  Compile-testing RPMH drivers is possible on other platforms,
+	  but in order to avoid link failures, drivers must not be built-in
+	  when QCOM_RPMH or QCOM_COMMAND_DB are loadable modules
+
 config INTERCONNECT_QCOM_RPMH
 	tristate
 
 config INTERCONNECT_QCOM_SC7180
 	tristate "Qualcomm SC7180 interconnect driver"
-	depends on INTERCONNECT_QCOM
-	depends on (QCOM_RPMH && QCOM_COMMAND_DB && OF) || COMPILE_TEST
+	depends on INTERCONNECT_QCOM_RPMH_POSSIBLE
 	select INTERCONNECT_QCOM_RPMH
 	select INTERCONNECT_QCOM_BCM_VOTER
 	help
@@ -57,8 +67,7 @@ config INTERCONNECT_QCOM_SC7180
 
 config INTERCONNECT_QCOM_SDM845
 	tristate "Qualcomm SDM845 interconnect driver"
-	depends on INTERCONNECT_QCOM
-	depends on (QCOM_RPMH && QCOM_COMMAND_DB && OF) || COMPILE_TEST
+	depends on INTERCONNECT_QCOM_RPMH_POSSIBLE
 	select INTERCONNECT_QCOM_RPMH
 	select INTERCONNECT_QCOM_BCM_VOTER
 	help
@@ -67,8 +76,7 @@ config INTERCONNECT_QCOM_SDM845
 
 config INTERCONNECT_QCOM_SM8150
 	tristate "Qualcomm SM8150 interconnect driver"
-	depends on INTERCONNECT_QCOM
-	depends on (QCOM_RPMH && QCOM_COMMAND_DB && OF) || COMPILE_TEST
+	depends on INTERCONNECT_QCOM_RPMH_POSSIBLE
 	select INTERCONNECT_QCOM_RPMH
 	select INTERCONNECT_QCOM_BCM_VOTER
 	help
@@ -77,8 +85,7 @@ config INTERCONNECT_QCOM_SM8150
 
 config INTERCONNECT_QCOM_SM8250
 	tristate "Qualcomm SM8250 interconnect driver"
-	depends on INTERCONNECT_QCOM
-	depends on (QCOM_RPMH && QCOM_COMMAND_DB && OF) || COMPILE_TEST
+	depends on INTERCONNECT_QCOM_RPMH_POSSIBLE
 	select INTERCONNECT_QCOM_RPMH
 	select INTERCONNECT_QCOM_BCM_VOTER
 	help

From c6174c0e058fc0a54e0b9787c44cb24b0a8d0217 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Mon, 28 Dec 2020 14:03:02 +0200
Subject: [PATCH 170/547] interconnect: imx: Add a missing of_node_put after
 of_device_is_available

Add an 'of_node_put()' call when a tested device node is not available.

Fixes: f0d8048525d7 ("interconnect: Add imx core driver")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/20201206121304.29381-1-christophe.jaillet@wanadoo.fr
Signed-off-by: Georgi Djakov <georgi.djakov@linaro.org>
---
 drivers/interconnect/imx/imx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/interconnect/imx/imx.c b/drivers/interconnect/imx/imx.c
index 41dba7090c2a..e398ebf1dbba 100644
--- a/drivers/interconnect/imx/imx.c
+++ b/drivers/interconnect/imx/imx.c
@@ -99,6 +99,7 @@ static int imx_icc_node_init_qos(struct icc_provider *provider,
 		if (!dn || !of_device_is_available(dn)) {
 			dev_warn(dev, "Missing property %s, skip scaling %s\n",
 				 adj->phandle_name, node->name);
+			of_node_put(dn);
 			return 0;
 		}
 

From 6414b79d02c426b7dd7d942fc19fb38220ea44ec Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Mon, 28 Dec 2020 14:03:02 +0200
Subject: [PATCH 171/547] interconnect: imx: Remove a useless test

'dn' can't be NULL here, it is tested just the line above.
Remove this useless test.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/20201206121322.29434-1-christophe.jaillet@wanadoo.fr
Signed-off-by: Georgi Djakov <georgi.djakov@linaro.org>
---
 drivers/interconnect/imx/imx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/interconnect/imx/imx.c b/drivers/interconnect/imx/imx.c
index e398ebf1dbba..c770951a909c 100644
--- a/drivers/interconnect/imx/imx.c
+++ b/drivers/interconnect/imx/imx.c
@@ -96,7 +96,7 @@ static int imx_icc_node_init_qos(struct icc_provider *provider,
 			return -ENODEV;
 		}
 		/* Allow scaling to be disabled on a per-node basis */
-		if (!dn || !of_device_is_available(dn)) {
+		if (!of_device_is_available(dn)) {
 			dev_warn(dev, "Missing property %s, skip scaling %s\n",
 				 adj->phandle_name, node->name);
 			of_node_put(dn);

From 67288f74d4837b82ef937170da3389b0779c17be Mon Sep 17 00:00:00 2001
From: Martin Kepplinger <martin.kepplinger@puri.sm>
Date: Mon, 28 Dec 2020 14:03:02 +0200
Subject: [PATCH 172/547] interconnect: imx8mq: Use icc_sync_state

Add the icc_sync_state callback to notify the framework when consumers
are probed and the bandwidth doesn't have to be kept at maximum anymore.

Signed-off-by: Martin Kepplinger <martin.kepplinger@puri.sm>
Suggested-by: Georgi Djakov <georgi.djakov@linaro.org>
Fixes: 7d3b0b0d8184 ("interconnect: qcom: Use icc_sync_state")
Link: https://lore.kernel.org/r/20201210100906.18205-6-martin.kepplinger@puri.sm
Signed-off-by: Georgi Djakov <georgi.djakov@linaro.org>
---
 drivers/interconnect/imx/imx8mq.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/interconnect/imx/imx8mq.c b/drivers/interconnect/imx/imx8mq.c
index ba43a15aefec..d7768d3c6d8a 100644
--- a/drivers/interconnect/imx/imx8mq.c
+++ b/drivers/interconnect/imx/imx8mq.c
@@ -7,6 +7,7 @@
 
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/interconnect-provider.h>
 #include <dt-bindings/interconnect/imx8mq.h>
 
 #include "imx.h"
@@ -94,6 +95,7 @@ static struct platform_driver imx8mq_icc_driver = {
 	.remove = imx8mq_icc_remove,
 	.driver = {
 		.name = "imx8mq-interconnect",
+		.sync_state = icc_sync_state,
 	},
 };
 

From 12b38ea040b3bb2a30eb9cd488376df5be7ea81f Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 13 Dec 2020 16:11:05 +0100
Subject: [PATCH 173/547] staging: spmi: hisi-spmi-controller: Fix some error
 handling paths

IN the probe function, if an error occurs after calling
'spmi_controller_alloc()', it must be undone by a corresponding
'spmi_controller_put() call.

In the remove function, use 'spmi_controller_put(ctrl)' instead of
'kfree(ctrl)'.

While a it fix an error message
(s/spmi_add_controller/spmi_controller_add/)

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/20201213151105.137731-1-christophe.jaillet@wanadoo.fr
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../staging/hikey9xx/hisi-spmi-controller.c   | 21 +++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/hikey9xx/hisi-spmi-controller.c b/drivers/staging/hikey9xx/hisi-spmi-controller.c
index 861aedd5de48..0d42bc65f39b 100644
--- a/drivers/staging/hikey9xx/hisi-spmi-controller.c
+++ b/drivers/staging/hikey9xx/hisi-spmi-controller.c
@@ -278,21 +278,24 @@ static int spmi_controller_probe(struct platform_device *pdev)
 	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!iores) {
 		dev_err(&pdev->dev, "can not get resource!\n");
-		return -EINVAL;
+		ret = -EINVAL;
+		goto err_put_controller;
 	}
 
 	spmi_controller->base = devm_ioremap(&pdev->dev, iores->start,
 					     resource_size(iores));
 	if (!spmi_controller->base) {
 		dev_err(&pdev->dev, "can not remap base addr!\n");
-		return -EADDRNOTAVAIL;
+		ret = -EADDRNOTAVAIL;
+		goto err_put_controller;
 	}
 
 	ret = of_property_read_u32(pdev->dev.of_node, "spmi-channel",
 				   &spmi_controller->channel);
 	if (ret) {
 		dev_err(&pdev->dev, "can not get channel\n");
-		return -ENODEV;
+		ret = -ENODEV;
+		goto err_put_controller;
 	}
 
 	platform_set_drvdata(pdev, spmi_controller);
@@ -309,9 +312,15 @@ static int spmi_controller_probe(struct platform_device *pdev)
 	ctrl->write_cmd = spmi_write_cmd;
 
 	ret = spmi_controller_add(ctrl);
-	if (ret)
-		dev_err(&pdev->dev, "spmi_add_controller failed with error %d!\n", ret);
+	if (ret) {
+		dev_err(&pdev->dev, "spmi_controller_add failed with error %d!\n", ret);
+		goto err_put_controller;
+	}
 
+	return 0;
+
+err_put_controller:
+	spmi_controller_put(ctrl);
 	return ret;
 }
 
@@ -320,7 +329,7 @@ static int spmi_del_controller(struct platform_device *pdev)
 	struct spmi_controller *ctrl = platform_get_drvdata(pdev);
 
 	spmi_controller_remove(ctrl);
-	kfree(ctrl);
+	spmi_controller_put(ctrl);
 	return 0;
 }
 

From cab36da4bf1a35739b091b73714a39a1bbd02b05 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 2 Dec 2020 09:43:49 +0300
Subject: [PATCH 174/547] Staging: comedi: Return -EFAULT if copy_to_user()
 fails

Return -EFAULT on error instead of the number of bytes remaining to be
copied.

Fixes: bac42fb21259 ("comedi: get rid of compat_alloc_user_space() mess in COMEDI_CMD{,TEST} compat")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/X8c3pfwFy2jpy4BP@mwanda
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/comedi/comedi_fops.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c
index d99231c737fb..80d74cce2a01 100644
--- a/drivers/staging/comedi/comedi_fops.c
+++ b/drivers/staging/comedi/comedi_fops.c
@@ -2987,7 +2987,9 @@ static int put_compat_cmd(struct comedi32_cmd_struct __user *cmd32,
 	v32.chanlist_len = cmd->chanlist_len;
 	v32.data = ptr_to_compat(cmd->data);
 	v32.data_len = cmd->data_len;
-	return copy_to_user(cmd32, &v32, sizeof(v32));
+	if (copy_to_user(cmd32, &v32, sizeof(v32)))
+		return -EFAULT;
+	return 0;
 }
 
 /* Handle 32-bit COMEDI_CMD ioctl. */

From d887d6104adeb94d1b926936ea21f07367f0ff9f Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 13 Dec 2020 16:35:13 +0100
Subject: [PATCH 175/547] staging: mt7621-dma: Fix a resource leak in an error
 handling path

If an error occurs after calling 'mtk_hsdma_init()', it must be undone by
a corresponding call to 'mtk_hsdma_uninit()' as already done in the
remove function.

Fixes: 0853c7a53eb3 ("staging: mt7621-dma: ralink: add rt2880 dma engine")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20201213153513.138723-1-christophe.jaillet@wanadoo.fr
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/mt7621-dma/mtk-hsdma.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/mt7621-dma/mtk-hsdma.c b/drivers/staging/mt7621-dma/mtk-hsdma.c
index d241349214e7..bc4bb4374313 100644
--- a/drivers/staging/mt7621-dma/mtk-hsdma.c
+++ b/drivers/staging/mt7621-dma/mtk-hsdma.c
@@ -712,7 +712,7 @@ static int mtk_hsdma_probe(struct platform_device *pdev)
 	ret = dma_async_device_register(dd);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to register dma device\n");
-		return ret;
+		goto err_uninit_hsdma;
 	}
 
 	ret = of_dma_controller_register(pdev->dev.of_node,
@@ -728,6 +728,8 @@ static int mtk_hsdma_probe(struct platform_device *pdev)
 
 err_unregister:
 	dma_async_device_unregister(dd);
+err_uninit_hsdma:
+	mtk_hsdma_uninit(hsdma);
 	return ret;
 }
 

From 0ffc76539e6e8d28114f95ac25c167c37b5191b3 Mon Sep 17 00:00:00 2001
From: Sean Young <sean@mess.org>
Date: Sun, 27 Dec 2020 13:45:02 +0000
Subject: [PATCH 176/547] USB: cdc-acm: blacklist another IR Droid device

This device is supported by the IR Toy driver.

Reported-by: Georgi Bakalski <georgi.bakalski@gmail.com>
Signed-off-by: Sean Young <sean@mess.org>
Acked-by: Oliver Neukum <oneukum@suse.com>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20201227134502.4548-2-sean@mess.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/cdc-acm.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index f52f1bc0559f..781905745812 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -1895,6 +1895,10 @@ static const struct usb_device_id acm_ids[] = {
 	{ USB_DEVICE(0x04d8, 0xfd08),
 	.driver_info = IGNORE_DEVICE,
 	},
+
+	{ USB_DEVICE(0x04d8, 0xf58b),
+	.driver_info = IGNORE_DEVICE,
+	},
 #endif
 
 	/*Samsung phone in firmware update mode */

From 421da9413a6a5ec4334cade5092370cf2c8c8add Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Fri, 18 Dec 2020 12:57:36 +0200
Subject: [PATCH 177/547] MAINTAINERS: Update address for Cadence USB3 driver

Updates my email address for Cadence USB3 driver.

Signed-off-by: Roger Quadros <rogerq@ti.com>
Link: https://lore.kernel.org/r/20201218105736.17667-1-rogerq@ti.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 546aa66428c9..afe3a5c66bc9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3883,7 +3883,7 @@ F:	drivers/mtd/nand/raw/cadence-nand-controller.c
 CADENCE USB3 DRD IP DRIVER
 M:	Peter Chen <peter.chen@nxp.com>
 M:	Pawel Laszczak <pawell@cadence.com>
-M:	Roger Quadros <rogerq@ti.com>
+R:	Roger Quadros <rogerq@kernel.org>
 R:	Aswath Govindraju <a-govindraju@ti.com>
 L:	linux-usb@vger.kernel.org
 S:	Maintained

From 88ebce92806e5dff3549e1a8cacb53978104d3b4 Mon Sep 17 00:00:00 2001
From: Aswath Govindraju <a-govindraju@ti.com>
Date: Tue, 15 Dec 2020 09:55:49 +0530
Subject: [PATCH 178/547] dt-bindings: usb: Add new compatible string for AM64
 SoC

Add compatible string in j721e-usb binding file as the same USB subsystem
is present in AM64.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Aswath Govindraju <a-govindraju@ti.com>
Link: https://lore.kernel.org/r/20201215042549.7956-1-a-govindraju@ti.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml b/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml
index 388245b91a55..148b3fb4ceaf 100644
--- a/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml
+++ b/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml
@@ -11,8 +11,12 @@ maintainers:
 
 properties:
   compatible:
-    items:
+    oneOf:
       - const: ti,j721e-usb
+      - const: ti,am64-usb
+      - items:
+          - const: ti,j721e-usb
+          - const: ti,am64-usb
 
   reg:
     description: module registers

From a390bef7db1f192cc5b588dbcf8ed113406ec130 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <festevam@gmail.com>
Date: Thu, 10 Dec 2020 18:04:13 -0300
Subject: [PATCH 179/547] usb: gadget: fsl_mxc_udc: Remove the driver

Since 5.10-rc1 i.MX is a devicetree-only platform, and this driver
was only used by the old non-DT i.MX devices.

Remove the driver as it has no users left.

Signed-off-by: Fabio Estevam <festevam@gmail.com>
Acked-by: Peter Chen <peter.chen@nxp.com>
Link: https://lore.kernel.org/r/20201210210413.15262-1-festevam@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/udc/Kconfig       |   2 +-
 drivers/usb/gadget/udc/Makefile      |   1 -
 drivers/usb/gadget/udc/fsl_mxc_udc.c | 122 ---------------------------
 3 files changed, 1 insertion(+), 124 deletions(-)
 delete mode 100644 drivers/usb/gadget/udc/fsl_mxc_udc.c

diff --git a/drivers/usb/gadget/udc/Kconfig b/drivers/usb/gadget/udc/Kconfig
index 1a12aab208b4..8c614bb86c66 100644
--- a/drivers/usb/gadget/udc/Kconfig
+++ b/drivers/usb/gadget/udc/Kconfig
@@ -90,7 +90,7 @@ config USB_BCM63XX_UDC
 
 config USB_FSL_USB2
 	tristate "Freescale Highspeed USB DR Peripheral Controller"
-	depends on FSL_SOC || ARCH_MXC
+	depends on FSL_SOC
 	help
 	   Some of Freescale PowerPC and i.MX processors have a High Speed
 	   Dual-Role(DR) USB controller, which supports device mode.
diff --git a/drivers/usb/gadget/udc/Makefile b/drivers/usb/gadget/udc/Makefile
index f5a7ce28aecd..a21f2224e7eb 100644
--- a/drivers/usb/gadget/udc/Makefile
+++ b/drivers/usb/gadget/udc/Makefile
@@ -23,7 +23,6 @@ obj-$(CONFIG_USB_ATMEL_USBA)	+= atmel_usba_udc.o
 obj-$(CONFIG_USB_BCM63XX_UDC)	+= bcm63xx_udc.o
 obj-$(CONFIG_USB_FSL_USB2)	+= fsl_usb2_udc.o
 fsl_usb2_udc-y			:= fsl_udc_core.o
-fsl_usb2_udc-$(CONFIG_ARCH_MXC)	+= fsl_mxc_udc.o
 obj-$(CONFIG_USB_TEGRA_XUDC)	+= tegra-xudc.o
 obj-$(CONFIG_USB_M66592)	+= m66592-udc.o
 obj-$(CONFIG_USB_R8A66597)	+= r8a66597-udc.o
diff --git a/drivers/usb/gadget/udc/fsl_mxc_udc.c b/drivers/usb/gadget/udc/fsl_mxc_udc.c
deleted file mode 100644
index 5a321992decc..000000000000
--- a/drivers/usb/gadget/udc/fsl_mxc_udc.c
+++ /dev/null
@@ -1,122 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Copyright (C) 2009
- * Guennadi Liakhovetski, DENX Software Engineering, <lg@denx.de>
- *
- * Description:
- * Helper routines for i.MX3x SoCs from Freescale, needed by the fsl_usb2_udc.c
- * driver to function correctly on these systems.
- */
-#include <linux/clk.h>
-#include <linux/delay.h>
-#include <linux/err.h>
-#include <linux/fsl_devices.h>
-#include <linux/mod_devicetable.h>
-#include <linux/platform_device.h>
-#include <linux/io.h>
-
-#include "fsl_usb2_udc.h"
-
-static struct clk *mxc_ahb_clk;
-static struct clk *mxc_per_clk;
-static struct clk *mxc_ipg_clk;
-
-/* workaround ENGcm09152 for i.MX35 */
-#define MX35_USBPHYCTRL_OFFSET		0x600
-#define USBPHYCTRL_OTGBASE_OFFSET	0x8
-#define USBPHYCTRL_EVDO			(1 << 23)
-
-int fsl_udc_clk_init(struct platform_device *pdev)
-{
-	struct fsl_usb2_platform_data *pdata;
-	unsigned long freq;
-	int ret;
-
-	pdata = dev_get_platdata(&pdev->dev);
-
-	mxc_ipg_clk = devm_clk_get(&pdev->dev, "ipg");
-	if (IS_ERR(mxc_ipg_clk)) {
-		dev_err(&pdev->dev, "clk_get(\"ipg\") failed\n");
-		return PTR_ERR(mxc_ipg_clk);
-	}
-
-	mxc_ahb_clk = devm_clk_get(&pdev->dev, "ahb");
-	if (IS_ERR(mxc_ahb_clk)) {
-		dev_err(&pdev->dev, "clk_get(\"ahb\") failed\n");
-		return PTR_ERR(mxc_ahb_clk);
-	}
-
-	mxc_per_clk = devm_clk_get(&pdev->dev, "per");
-	if (IS_ERR(mxc_per_clk)) {
-		dev_err(&pdev->dev, "clk_get(\"per\") failed\n");
-		return PTR_ERR(mxc_per_clk);
-	}
-
-	clk_prepare_enable(mxc_ipg_clk);
-	clk_prepare_enable(mxc_ahb_clk);
-	clk_prepare_enable(mxc_per_clk);
-
-	/* make sure USB_CLK is running at 60 MHz +/- 1000 Hz */
-	if (!strcmp(pdev->id_entry->name, "imx-udc-mx27")) {
-		freq = clk_get_rate(mxc_per_clk);
-		if (pdata->phy_mode != FSL_USB2_PHY_ULPI &&
-		    (freq < 59999000 || freq > 60001000)) {
-			dev_err(&pdev->dev, "USB_CLK=%lu, should be 60MHz\n", freq);
-			ret = -EINVAL;
-			goto eclkrate;
-		}
-	}
-
-	return 0;
-
-eclkrate:
-	clk_disable_unprepare(mxc_ipg_clk);
-	clk_disable_unprepare(mxc_ahb_clk);
-	clk_disable_unprepare(mxc_per_clk);
-	mxc_per_clk = NULL;
-	return ret;
-}
-
-int fsl_udc_clk_finalize(struct platform_device *pdev)
-{
-	struct fsl_usb2_platform_data *pdata = dev_get_platdata(&pdev->dev);
-	int ret = 0;
-
-	/* workaround ENGcm09152 for i.MX35 */
-	if (pdata->workaround & FLS_USB2_WORKAROUND_ENGCM09152) {
-		unsigned int v;
-		struct resource *res = platform_get_resource
-			(pdev, IORESOURCE_MEM, 0);
-		void __iomem *phy_regs = ioremap(res->start +
-						MX35_USBPHYCTRL_OFFSET, 512);
-		if (!phy_regs) {
-			dev_err(&pdev->dev, "ioremap for phy address fails\n");
-			ret = -EINVAL;
-			goto ioremap_err;
-		}
-
-		v = readl(phy_regs + USBPHYCTRL_OTGBASE_OFFSET);
-		writel(v | USBPHYCTRL_EVDO,
-			phy_regs + USBPHYCTRL_OTGBASE_OFFSET);
-
-		iounmap(phy_regs);
-	}
-
-
-ioremap_err:
-	/* ULPI transceivers don't need usbpll */
-	if (pdata->phy_mode == FSL_USB2_PHY_ULPI) {
-		clk_disable_unprepare(mxc_per_clk);
-		mxc_per_clk = NULL;
-	}
-
-	return ret;
-}
-
-void fsl_udc_clk_release(void)
-{
-	if (mxc_per_clk)
-		clk_disable_unprepare(mxc_per_clk);
-	clk_disable_unprepare(mxc_ahb_clk);
-	clk_disable_unprepare(mxc_ipg_clk);
-}

From 5e5ff0b4b6bcb4d17b7a26ec8bcfc7dd4651684f Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Date: Sun, 20 Dec 2020 00:25:53 +0900
Subject: [PATCH 180/547] USB: cdc-wdm: Fix use after free in
 service_outstanding_interrupt().

syzbot is reporting UAF at usb_submit_urb() [1], for
service_outstanding_interrupt() is not checking WDM_DISCONNECTING
before calling usb_submit_urb(). Close the race by doing same checks
wdm_read() does upon retry.

Also, while wdm_read() checks WDM_DISCONNECTING with desc->rlock held,
service_interrupt_work() does not hold desc->rlock. Thus, it is possible
that usb_submit_urb() is called from service_outstanding_interrupt() from
service_interrupt_work() after WDM_DISCONNECTING was set and kill_urbs()
 from wdm_disconnect() completed. Thus, move kill_urbs() in
wdm_disconnect() to after cancel_work_sync() (which makes sure that
service_interrupt_work() is no longer running) completed.

Although it seems to be safe to dereference desc->intf->dev in
service_outstanding_interrupt() even if WDM_DISCONNECTING was already set
because desc->rlock or cancel_work_sync() prevents wdm_disconnect() from
reaching list_del() before service_outstanding_interrupt() completes,
let's not emit error message if WDM_DISCONNECTING is set by
wdm_disconnect() while usb_submit_urb() is in progress.

[1] https://syzkaller.appspot.com/bug?extid=9e04e2df4a32fb661daf

Reported-by: syzbot <syzbot+9e04e2df4a32fb661daf@syzkaller.appspotmail.com>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/620e2ee0-b9a3-dbda-a25b-a93e0ed03ec5@i-love.sakura.ne.jp
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/cdc-wdm.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index 02d0cfd23bb2..508b1c3f8b73 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -465,13 +465,23 @@ static int service_outstanding_interrupt(struct wdm_device *desc)
 	if (!desc->resp_count || !--desc->resp_count)
 		goto out;
 
+	if (test_bit(WDM_DISCONNECTING, &desc->flags)) {
+		rv = -ENODEV;
+		goto out;
+	}
+	if (test_bit(WDM_RESETTING, &desc->flags)) {
+		rv = -EIO;
+		goto out;
+	}
+
 	set_bit(WDM_RESPONDING, &desc->flags);
 	spin_unlock_irq(&desc->iuspin);
 	rv = usb_submit_urb(desc->response, GFP_KERNEL);
 	spin_lock_irq(&desc->iuspin);
 	if (rv) {
-		dev_err(&desc->intf->dev,
-			"usb_submit_urb failed with result %d\n", rv);
+		if (!test_bit(WDM_DISCONNECTING, &desc->flags))
+			dev_err(&desc->intf->dev,
+				"usb_submit_urb failed with result %d\n", rv);
 
 		/* make sure the next notification trigger a submit */
 		clear_bit(WDM_RESPONDING, &desc->flags);
@@ -1027,9 +1037,9 @@ static void wdm_disconnect(struct usb_interface *intf)
 	wake_up_all(&desc->wait);
 	mutex_lock(&desc->rlock);
 	mutex_lock(&desc->wlock);
-	kill_urbs(desc);
 	cancel_work_sync(&desc->rxwork);
 	cancel_work_sync(&desc->service_outs_intr);
+	kill_urbs(desc);
 	mutex_unlock(&desc->wlock);
 	mutex_unlock(&desc->rlock);
 

From 0f041b8592daaaea46e91a8ebb3b47e6e0171fd8 Mon Sep 17 00:00:00 2001
From: Madhusudanarao Amara <madhusudanarao.amara@intel.com>
Date: Wed, 16 Dec 2020 19:39:18 +0530
Subject: [PATCH 181/547] usb: typec: intel_pmc_mux: Configure HPD first for
 HPD+IRQ request

Warm reboot scenarios some times type C Mux driver gets Mux configuration
request as HPD=1,IRQ=1. In that scenario typeC Mux driver need to configure
Mux as follows as per IOM requirement:
 (1). Confgiure Mux HPD = 1, IRQ = 0
 (2). Configure Mux with HPD = 1, IRQ = 1

IOM expects TypeC Mux configuration as follows:
 (1). HPD=1, IRQ=0
 (2). HPD=1, IRQ=1
if IOM gets mux config request (2) without configuring (1), it will ignore
the request. The impact of this is there is no DP_alt mode display.

Fixes: 43d596e32276 ("usb: typec: intel_pmc_mux: Check the port status before connect")
Cc: stable@vger.kernel.org
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Madhusudanarao Amara <madhusudanarao.amara@intel.com>
Link: https://lore.kernel.org/r/20201216140918.49197-1-madhusudanarao.amara@intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/mux/intel_pmc_mux.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/usb/typec/mux/intel_pmc_mux.c b/drivers/usb/typec/mux/intel_pmc_mux.c
index cf37a59ce130..46a25b8db72e 100644
--- a/drivers/usb/typec/mux/intel_pmc_mux.c
+++ b/drivers/usb/typec/mux/intel_pmc_mux.c
@@ -207,10 +207,21 @@ static int
 pmc_usb_mux_dp_hpd(struct pmc_usb_port *port, struct typec_displayport_data *dp)
 {
 	u8 msg[2] = { };
+	int ret;
 
 	msg[0] = PMC_USB_DP_HPD;
 	msg[0] |= port->usb3_port << PMC_USB_MSG_USB3_PORT_SHIFT;
 
+	/* Configure HPD first if HPD,IRQ comes together */
+	if (!IOM_PORT_HPD_ASSERTED(port->iom_status) &&
+	    dp->status & DP_STATUS_IRQ_HPD &&
+	    dp->status & DP_STATUS_HPD_STATE) {
+		msg[1] = PMC_USB_DP_HPD_LVL;
+		ret = pmc_usb_command(port, msg, sizeof(msg));
+		if (ret)
+			return ret;
+	}
+
 	if (dp->status & DP_STATUS_IRQ_HPD)
 		msg[1] = PMC_USB_DP_HPD_IRQ;
 

From 5d5323a6f3625f101dbfa94ba3ef7706cce38760 Mon Sep 17 00:00:00 2001
From: Michael Grzeschik <m.grzeschik@pengutronix.de>
Date: Tue, 15 Dec 2020 20:31:47 +0100
Subject: [PATCH 182/547] USB: xhci: fix U1/U2 handling for hardware with
 XHCI_INTEL_HOST quirk set

The commit 0472bf06c6fd ("xhci: Prevent U1/U2 link pm states if exit
latency is too long") was constraining the xhci code not to allow U1/U2
sleep states if the latency to wake up from the U-states reached the
service interval of an periodic endpoint. This fix was not taking into
account that in case the quirk XHCI_INTEL_HOST is set, the wakeup time
will be calculated and configured differently.

It checks for u1_params.mel/u2_params.mel as a limit. But the code could
decide to write another MEL into the hardware. This leads to broken
cases where not enough bandwidth is available for other devices:

usb 1-2: can't set config #1, error -28

This patch is fixing that case by checking for timeout_ns after the
wakeup time was calculated depending on the quirks.

Fixes: 0472bf06c6fd ("xhci: Prevent U1/U2 link pm states if exit latency is too long")
Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20201215193147.11738-1-m.grzeschik@pengutronix.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 91ab81c3fc79..e86940571b4c 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -4770,19 +4770,19 @@ static u16 xhci_calculate_u1_timeout(struct xhci_hcd *xhci,
 {
 	unsigned long long timeout_ns;
 
-	/* Prevent U1 if service interval is shorter than U1 exit latency */
-	if (usb_endpoint_xfer_int(desc) || usb_endpoint_xfer_isoc(desc)) {
-		if (xhci_service_interval_to_ns(desc) <= udev->u1_params.mel) {
-			dev_dbg(&udev->dev, "Disable U1, ESIT shorter than exit latency\n");
-			return USB3_LPM_DISABLED;
-		}
-	}
-
 	if (xhci->quirks & XHCI_INTEL_HOST)
 		timeout_ns = xhci_calculate_intel_u1_timeout(udev, desc);
 	else
 		timeout_ns = udev->u1_params.sel;
 
+	/* Prevent U1 if service interval is shorter than U1 exit latency */
+	if (usb_endpoint_xfer_int(desc) || usb_endpoint_xfer_isoc(desc)) {
+		if (xhci_service_interval_to_ns(desc) <= timeout_ns) {
+			dev_dbg(&udev->dev, "Disable U1, ESIT shorter than exit latency\n");
+			return USB3_LPM_DISABLED;
+		}
+	}
+
 	/* The U1 timeout is encoded in 1us intervals.
 	 * Don't return a timeout of zero, because that's USB3_LPM_DISABLED.
 	 */
@@ -4834,19 +4834,19 @@ static u16 xhci_calculate_u2_timeout(struct xhci_hcd *xhci,
 {
 	unsigned long long timeout_ns;
 
-	/* Prevent U2 if service interval is shorter than U2 exit latency */
-	if (usb_endpoint_xfer_int(desc) || usb_endpoint_xfer_isoc(desc)) {
-		if (xhci_service_interval_to_ns(desc) <= udev->u2_params.mel) {
-			dev_dbg(&udev->dev, "Disable U2, ESIT shorter than exit latency\n");
-			return USB3_LPM_DISABLED;
-		}
-	}
-
 	if (xhci->quirks & XHCI_INTEL_HOST)
 		timeout_ns = xhci_calculate_intel_u2_timeout(udev, desc);
 	else
 		timeout_ns = udev->u2_params.sel;
 
+	/* Prevent U2 if service interval is shorter than U2 exit latency */
+	if (usb_endpoint_xfer_int(desc) || usb_endpoint_xfer_isoc(desc)) {
+		if (xhci_service_interval_to_ns(desc) <= timeout_ns) {
+			dev_dbg(&udev->dev, "Disable U2, ESIT shorter than exit latency\n");
+			return USB3_LPM_DISABLED;
+		}
+	}
+
 	/* The U2 timeout is encoded in 256us intervals */
 	timeout_ns = DIV_ROUND_UP_ULL(timeout_ns, 256 * 1000);
 	/* If the necessary timeout value is bigger than what we can set in the

From a5ada3dfe6a20f41f91448b9034a1ef8da3dc87d Mon Sep 17 00:00:00 2001
From: Zheng Zengkai <zhengzengkai@huawei.com>
Date: Tue, 15 Dec 2020 10:54:59 +0800
Subject: [PATCH 183/547] usb: dwc3: meson-g12a: disable clk on error handling
 path in probe

dwc3_meson_g12a_probe() does not invoke clk_bulk_disable_unprepare()
on one error handling path. This patch fixes that.

Fixes: 347052e3bf1b ("usb: dwc3: meson-g12a: fix USB2 PHY initialization on G12A and A1 SoCs")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com>
Cc: stable <stable@vger.kernel.org>
Reviewed-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Link: https://lore.kernel.org/r/20201215025459.91794-1-zhengzengkai@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/dwc3-meson-g12a.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/dwc3/dwc3-meson-g12a.c b/drivers/usb/dwc3/dwc3-meson-g12a.c
index 417e05381b5d..bdf1f98dfad8 100644
--- a/drivers/usb/dwc3/dwc3-meson-g12a.c
+++ b/drivers/usb/dwc3/dwc3-meson-g12a.c
@@ -754,7 +754,7 @@ static int dwc3_meson_g12a_probe(struct platform_device *pdev)
 
 	ret = priv->drvdata->setup_regmaps(priv, base);
 	if (ret)
-		return ret;
+		goto err_disable_clks;
 
 	if (priv->vbus) {
 		ret = regulator_enable(priv->vbus);

From 2cc332e4ee4febcbb685e2962ad323fe4b3b750a Mon Sep 17 00:00:00 2001
From: Zqiang <qiang.zhang@windriver.com>
Date: Thu, 10 Dec 2020 10:01:48 +0800
Subject: [PATCH 184/547] usb: gadget: function: printer: Fix a memory leak for
 interface descriptor

When printer driver is loaded, the printer_func_bind function is called, in
this function, the interface descriptor be allocated memory, if after that,
the error occurred, the interface descriptor memory need to be free.

Reviewed-by: Peter Chen <peter.chen@nxp.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Zqiang <qiang.zhang@windriver.com>
Link: https://lore.kernel.org/r/20201210020148.6691-1-qiang.zhang@windriver.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/f_printer.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/gadget/function/f_printer.c b/drivers/usb/gadget/function/f_printer.c
index 64a4112068fc..2f1eb2e81d30 100644
--- a/drivers/usb/gadget/function/f_printer.c
+++ b/drivers/usb/gadget/function/f_printer.c
@@ -1162,6 +1162,7 @@ fail_tx_reqs:
 		printer_req_free(dev->in_ep, req);
 	}
 
+	usb_free_all_descriptors(f);
 	return ret;
 
 }

From 5cc35c224a80aa5a5a539510ef049faf0d6ed181 Mon Sep 17 00:00:00 2001
From: Sriharsha Allenki <sallenki@codeaurora.org>
Date: Wed, 2 Dec 2020 18:32:20 +0530
Subject: [PATCH 185/547] usb: gadget: Fix spinlock lockup on
 usb_function_deactivate

There is a spinlock lockup as part of composite_disconnect
when it tries to acquire cdev->lock as part of usb_gadget_deactivate.
This is because the usb_gadget_deactivate is called from
usb_function_deactivate with the same spinlock held.

This would result in the below call stack and leads to stall.

rcu: INFO: rcu_preempt detected stalls on CPUs/tasks:
rcu:     3-...0: (1 GPs behind) idle=162/1/0x4000000000000000
softirq=10819/10819 fqs=2356
 (detected by 2, t=5252 jiffies, g=20129, q=3770)
 Task dump for CPU 3:
 task:uvc-gadget_wlhe state:R  running task     stack:    0 pid:  674 ppid:
 636 flags:0x00000202
 Call trace:
  __switch_to+0xc0/0x170
  _raw_spin_lock_irqsave+0x84/0xb0
  composite_disconnect+0x28/0x78
  configfs_composite_disconnect+0x68/0x70
  usb_gadget_disconnect+0x10c/0x128
  usb_gadget_deactivate+0xd4/0x108
  usb_function_deactivate+0x6c/0x80
  uvc_function_disconnect+0x20/0x58
  uvc_v4l2_release+0x30/0x88
  v4l2_release+0xbc/0xf0
  __fput+0x7c/0x230
  ____fput+0x14/0x20
  task_work_run+0x88/0x140
  do_notify_resume+0x240/0x6f0
  work_pending+0x8/0x200

Fix this by doing an unlock on cdev->lock before the usb_gadget_deactivate
call from usb_function_deactivate.

The same lockup can happen in the usb_gadget_activate path. Fix that path
as well.

Reported-by: Peter Chen <peter.chen@nxp.com>
Link: https://lore.kernel.org/linux-usb/20201102094936.GA29581@b29397-desktop/
Tested-by: Peter Chen <peter.chen@nxp.com>
Signed-off-by: Sriharsha Allenki <sallenki@codeaurora.org>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20201202130220.24926-1-sallenki@codeaurora.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/composite.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index c6d455f2bb92..1a556a628971 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -392,8 +392,11 @@ int usb_function_deactivate(struct usb_function *function)
 
 	spin_lock_irqsave(&cdev->lock, flags);
 
-	if (cdev->deactivations == 0)
+	if (cdev->deactivations == 0) {
+		spin_unlock_irqrestore(&cdev->lock, flags);
 		status = usb_gadget_deactivate(cdev->gadget);
+		spin_lock_irqsave(&cdev->lock, flags);
+	}
 	if (status == 0)
 		cdev->deactivations++;
 
@@ -424,8 +427,11 @@ int usb_function_activate(struct usb_function *function)
 		status = -EINVAL;
 	else {
 		cdev->deactivations--;
-		if (cdev->deactivations == 0)
+		if (cdev->deactivations == 0) {
+			spin_unlock_irqrestore(&cdev->lock, flags);
 			status = usb_gadget_activate(cdev->gadget);
+			spin_lock_irqsave(&cdev->lock, flags);
+		}
 	}
 
 	spin_unlock_irqrestore(&cdev->lock, flags);

From c91d3a6bcaa031f551ba29a496a8027b31289464 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Tue, 17 Nov 2020 17:29:55 +0800
Subject: [PATCH 186/547] USB: gadget: legacy: fix return error code in
 acm_ms_bind()

If usb_otg_descriptor_alloc() failed, it need return ENOMEM.

Fixes: 578aa8a2b12c ("usb: gadget: acm_ms: allocate and init otg descriptor by otg capabilities")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20201117092955.4102785-1-yangyingliang@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/legacy/acm_ms.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/legacy/acm_ms.c b/drivers/usb/gadget/legacy/acm_ms.c
index 59be2d8417c9..e8033e5f0c18 100644
--- a/drivers/usb/gadget/legacy/acm_ms.c
+++ b/drivers/usb/gadget/legacy/acm_ms.c
@@ -200,8 +200,10 @@ static int acm_ms_bind(struct usb_composite_dev *cdev)
 		struct usb_descriptor_header *usb_desc;
 
 		usb_desc = usb_otg_descriptor_alloc(gadget);
-		if (!usb_desc)
+		if (!usb_desc) {
+			status = -ENOMEM;
 			goto fail_string_ids;
+		}
 		usb_otg_descriptor_init(gadget, usb_desc);
 		otg_desc[0] = usb_desc;
 		otg_desc[1] = NULL;

From 0a88fa221ce911c331bf700d2214c5b2f77414d3 Mon Sep 17 00:00:00 2001
From: Manish Narani <manish.narani@xilinx.com>
Date: Tue, 17 Nov 2020 12:43:35 +0530
Subject: [PATCH 187/547] usb: gadget: u_ether: Fix MTU size mismatch with RX
 packet size

Fix the MTU size issue with RX packet size as the host sends the packet
with extra bytes containing ethernet header. This causes failure when
user sets the MTU size to the maximum i.e. 15412. In this case the
ethernet packet received will be of length 15412 plus the ethernet header
length. This patch fixes the issue where there is a check that RX packet
length must not be more than max packet length.

Fixes: bba787a860fa ("usb: gadget: ether: Allow jumbo frames")
Signed-off-by: Manish Narani <manish.narani@xilinx.com>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/1605597215-122027-1-git-send-email-manish.narani@xilinx.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/u_ether.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c
index 31ea76adcc0d..c019f2b0c0af 100644
--- a/drivers/usb/gadget/function/u_ether.c
+++ b/drivers/usb/gadget/function/u_ether.c
@@ -45,9 +45,10 @@
 #define UETH__VERSION	"29-May-2008"
 
 /* Experiments show that both Linux and Windows hosts allow up to 16k
- * frame sizes. Set the max size to 15k+52 to prevent allocating 32k
+ * frame sizes. Set the max MTU size to 15k+52 to prevent allocating 32k
  * blocks and still have efficient handling. */
-#define GETHER_MAX_ETH_FRAME_LEN 15412
+#define GETHER_MAX_MTU_SIZE 15412
+#define GETHER_MAX_ETH_FRAME_LEN (GETHER_MAX_MTU_SIZE + ETH_HLEN)
 
 struct eth_dev {
 	/* lock is held while accessing port_usb
@@ -786,7 +787,7 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g,
 
 	/* MTU range: 14 - 15412 */
 	net->min_mtu = ETH_HLEN;
-	net->max_mtu = GETHER_MAX_ETH_FRAME_LEN;
+	net->max_mtu = GETHER_MAX_MTU_SIZE;
 
 	dev->gadget = g;
 	SET_NETDEV_DEV(net, &g->dev);
@@ -848,7 +849,7 @@ struct net_device *gether_setup_name_default(const char *netname)
 
 	/* MTU range: 14 - 15412 */
 	net->min_mtu = ETH_HLEN;
-	net->max_mtu = GETHER_MAX_ETH_FRAME_LEN;
+	net->max_mtu = GETHER_MAX_MTU_SIZE;
 
 	return net;
 }

From 83a43ff80a566de8718dfc6565545a0080ec1fb5 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Tue, 17 Nov 2020 09:14:30 +0800
Subject: [PATCH 188/547] usb: chipidea: ci_hdrc_imx: add missing put_device()
 call in usbmisc_get_init_data()

if of_find_device_by_node() succeed, usbmisc_get_init_data() doesn't have
a corresponding put_device(). Thus add put_device() to fix the exception
handling for this function implementation.

Fixes: ef12da914ed6 ("usb: chipidea: imx: properly check for usbmisc")
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20201117011430.642589-1-yukuai3@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/chipidea/ci_hdrc_imx.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c
index 9e12152ea46b..8b7bc10b6e8b 100644
--- a/drivers/usb/chipidea/ci_hdrc_imx.c
+++ b/drivers/usb/chipidea/ci_hdrc_imx.c
@@ -139,9 +139,13 @@ static struct imx_usbmisc_data *usbmisc_get_init_data(struct device *dev)
 	misc_pdev = of_find_device_by_node(args.np);
 	of_node_put(args.np);
 
-	if (!misc_pdev || !platform_get_drvdata(misc_pdev))
+	if (!misc_pdev)
 		return ERR_PTR(-EPROBE_DEFER);
 
+	if (!platform_get_drvdata(misc_pdev)) {
+		put_device(&misc_pdev->dev);
+		return ERR_PTR(-EPROBE_DEFER);
+	}
 	data->dev = &misc_pdev->dev;
 
 	/*

From 372c93131998c0622304bed118322d2a04489e63 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 14 Dec 2020 11:30:53 +0100
Subject: [PATCH 189/547] USB: yurex: fix control-URB timeout handling

Make sure to always cancel the control URB in write() so that it can be
reused after a timeout or spurious CMD_ACK.

Currently any further write requests after a timeout would fail after
triggering a WARN() in usb_submit_urb() when attempting to submit the
already active URB.

Reported-by: syzbot+e87ebe0f7913f71f2ea5@syzkaller.appspotmail.com
Fixes: 6bc235a2e24a ("USB: add driver for Meywa-Denki & Kayac YUREX")
Cc: stable <stable@vger.kernel.org>     # 2.6.37
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/yurex.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c
index 73ebfa6e9715..c640f98d20c5 100644
--- a/drivers/usb/misc/yurex.c
+++ b/drivers/usb/misc/yurex.c
@@ -496,6 +496,9 @@ static ssize_t yurex_write(struct file *file, const char __user *user_buffer,
 		timeout = schedule_timeout(YUREX_WRITE_TIMEOUT);
 	finish_wait(&dev->waitq, &wait);
 
+	/* make sure URB is idle after timeout or (spurious) CMD_ACK */
+	usb_kill_urb(dev->cntl_urb);
+
 	mutex_unlock(&dev->io_mutex);
 
 	if (retval < 0) {

From ce722da66d3e9384aa2de9d33d584ee154e5e157 Mon Sep 17 00:00:00 2001
From: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Date: Thu, 10 Dec 2020 11:50:06 +0300
Subject: [PATCH 190/547] usb: dwc3: ulpi: Use VStsDone to detect PHY regs
 access completion

In accordance with [1] the DWC_usb3 core sets the GUSB2PHYACCn.VStsDone
bit when the PHY vendor control access is done and clears it when the
application initiates a new transaction. The doc doesn't say anything
about the GUSB2PHYACCn.VStsBsy flag serving for the same purpose. Moreover
we've discovered that the VStsBsy flag can be cleared before the VStsDone
bit. So using the former as a signal of the PHY control registers
completion might be dangerous. Let's have the VStsDone flag utilized
instead then.

[1] Synopsys DesignWare Cores SuperSpeed USB 3.0 xHCI Host Controller
    Databook, 2.70a, December 2013, p.388

Fixes: 88bc9d194ff6 ("usb: dwc3: add ULPI interface support")
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Link: https://lore.kernel.org/r/20201210085008.13264-2-Sergey.Semin@baikalelectronics.ru
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/core.h | 1 +
 drivers/usb/dwc3/ulpi.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index 2f95f08ca511..1b241f937d8f 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -285,6 +285,7 @@
 
 /* Global USB2 PHY Vendor Control Register */
 #define DWC3_GUSB2PHYACC_NEWREGREQ	BIT(25)
+#define DWC3_GUSB2PHYACC_DONE		BIT(24)
 #define DWC3_GUSB2PHYACC_BUSY		BIT(23)
 #define DWC3_GUSB2PHYACC_WRITE		BIT(22)
 #define DWC3_GUSB2PHYACC_ADDR(n)	(n << 16)
diff --git a/drivers/usb/dwc3/ulpi.c b/drivers/usb/dwc3/ulpi.c
index aa213c9815f6..3cc4f4970c05 100644
--- a/drivers/usb/dwc3/ulpi.c
+++ b/drivers/usb/dwc3/ulpi.c
@@ -24,7 +24,7 @@ static int dwc3_ulpi_busyloop(struct dwc3 *dwc)
 
 	while (count--) {
 		reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYACC(0));
-		if (!(reg & DWC3_GUSB2PHYACC_BUSY))
+		if (reg & DWC3_GUSB2PHYACC_DONE)
 			return 0;
 		cpu_relax();
 	}

From fca3f138105727c3a22edda32d02f91ce1bf11c9 Mon Sep 17 00:00:00 2001
From: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Date: Thu, 10 Dec 2020 11:50:07 +0300
Subject: [PATCH 191/547] usb: dwc3: ulpi: Replace CPU-based busyloop with
 Protocol-based one

Originally the procedure of the ULPI transaction finish detection has been
developed as a simple busy-loop with just decrementing counter and no
delays. It's wrong since on different systems the loop will take a
different time to complete. So if the system bus and CPU are fast enough
to overtake the ULPI bus and the companion PHY reaction, then we'll get to
take a false timeout error. Fix this by converting the busy-loop procedure
to take the standard bus speed, address value and the registers access
mode into account for the busy-loop delay calculation.

Here is the way the fix works. It's known that the ULPI bus is clocked
with 60MHz signal. In accordance with [1] the ULPI bus protocol is created
so to spend 5 and 6 clock periods for immediate register write and read
operations respectively, and 6 and 7 clock periods - for the extended
register writes and reads. Based on that we can easily pre-calculate the
time which will be needed for the controller to perform a requested IO
operation. Note we'll still preserve the attempts counter in case if the
DWC USB3 controller has got some internals delays.

[1] UTMI+ Low Pin Interface (ULPI) Specification, Revision 1.1,
    October 20, 2004, pp. 30 - 36.

Fixes: 88bc9d194ff6 ("usb: dwc3: add ULPI interface support")
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Link: https://lore.kernel.org/r/20201210085008.13264-3-Sergey.Semin@baikalelectronics.ru
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/ulpi.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/dwc3/ulpi.c b/drivers/usb/dwc3/ulpi.c
index 3cc4f4970c05..54c877f7b51d 100644
--- a/drivers/usb/dwc3/ulpi.c
+++ b/drivers/usb/dwc3/ulpi.c
@@ -7,6 +7,8 @@
  * Author: Heikki Krogerus <heikki.krogerus@linux.intel.com>
  */
 
+#include <linux/delay.h>
+#include <linux/time64.h>
 #include <linux/ulpi/regs.h>
 
 #include "core.h"
@@ -17,12 +19,22 @@
 		DWC3_GUSB2PHYACC_ADDR(ULPI_ACCESS_EXTENDED) | \
 		DWC3_GUSB2PHYACC_EXTEND_ADDR(a) : DWC3_GUSB2PHYACC_ADDR(a))
 
-static int dwc3_ulpi_busyloop(struct dwc3 *dwc)
+#define DWC3_ULPI_BASE_DELAY	DIV_ROUND_UP(NSEC_PER_SEC, 60000000L)
+
+static int dwc3_ulpi_busyloop(struct dwc3 *dwc, u8 addr, bool read)
 {
+	unsigned long ns = 5L * DWC3_ULPI_BASE_DELAY;
 	unsigned int count = 1000;
 	u32 reg;
 
+	if (addr >= ULPI_EXT_VENDOR_SPECIFIC)
+		ns += DWC3_ULPI_BASE_DELAY;
+
+	if (read)
+		ns += DWC3_ULPI_BASE_DELAY;
+
 	while (count--) {
+		ndelay(ns);
 		reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYACC(0));
 		if (reg & DWC3_GUSB2PHYACC_DONE)
 			return 0;
@@ -47,7 +59,7 @@ static int dwc3_ulpi_read(struct device *dev, u8 addr)
 	reg = DWC3_GUSB2PHYACC_NEWREGREQ | DWC3_ULPI_ADDR(addr);
 	dwc3_writel(dwc->regs, DWC3_GUSB2PHYACC(0), reg);
 
-	ret = dwc3_ulpi_busyloop(dwc);
+	ret = dwc3_ulpi_busyloop(dwc, addr, true);
 	if (ret)
 		return ret;
 
@@ -71,7 +83,7 @@ static int dwc3_ulpi_write(struct device *dev, u8 addr, u8 val)
 	reg |= DWC3_GUSB2PHYACC_WRITE | val;
 	dwc3_writel(dwc->regs, DWC3_GUSB2PHYACC(0), reg);
 
-	return dwc3_ulpi_busyloop(dwc);
+	return dwc3_ulpi_busyloop(dwc, addr, false);
 }
 
 static const struct ulpi_ops dwc3_ulpi_ops = {

From e5f4ca3fce90a37b23a77bfcc86800d484a80514 Mon Sep 17 00:00:00 2001
From: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Date: Thu, 10 Dec 2020 11:50:08 +0300
Subject: [PATCH 192/547] usb: dwc3: ulpi: Fix USB2.0 HS/FS/LS PHY suspend
 regression

First of all the commit e0082698b689 ("usb: dwc3: ulpi: conditionally
resume ULPI PHY") introduced the Suspend USB2.0 HS/FS/LS PHY regression,
as by design of the fix any attempt to read/write from/to the PHY control
registers will completely disable the PHY suspension, which consequently
will increase the USB bus power consumption. Secondly the fix won't work
well for the very first attempt of the ULPI PHY control registers IO,
because after disabling the USB2.0 PHY suspension functionality it will
still take some time for the bus to resume from the sleep state if one has
been reached before it. So the very first PHY register read/write
operation will take more time than the busy-loop provides and the IO
timeout error might be returned anyway.

Here we suggest to fix the denoted problems in the following way. First of
all let's not disable the Suspend USB2.0 HS/FS/LS PHY functionality so to
make the controller and the USB2.0 bus more power efficient. Secondly
instead of that we'll extend the PHY IO op wait procedure with 1 - 1.2 ms
sleep if the PHY suspension is enabled (1ms should be enough as by LPM
specification it is at most how long it takes for the USB2.0 bus to resume
from L1 (Sleep) state). Finally in case if the USB2.0 PHY suspension
functionality has been disabled on the DWC USB3 controller setup procedure
we'll compensate the USB bus resume process latency by extending the
busy-loop attempts counter.

Fixes: e0082698b689 ("usb: dwc3: ulpi: conditionally resume ULPI PHY")
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Link: https://lore.kernel.org/r/20201210085008.13264-4-Sergey.Semin@baikalelectronics.ru
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/ulpi.c | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/drivers/usb/dwc3/ulpi.c b/drivers/usb/dwc3/ulpi.c
index 54c877f7b51d..f23f4c9a557e 100644
--- a/drivers/usb/dwc3/ulpi.c
+++ b/drivers/usb/dwc3/ulpi.c
@@ -24,7 +24,7 @@
 static int dwc3_ulpi_busyloop(struct dwc3 *dwc, u8 addr, bool read)
 {
 	unsigned long ns = 5L * DWC3_ULPI_BASE_DELAY;
-	unsigned int count = 1000;
+	unsigned int count = 10000;
 	u32 reg;
 
 	if (addr >= ULPI_EXT_VENDOR_SPECIFIC)
@@ -33,6 +33,10 @@ static int dwc3_ulpi_busyloop(struct dwc3 *dwc, u8 addr, bool read)
 	if (read)
 		ns += DWC3_ULPI_BASE_DELAY;
 
+	reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0));
+	if (reg & DWC3_GUSB2PHYCFG_SUSPHY)
+		usleep_range(1000, 1200);
+
 	while (count--) {
 		ndelay(ns);
 		reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYACC(0));
@@ -50,12 +54,6 @@ static int dwc3_ulpi_read(struct device *dev, u8 addr)
 	u32 reg;
 	int ret;
 
-	reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0));
-	if (reg & DWC3_GUSB2PHYCFG_SUSPHY) {
-		reg &= ~DWC3_GUSB2PHYCFG_SUSPHY;
-		dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg);
-	}
-
 	reg = DWC3_GUSB2PHYACC_NEWREGREQ | DWC3_ULPI_ADDR(addr);
 	dwc3_writel(dwc->regs, DWC3_GUSB2PHYACC(0), reg);
 
@@ -73,12 +71,6 @@ static int dwc3_ulpi_write(struct device *dev, u8 addr, u8 val)
 	struct dwc3 *dwc = dev_get_drvdata(dev);
 	u32 reg;
 
-	reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0));
-	if (reg & DWC3_GUSB2PHYCFG_SUSPHY) {
-		reg &= ~DWC3_GUSB2PHYCFG_SUSPHY;
-		dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg);
-	}
-
 	reg = DWC3_GUSB2PHYACC_NEWREGREQ | DWC3_ULPI_ADDR(addr);
 	reg |= DWC3_GUSB2PHYACC_WRITE | val;
 	dwc3_writel(dwc->regs, DWC3_GUSB2PHYACC(0), reg);

From 9389044f27081d6ec77730c36d5bf9a1288bcda2 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Mon, 21 Dec 2020 18:35:28 +0100
Subject: [PATCH 193/547] usb: gadget: f_uac2: reset wMaxPacketSize

With commit 913e4a90b6f9 ("usb: gadget: f_uac2: finalize wMaxPacketSize according to bandwidth")
wMaxPacketSize is computed dynamically but the value is never reset.

Because of this, the actual maximum packet size can only decrease each time
the audio gadget is instantiated.

Reset the endpoint maximum packet size and mark wMaxPacketSize as dynamic
to solve the problem.

Fixes: 913e4a90b6f9 ("usb: gadget: f_uac2: finalize wMaxPacketSize according to bandwidth")
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20201221173531.215169-2-jbrunet@baylibre.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/f_uac2.c | 69 ++++++++++++++++++++++------
 1 file changed, 55 insertions(+), 14 deletions(-)

diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c
index 3633df6d7610..5d960b6603b6 100644
--- a/drivers/usb/gadget/function/f_uac2.c
+++ b/drivers/usb/gadget/function/f_uac2.c
@@ -271,7 +271,7 @@ static struct usb_endpoint_descriptor fs_epout_desc = {
 
 	.bEndpointAddress = USB_DIR_OUT,
 	.bmAttributes = USB_ENDPOINT_XFER_ISOC | USB_ENDPOINT_SYNC_ASYNC,
-	.wMaxPacketSize = cpu_to_le16(1023),
+	/* .wMaxPacketSize = DYNAMIC */
 	.bInterval = 1,
 };
 
@@ -280,7 +280,7 @@ static struct usb_endpoint_descriptor hs_epout_desc = {
 	.bDescriptorType = USB_DT_ENDPOINT,
 
 	.bmAttributes = USB_ENDPOINT_XFER_ISOC | USB_ENDPOINT_SYNC_ASYNC,
-	.wMaxPacketSize = cpu_to_le16(1024),
+	/* .wMaxPacketSize = DYNAMIC */
 	.bInterval = 4,
 };
 
@@ -348,7 +348,7 @@ static struct usb_endpoint_descriptor fs_epin_desc = {
 
 	.bEndpointAddress = USB_DIR_IN,
 	.bmAttributes = USB_ENDPOINT_XFER_ISOC | USB_ENDPOINT_SYNC_ASYNC,
-	.wMaxPacketSize = cpu_to_le16(1023),
+	/* .wMaxPacketSize = DYNAMIC */
 	.bInterval = 1,
 };
 
@@ -357,7 +357,7 @@ static struct usb_endpoint_descriptor hs_epin_desc = {
 	.bDescriptorType = USB_DT_ENDPOINT,
 
 	.bmAttributes = USB_ENDPOINT_XFER_ISOC | USB_ENDPOINT_SYNC_ASYNC,
-	.wMaxPacketSize = cpu_to_le16(1024),
+	/* .wMaxPacketSize = DYNAMIC */
 	.bInterval = 4,
 };
 
@@ -444,12 +444,28 @@ struct cntrl_range_lay3 {
 	__le32	dRES;
 } __packed;
 
-static void set_ep_max_packet_size(const struct f_uac2_opts *uac2_opts,
+static int set_ep_max_packet_size(const struct f_uac2_opts *uac2_opts,
 	struct usb_endpoint_descriptor *ep_desc,
-	unsigned int factor, bool is_playback)
+	enum usb_device_speed speed, bool is_playback)
 {
 	int chmask, srate, ssize;
-	u16 max_packet_size;
+	u16 max_size_bw, max_size_ep;
+	unsigned int factor;
+
+	switch (speed) {
+	case USB_SPEED_FULL:
+		max_size_ep = 1023;
+		factor = 1000;
+		break;
+
+	case USB_SPEED_HIGH:
+		max_size_ep = 1024;
+		factor = 8000;
+		break;
+
+	default:
+		return -EINVAL;
+	}
 
 	if (is_playback) {
 		chmask = uac2_opts->p_chmask;
@@ -461,10 +477,12 @@ static void set_ep_max_packet_size(const struct f_uac2_opts *uac2_opts,
 		ssize = uac2_opts->c_ssize;
 	}
 
-	max_packet_size = num_channels(chmask) * ssize *
+	max_size_bw = num_channels(chmask) * ssize *
 		DIV_ROUND_UP(srate, factor / (1 << (ep_desc->bInterval - 1)));
-	ep_desc->wMaxPacketSize = cpu_to_le16(min_t(u16, max_packet_size,
-				le16_to_cpu(ep_desc->wMaxPacketSize)));
+	ep_desc->wMaxPacketSize = cpu_to_le16(min_t(u16, max_size_bw,
+						    max_size_ep));
+
+	return 0;
 }
 
 /* Use macro to overcome line length limitation */
@@ -670,10 +688,33 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn)
 	}
 
 	/* Calculate wMaxPacketSize according to audio bandwidth */
-	set_ep_max_packet_size(uac2_opts, &fs_epin_desc, 1000, true);
-	set_ep_max_packet_size(uac2_opts, &fs_epout_desc, 1000, false);
-	set_ep_max_packet_size(uac2_opts, &hs_epin_desc, 8000, true);
-	set_ep_max_packet_size(uac2_opts, &hs_epout_desc, 8000, false);
+	ret = set_ep_max_packet_size(uac2_opts, &fs_epin_desc, USB_SPEED_FULL,
+				     true);
+	if (ret < 0) {
+		dev_err(dev, "%s:%d Error!\n", __func__, __LINE__);
+		return ret;
+	}
+
+	ret = set_ep_max_packet_size(uac2_opts, &fs_epout_desc, USB_SPEED_FULL,
+				     false);
+	if (ret < 0) {
+		dev_err(dev, "%s:%d Error!\n", __func__, __LINE__);
+		return ret;
+	}
+
+	ret = set_ep_max_packet_size(uac2_opts, &hs_epin_desc, USB_SPEED_HIGH,
+				     true);
+	if (ret < 0) {
+		dev_err(dev, "%s:%d Error!\n", __func__, __LINE__);
+		return ret;
+	}
+
+	ret = set_ep_max_packet_size(uac2_opts, &hs_epout_desc, USB_SPEED_HIGH,
+				     false);
+	if (ret < 0) {
+		dev_err(dev, "%s:%d Error!\n", __func__, __LINE__);
+		return ret;
+	}
 
 	if (EPOUT_EN(uac2_opts)) {
 		agdev->out_ep = usb_ep_autoconfig(gadget, &fs_epout_desc);

From 60267ba35c744d851dcd2d22ebaa240ca6aaa15f Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 16 Dec 2020 17:19:58 +0100
Subject: [PATCH 194/547] ceph: reencode gid_list when reconnecting

On reconnect, cap and dentry releases are dropped and the fields
that follow must be reencoded into the freed space.  Currently these
are timestamp and gid_list, but gid_list isn't reencoded.  This
results in

  failed to decode message of type 24 v4: End of buffer

errors on the MDS.

While at it, make a change to encode gid_list unconditionally,
without regard to what head/which version was used as a result
of checking whether CEPH_FEATURE_FS_BTIME is supported or not.

URL: https://tracker.ceph.com/issues/48618
Fixes: 4f1ddb1ea874 ("ceph: implement updated ceph_mds_request_head structure")
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
---
 fs/ceph/mds_client.c | 53 ++++++++++++++++++--------------------------
 1 file changed, 22 insertions(+), 31 deletions(-)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 98c15ff2e599..840587037b59 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2475,6 +2475,22 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
 	return r;
 }
 
+static void encode_timestamp_and_gids(void **p,
+				      const struct ceph_mds_request *req)
+{
+	struct ceph_timespec ts;
+	int i;
+
+	ceph_encode_timespec64(&ts, &req->r_stamp);
+	ceph_encode_copy(p, &ts, sizeof(ts));
+
+	/* gid_list */
+	ceph_encode_32(p, req->r_cred->group_info->ngroups);
+	for (i = 0; i < req->r_cred->group_info->ngroups; i++)
+		ceph_encode_64(p, from_kgid(&init_user_ns,
+					    req->r_cred->group_info->gid[i]));
+}
+
 /*
  * called under mdsc->mutex
  */
@@ -2491,7 +2507,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
 	u64 ino1 = 0, ino2 = 0;
 	int pathlen1 = 0, pathlen2 = 0;
 	bool freepath1 = false, freepath2 = false;
-	int len, i;
+	int len;
 	u16 releases;
 	void *p, *end;
 	int ret;
@@ -2517,17 +2533,10 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
 		goto out_free1;
 	}
 
-	if (legacy) {
-		/* Old style */
-		len = sizeof(*head);
-	} else {
-		/* New style: add gid_list and any later fields */
-		len = sizeof(struct ceph_mds_request_head) + sizeof(u32) +
-		      (sizeof(u64) * req->r_cred->group_info->ngroups);
-	}
-
+	len = legacy ? sizeof(*head) : sizeof(struct ceph_mds_request_head);
 	len += pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) +
 		sizeof(struct ceph_timespec);
+	len += sizeof(u32) + (sizeof(u64) * req->r_cred->group_info->ngroups);
 
 	/* calculate (max) length for cap releases */
 	len += sizeof(struct ceph_mds_request_release) *
@@ -2548,7 +2557,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
 	msg->hdr.tid = cpu_to_le64(req->r_tid);
 
 	/*
-	 * The old ceph_mds_request_header didn't contain a version field, and
+	 * The old ceph_mds_request_head didn't contain a version field, and
 	 * one was added when we moved the message version from 3->4.
 	 */
 	if (legacy) {
@@ -2609,20 +2618,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
 
 	head->num_releases = cpu_to_le16(releases);
 
-	/* time stamp */
-	{
-		struct ceph_timespec ts;
-		ceph_encode_timespec64(&ts, &req->r_stamp);
-		ceph_encode_copy(&p, &ts, sizeof(ts));
-	}
-
-	/* gid list */
-	if (!legacy) {
-		ceph_encode_32(&p, req->r_cred->group_info->ngroups);
-		for (i = 0; i < req->r_cred->group_info->ngroups; i++)
-			ceph_encode_64(&p, from_kgid(&init_user_ns,
-				       req->r_cred->group_info->gid[i]));
-	}
+	encode_timestamp_and_gids(&p, req);
 
 	if (WARN_ON_ONCE(p > end)) {
 		ceph_msg_put(msg);
@@ -2730,13 +2726,8 @@ static int __prepare_send_request(struct ceph_mds_session *session,
 		/* remove cap/dentry releases from message */
 		rhead->num_releases = 0;
 
-		/* time stamp */
 		p = msg->front.iov_base + req->r_request_release_offset;
-		{
-			struct ceph_timespec ts;
-			ceph_encode_timespec64(&ts, &req->r_stamp);
-			ceph_encode_copy(&p, &ts, sizeof(ts));
-		}
+		encode_timestamp_and_gids(&p, req);
 
 		msg->front.iov_len = p - msg->front.iov_base;
 		msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);

From ad32fe8801c38f7b1a8b3814bd1f006cb2b5e781 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Tue, 15 Dec 2020 16:40:59 +0100
Subject: [PATCH 195/547] libceph: fix auth_signature buffer allocation in
 secure mode

auth_signature frame is 68 bytes in plain mode and 96 bytes in
secure mode but we are requesting 68 bytes in both modes.  By luck,
this doesn't actually result in any invalid memory accesses because
the allocation is satisfied out of kmalloc-96 slab and so exactly
96 bytes are allocated, but KASAN rightfully complains.

Fixes: cd1a677cad99 ("libceph, ceph: implement msgr2.1 protocol (crc and secure modes)")
Reported-by: Luis Henriques <lhenriques@suse.de>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/messenger_v2.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
index c1ebb2aa08b5..4f938fc8deaf 100644
--- a/net/ceph/messenger_v2.c
+++ b/net/ceph/messenger_v2.c
@@ -1333,7 +1333,8 @@ static int prepare_auth_signature(struct ceph_connection *con)
 	void *buf;
 	int ret;
 
-	buf = alloc_conn_buf(con, head_onwire_len(SHA256_DIGEST_SIZE, false));
+	buf = alloc_conn_buf(con, head_onwire_len(SHA256_DIGEST_SIZE,
+						  con_secure(con)));
 	if (!buf)
 		return -ENOMEM;
 

From f5f2c9a0e3073debc6bc0ecc855ced0158526ee8 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Tue, 15 Dec 2020 16:49:07 +0100
Subject: [PATCH 196/547] libceph: align session_key and con_secret to 16 bytes

crypto_shash_setkey() and crypto_aead_setkey() will do a (small)
GFP_ATOMIC allocation to align the key if it isn't suitably aligned.
It's not a big deal, but at the same time easy to avoid.

The actual alignment requirement is dynamic, queryable with
crypto_shash_alignmask() and crypto_aead_alignmask(), but shouldn't
be stricter than 16 bytes for our algorithms.

Fixes: cd1a677cad99 ("libceph, ceph: implement msgr2.1 protocol (crc and secure modes)")
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/messenger_v2.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
index 4f938fc8deaf..c38d8de93836 100644
--- a/net/ceph/messenger_v2.c
+++ b/net/ceph/messenger_v2.c
@@ -2033,10 +2033,18 @@ bad:
 	return -EINVAL;
 }
 
+/*
+ * Align session_key and con_secret to avoid GFP_ATOMIC allocation
+ * inside crypto_shash_setkey() and crypto_aead_setkey() called from
+ * setup_crypto().  __aligned(16) isn't guaranteed to work for stack
+ * objects, so do it by hand.
+ */
 static int process_auth_done(struct ceph_connection *con, void *p, void *end)
 {
-	u8 session_key[CEPH_KEY_LEN];
-	u8 con_secret[CEPH_MAX_CON_SECRET_LEN];
+	u8 session_key_buf[CEPH_KEY_LEN + 16];
+	u8 con_secret_buf[CEPH_MAX_CON_SECRET_LEN + 16];
+	u8 *session_key = PTR_ALIGN(&session_key_buf[0], 16);
+	u8 *con_secret = PTR_ALIGN(&con_secret_buf[0], 16);
 	int session_key_len, con_secret_len;
 	int payload_len;
 	u64 global_id;

From 664f1e259a982bf213f0cd8eea7616c89546585c Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 16 Dec 2020 18:40:05 +0100
Subject: [PATCH 197/547] libceph: add __maybe_unused to DEFINE_MSGR2_FEATURE

Avoid -Wunused-const-variable warnings for "make W=1".

Reported-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/msgr.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h
index f5e02f6c0655..3989dcb94d3d 100644
--- a/include/linux/ceph/msgr.h
+++ b/include/linux/ceph/msgr.h
@@ -33,8 +33,8 @@
 #define CEPH_MSGR2_INCARNATION_1 (0ull)
 
 #define DEFINE_MSGR2_FEATURE(bit, incarnation, name)               \
-	static const uint64_t CEPH_MSGR2_FEATURE_##name = (1ULL << bit); \
-	static const uint64_t CEPH_MSGR2_FEATUREMASK_##name =            \
+	static const uint64_t __maybe_unused CEPH_MSGR2_FEATURE_##name = (1ULL << bit); \
+	static const uint64_t __maybe_unused CEPH_MSGR2_FEATUREMASK_##name =            \
 			(1ULL << bit | CEPH_MSGR2_INCARNATION_##incarnation);
 
 #define HAVE_MSGR2_FEATURE(x, name) \

From 48b0777cd93dbd800d3966b6f5c34714aad5c203 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Mon, 28 Dec 2020 16:13:52 -0500
Subject: [PATCH 198/547] Revert "dm crypt: export sysfs of kcryptd workqueue"

This reverts commit a2b8b2d975673b1a50ab0bcce5d146b9335edfad.

WQ_SYSFS breaks the ability to reload a DM table due to sysfs kobject
collision (due to active and inactive table). Given lack of
demonstrated need for exposing this workqueue via sysfs: revert
exposing it.

Reported-by: Ignat Korchagin <ignat@cloudflare.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-crypt.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 5f9f9b3a226d..53791138d78b 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -3166,12 +3166,11 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	}
 
 	if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
-		cc->crypt_queue = alloc_workqueue("kcryptd-%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM,
+		cc->crypt_queue = alloc_workqueue("kcryptd/%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM,
 						  1, devname);
 	else
-		cc->crypt_queue = alloc_workqueue("kcryptd-%s",
-						  WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM |
-						  WQ_UNBOUND | WQ_SYSFS,
+		cc->crypt_queue = alloc_workqueue("kcryptd/%s",
+						  WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND,
 						  num_online_cpus(), devname);
 	if (!cc->crypt_queue) {
 		ti->error = "Couldn't create kcryptd queue";

From 59b4a8fa27f5a895582ada1ae5034af7c94a57b5 Mon Sep 17 00:00:00 2001
From: Roland Dreier <roland@kernel.org>
Date: Wed, 23 Dec 2020 19:21:16 -0800
Subject: [PATCH 199/547] CDC-NCM: remove "connected" log message

The cdc_ncm driver passes network connection notifications up to
usbnet_link_change(), which is the right place for any logging.
Remove the netdev_info() duplicating this from the driver itself.

This stops devices such as my "TRENDnet USB 10/100/1G/2.5G LAN"
(ID 20f4:e02b) adapter from spamming the kernel log with

    cdc_ncm 2-2:2.0 enp0s2u2c2: network connection: connected

messages every 60 msec or so.

Signed-off-by: Roland Dreier <roland@kernel.org>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20201224032116.2453938-1-roland@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/usb/cdc_ncm.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 2bac57d5e8d5..3b816a4731f2 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -1863,9 +1863,6 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb)
 		 * USB_CDC_NOTIFY_NETWORK_CONNECTION notification shall be
 		 * sent by device after USB_CDC_NOTIFY_SPEED_CHANGE.
 		 */
-		netif_info(dev, link, dev->net,
-			   "network connection: %sconnected\n",
-			   !!event->wValue ? "" : "dis");
 		usbnet_link_change(dev, !!event->wValue, 0);
 		break;
 

From 1ad58225dba3f2f598d2c6daed4323f24547168f Mon Sep 17 00:00:00 2001
From: Antoine Tenart <atenart@kernel.org>
Date: Wed, 23 Dec 2020 22:23:20 +0100
Subject: [PATCH 200/547] net-sysfs: take the rtnl lock when storing xps_cpus

Two race conditions can be triggered when storing xps cpus, resulting in
various oops and invalid memory accesses:

1. Calling netdev_set_num_tc while netif_set_xps_queue:

   - netif_set_xps_queue uses dev->tc_num as one of the parameters to
     compute the size of new_dev_maps when allocating it. dev->tc_num is
     also used to access the map, and the compiler may generate code to
     retrieve this field multiple times in the function.

   - netdev_set_num_tc sets dev->tc_num.

   If new_dev_maps is allocated using dev->tc_num and then dev->tc_num
   is set to a higher value through netdev_set_num_tc, later accesses to
   new_dev_maps in netif_set_xps_queue could lead to accessing memory
   outside of new_dev_maps; triggering an oops.

2. Calling netif_set_xps_queue while netdev_set_num_tc is running:

   2.1. netdev_set_num_tc starts by resetting the xps queues,
        dev->tc_num isn't updated yet.

   2.2. netif_set_xps_queue is called, setting up the map with the
        *old* dev->num_tc.

   2.3. netdev_set_num_tc updates dev->tc_num.

   2.4. Later accesses to the map lead to out of bound accesses and
        oops.

   A similar issue can be found with netdev_reset_tc.

One way of triggering this is to set an iface up (for which the driver
uses netdev_set_num_tc in the open path, such as bnx2x) and writing to
xps_cpus in a concurrent thread. With the right timing an oops is
triggered.

Both issues have the same fix: netif_set_xps_queue, netdev_set_num_tc
and netdev_reset_tc should be mutually exclusive. We do that by taking
the rtnl lock in xps_cpus_store.

Fixes: 184c449f91fe ("net: Add support for XPS with QoS via traffic classes")
Signed-off-by: Antoine Tenart <atenart@kernel.org>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/core/net-sysfs.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 999b70c59761..7cc15dec1717 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1396,7 +1396,13 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue,
 		return err;
 	}
 
+	if (!rtnl_trylock()) {
+		free_cpumask_var(mask);
+		return restart_syscall();
+	}
+
 	err = netif_set_xps_queue(dev, mask, index);
+	rtnl_unlock();
 
 	free_cpumask_var(mask);
 

From fb25038586d0064123e393cadf1fadd70a9df97a Mon Sep 17 00:00:00 2001
From: Antoine Tenart <atenart@kernel.org>
Date: Wed, 23 Dec 2020 22:23:21 +0100
Subject: [PATCH 201/547] net-sysfs: take the rtnl lock when accessing
 xps_cpus_map and num_tc

Accesses to dev->xps_cpus_map (when using dev->num_tc) should be
protected by the rtnl lock, like we do for netif_set_xps_queue. I didn't
see an actual bug being triggered, but let's be safe here and take the
rtnl lock while accessing the map in sysfs.

Fixes: 184c449f91fe ("net: Add support for XPS with QoS via traffic classes")
Signed-off-by: Antoine Tenart <atenart@kernel.org>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/core/net-sysfs.c | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 7cc15dec1717..65886bfbf822 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1317,8 +1317,8 @@ static const struct attribute_group dql_group = {
 static ssize_t xps_cpus_show(struct netdev_queue *queue,
 			     char *buf)
 {
+	int cpu, len, ret, num_tc = 1, tc = 0;
 	struct net_device *dev = queue->dev;
-	int cpu, len, num_tc = 1, tc = 0;
 	struct xps_dev_maps *dev_maps;
 	cpumask_var_t mask;
 	unsigned long index;
@@ -1328,22 +1328,31 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
 
 	index = get_netdev_queue_index(queue);
 
+	if (!rtnl_trylock())
+		return restart_syscall();
+
 	if (dev->num_tc) {
 		/* Do not allow XPS on subordinate device directly */
 		num_tc = dev->num_tc;
-		if (num_tc < 0)
-			return -EINVAL;
+		if (num_tc < 0) {
+			ret = -EINVAL;
+			goto err_rtnl_unlock;
+		}
 
 		/* If queue belongs to subordinate dev use its map */
 		dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
 
 		tc = netdev_txq_to_tc(dev, index);
-		if (tc < 0)
-			return -EINVAL;
+		if (tc < 0) {
+			ret = -EINVAL;
+			goto err_rtnl_unlock;
+		}
 	}
 
-	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
-		return -ENOMEM;
+	if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
+		ret = -ENOMEM;
+		goto err_rtnl_unlock;
+	}
 
 	rcu_read_lock();
 	dev_maps = rcu_dereference(dev->xps_cpus_map);
@@ -1366,9 +1375,15 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
 	}
 	rcu_read_unlock();
 
+	rtnl_unlock();
+
 	len = snprintf(buf, PAGE_SIZE, "%*pb\n", cpumask_pr_args(mask));
 	free_cpumask_var(mask);
 	return len < PAGE_SIZE ? len : -EINVAL;
+
+err_rtnl_unlock:
+	rtnl_unlock();
+	return ret;
 }
 
 static ssize_t xps_cpus_store(struct netdev_queue *queue,

From 2d57b4f142e0b03e854612b8e28978935414bced Mon Sep 17 00:00:00 2001
From: Antoine Tenart <atenart@kernel.org>
Date: Wed, 23 Dec 2020 22:23:22 +0100
Subject: [PATCH 202/547] net-sysfs: take the rtnl lock when storing xps_rxqs

Two race conditions can be triggered when storing xps rxqs, resulting in
various oops and invalid memory accesses:

1. Calling netdev_set_num_tc while netif_set_xps_queue:

   - netif_set_xps_queue uses dev->tc_num as one of the parameters to
     compute the size of new_dev_maps when allocating it. dev->tc_num is
     also used to access the map, and the compiler may generate code to
     retrieve this field multiple times in the function.

   - netdev_set_num_tc sets dev->tc_num.

   If new_dev_maps is allocated using dev->tc_num and then dev->tc_num
   is set to a higher value through netdev_set_num_tc, later accesses to
   new_dev_maps in netif_set_xps_queue could lead to accessing memory
   outside of new_dev_maps; triggering an oops.

2. Calling netif_set_xps_queue while netdev_set_num_tc is running:

   2.1. netdev_set_num_tc starts by resetting the xps queues,
        dev->tc_num isn't updated yet.

   2.2. netif_set_xps_queue is called, setting up the map with the
        *old* dev->num_tc.

   2.3. netdev_set_num_tc updates dev->tc_num.

   2.4. Later accesses to the map lead to out of bound accesses and
        oops.

   A similar issue can be found with netdev_reset_tc.

One way of triggering this is to set an iface up (for which the driver
uses netdev_set_num_tc in the open path, such as bnx2x) and writing to
xps_rxqs in a concurrent thread. With the right timing an oops is
triggered.

Both issues have the same fix: netif_set_xps_queue, netdev_set_num_tc
and netdev_reset_tc should be mutually exclusive. We do that by taking
the rtnl lock in xps_rxqs_store.

Fixes: 8af2c06ff4b1 ("net-sysfs: Add interface for Rx queue(s) map per Tx queue")
Signed-off-by: Antoine Tenart <atenart@kernel.org>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/core/net-sysfs.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 65886bfbf822..62ca2f2c0ee6 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1499,10 +1499,17 @@ static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
 		return err;
 	}
 
+	if (!rtnl_trylock()) {
+		bitmap_free(mask);
+		return restart_syscall();
+	}
+
 	cpus_read_lock();
 	err = __netif_set_xps_queue(dev, mask, index, true);
 	cpus_read_unlock();
 
+	rtnl_unlock();
+
 	bitmap_free(mask);
 	return err ? : len;
 }

From 4ae2bb81649dc03dfc95875f02126b14b773f7ab Mon Sep 17 00:00:00 2001
From: Antoine Tenart <atenart@kernel.org>
Date: Wed, 23 Dec 2020 22:23:23 +0100
Subject: [PATCH 203/547] net-sysfs: take the rtnl lock when accessing
 xps_rxqs_map and num_tc

Accesses to dev->xps_rxqs_map (when using dev->num_tc) should be
protected by the rtnl lock, like we do for netif_set_xps_queue. I didn't
see an actual bug being triggered, but let's be safe here and take the
rtnl lock while accessing the map in sysfs.

Fixes: 8af2c06ff4b1 ("net-sysfs: Add interface for Rx queue(s) map per Tx queue")
Signed-off-by: Antoine Tenart <atenart@kernel.org>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/core/net-sysfs.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 62ca2f2c0ee6..daf502c13d6d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1429,22 +1429,29 @@ static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init
 
 static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
 {
+	int j, len, ret, num_tc = 1, tc = 0;
 	struct net_device *dev = queue->dev;
 	struct xps_dev_maps *dev_maps;
 	unsigned long *mask, index;
-	int j, len, num_tc = 1, tc = 0;
 
 	index = get_netdev_queue_index(queue);
 
+	if (!rtnl_trylock())
+		return restart_syscall();
+
 	if (dev->num_tc) {
 		num_tc = dev->num_tc;
 		tc = netdev_txq_to_tc(dev, index);
-		if (tc < 0)
-			return -EINVAL;
+		if (tc < 0) {
+			ret = -EINVAL;
+			goto err_rtnl_unlock;
+		}
 	}
 	mask = bitmap_zalloc(dev->num_rx_queues, GFP_KERNEL);
-	if (!mask)
-		return -ENOMEM;
+	if (!mask) {
+		ret = -ENOMEM;
+		goto err_rtnl_unlock;
+	}
 
 	rcu_read_lock();
 	dev_maps = rcu_dereference(dev->xps_rxqs_map);
@@ -1470,10 +1477,16 @@ static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
 out_no_maps:
 	rcu_read_unlock();
 
+	rtnl_unlock();
+
 	len = bitmap_print_to_pagebuf(false, buf, mask, dev->num_rx_queues);
 	bitmap_free(mask);
 
 	return len < PAGE_SIZE ? len : -EINVAL;
+
+err_rtnl_unlock:
+	rtnl_unlock();
+	return ret;
 }
 
 static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,

From 4614792eebcbf81c60ad3604c1aeeb2b0899cea4 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Thu, 24 Dec 2020 18:24:05 +0200
Subject: [PATCH 204/547] net: ethernet: ti: cpts: fix ethtool output when no
 ptp_clock registered

The CPTS driver registers PTP PHC clock when first netif is going up and
unregister it when all netif are down. Now ethtool will show:
 - PTP PHC clock index 0 after boot until first netif is up;
 - the last assigned PTP PHC clock index even if PTP PHC clock is not
registered any more after all netifs are down.

This patch ensures that -1 is returned by ethtool when PTP PHC clock is not
registered any more.

Fixes: 8a2c9a5ab4b9 ("net: ethernet: ti: cpts: rework initialization/deinitialization")
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Link: https://lore.kernel.org/r/20201224162405.28032-1-grygorii.strashko@ti.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/ti/cpts.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index d1fc7955d422..43222a34cba0 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -599,6 +599,7 @@ void cpts_unregister(struct cpts *cpts)
 
 	ptp_clock_unregister(cpts->clock);
 	cpts->clock = NULL;
+	cpts->phc_index = -1;
 
 	cpts_write32(cpts, 0, int_enable);
 	cpts_write32(cpts, 0, control);
@@ -784,6 +785,7 @@ struct cpts *cpts_create(struct device *dev, void __iomem *regs,
 	cpts->cc.read = cpts_systim_read;
 	cpts->cc.mask = CLOCKSOURCE_MASK(32);
 	cpts->info = cpts_info;
+	cpts->phc_index = -1;
 
 	if (n_ext_ts)
 		cpts->info.n_ext_ts = n_ext_ts;

From 950271d7cc0b4546af3549d8143c4132d6e1f138 Mon Sep 17 00:00:00 2001
From: Yunjian Wang <wangyunjian@huawei.com>
Date: Fri, 25 Dec 2020 10:52:16 +0800
Subject: [PATCH 205/547] tun: fix return value when the number of iovs exceeds
 MAX_SKB_FRAGS

Currently the tun_napi_alloc_frags() function returns -ENOMEM when the
number of iovs exceeds MAX_SKB_FRAGS + 1. However this is inappropriate,
we should use -EMSGSIZE instead of -ENOMEM.

The following distinctions are matters:
1. the caller need to drop the bad packet when -EMSGSIZE is returned,
   which means meeting a persistent failure.
2. the caller can try again when -ENOMEM is returned, which means
   meeting a transient failure.

Fixes: 90e33d459407 ("tun: enable napi_gro_frags() for TUN/TAP driver")
Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Link: https://lore.kernel.org/r/1608864736-24332-1-git-send-email-wangyunjian@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/tun.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index fbed05ae7b0f..978ac0981d16 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1365,7 +1365,7 @@ static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile,
 	int i;
 
 	if (it->nr_segs > MAX_SKB_FRAGS + 1)
-		return ERR_PTR(-ENOMEM);
+		return ERR_PTR(-EMSGSIZE);
 
 	local_bh_disable();
 	skb = napi_get_frags(&tfile->napi);

From e7579d5d5b3298f7e888ed07ac16bfb7174c135a Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Mon, 21 Dec 2020 22:07:25 +0100
Subject: [PATCH 206/547] net: mptcp: cap forward allocation to 1M

the following syzkaller reproducer:

 r0 = socket$inet_mptcp(0x2, 0x1, 0x106)
 bind$inet(r0, &(0x7f0000000080)={0x2, 0x4e24, @multicast2}, 0x10)
 connect$inet(r0, &(0x7f0000000480)={0x2, 0x4e24, @local}, 0x10)
 sendto$inet(r0, &(0x7f0000000100)="f6", 0xffffffe7, 0xc000, 0x0, 0x0)

systematically triggers the following warning:

 WARNING: CPU: 2 PID: 8618 at net/core/stream.c:208 sk_stream_kill_queues+0x3fa/0x580
 Modules linked in:
 CPU: 2 PID: 8618 Comm: syz-executor Not tainted 5.10.0+ #334
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.1-4.module+el8.1.0+4066+0f1aadab 04/04
 RIP: 0010:sk_stream_kill_queues+0x3fa/0x580
 Code: df 48 c1 ea 03 0f b6 04 02 84 c0 74 04 3c 03 7e 40 8b ab 20 02 00 00 e9 64 ff ff ff e8 df f0 81 2
 RSP: 0018:ffffc9000290fcb0 EFLAGS: 00010293
 RAX: ffff888011cb8000 RBX: 0000000000000000 RCX: ffffffff86eecf0e
 RDX: 0000000000000000 RSI: ffffffff86eecf6a RDI: 0000000000000005
 RBP: 0000000000000e28 R08: ffff888011cb8000 R09: fffffbfff1f48139
 R10: ffffffff8fa409c7 R11: fffffbfff1f48138 R12: ffff8880215e6220
 R13: ffffffff8fa409c0 R14: ffffc9000290fd30 R15: 1ffff92000521fa2
 FS:  00007f41c78f4800(0000) GS:ffff88802d000000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00007f95c803d088 CR3: 0000000025ed2000 CR4: 00000000000006f0
 Call Trace:
  __mptcp_destroy_sock+0x4f5/0x8e0
   mptcp_close+0x5e2/0x7f0
  inet_release+0x12b/0x270
  __sock_release+0xc8/0x270
  sock_close+0x18/0x20
  __fput+0x272/0x8e0
  task_work_run+0xe0/0x1a0
  exit_to_user_mode_prepare+0x1df/0x200
  syscall_exit_to_user_mode+0x19/0x50
  entry_SYSCALL_64_after_hwframe+0x44/0xa9

userspace programs provide arbitrarily high values of 'len' in sendmsg():
this is causing integer overflow of 'amount'. Cap forward allocation to 1
megabyte: higher values are not really useful.

Suggested-by: Paolo Abeni <pabeni@redhat.com>
Fixes: e93da92896bc ("mptcp: implement wmem reservation")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Link: https://lore.kernel.org/r/3334d00d8b2faecafdfab9aa593efcbf61442756.1608584474.git.dcaratti@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/mptcp/protocol.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 09b19aa2f205..6628d8d74203 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -877,6 +877,9 @@ static void __mptcp_wmem_reserve(struct sock *sk, int size)
 	struct mptcp_sock *msk = mptcp_sk(sk);
 
 	WARN_ON_ONCE(msk->wmem_reserved);
+	if (WARN_ON_ONCE(amount < 0))
+		amount = 0;
+
 	if (amount <= sk->sk_forward_alloc)
 		goto reserve;
 
@@ -1587,7 +1590,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
 		return -EOPNOTSUPP;
 
-	mptcp_lock_sock(sk, __mptcp_wmem_reserve(sk, len));
+	mptcp_lock_sock(sk, __mptcp_wmem_reserve(sk, min_t(size_t, 1 << 20, len)));
 
 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
 

From fb1e6e562b37b39adfe251919c9abfdb3e01f921 Mon Sep 17 00:00:00 2001
From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Date: Sun, 27 Dec 2020 14:18:17 -0500
Subject: [PATCH 207/547] bnxt_en: Fix AER recovery.

A recent change skips sending firmware messages to the firmware when
pci_channel_offline() is true during fatal AER error.  To make this
complete, we need to move the re-initialization sequence to
bnxt_io_resume(), otherwise the firmware messages to re-initialize
will all be skipped.  In any case, it is more correct to re-initialize
in bnxt_io_resume().

Also, fix the reverse x-mas tree format when defining variables
in bnxt_io_slot_reset().

Fixes: b340dc680ed4 ("bnxt_en: Avoid sending firmware messages when AER error is detected.")
Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 31 ++++++++++-------------
 1 file changed, 14 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 4edd6f8e017e..b8351e24395d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -12887,10 +12887,10 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev,
  */
 static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev)
 {
+	pci_ers_result_t result = PCI_ERS_RESULT_DISCONNECT;
 	struct net_device *netdev = pci_get_drvdata(pdev);
 	struct bnxt *bp = netdev_priv(netdev);
 	int err = 0, off;
-	pci_ers_result_t result = PCI_ERS_RESULT_DISCONNECT;
 
 	netdev_info(bp->dev, "PCI Slot Reset\n");
 
@@ -12919,22 +12919,8 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev)
 		pci_save_state(pdev);
 
 		err = bnxt_hwrm_func_reset(bp);
-		if (!err) {
-			err = bnxt_hwrm_func_qcaps(bp);
-			if (!err && netif_running(netdev))
-				err = bnxt_open(netdev);
-		}
-		bnxt_ulp_start(bp, err);
-		if (!err) {
-			bnxt_reenable_sriov(bp);
+		if (!err)
 			result = PCI_ERS_RESULT_RECOVERED;
-		}
-	}
-
-	if (result != PCI_ERS_RESULT_RECOVERED) {
-		if (netif_running(netdev))
-			dev_close(netdev);
-		pci_disable_device(pdev);
 	}
 
 	rtnl_unlock();
@@ -12952,10 +12938,21 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev)
 static void bnxt_io_resume(struct pci_dev *pdev)
 {
 	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct bnxt *bp = netdev_priv(netdev);
+	int err;
 
+	netdev_info(bp->dev, "PCI Slot Resume\n");
 	rtnl_lock();
 
-	netif_device_attach(netdev);
+	err = bnxt_hwrm_func_qcaps(bp);
+	if (!err && netif_running(netdev))
+		err = bnxt_open(netdev);
+
+	bnxt_ulp_start(bp, err);
+	if (!err) {
+		bnxt_reenable_sriov(bp);
+		netif_device_attach(netdev);
+	}
 
 	rtnl_unlock();
 }

From a029a2fef5d11bb85587433c3783615442abac96 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sun, 27 Dec 2020 14:18:18 -0500
Subject: [PATCH 208/547] bnxt_en: Check TQM rings for maximum supported value.

TQM rings are hardware resources that require host context memory
managed by the driver.  The driver supports up to 9 TQM rings and
the number of rings to use is requested by firmware during run-time.
Cap this number to the maximum supported to prevent accessing beyond
the array.  Future firmware may request more than 9 TQM rings.  Define
macros to remove the magic number 9 from the C code.

Fixes: ac3158cb0108 ("bnxt_en: Allocate TQM ring context memory according to fw specification.")
Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Reviewed-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 7 +++++--
 drivers/net/ethernet/broadcom/bnxt/bnxt.h | 7 ++++++-
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index b8351e24395d..d10e4f85dd11 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -6790,8 +6790,10 @@ static int bnxt_hwrm_func_backing_store_qcaps(struct bnxt *bp)
 		ctx->tqm_fp_rings_count = resp->tqm_fp_rings_count;
 		if (!ctx->tqm_fp_rings_count)
 			ctx->tqm_fp_rings_count = bp->max_q;
+		else if (ctx->tqm_fp_rings_count > BNXT_MAX_TQM_FP_RINGS)
+			ctx->tqm_fp_rings_count = BNXT_MAX_TQM_FP_RINGS;
 
-		tqm_rings = ctx->tqm_fp_rings_count + 1;
+		tqm_rings = ctx->tqm_fp_rings_count + BNXT_MAX_TQM_SP_RINGS;
 		ctx_pg = kcalloc(tqm_rings, sizeof(*ctx_pg), GFP_KERNEL);
 		if (!ctx_pg) {
 			kfree(ctx);
@@ -6925,7 +6927,8 @@ static int bnxt_hwrm_func_backing_store_cfg(struct bnxt *bp, u32 enables)
 	     pg_attr = &req.tqm_sp_pg_size_tqm_sp_lvl,
 	     pg_dir = &req.tqm_sp_page_dir,
 	     ena = FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_SP;
-	     i < 9; i++, num_entries++, pg_attr++, pg_dir++, ena <<= 1) {
+	     i < BNXT_MAX_TQM_RINGS;
+	     i++, num_entries++, pg_attr++, pg_dir++, ena <<= 1) {
 		if (!(enables & ena))
 			continue;
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 950ea26ae0d2..51996c85547e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1436,6 +1436,11 @@ struct bnxt_ctx_pg_info {
 	struct bnxt_ctx_pg_info **ctx_pg_tbl;
 };
 
+#define BNXT_MAX_TQM_SP_RINGS		1
+#define BNXT_MAX_TQM_FP_RINGS		8
+#define BNXT_MAX_TQM_RINGS		\
+	(BNXT_MAX_TQM_SP_RINGS + BNXT_MAX_TQM_FP_RINGS)
+
 struct bnxt_ctx_mem_info {
 	u32	qp_max_entries;
 	u16	qp_min_qp1_entries;
@@ -1474,7 +1479,7 @@ struct bnxt_ctx_mem_info {
 	struct bnxt_ctx_pg_info stat_mem;
 	struct bnxt_ctx_pg_info mrav_mem;
 	struct bnxt_ctx_pg_info tim_mem;
-	struct bnxt_ctx_pg_info *tqm_mem[9];
+	struct bnxt_ctx_pg_info *tqm_mem[BNXT_MAX_TQM_RINGS];
 };
 
 struct bnxt_fw_health {

From 1169318bd565d2911b949f6123e109baa35881b6 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Sat, 26 Dec 2020 15:37:36 -0600
Subject: [PATCH 209/547] net: ipa: don't return a value from
 gsi_channel_command()

Callers of gsi_channel_command() no longer care whether the command
times out, and don't use what gsi_channel_command() returns.  Redefine
that function to have void return type.

Reported-by: kernel test robot <lkp@intel.com>
Fixes: 6ffddf3b3d182 ("net: ipa: use state to determine channel command success")
Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ipa/gsi.c | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c
index 579cc3e51677..e51a77057899 100644
--- a/drivers/net/ipa/gsi.c
+++ b/drivers/net/ipa/gsi.c
@@ -454,7 +454,7 @@ static enum gsi_channel_state gsi_channel_state(struct gsi_channel *channel)
 }
 
 /* Issue a channel command and wait for it to complete */
-static int
+static void
 gsi_channel_command(struct gsi_channel *channel, enum gsi_ch_cmd_opcode opcode)
 {
 	struct completion *completion = &channel->completion;
@@ -489,12 +489,10 @@ gsi_channel_command(struct gsi_channel *channel, enum gsi_ch_cmd_opcode opcode)
 	iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET);
 
 	if (success)
-		return 0;
+		return;
 
 	dev_err(dev, "GSI command %u for channel %u timed out, state %u\n",
 		opcode, channel_id, gsi_channel_state(channel));
-
-	return -ETIMEDOUT;
 }
 
 /* Allocate GSI channel in NOT_ALLOCATED state */
@@ -503,7 +501,6 @@ static int gsi_channel_alloc_command(struct gsi *gsi, u32 channel_id)
 	struct gsi_channel *channel = &gsi->channel[channel_id];
 	struct device *dev = gsi->dev;
 	enum gsi_channel_state state;
-	int ret;
 
 	/* Get initial channel state */
 	state = gsi_channel_state(channel);
@@ -513,7 +510,7 @@ static int gsi_channel_alloc_command(struct gsi *gsi, u32 channel_id)
 		return -EINVAL;
 	}
 
-	ret = gsi_channel_command(channel, GSI_CH_ALLOCATE);
+	gsi_channel_command(channel, GSI_CH_ALLOCATE);
 
 	/* If successful the channel state will have changed */
 	state = gsi_channel_state(channel);
@@ -531,7 +528,6 @@ static int gsi_channel_start_command(struct gsi_channel *channel)
 {
 	struct device *dev = channel->gsi->dev;
 	enum gsi_channel_state state;
-	int ret;
 
 	state = gsi_channel_state(channel);
 	if (state != GSI_CHANNEL_STATE_ALLOCATED &&
@@ -541,7 +537,7 @@ static int gsi_channel_start_command(struct gsi_channel *channel)
 		return -EINVAL;
 	}
 
-	ret = gsi_channel_command(channel, GSI_CH_START);
+	gsi_channel_command(channel, GSI_CH_START);
 
 	/* If successful the channel state will have changed */
 	state = gsi_channel_state(channel);
@@ -559,7 +555,6 @@ static int gsi_channel_stop_command(struct gsi_channel *channel)
 {
 	struct device *dev = channel->gsi->dev;
 	enum gsi_channel_state state;
-	int ret;
 
 	state = gsi_channel_state(channel);
 
@@ -576,7 +571,7 @@ static int gsi_channel_stop_command(struct gsi_channel *channel)
 		return -EINVAL;
 	}
 
-	ret = gsi_channel_command(channel, GSI_CH_STOP);
+	gsi_channel_command(channel, GSI_CH_STOP);
 
 	/* If successful the channel state will have changed */
 	state = gsi_channel_state(channel);
@@ -598,7 +593,6 @@ static void gsi_channel_reset_command(struct gsi_channel *channel)
 {
 	struct device *dev = channel->gsi->dev;
 	enum gsi_channel_state state;
-	int ret;
 
 	msleep(1);	/* A short delay is required before a RESET command */
 
@@ -612,7 +606,7 @@ static void gsi_channel_reset_command(struct gsi_channel *channel)
 		return;
 	}
 
-	ret = gsi_channel_command(channel, GSI_CH_RESET);
+	gsi_channel_command(channel, GSI_CH_RESET);
 
 	/* If successful the channel state will have changed */
 	state = gsi_channel_state(channel);
@@ -627,7 +621,6 @@ static void gsi_channel_de_alloc_command(struct gsi *gsi, u32 channel_id)
 	struct gsi_channel *channel = &gsi->channel[channel_id];
 	struct device *dev = gsi->dev;
 	enum gsi_channel_state state;
-	int ret;
 
 	state = gsi_channel_state(channel);
 	if (state != GSI_CHANNEL_STATE_ALLOCATED) {
@@ -636,7 +629,7 @@ static void gsi_channel_de_alloc_command(struct gsi *gsi, u32 channel_id)
 		return;
 	}
 
-	ret = gsi_channel_command(channel, GSI_CH_DE_ALLOC);
+	gsi_channel_command(channel, GSI_CH_DE_ALLOC);
 
 	/* If successful the channel state will have changed */
 	state = gsi_channel_state(channel);

From 1ddf776b498c922935d0ec3283b9817dd33aedf7 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Sat, 26 Dec 2020 15:37:37 -0600
Subject: [PATCH 210/547] net: ipa: don't return a value from
 evt_ring_command()

Callers of evt_ring_command() no longer care whether the command
times out, and don't use what evt_ring_command() returns.  Redefine
that function to have void return type.

Reported-by: kernel test robot <lkp@intel.com>
Fixes: 428b448ee764a ("net: ipa: use state to determine event ring command success")
Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ipa/gsi.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c
index e51a77057899..14d9a791924b 100644
--- a/drivers/net/ipa/gsi.c
+++ b/drivers/net/ipa/gsi.c
@@ -326,8 +326,8 @@ gsi_evt_ring_state(struct gsi *gsi, u32 evt_ring_id)
 }
 
 /* Issue an event ring command and wait for it to complete */
-static int evt_ring_command(struct gsi *gsi, u32 evt_ring_id,
-			    enum gsi_evt_cmd_opcode opcode)
+static void evt_ring_command(struct gsi *gsi, u32 evt_ring_id,
+			     enum gsi_evt_cmd_opcode opcode)
 {
 	struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id];
 	struct completion *completion = &evt_ring->completion;
@@ -361,19 +361,16 @@ static int evt_ring_command(struct gsi *gsi, u32 evt_ring_id,
 	iowrite32(0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET);
 
 	if (success)
-		return 0;
+		return;
 
 	dev_err(dev, "GSI command %u for event ring %u timed out, state %u\n",
 		opcode, evt_ring_id, evt_ring->state);
-
-	return -ETIMEDOUT;
 }
 
 /* Allocate an event ring in NOT_ALLOCATED state */
 static int gsi_evt_ring_alloc_command(struct gsi *gsi, u32 evt_ring_id)
 {
 	struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id];
-	int ret;
 
 	/* Get initial event ring state */
 	evt_ring->state = gsi_evt_ring_state(gsi, evt_ring_id);
@@ -383,7 +380,7 @@ static int gsi_evt_ring_alloc_command(struct gsi *gsi, u32 evt_ring_id)
 		return -EINVAL;
 	}
 
-	ret = evt_ring_command(gsi, evt_ring_id, GSI_EVT_ALLOCATE);
+	evt_ring_command(gsi, evt_ring_id, GSI_EVT_ALLOCATE);
 
 	/* If successful the event ring state will have changed */
 	if (evt_ring->state == GSI_EVT_RING_STATE_ALLOCATED)
@@ -400,7 +397,6 @@ static void gsi_evt_ring_reset_command(struct gsi *gsi, u32 evt_ring_id)
 {
 	struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id];
 	enum gsi_evt_ring_state state = evt_ring->state;
-	int ret;
 
 	if (state != GSI_EVT_RING_STATE_ALLOCATED &&
 	    state != GSI_EVT_RING_STATE_ERROR) {
@@ -409,7 +405,7 @@ static void gsi_evt_ring_reset_command(struct gsi *gsi, u32 evt_ring_id)
 		return;
 	}
 
-	ret = evt_ring_command(gsi, evt_ring_id, GSI_EVT_RESET);
+	evt_ring_command(gsi, evt_ring_id, GSI_EVT_RESET);
 
 	/* If successful the event ring state will have changed */
 	if (evt_ring->state == GSI_EVT_RING_STATE_ALLOCATED)
@@ -423,7 +419,6 @@ static void gsi_evt_ring_reset_command(struct gsi *gsi, u32 evt_ring_id)
 static void gsi_evt_ring_de_alloc_command(struct gsi *gsi, u32 evt_ring_id)
 {
 	struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id];
-	int ret;
 
 	if (evt_ring->state != GSI_EVT_RING_STATE_ALLOCATED) {
 		dev_err(gsi->dev, "event ring %u state %u before dealloc\n",
@@ -431,7 +426,7 @@ static void gsi_evt_ring_de_alloc_command(struct gsi *gsi, u32 evt_ring_id)
 		return;
 	}
 
-	ret = evt_ring_command(gsi, evt_ring_id, GSI_EVT_DE_ALLOC);
+	evt_ring_command(gsi, evt_ring_id, GSI_EVT_DE_ALLOC);
 
 	/* If successful the event ring state will have changed */
 	if (evt_ring->state == GSI_EVT_RING_STATE_NOT_ALLOCATED)

From 4f374d2c43a9e5e773f1dee56db63bd6b8a36276 Mon Sep 17 00:00:00 2001
From: Stefan Chulski <stefanc@marvell.com>
Date: Wed, 23 Dec 2020 20:35:21 +0200
Subject: [PATCH 211/547] net: mvpp2: fix pkt coalescing int-threshold
 configuration

The packet coalescing interrupt threshold has separated registers
for different aggregated/cpu (sw-thread). The required value should
be loaded for every thread but not only for 1 current cpu.

Fixes: 213f428f5056 ("net: mvpp2: add support for TX interrupts and RX queue distribution modes")
Signed-off-by: Stefan Chulski <stefanc@marvell.com>
Link: https://lore.kernel.org/r/1608748521-11033-1-git-send-email-stefanc@marvell.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index f20b31327027..4b1808acef58 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -2370,17 +2370,18 @@ static void mvpp2_rx_pkts_coal_set(struct mvpp2_port *port,
 static void mvpp2_tx_pkts_coal_set(struct mvpp2_port *port,
 				   struct mvpp2_tx_queue *txq)
 {
-	unsigned int thread = mvpp2_cpu_to_thread(port->priv, get_cpu());
+	unsigned int thread;
 	u32 val;
 
 	if (txq->done_pkts_coal > MVPP2_TXQ_THRESH_MASK)
 		txq->done_pkts_coal = MVPP2_TXQ_THRESH_MASK;
 
 	val = (txq->done_pkts_coal << MVPP2_TXQ_THRESH_OFFSET);
-	mvpp2_thread_write(port->priv, thread, MVPP2_TXQ_NUM_REG, txq->id);
-	mvpp2_thread_write(port->priv, thread, MVPP2_TXQ_THRESH_REG, val);
-
-	put_cpu();
+	/* PKT-coalescing registers are per-queue + per-thread */
+	for (thread = 0; thread < MVPP2_MAX_THREADS; thread++) {
+		mvpp2_thread_write(port->priv, thread, MVPP2_TXQ_NUM_REG, txq->id);
+		mvpp2_thread_write(port->priv, thread, MVPP2_TXQ_THRESH_REG, val);
+	}
 }
 
 static u32 mvpp2_usec_to_cycles(u32 usec, unsigned long clk_hz)

From 21fdca22eb7df2a1e194b8adb812ce370748b733 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Thu, 24 Dec 2020 20:01:09 +0100
Subject: [PATCH 212/547] ipv4: Ignore ECN bits for fib lookups in
 fib_compute_spec_dst()

RT_TOS() only clears one of the ECN bits. Therefore, when
fib_compute_spec_dst() resorts to a fib lookup, it can return
different results depending on the value of the second ECN bit.

For example, ECT(0) and ECT(1) packets could be treated differently.

  $ ip netns add ns0
  $ ip netns add ns1
  $ ip link add name veth01 netns ns0 type veth peer name veth10 netns ns1
  $ ip -netns ns0 link set dev lo up
  $ ip -netns ns1 link set dev lo up
  $ ip -netns ns0 link set dev veth01 up
  $ ip -netns ns1 link set dev veth10 up

  $ ip -netns ns0 address add 192.0.2.10/24 dev veth01
  $ ip -netns ns1 address add 192.0.2.11/24 dev veth10

  $ ip -netns ns1 address add 192.0.2.21/32 dev lo
  $ ip -netns ns1 route add 192.0.2.10/32 tos 4 dev veth10 src 192.0.2.21
  $ ip netns exec ns1 sysctl -wq net.ipv4.icmp_echo_ignore_broadcasts=0

With TOS 4 and ECT(1), ns1 replies using source address 192.0.2.21
(ping uses -Q to set all TOS and ECN bits):

  $ ip netns exec ns0 ping -c 1 -b -Q 5 192.0.2.255
  [...]
  64 bytes from 192.0.2.21: icmp_seq=1 ttl=64 time=0.544 ms

But with TOS 4 and ECT(0), ns1 replies using source address 192.0.2.11
because the "tos 4" route isn't matched:

  $ ip netns exec ns0 ping -c 1 -b -Q 6 192.0.2.255
  [...]
  64 bytes from 192.0.2.11: icmp_seq=1 ttl=64 time=0.597 ms

After this patch the ECN bits don't affect the result anymore:

  $ ip netns exec ns0 ping -c 1 -b -Q 6 192.0.2.255
  [...]
  64 bytes from 192.0.2.21: icmp_seq=1 ttl=64 time=0.591 ms

Fixes: 35ebf65e851c ("ipv4: Create and use fib_compute_spec_dst() helper.")
Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index cdf6ec5aa45d..84bb707bd88d 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -292,7 +292,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
 			.flowi4_iif = LOOPBACK_IFINDEX,
 			.flowi4_oif = l3mdev_master_ifindex_rcu(dev),
 			.daddr = ip_hdr(skb)->saddr,
-			.flowi4_tos = RT_TOS(ip_hdr(skb)->tos),
+			.flowi4_tos = ip_hdr(skb)->tos & IPTOS_RT_MASK,
 			.flowi4_scope = scope,
 			.flowi4_mark = vmark ? skb->mark : 0,
 		};

From a533b70a657c03137dd49cbcfee70aac086ab2b1 Mon Sep 17 00:00:00 2001
From: weichenchen <weichen.chen@linux.alibaba.com>
Date: Fri, 25 Dec 2020 13:44:45 +0800
Subject: [PATCH 213/547] net: neighbor: fix a crash caused by mod zero

pneigh_enqueue() tries to obtain a random delay by mod
NEIGH_VAR(p, PROXY_DELAY). However, NEIGH_VAR(p, PROXY_DELAY)
migth be zero at that point because someone could write zero
to /proc/sys/net/ipv4/neigh/[device]/proxy_delay after the
callers check it.

This patch uses prandom_u32_max() to get a random delay instead
which avoids potential division by zero.

Signed-off-by: weichenchen <weichen.chen@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 9500d28a43b0..277ed854aef1 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1569,10 +1569,8 @@ static void neigh_proxy_process(struct timer_list *t)
 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
 		    struct sk_buff *skb)
 {
-	unsigned long now = jiffies;
-
-	unsigned long sched_next = now + (prandom_u32() %
-					  NEIGH_VAR(p, PROXY_DELAY));
+	unsigned long sched_next = jiffies +
+			prandom_u32_max(NEIGH_VAR(p, PROXY_DELAY));
 
 	if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
 		kfree_skb(skb);

From bd1248f1ddbc48b0c30565fce897a3b6423313b8 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 24 Dec 2020 22:23:44 -0800
Subject: [PATCH 214/547] net: sched: prevent invalid Scell_log shift count

Check Scell_log shift size in red_check_params() and modify all callers
of red_check_params() to pass Scell_log.

This prevents a shift out-of-bounds as detected by UBSAN:
  UBSAN: shift-out-of-bounds in ./include/net/red.h:252:22
  shift exponent 72 is too large for 32-bit type 'int'

Fixes: 8afa10cbe281 ("net_sched: red: Avoid illegal values")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: syzbot+97c5bd9cc81eca63d36e@syzkaller.appspotmail.com
Cc: Nogah Frankel <nogahf@mellanox.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Cc: netdev@vger.kernel.org
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/red.h     | 4 +++-
 net/sched/sch_choke.c | 2 +-
 net/sched/sch_gred.c  | 2 +-
 net/sched/sch_red.c   | 2 +-
 net/sched/sch_sfq.c   | 2 +-
 5 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/include/net/red.h b/include/net/red.h
index fc455445f4b2..932f0d79d60c 100644
--- a/include/net/red.h
+++ b/include/net/red.h
@@ -168,12 +168,14 @@ static inline void red_set_vars(struct red_vars *v)
 	v->qcount	= -1;
 }
 
-static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog)
+static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog, u8 Scell_log)
 {
 	if (fls(qth_min) + Wlog > 32)
 		return false;
 	if (fls(qth_max) + Wlog > 32)
 		return false;
+	if (Scell_log >= 32)
+		return false;
 	if (qth_max < qth_min)
 		return false;
 	return true;
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index bd618b00d319..50f680f03a54 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -362,7 +362,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt,
 
 	ctl = nla_data(tb[TCA_CHOKE_PARMS]);
 
-	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
+	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log))
 		return -EINVAL;
 
 	if (ctl->limit > CHOKE_MAX_QUEUE)
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 8599c6f31b05..e0bc77533acc 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -480,7 +480,7 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp,
 	struct gred_sched *table = qdisc_priv(sch);
 	struct gred_sched_data *q = table->tab[dp];
 
-	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) {
+	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log)) {
 		NL_SET_ERR_MSG_MOD(extack, "invalid RED parameters");
 		return -EINVAL;
 	}
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index e89fab6ccb34..b4ae34d7aa96 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -250,7 +250,7 @@ static int __red_change(struct Qdisc *sch, struct nlattr **tb,
 	max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
 
 	ctl = nla_data(tb[TCA_RED_PARMS]);
-	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
+	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log))
 		return -EINVAL;
 
 	err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index bca2be57d9fc..b25e51440623 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -647,7 +647,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 	}
 
 	if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
-					ctl_v1->Wlog))
+					ctl_v1->Wlog, ctl_v1->Scell_log))
 		return -EINVAL;
 	if (ctl_v1 && ctl_v1->qth_min) {
 		p = kmalloc(sizeof(*p), GFP_KERNEL);

From 5ede3ada3da7f050519112b81badc058190b9f9f Mon Sep 17 00:00:00 2001
From: Yunjian Wang <wangyunjian@huawei.com>
Date: Sat, 26 Dec 2020 16:10:05 +0800
Subject: [PATCH 215/547] net: hns: fix return value check in
 __lb_other_process()

The function skb_copy() could return NULL, the return value
need to be checked.

Fixes: b5996f11ea54 ("net: add Hisilicon Network Subsystem basic ethernet support")
Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hns_ethtool.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index 7165da0ee9aa..a6e3f07caf99 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -415,6 +415,10 @@ static void __lb_other_process(struct hns_nic_ring_data *ring_data,
 	/* for mutl buffer*/
 	new_skb = skb_copy(skb, GFP_ATOMIC);
 	dev_kfree_skb_any(skb);
+	if (!new_skb) {
+		netdev_err(ndev, "skb alloc failed\n");
+		return;
+	}
 	skb = new_skb;
 
 	check_ok = 0;

From 085c7c4e1c0e50d90b7d90f61a12e12b317a91e2 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Sat, 26 Dec 2020 15:44:53 -0800
Subject: [PATCH 216/547] erspan: fix version 1 check in gre_parse_header()

Both version 0 and version 1 use ETH_P_ERSPAN, but version 0 does not
have an erspan header. So the check in gre_parse_header() is wrong,
we have to distinguish version 1 from version 0.

We can just check the gre header length like is_erspan_type1().

Fixes: cb73ee40b1b3 ("net: ip_gre: use erspan key field for tunnel lookup")
Reported-by: syzbot+f583ce3d4ddf9836b27a@syzkaller.appspotmail.com
Cc: William Tu <u9012063@gmail.com>
Cc: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/gre_demux.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 66fdbfe5447c..5d1e6fe9d838 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -128,7 +128,7 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
 	 * to 0 and sets the configured key in the
 	 * inner erspan header field
 	 */
-	if (greh->protocol == htons(ETH_P_ERSPAN) ||
+	if ((greh->protocol == htons(ETH_P_ERSPAN) && hdr_len != 4) ||
 	    greh->protocol == htons(ETH_P_ERSPAN2)) {
 		struct erspan_base_hdr *ershdr;
 

From 9b22fece786ed641909988da4810bfa8e5d2e592 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C3=A9o=20Le=20Bouter?= <lle-bout@zaclys.net>
Date: Sun, 27 Dec 2020 17:11:36 +0100
Subject: [PATCH 217/547] atlantic: remove architecture depends
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This was tested on a RaptorCS Talos II with IBM POWER9 DD2.2 CPUs and an
ASUS XG-C100F PCI-e card without any issue. Speeds of ~8Gbps could be
attained with not-very-scientific (wget HTTP) both-ways measurements on
a local network. No warning or error reported in kernel logs. The
drivers seems to be portable enough for it not to be gated like such.

Signed-off-by: Léo Le Bouter <lle-bout@zaclys.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/aquantia/Kconfig b/drivers/net/ethernet/aquantia/Kconfig
index efb33c078a3c..cec2018c84a9 100644
--- a/drivers/net/ethernet/aquantia/Kconfig
+++ b/drivers/net/ethernet/aquantia/Kconfig
@@ -19,7 +19,6 @@ if NET_VENDOR_AQUANTIA
 config AQTION
 	tristate "aQuantia AQtion(tm) Support"
 	depends on PCI
-	depends on X86_64 || ARM64 || COMPILE_TEST
 	depends on MACSEC || MACSEC=n
 	help
 	  This enables the support for the aQuantia AQtion(tm) Ethernet card.

From 1fef73597fa545c35fddc953979013882fbd4e55 Mon Sep 17 00:00:00 2001
From: Xie He <xie.he.0141@gmail.com>
Date: Sun, 27 Dec 2020 18:53:39 -0800
Subject: [PATCH 218/547] net: hdlc_ppp: Fix issues when mod_timer is called
 while timer is running

ppp_cp_event is called directly or indirectly by ppp_rx with "ppp->lock"
held. It may call mod_timer to add a new timer. However, at the same time
ppp_timer may be already running and waiting for "ppp->lock". In this
case, there's no need for ppp_timer to continue running and it can just
exit.

If we let ppp_timer continue running, it may call add_timer. This causes
kernel panic because add_timer can't be called with a timer pending.
This patch fixes this problem.

Fixes: e022c2f07ae5 ("WAN: new synchronous PPP implementation for generic HDLC.")
Cc: Krzysztof Halasa <khc@pm.waw.pl>
Signed-off-by: Xie He <xie.he.0141@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hdlc_ppp.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c
index 64f855651336..261b53fc8e04 100644
--- a/drivers/net/wan/hdlc_ppp.c
+++ b/drivers/net/wan/hdlc_ppp.c
@@ -569,6 +569,13 @@ static void ppp_timer(struct timer_list *t)
 	unsigned long flags;
 
 	spin_lock_irqsave(&ppp->lock, flags);
+	/* mod_timer could be called after we entered this function but
+	 * before we got the lock.
+	 */
+	if (timer_pending(&proto->timer)) {
+		spin_unlock_irqrestore(&ppp->lock, flags);
+		return;
+	}
 	switch (proto->state) {
 	case STOPPING:
 	case REQ_SENT:

From 26b614fa441048a9f8e4a814c3b01756816ce7a7 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Wed, 16 Dec 2020 17:48:33 +0200
Subject: [PATCH 219/547] dmaengine: ti: k3-udma: Fix pktdma rchan TPL level
 setup

Instead of initializing the rchan_tpl the initial commit re-initialized
the tchan_tpl.

Fixes: d2abc982333c0 ("dmaengine: ti: k3-udma: Initial support for K3 PKTDMA")
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Link: https://lore.kernel.org/r/20201216154833.20821-1-peter.ujfalusi@ti.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/ti/k3-udma.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c
index 87157cbae1b8..298460438bb4 100644
--- a/drivers/dma/ti/k3-udma.c
+++ b/drivers/dma/ti/k3-udma.c
@@ -4698,9 +4698,9 @@ static int pktdma_setup_resources(struct udma_dev *ud)
 		ud->tchan_tpl.levels = 1;
 	}
 
-	ud->tchan_tpl.levels = ud->tchan_tpl.levels;
-	ud->tchan_tpl.start_idx[0] = ud->tchan_tpl.start_idx[0];
-	ud->tchan_tpl.start_idx[1] = ud->tchan_tpl.start_idx[1];
+	ud->rchan_tpl.levels = ud->tchan_tpl.levels;
+	ud->rchan_tpl.start_idx[0] = ud->tchan_tpl.start_idx[0];
+	ud->rchan_tpl.start_idx[1] = ud->tchan_tpl.start_idx[1];
 
 	ud->tchan_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->tchan_cnt),
 					   sizeof(unsigned long), GFP_KERNEL);

From ff58f7dd0c1352a01de3a40327895bd51e03de3a Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 16 Dec 2020 11:29:46 +0300
Subject: [PATCH 220/547] dmaengine: idxd: off by one in cleanup code

The clean up is off by one so this will start at "i" and it should start
with "i - 1" and then it doesn't unregister the zeroeth elements in the
array.

Fixes: c52ca478233c ("dmaengine: idxd: add configuration component of driver")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/X9nFeojulsNqUSnG@mwanda
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/idxd/sysfs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c
index 266423a2cabc..4dbb03c545e4 100644
--- a/drivers/dma/idxd/sysfs.c
+++ b/drivers/dma/idxd/sysfs.c
@@ -434,7 +434,7 @@ int idxd_register_driver(void)
 	return 0;
 
 drv_fail:
-	for (; i > 0; i--)
+	while (--i >= 0)
 		driver_unregister(&idxd_drvs[i]->drv);
 	return rc;
 }
@@ -1840,7 +1840,7 @@ int idxd_register_bus_type(void)
 	return 0;
 
 bus_err:
-	for (; i > 0; i--)
+	while (--i >= 0)
 		bus_unregister(idxd_bus_types[i]);
 	return rc;
 }

From 8fb28795fb64e1151c0e713686d8b026a5a2aece Mon Sep 17 00:00:00 2001
From: Xiaoming Ni <nixiaoming@huawei.com>
Date: Fri, 18 Dec 2020 18:41:37 +0800
Subject: [PATCH 221/547] dmaengine: qcom: gpi: Fixes a format mismatch

drivers/dma/qcom/gpi.c:1419:3: warning: format '%lu' expects argument of
 type 'long unsigned int', but argument 8 has type 'size_t {aka unsigned
 int}' [-Wformat=]
drivers/dma/qcom/gpi.c:1427:31: warning: format '%lu' expects argument of
 type 'long unsigned int', but argument 3 has type 'size_t {aka unsigned
 int}' [-Wformat=]
drivers/dma/qcom/gpi.c:1447:3: warning: format '%llx' expects argument of
 type 'long long unsigned int', but argument 4 has type 'dma_addr_t {aka
 unsigned int}' [-Wformat=]
drivers/dma/qcom/gpi.c:1447:3: warning: format '%llx' expects argument of
 type 'long long unsigned int', but argument 5 has type 'phys_addr_t {aka
 unsigned int}' [-Wformat=]

Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/20201218104137.59200-1-nixiaoming@huawei.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/qcom/gpi.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c
index d2334f535de2..556c070a514c 100644
--- a/drivers/dma/qcom/gpi.c
+++ b/drivers/dma/qcom/gpi.c
@@ -1416,7 +1416,7 @@ static int gpi_alloc_ring(struct gpi_ring *ring, u32 elements,
 	len = 1 << bit;
 	ring->alloc_size = (len + (len - 1));
 	dev_dbg(gpii->gpi_dev->dev,
-		"#el:%u el_size:%u len:%u actual_len:%llu alloc_size:%lu\n",
+		"#el:%u el_size:%u len:%u actual_len:%llu alloc_size:%zu\n",
 		  elements, el_size, (elements * el_size), len,
 		  ring->alloc_size);
 
@@ -1424,7 +1424,7 @@ static int gpi_alloc_ring(struct gpi_ring *ring, u32 elements,
 					       ring->alloc_size,
 					       &ring->dma_handle, GFP_KERNEL);
 	if (!ring->pre_aligned) {
-		dev_err(gpii->gpi_dev->dev, "could not alloc size:%lu mem for ring\n",
+		dev_err(gpii->gpi_dev->dev, "could not alloc size:%zu mem for ring\n",
 			ring->alloc_size);
 		return -ENOMEM;
 	}
@@ -1444,8 +1444,8 @@ static int gpi_alloc_ring(struct gpi_ring *ring, u32 elements,
 	smp_wmb();
 
 	dev_dbg(gpii->gpi_dev->dev,
-		"phy_pre:0x%0llx phy_alig:0x%0llx len:%u el_size:%u elements:%u\n",
-		ring->dma_handle, ring->phys_addr, ring->len,
+		"phy_pre:%pad phy_alig:%pa len:%u el_size:%u elements:%u\n",
+		&ring->dma_handle, &ring->phys_addr, ring->len,
 		ring->el_size, ring->elements);
 
 	return 0;

From 33cbd54dc515cc04b5a603603414222b4bb1448d Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sat, 19 Dec 2020 13:47:18 +0100
Subject: [PATCH 222/547] dmaengine: mediatek: mtk-hsdma: Fix a resource leak
 in the error handling path of the probe function

'mtk_hsdma_hw_deinit()' should be called in the error handling path of the
probe function to undo a previous 'mtk_hsdma_hw_init()' call, as already
done in the remove function.

Fixes: 548c4597e984 ("dmaengine: mediatek: Add MediaTek High-Speed DMA controller for MT7622 and MT7623 SoC")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/20201219124718.182664-1-christophe.jaillet@wanadoo.fr
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/mediatek/mtk-hsdma.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/dma/mediatek/mtk-hsdma.c b/drivers/dma/mediatek/mtk-hsdma.c
index f133ae8dece1..6ad8afbb95f2 100644
--- a/drivers/dma/mediatek/mtk-hsdma.c
+++ b/drivers/dma/mediatek/mtk-hsdma.c
@@ -1007,6 +1007,7 @@ static int mtk_hsdma_probe(struct platform_device *pdev)
 	return 0;
 
 err_free:
+	mtk_hsdma_hw_deinit(hsdma);
 	of_dma_controller_free(pdev->dev.of_node);
 err_unregister:
 	dma_async_device_unregister(dd);

From d645148cc82ca7fbacaa601414a552184e9c6dd3 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sat, 19 Dec 2020 14:28:00 +0100
Subject: [PATCH 223/547] dmaengine: milbeaut-xdmac: Fix a resource leak in the
 error handling path of the probe function

'disable_xdmac()' should be called in the error handling path of the
probe function to undo a previous 'enable_xdmac()' call, as already
done in the remove function.

Fixes: a6e9be055d47 ("dmaengine: milbeaut-xdmac: Add XDMAC driver for Milbeaut platforms")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/20201219132800.183254-1-christophe.jaillet@wanadoo.fr
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/milbeaut-xdmac.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/dma/milbeaut-xdmac.c b/drivers/dma/milbeaut-xdmac.c
index 584c931e807a..d29d01e730aa 100644
--- a/drivers/dma/milbeaut-xdmac.c
+++ b/drivers/dma/milbeaut-xdmac.c
@@ -350,7 +350,7 @@ static int milbeaut_xdmac_probe(struct platform_device *pdev)
 
 	ret = dma_async_device_register(ddev);
 	if (ret)
-		return ret;
+		goto disable_xdmac;
 
 	ret = of_dma_controller_register(dev->of_node,
 					 of_dma_simple_xlate, mdev);
@@ -363,6 +363,8 @@ static int milbeaut_xdmac_probe(struct platform_device *pdev)
 
 unregister_dmac:
 	dma_async_device_unregister(ddev);
+disable_xdmac:
+	disable_xdmac(mdev);
 	return ret;
 }
 

From 595a334148449bd1d27cf5d6fcb3b0d718cb1b9f Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 14 Dec 2020 14:56:52 +0300
Subject: [PATCH 224/547] dmaengine: dw-edma: Fix use after free in
 dw_edma_alloc_chunk()

If the dw_edma_alloc_burst() function fails then we free "chunk" but
it's still on the "desc->chunk->list" list so it will lead to a use
after free.  Also the "->chunks_alloc" count is incremented when it
shouldn't be.

In current kernels small allocations are guaranteed to succeed and
dw_edma_alloc_burst() can't fail so this will not actually affect
runtime.

Fixes: e63d79d1ffcd ("dmaengine: Add Synopsys eDMA IP core driver")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
Link: https://lore.kernel.org/r/X9dTBFrUPEvvW7qc@mwanda
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dw-edma/dw-edma-core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/dw-edma/dw-edma-core.c b/drivers/dma/dw-edma/dw-edma-core.c
index b971505b8715..08d71dafa001 100644
--- a/drivers/dma/dw-edma/dw-edma-core.c
+++ b/drivers/dma/dw-edma/dw-edma-core.c
@@ -86,12 +86,12 @@ static struct dw_edma_chunk *dw_edma_alloc_chunk(struct dw_edma_desc *desc)
 
 	if (desc->chunk) {
 		/* Create and add new element into the linked list */
-		desc->chunks_alloc++;
-		list_add_tail(&chunk->list, &desc->chunk->list);
 		if (!dw_edma_alloc_burst(chunk)) {
 			kfree(chunk);
 			return NULL;
 		}
+		desc->chunks_alloc++;
+		list_add_tail(&chunk->list, &desc->chunk->list);
 	} else {
 		/* List head */
 		chunk->burst = NULL;

From ba42f61b36121730d7f51cc261dfd744ee19f50b Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Wed, 16 Dec 2020 21:06:49 +0800
Subject: [PATCH 225/547] qcom: bam_dma: Delete useless kfree code

The parameter of kfree function is NULL, so kfree code is useless, delete it.
Therefore, goto expression is no longer needed, so simplify it.

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/20201216130649.13979-1-zhengyongjun3@huawei.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/qcom/bam_dma.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c
index d5773d474d8f..88579857ca1d 100644
--- a/drivers/dma/qcom/bam_dma.c
+++ b/drivers/dma/qcom/bam_dma.c
@@ -630,7 +630,7 @@ static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan,
 			     GFP_NOWAIT);
 
 	if (!async_desc)
-		goto err_out;
+		return NULL;
 
 	if (flags & DMA_PREP_FENCE)
 		async_desc->flags |= DESC_FLAG_NWD;
@@ -670,10 +670,6 @@ static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan,
 	}
 
 	return vchan_tx_prep(&bchan->vc, &async_desc->vd, flags);
-
-err_out:
-	kfree(async_desc);
-	return NULL;
 }
 
 /**

From 28d8e07fc9478f8f14dd5dd4b2c382982fa12461 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Tue, 15 Dec 2020 15:13:47 +0200
Subject: [PATCH 226/547] MAINTAINERS: Add entry for Texas Instruments DMA
 drivers

My employment with TI is coming to an end, it is my intention to look after
the DMA drivers I have worked with over the years.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@gmail.com>
Link: https://lore.kernel.org/r/20201215131348.11282-2-peter.ujfalusi@ti.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 MAINTAINERS | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 546aa66428c9..217866b668b9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17553,6 +17553,19 @@ S:	Supported
 F:	Documentation/devicetree/bindings/iio/dac/ti,dac7612.txt
 F:	drivers/iio/dac/ti-dac7612.c
 
+TEXAS INSTRUMENTS DMA DRIVERS
+M:	Peter Ujfalusi <peter.ujfalusi@gmail.com>
+L:	dmaengine@vger.kernel.org
+S:	Maintained
+F:	Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt
+F:	Documentation/devicetree/bindings/dma/ti-edma.txt
+F:	Documentation/devicetree/bindings/dma/ti/
+F:	drivers/dma/ti/
+X:	drivers/dma/ti/cppi41.c
+F:	include/linux/dma/k3-udma-glue.h
+F:	include/linux/dma/ti-cppi5.h
+F:	include/linux/dma/k3-psil.h
+
 TEXAS INSTRUMENTS' SYSTEM CONTROL INTERFACE (TISCI) PROTOCOL DRIVER
 M:	Nishanth Menon <nm@ti.com>
 M:	Tero Kristo <t-kristo@ti.com>

From cc465fa269bc0dc63a1ab7384110e4079fb40421 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Tue, 15 Dec 2020 15:13:48 +0200
Subject: [PATCH 227/547] dt-bindings: dma: ti: Update maintainer and author
 information

My employment with TI is coming to an end, add the copyright and author comments
as they due and change the maintainer mail address.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@gmail.com>
Link: https://lore.kernel.org/r/20201215131348.11282-3-peter.ujfalusi@ti.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml  | 4 +++-
 Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml | 4 +++-
 Documentation/devicetree/bindings/dma/ti/k3-udma.yaml   | 4 +++-
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml
index b15f68c499cb..df29d59d13a8 100644
--- a/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml
+++ b/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml
@@ -1,4 +1,6 @@
 # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) 2020 Texas Instruments Incorporated
+# Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
 %YAML 1.2
 ---
 $id: http://devicetree.org/schemas/dma/ti/k3-bcdma.yaml#
@@ -7,7 +9,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Texas Instruments K3 DMSS BCDMA Device Tree Bindings
 
 maintainers:
-  - Peter Ujfalusi <peter.ujfalusi@ti.com>
+  - Peter Ujfalusi <peter.ujfalusi@gmail.com>
 
 description: |
   The Block Copy DMA (BCDMA) is intended to perform similar functions as the TR
diff --git a/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml
index b13ab60cd740..ea19d12a9337 100644
--- a/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml
+++ b/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml
@@ -1,4 +1,6 @@
 # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) 2020 Texas Instruments Incorporated
+# Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
 %YAML 1.2
 ---
 $id: http://devicetree.org/schemas/dma/ti/k3-pktdma.yaml#
@@ -7,7 +9,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Texas Instruments K3 DMSS PKTDMA Device Tree Bindings
 
 maintainers:
-  - Peter Ujfalusi <peter.ujfalusi@ti.com>
+  - Peter Ujfalusi <peter.ujfalusi@gmail.com>
 
 description: |
   The Packet DMA (PKTDMA) is intended to perform similar functions as the packet
diff --git a/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml
index 9a87fd9041eb..6a09bbf83d46 100644
--- a/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml
+++ b/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml
@@ -1,4 +1,6 @@
 # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) 2019 Texas Instruments Incorporated
+# Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
 %YAML 1.2
 ---
 $id: http://devicetree.org/schemas/dma/ti/k3-udma.yaml#
@@ -7,7 +9,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Texas Instruments K3 NAVSS Unified DMA Device Tree Bindings
 
 maintainers:
-  - Peter Ujfalusi <peter.ujfalusi@ti.com>
+  - Peter Ujfalusi <peter.ujfalusi@gmail.com>
 
 description: |
   The UDMA-P is intended to perform similar (but significantly upgraded)

From 3deba4d8f07be264b21e81d604c6b569a41a33b5 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 29 Dec 2020 09:34:28 +0100
Subject: [PATCH 228/547] ALSA: usb-audio: Add quirk for BOSS AD-10

BOSS AD-10 requires the very same quirk like other BOSS devices to
enable the special implicit feedback mode.

Reported-and-tested-by: Martin Passing <martin@passing.name>
Link: https://lore.kernel.org/r/20201229083428.20467-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/implicit.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/usb/implicit.c b/sound/usb/implicit.c
index eb3a4c433c3e..70b9777b2e63 100644
--- a/sound/usb/implicit.c
+++ b/sound/usb/implicit.c
@@ -78,6 +78,7 @@ static const struct snd_usb_implicit_fb_match playback_implicit_fb_quirks[] = {
 	IMPLICIT_FB_SKIP_DEV(0x0582, 0x01d6),	/* BOSS GT-1 */
 	IMPLICIT_FB_SKIP_DEV(0x0582, 0x01d8),	/* BOSS Katana */
 	IMPLICIT_FB_SKIP_DEV(0x0582, 0x01e5),	/* BOSS GT-001 */
+	IMPLICIT_FB_SKIP_DEV(0x0582, 0x0203),   /* BOSS AD-10 */
 
 	{} /* terminator */
 };
@@ -89,6 +90,7 @@ static const struct snd_usb_implicit_fb_match capture_implicit_fb_quirks[] = {
 	IMPLICIT_FB_FIXED_DEV(0x0582, 0x01d6, 0x0d, 0x01), /* BOSS GT-1 */
 	IMPLICIT_FB_FIXED_DEV(0x0582, 0x01d8, 0x0d, 0x01), /* BOSS Katana */
 	IMPLICIT_FB_FIXED_DEV(0x0582, 0x01e5, 0x0d, 0x01), /* BOSS GT-001 */
+	IMPLICIT_FB_FIXED_DEV(0x0582, 0x0203, 0x0d, 0x01), /* BOSS AD-10 */
 
 	{} /* terminator */
 };

From cffa4b2122f5f3e53cf3d529bbc74651f95856d5 Mon Sep 17 00:00:00 2001
From: Xiaolei Wang <xiaolei.wang@windriver.com>
Date: Tue, 29 Dec 2020 18:50:46 +0800
Subject: [PATCH 229/547] regmap: debugfs: Fix a memory leak when calling
 regmap_attach_dev

After initializing the regmap through
syscon_regmap_lookup_by_compatible, then regmap_attach_dev to the
device, because the debugfs_name has been allocated, there is no
need to redistribute it again

unreferenced object 0xd8399b80 (size 64):
  comm "swapper/0", pid 1, jiffies 4294937641 (age 278.590s)
  hex dump (first 32 bytes):
	64 75 6d 6d 79 2d 69 6f 6d 75 78 63 2d 67 70 72
dummy-iomuxc-gpr
	40 32 30 65 34 30 30 30 00 7f 52 5b d8 7e 42 69
@20e4000..R[.~Bi
  backtrace:
    [<ca384d6f>] kasprintf+0x2c/0x54
    [<6ad3bbc2>] regmap_debugfs_init+0xdc/0x2fc
    [<bc4181da>] __regmap_init+0xc38/0xd88
    [<1f7e0609>] of_syscon_register+0x168/0x294
    [<735e8766>] device_node_get_regmap+0x6c/0x98
    [<d96c8982>] imx6ul_init_machine+0x20/0x88
    [<0456565b>] customize_machine+0x1c/0x30
    [<d07393d8>] do_one_initcall+0x80/0x3ac
    [<7e584867>] kernel_init_freeable+0x170/0x1f0
    [<80074741>] kernel_init+0x8/0x120
    [<285d6f28>] ret_from_fork+0x14/0x20
    [<00000000>] 0x0

Fixes: 9b947a13e7f6 ("regmap: use debugfs even when no device")
Signed-off-by: Xiaolei Wang <xiaolei.wang@windriver.com>
Link: https://lore.kernel.org/r/20201229105046.41984-1-xiaolei.wang@windriver.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap-debugfs.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c
index 8dfac7f3ed7a..bf03cd343be2 100644
--- a/drivers/base/regmap/regmap-debugfs.c
+++ b/drivers/base/regmap/regmap-debugfs.c
@@ -582,18 +582,25 @@ void regmap_debugfs_init(struct regmap *map)
 		devname = dev_name(map->dev);
 
 	if (name) {
-		map->debugfs_name = kasprintf(GFP_KERNEL, "%s-%s",
+		if (!map->debugfs_name) {
+			map->debugfs_name = kasprintf(GFP_KERNEL, "%s-%s",
 					      devname, name);
+			if (!map->debugfs_name)
+				return;
+		}
 		name = map->debugfs_name;
 	} else {
 		name = devname;
 	}
 
 	if (!strcmp(name, "dummy")) {
-		kfree(map->debugfs_name);
+		if (!map->debugfs_name)
+			kfree(map->debugfs_name);
 
 		map->debugfs_name = kasprintf(GFP_KERNEL, "dummy%d",
 						dummy_index);
+		if (!map->debugfs_name)
+				return;
 		name = map->debugfs_name;
 		dummy_index++;
 	}

From ede090f5a438e97d0586f64067bbb956e30a2a31 Mon Sep 17 00:00:00 2001
From: Xu Yilun <yilun.xu@intel.com>
Date: Tue, 29 Dec 2020 13:27:41 +0800
Subject: [PATCH 230/547] spi: altera: fix return value for altera_spi_txrx()

This patch fixes the return value for altera_spi_txrx. It should return
1 for interrupt transfer mode, and return 0 for polling transfer mode.

The altera_spi_txrx() implements the spi_controller.transfer_one
callback. According to the spi-summary.rst, the transfer_one should
return 0 when transfer is finished, return 1 when transfer is still in
progress.

Signed-off-by: Xu Yilun <yilun.xu@intel.com>
Link: https://lore.kernel.org/r/1609219662-27057-2-git-send-email-yilun.xu@intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-altera.c | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/drivers/spi/spi-altera.c b/drivers/spi/spi-altera.c
index 809bfff3690a..cbc4c28c1541 100644
--- a/drivers/spi/spi-altera.c
+++ b/drivers/spi/spi-altera.c
@@ -189,24 +189,26 @@ static int altera_spi_txrx(struct spi_master *master,
 
 		/* send the first byte */
 		altera_spi_tx_word(hw);
-	} else {
-		while (hw->count < hw->len) {
-			altera_spi_tx_word(hw);
 
-			for (;;) {
-				altr_spi_readl(hw, ALTERA_SPI_STATUS, &val);
-				if (val & ALTERA_SPI_STATUS_RRDY_MSK)
-					break;
-
-				cpu_relax();
-			}
-
-			altera_spi_rx_word(hw);
-		}
-		spi_finalize_current_transfer(master);
+		return 1;
 	}
 
-	return t->len;
+	while (hw->count < hw->len) {
+		altera_spi_tx_word(hw);
+
+		for (;;) {
+			altr_spi_readl(hw, ALTERA_SPI_STATUS, &val);
+			if (val & ALTERA_SPI_STATUS_RRDY_MSK)
+				break;
+
+			cpu_relax();
+		}
+
+		altera_spi_rx_word(hw);
+	}
+	spi_finalize_current_transfer(master);
+
+	return 0;
 }
 
 static irqreturn_t altera_spi_irq(int irq, void *dev)

From da4282c17d695b9311608aa63b3c633e649aadea Mon Sep 17 00:00:00 2001
From: Jiang Wang <jiang.wang@bytedance.com>
Date: Thu, 24 Dec 2020 01:12:42 +0000
Subject: [PATCH 231/547] selftests/bpf: Fix a compile error for
 BPF_F_BPRM_SECUREEXEC

When CONFIG_BPF_LSM is not configured, running bpf selftesting will show
BPF_F_BPRM_SECUREEXEC undefined error for bprm_opts.c.

The problem is that bprm_opts.c includes vmliunx.h. The vmlinux.h is
generated by "bpftool btf dump file ./vmlinux format c". On the other
hand, BPF_F_BPRM_SECUREEXEC is defined in include/uapi/linux/bpf.h
and used only in bpf_lsm.c. When CONFIG_BPF_LSM is not set, bpf_lsm
will not be compiled, so vmlinux.h will not include definition of
BPF_F_BPRM_SECUREEXEC.

Ideally, we want to compile bpf selftest regardless of the configuration
setting, so change the include file from vmlinux.h to bpf.h.

Signed-off-by: Jiang Wang <jiang.wang@bytedance.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20201224011242.585967-1-jiang.wang@bytedance.com
---
 tools/testing/selftests/bpf/progs/bprm_opts.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/progs/bprm_opts.c b/tools/testing/selftests/bpf/progs/bprm_opts.c
index 5bfef2887e70..418d9c6d4952 100644
--- a/tools/testing/selftests/bpf/progs/bprm_opts.c
+++ b/tools/testing/selftests/bpf/progs/bprm_opts.c
@@ -4,7 +4,7 @@
  * Copyright 2020 Google LLC.
  */
 
-#include "vmlinux.h"
+#include <linux/bpf.h>
 #include <errno.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>

From a694ffed876575d1df1a47067444047182de4354 Mon Sep 17 00:00:00 2001
From: Iskren Chernev <iskren.chernev@gmail.com>
Date: Mon, 28 Dec 2020 23:31:30 +0200
Subject: [PATCH 232/547] drm/msm: Fix null dereference in _msm_gem_new

The crash was caused by locking an uninitialized lock during init of
drm_gem_object. The lock changed in the breaking commit, but the init
was not moved accordingly.

 8<--- cut here ---
 Unable to handle kernel NULL pointer dereference at virtual address 00000000
 pgd = (ptrval)
 [00000000] *pgd=00000000
 Internal error: Oops: 5 [#1] PREEMPT SMP ARM
 Modules linked in: msm(+) qcom_spmi_vadc qcom_vadc_common dm_mod usb_f_rndis rmi_i2c rmi_core qnoc_msm8974 icc_smd_rpm pm8941_pwrkey
 CPU: 2 PID: 1020 Comm: udevd Not tainted 5.10.0-postmarketos-qcom-msm8974 #8
 Hardware name: Generic DT based system
 PC is at ww_mutex_lock+0x20/0xb0
 LR is at _msm_gem_new+0x13c/0x298 [msm]
 pc : [<c0be31e8>]    lr : [<bf0b3404>]    psr: 20000013
 sp : c36e7ad0  ip : c3b3d800  fp : 00000000
 r10: 00000001  r9 : c3b22800  r8 : 00000000
 r7 : c3b23000  r6 : c3b3d600  r5 : c3b3d600  r4 : 00000000
 r3 : c34b4780  r2 : c3b3d6f4  r1 : 00000000  r0 : 00000000
 Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
 Control: 10c5787d  Table: 03ae406a  DAC: 00000051
 Process udevd (pid: 1020, stack limit = 0x(ptrval))
 Stack: (0xc36e7ad0 to 0xc36e8000)
 [...]
 [<c0be31e8>] (ww_mutex_lock) from [<bf0b3404>] (_msm_gem_new+0x13c/0x298 [msm])
 [<bf0b3404>] (_msm_gem_new [msm]) from [<bf0b3aa8>] (_msm_gem_kernel_new+0x20/0x190 [msm])
 [<bf0b3aa8>] (_msm_gem_kernel_new [msm]) from [<bf0b4a30>] (msm_gem_kernel_new+0x24/0x2c [msm])
 [<bf0b4a30>] (msm_gem_kernel_new [msm]) from [<bf0b8e2c>] (msm_gpu_init+0x308/0x548 [msm])
 [<bf0b8e2c>] (msm_gpu_init [msm]) from [<bf060a90>] (adreno_gpu_init+0x13c/0x240 [msm])
 [<bf060a90>] (adreno_gpu_init [msm]) from [<bf062b1c>] (a3xx_gpu_init+0x78/0x1dc [msm])
 [<bf062b1c>] (a3xx_gpu_init [msm]) from [<bf05f394>] (adreno_bind+0x1cc/0x274 [msm])
 [<bf05f394>] (adreno_bind [msm]) from [<c087a254>] (component_bind_all+0x11c/0x278)
 [<c087a254>] (component_bind_all) from [<bf0b11d4>] (msm_drm_bind+0x18c/0x5b4 [msm])
 [<bf0b11d4>] (msm_drm_bind [msm]) from [<c0879ea0>] (try_to_bring_up_master+0x200/0x2c8)
 [<c0879ea0>] (try_to_bring_up_master) from [<c087a648>] (component_master_add_with_match+0xc8/0xfc)
 [<c087a648>] (component_master_add_with_match) from [<bf0b0c3c>] (msm_pdev_probe+0x288/0x2c4 [msm])
 [<bf0b0c3c>] (msm_pdev_probe [msm]) from [<c08844cc>] (platform_drv_probe+0x48/0x98)
 [<c08844cc>] (platform_drv_probe) from [<c0881cc4>] (really_probe+0x108/0x528)
 [<c0881cc4>] (really_probe) from [<c0882480>] (driver_probe_device+0x78/0x1d4)
 [<c0882480>] (driver_probe_device) from [<c08828dc>] (device_driver_attach+0xa8/0xb0)
 [<c08828dc>] (device_driver_attach) from [<c0882998>] (__driver_attach+0xb4/0x154)
 [<c0882998>] (__driver_attach) from [<c087fa1c>] (bus_for_each_dev+0x78/0xb8)
 [<c087fa1c>] (bus_for_each_dev) from [<c0880e98>] (bus_add_driver+0x10c/0x208)
 [<c0880e98>] (bus_add_driver) from [<c0883504>] (driver_register+0x88/0x118)
 [<c0883504>] (driver_register) from [<c0302098>] (do_one_initcall+0x50/0x2b0)
 [<c0302098>] (do_one_initcall) from [<c03bace4>] (do_init_module+0x60/0x288)
 [<c03bace4>] (do_init_module) from [<c03bdf1c>] (sys_finit_module+0xd4/0x120)
 [<c03bdf1c>] (sys_finit_module) from [<c0300060>] (ret_fast_syscall+0x0/0x54)
 Exception stack(0xc36e7fa8 to 0xc36e7ff0)
 7fa0:                   00020000 00000000 00000007 b6edd5b0 00000000 b6f2ff20
 7fc0: 00020000 00000000 0000017b 0000017b b6eef980 bedc3a54 00473c99 00000000
 7fe0: b6edd5b0 bedc3918 b6ed8a5f b6f6a8b0
 Code: e3c3303f e593300c e1a04000 f590f000 (e1940f9f)
 ---[ end trace 277e2a3da40bbb76 ]---

Fixes: 6c0e3ea250476 ("drm/msm/gem: Switch over to obj->resv for locking")
Signed-off-by: Iskren Chernev <iskren.chernev@gmail.com>
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/msm_gem.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index a21be5b910ff..d9a5a1895f3d 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -1101,6 +1101,8 @@ static struct drm_gem_object *_msm_gem_new(struct drm_device *dev,
 		struct msm_gem_vma *vma;
 		struct page **pages;
 
+		drm_gem_private_object_init(dev, obj, size);
+
 		msm_gem_lock(obj);
 
 		vma = add_vma(obj, NULL);
@@ -1112,7 +1114,6 @@ static struct drm_gem_object *_msm_gem_new(struct drm_device *dev,
 
 		to_msm_bo(obj)->vram_node = &vma->node;
 
-		drm_gem_private_object_init(dev, obj, size);
 
 		pages = get_pages(obj);
 		if (IS_ERR(pages)) {

From 07fcad0d726d5da7c43f1c8e8fdb66c93a140ca5 Mon Sep 17 00:00:00 2001
From: Iskren Chernev <iskren.chernev@gmail.com>
Date: Mon, 28 Dec 2020 23:31:31 +0200
Subject: [PATCH 233/547] drm/msm: Ensure get_pages is called when locked

get_pages is only called in a locked context. Add a WARN_ON to make sure
it stays that way.

Signed-off-by: Iskren Chernev <iskren.chernev@gmail.com>
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/msm_gem.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index d9a5a1895f3d..114c0711a302 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -96,6 +96,8 @@ static struct page **get_pages(struct drm_gem_object *obj)
 {
 	struct msm_gem_object *msm_obj = to_msm_bo(obj);
 
+	WARN_ON(!msm_gem_is_locked(obj));
+
 	if (!msm_obj->pages) {
 		struct drm_device *dev = obj->dev;
 		struct page **p;
@@ -1114,8 +1116,9 @@ static struct drm_gem_object *_msm_gem_new(struct drm_device *dev,
 
 		to_msm_bo(obj)->vram_node = &vma->node;
 
-
+		msm_gem_lock(obj);
 		pages = get_pages(obj);
+		msm_gem_unlock(obj);
 		if (IS_ERR(pages)) {
 			ret = PTR_ERR(pages);
 			goto fail;

From 77788775c7132a8d93c6930ab1bd84fc743c7cb7 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 29 Dec 2020 10:50:46 -0700
Subject: [PATCH 234/547] io_uring: don't assume mm is constant across submits

If we COW the identity, we assume that ->mm never changes. But this
isn't true of multiple processes end up sharing the ring. Hence treat
id->mm like like any other process compontent when it comes to the
identity mapping. This is pretty trivial, just moving the existing grab
into io_grab_identity(), and including a check for the match.

Cc: stable@vger.kernel.org # 5.10
Fixes: 1e6fa5216a0e ("io_uring: COW io_identity on mismatch")
Reported-by: Christian Brauner <christian.brauner@ubuntu.com>:
Tested-by: Christian Brauner <christian.brauner@ubuntu.com>:
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 7e35283fc0b1..eb4620ff638e 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1501,6 +1501,13 @@ static bool io_grab_identity(struct io_kiocb *req)
 		spin_unlock_irq(&ctx->inflight_lock);
 		req->work.flags |= IO_WQ_WORK_FILES;
 	}
+	if (!(req->work.flags & IO_WQ_WORK_MM) &&
+	    (def->work_flags & IO_WQ_WORK_MM)) {
+		if (id->mm != current->mm)
+			return false;
+		mmgrab(id->mm);
+		req->work.flags |= IO_WQ_WORK_MM;
+	}
 
 	return true;
 }
@@ -1525,13 +1532,6 @@ static void io_prep_async_work(struct io_kiocb *req)
 			req->work.flags |= IO_WQ_WORK_UNBOUND;
 	}
 
-	/* ->mm can never change on us */
-	if (!(req->work.flags & IO_WQ_WORK_MM) &&
-	    (def->work_flags & IO_WQ_WORK_MM)) {
-		mmgrab(id->mm);
-		req->work.flags |= IO_WQ_WORK_MM;
-	}
-
 	/* if we fail grabbing identity, we must COW, regrab, and retry */
 	if (io_grab_identity(req))
 		return;

From b000700d6db50c933ce8b661154e26cf4ad06dba Mon Sep 17 00:00:00 2001
From: Dinghao Liu <dinghao.liu@zju.edu.cn>
Date: Sat, 26 Dec 2020 15:27:14 +0800
Subject: [PATCH 235/547] habanalabs: Fix memleak in hl_device_reset

When kzalloc() fails, we should execute hl_mmu_fini()
to release the MMU module. It's the same when
hl_ctx_init() fails.

Signed-off-by: Dinghao Liu <dinghao.liu@zju.edu.cn>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 0749c92cbcf6..1456eabf9601 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -1092,6 +1092,7 @@ kill_processes:
 						GFP_KERNEL);
 		if (!hdev->kernel_ctx) {
 			rc = -ENOMEM;
+			hl_mmu_fini(hdev);
 			goto out_err;
 		}
 
@@ -1103,6 +1104,7 @@ kill_processes:
 				"failed to init kernel ctx in hard reset\n");
 			kfree(hdev->kernel_ctx);
 			hdev->kernel_ctx = NULL;
+			hl_mmu_fini(hdev);
 			goto out_err;
 		}
 	}

From 7cf22a1c88c05ea3807f95b1edfebb729016ae52 Mon Sep 17 00:00:00 2001
From: Harish <harish@linux.ibm.com>
Date: Tue, 29 Dec 2020 15:14:22 -0800
Subject: [PATCH 236/547] selftests/vm: fix building protection keys test

Commit d8cbe8bfa7d ("tools/testing/selftests/vm: fix build error") tried
to include a ARCH check for powerpc, however ARCH is not defined in the
Makefile before including lib.mk.  This makes test building to skip on
both x86 and powerpc.

Fix the arch check by replacing it using machine type as it is already
defined and used in the test.

Link: https://lkml.kernel.org/r/20201215100402.257376-1-harish@linux.ibm.com
Fixes: d8cbe8bfa7d ("tools/testing/selftests/vm: fix build error")
Signed-off-by: Harish <harish@linux.ibm.com>
Reviewed-by: Sandipan Das <sandipan@linux.ibm.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Sandipan Das <sandipan@linux.ibm.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/Makefile | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 9a25307f6115..d42115e4284d 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -4,7 +4,7 @@
 include local_config.mk
 
 uname_M := $(shell uname -m 2>/dev/null || echo not)
-MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/')
+MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 's/ppc64.*/ppc64/')
 
 # Without this, failed build products remain, with up-to-date timestamps,
 # thus tricking Make (and you!) into believing that All Is Well, in subsequent
@@ -43,7 +43,7 @@ TEST_GEN_FILES += thuge-gen
 TEST_GEN_FILES += transhuge-stress
 TEST_GEN_FILES += userfaultfd
 
-ifeq ($(ARCH),x86_64)
+ifeq ($(MACHINE),x86_64)
 CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32)
 CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_64bit_program.c)
 CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_program.c -no-pie)
@@ -65,13 +65,13 @@ TEST_GEN_FILES += $(BINARIES_64)
 endif
 else
 
-ifneq (,$(findstring $(ARCH),powerpc))
+ifneq (,$(findstring $(MACHINE),ppc64))
 TEST_GEN_FILES += protection_keys
 endif
 
 endif
 
-ifneq (,$(filter $(MACHINE),arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64))
+ifneq (,$(filter $(MACHINE),arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sh64 sparc64 x86_64))
 TEST_GEN_FILES += va_128TBswitch
 TEST_GEN_FILES += virtual_address_range
 TEST_GEN_FILES += write_to_hugetlbfs
@@ -84,7 +84,7 @@ TEST_FILES := test_vmalloc.sh
 KSFT_KHDR_INSTALL := 1
 include ../lib.mk
 
-ifeq ($(ARCH),x86_64)
+ifeq ($(MACHINE),x86_64)
 BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
 BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
 

From e7dd91c456a8cdbcd7066997d15e36d14276a949 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Tue, 29 Dec 2020 15:14:25 -0800
Subject: [PATCH 237/547] mm/hugetlb: fix deadlock in hugetlb_cow error path

syzbot reported the deadlock here [1].  The issue is in hugetlb cow
error handling when there are not enough huge pages for the faulting
task which took the original reservation.  It is possible that other
(child) tasks could have consumed pages associated with the reservation.
In this case, we want the task which took the original reservation to
succeed.  So, we unmap any associated pages in children so that they can
be used by the faulting task that owns the reservation.

The unmapping code needs to hold i_mmap_rwsem in write mode.  However,
due to commit c0d0381ade79 ("hugetlbfs: use i_mmap_rwsem for more pmd
sharing synchronization") we are already holding i_mmap_rwsem in read
mode when hugetlb_cow is called.

Technically, i_mmap_rwsem does not need to be held in read mode for COW
mappings as they can not share pmd's.  Modifying the fault code to not
take i_mmap_rwsem in read mode for COW (and other non-sharable) mappings
is too involved for a stable fix.

Instead, we simply drop the hugetlb_fault_mutex and i_mmap_rwsem before
unmapping.  This is OK as it is technically not needed.  They are
reacquired after unmapping as expected by calling code.  Since this is
done in an uncommon error path, the overhead of dropping and reacquiring
mutexes is acceptable.

While making changes, remove redundant BUG_ON after unmap_ref_private.

[1] https://lkml.kernel.org/r/000000000000b73ccc05b5cf8558@google.com

Link: https://lkml.kernel.org/r/4c5781b8-3b00-761e-c0c7-c5edebb6ec1a@oracle.com
Fixes: c0d0381ade79 ("hugetlbfs: use i_mmap_rwsem for more pmd sharing synchronization")
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reported-by: syzbot+5eee4145df3c15e96625@syzkaller.appspotmail.com
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index cbf32d2824fd..a2602969873d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4105,10 +4105,30 @@ retry_avoidcopy:
 		 * may get SIGKILLed if it later faults.
 		 */
 		if (outside_reserve) {
+			struct address_space *mapping = vma->vm_file->f_mapping;
+			pgoff_t idx;
+			u32 hash;
+
 			put_page(old_page);
 			BUG_ON(huge_pte_none(pte));
+			/*
+			 * Drop hugetlb_fault_mutex and i_mmap_rwsem before
+			 * unmapping.  unmapping needs to hold i_mmap_rwsem
+			 * in write mode.  Dropping i_mmap_rwsem in read mode
+			 * here is OK as COW mappings do not interact with
+			 * PMD sharing.
+			 *
+			 * Reacquire both after unmap operation.
+			 */
+			idx = vma_hugecache_offset(h, vma, haddr);
+			hash = hugetlb_fault_mutex_hash(mapping, idx);
+			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+			i_mmap_unlock_read(mapping);
+
 			unmap_ref_private(mm, vma, old_page, haddr);
-			BUG_ON(huge_pte_none(pte));
+
+			i_mmap_lock_read(mapping);
+			mutex_lock(&hugetlb_fault_mutex_table[hash]);
 			spin_lock(ptl);
 			ptep = huge_pte_offset(mm, haddr, huge_page_size(h));
 			if (likely(ptep &&

From 3a176b94609a18f5f8bac7ddbf8923bd737262db Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 29 Dec 2020 15:14:28 -0800
Subject: [PATCH 238/547] Revert "kbuild: avoid static_assert for genksyms"

This reverts commit 14dc3983b5dff513a90bd5a8cc90acaf7867c3d0.

Macro Elver had sent a fix proper fix earlier, and also pointed out
corner cases:
 "I guess what you propose is simpler, but might still have corner cases
  where we still get warnings. In particular, if some file (for whatever
  reason) does not include build_bug.h and uses a raw _Static_assert(),
  then we still get warnings. E.g. I see 1 user of raw _Static_assert()
  (drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h )."

I believe the raw use of _Static_assert() should be allowed, so this
should be fixed in genksyms.

Even after commit 14dc3983b5df ("kbuild: avoid static_assert for
genksyms"), I confirmed the following test code emits the warning.

  ---------------->8----------------
  #include <linux/export.h>

  _Static_assert((1 ?: 0), "");

  void foo(void) { }
  EXPORT_SYMBOL(foo);
  ---------------->8----------------

  WARNING: modpost: EXPORT symbol "foo" [vmlinux] version generation failed, symbol will not be versioned.

Now that commit 869b91992bce ("genksyms: Ignore module scoped
_Static_assert()") fixed this issue properly, the workaround should
be reverted.

Link: https://lkml.org/lkml/2020/12/10/845
Link: https://lkml.kernel.org/r/20201219183911.181442-1-masahiroy@kernel.org
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Cc: Marco Elver <elver@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/build_bug.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h
index 7bb66e15b481..e3a0be2c90ad 100644
--- a/include/linux/build_bug.h
+++ b/include/linux/build_bug.h
@@ -77,9 +77,4 @@
 #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr)
 #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
 
-#ifdef __GENKSYMS__
-/* genksyms gets confused by _Static_assert */
-#define _Static_assert(expr, ...)
-#endif
-
 #endif	/* _LINUX_BUILD_BUG_H */

From 5dbdb2d87c294401a22e6a6002f08ebc9fbea38b Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 29 Dec 2020 15:14:31 -0800
Subject: [PATCH 239/547] checkpatch: prefer strscpy to strlcpy

Prefer strscpy over the deprecated strlcpy function.

Link: https://lkml.kernel.org/r/19fe91084890e2c16fe56f960de6c570a93fa99b.camel@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Requested-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 00085308ed9d..92e888ed939f 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -6646,6 +6646,12 @@ sub process {
 #			}
 #		}
 
+# strlcpy uses that should likely be strscpy
+		if ($line =~ /\bstrlcpy\s*\(/) {
+			WARN("STRLCPY",
+			     "Prefer strscpy over strlcpy - see: https://lore.kernel.org/r/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL=V6A6G1oUZcprmknw\@mail.gmail.com/\n" . $herecurr);
+		}
+
 # typecasts on min/max could be min_t/max_t
 		if ($perl_version_ok &&
 		    defined $stat &&

From 6d87d0ece58bc0022ca5247721a8eb06ef66b673 Mon Sep 17 00:00:00 2001
From: Souptick Joarder <jrdr.linux@gmail.com>
Date: Tue, 29 Dec 2020 15:14:34 -0800
Subject: [PATCH 240/547] mm: add prototype for __add_to_page_cache_locked()

Otherwise it causes a gcc warning:

  mm/filemap.c:830:14: warning: no previous prototype for `__add_to_page_cache_locked' [-Wmissing-prototypes]

A previous attempt to make this function static led to compilation
errors when CONFIG_DEBUG_INFO_BTF is enabled because
__add_to_page_cache_locked() is referred to by BPF code.

Adding a prototype will silence the warning.

Link: https://lkml.kernel.org/r/1608693702-4665-1-git-send-email-jrdr.linux@gmail.com
Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
Cc: Alex Shi <alex.shi@linux.alibaba.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5299b90a6c40..c1e908184442 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -216,6 +216,13 @@ int overcommit_kbytes_handler(struct ctl_table *, int, void *, size_t *,
 		loff_t *);
 int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *,
 		loff_t *);
+/*
+ * Any attempt to mark this function as static leads to build failure
+ * when CONFIG_DEBUG_INFO_BTF is enabled because __add_to_page_cache_locked()
+ * is referred to by BPF code. This must be visible for error injection.
+ */
+int __add_to_page_cache_locked(struct page *page, struct address_space *mapping,
+		pgoff_t index, gfp_t gfp, void **shadowp);
 
 #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
 

From dc2da7b45ffe954a0090f5d0310ed7b0b37d2bd2 Mon Sep 17 00:00:00 2001
From: Baoquan He <bhe@redhat.com>
Date: Tue, 29 Dec 2020 15:14:37 -0800
Subject: [PATCH 241/547] mm: memmap defer init doesn't work as expected

VMware observed a performance regression during memmap init on their
platform, and bisected to commit 73a6e474cb376 ("mm: memmap_init:
iterate over memblock regions rather that check each PFN") causing it.

Before the commit:

  [0.033176] Normal zone: 1445888 pages used for memmap
  [0.033176] Normal zone: 89391104 pages, LIFO batch:63
  [0.035851] ACPI: PM-Timer IO Port: 0x448

With commit

  [0.026874] Normal zone: 1445888 pages used for memmap
  [0.026875] Normal zone: 89391104 pages, LIFO batch:63
  [2.028450] ACPI: PM-Timer IO Port: 0x448

The root cause is the current memmap defer init doesn't work as expected.

Before, memmap_init_zone() was used to do memmap init of one whole zone,
to initialize all low zones of one numa node, but defer memmap init of
the last zone in that numa node.  However, since commit 73a6e474cb376,
function memmap_init() is adapted to iterater over memblock regions
inside one zone, then call memmap_init_zone() to do memmap init for each
region.

E.g, on VMware's system, the memory layout is as below, there are two
memory regions in node 2.  The current code will mistakenly initialize the
whole 1st region [mem 0xab00000000-0xfcffffffff], then do memmap defer to
iniatialize only one memmory section on the 2nd region [mem
0x10000000000-0x1033fffffff].  In fact, we only expect to see that there's
only one memory section's memmap initialized.  That's why more time is
costed at the time.

[    0.008842] ACPI: SRAT: Node 0 PXM 0 [mem 0x00000000-0x0009ffff]
[    0.008842] ACPI: SRAT: Node 0 PXM 0 [mem 0x00100000-0xbfffffff]
[    0.008843] ACPI: SRAT: Node 0 PXM 0 [mem 0x100000000-0x55ffffffff]
[    0.008844] ACPI: SRAT: Node 1 PXM 1 [mem 0x5600000000-0xaaffffffff]
[    0.008844] ACPI: SRAT: Node 2 PXM 2 [mem 0xab00000000-0xfcffffffff]
[    0.008845] ACPI: SRAT: Node 2 PXM 2 [mem 0x10000000000-0x1033fffffff]

Now, let's add a parameter 'zone_end_pfn' to memmap_init_zone() to pass
down the real zone end pfn so that defer_init() can use it to judge
whether defer need be taken in zone wide.

Link: https://lkml.kernel.org/r/20201223080811.16211-1-bhe@redhat.com
Link: https://lkml.kernel.org/r/20201223080811.16211-2-bhe@redhat.com
Fixes: commit 73a6e474cb376 ("mm: memmap_init: iterate over memblock regions rather that check each PFN")
Signed-off-by: Baoquan He <bhe@redhat.com>
Reported-by: Rahul Gopakumar <gopakumarr@vmware.com>
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/mm/init.c | 4 ++--
 include/linux/mm.h  | 5 +++--
 mm/memory_hotplug.c | 2 +-
 mm/page_alloc.c     | 8 +++++---
 4 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 9b5acf8fb092..e76386a3479e 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -536,7 +536,7 @@ virtual_memmap_init(u64 start, u64 end, void *arg)
 
 	if (map_start < map_end)
 		memmap_init_zone((unsigned long)(map_end - map_start),
-				 args->nid, args->zone, page_to_pfn(map_start),
+				 args->nid, args->zone, page_to_pfn(map_start), page_to_pfn(map_end),
 				 MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
 	return 0;
 }
@@ -546,7 +546,7 @@ memmap_init (unsigned long size, int nid, unsigned long zone,
 	     unsigned long start_pfn)
 {
 	if (!vmem_map) {
-		memmap_init_zone(size, nid, zone, start_pfn,
+		memmap_init_zone(size, nid, zone, start_pfn, start_pfn + size,
 				 MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
 	} else {
 		struct page *start;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c1e908184442..ecdf8a8cd6ae 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2439,8 +2439,9 @@ extern int __meminit early_pfn_to_nid(unsigned long pfn);
 #endif
 
 extern void set_dma_reserve(unsigned long new_dma_reserve);
-extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long,
-		enum meminit_context, struct vmem_altmap *, int migratetype);
+extern void memmap_init_zone(unsigned long, int, unsigned long,
+		unsigned long, unsigned long, enum meminit_context,
+		struct vmem_altmap *, int migratetype);
 extern void setup_per_zone_wmarks(void);
 extern int __meminit init_per_zone_wmark_min(void);
 extern void mem_init(void);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index af41fb990820..f9d57b9be8c7 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -713,7 +713,7 @@ void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
 	 * expects the zone spans the pfn range. All the pages in the range
 	 * are reserved so nobody should be touching them so we should be safe
 	 */
-	memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn,
+	memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, 0,
 			 MEMINIT_HOTPLUG, altmap, migratetype);
 
 	set_zone_contiguous(zone);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7a2c89b21115..bdbec4c98173 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -423,6 +423,8 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
 	if (end_pfn < pgdat_end_pfn(NODE_DATA(nid)))
 		return false;
 
+	if (NODE_DATA(nid)->first_deferred_pfn != ULONG_MAX)
+		return true;
 	/*
 	 * We start only with one section of pages, more pages are added as
 	 * needed until the rest of deferred pages are initialized.
@@ -6116,7 +6118,7 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn)
  * zone stats (e.g., nr_isolate_pageblock) are touched.
  */
 void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
-		unsigned long start_pfn,
+		unsigned long start_pfn, unsigned long zone_end_pfn,
 		enum meminit_context context,
 		struct vmem_altmap *altmap, int migratetype)
 {
@@ -6152,7 +6154,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
 		if (context == MEMINIT_EARLY) {
 			if (overlap_memmap_init(zone, &pfn))
 				continue;
-			if (defer_init(nid, pfn, end_pfn))
+			if (defer_init(nid, pfn, zone_end_pfn))
 				break;
 		}
 
@@ -6266,7 +6268,7 @@ void __meminit __weak memmap_init(unsigned long size, int nid,
 
 		if (end_pfn > start_pfn) {
 			size = end_pfn - start_pfn;
-			memmap_init_zone(size, nid, zone, start_pfn,
+			memmap_init_zone(size, nid, zone, start_pfn, range_end_pfn,
 					 MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
 		}
 	}

From e05986ee7a5814bec0e0075d813daca3d46e4a9e Mon Sep 17 00:00:00 2001
From: Kalesh Singh <kaleshsingh@google.com>
Date: Tue, 29 Dec 2020 15:14:40 -0800
Subject: [PATCH 242/547] mm/mremap.c: fix extent calculation

When `next < old_addr`, `next - old_addr` arithmetic underflows causing
`extent` to be incorrect.

Make `extent` the smaller of `next - old_addr` or `old_end - old_addr`.

Link: https://lkml.kernel.org/r/20201219170433.2418867-1-kaleshsingh@google.com
Fixes: c49dd34018026 ("mm: speedup mremap on 1GB or larger regions")
Signed-off-by: Kalesh Singh <kaleshsingh@google.com>
Reported-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Lokesh Gidra <lokeshgidra@google.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/mremap.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mm/mremap.c b/mm/mremap.c
index c5590afe7165..f554320281cc 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -358,7 +358,9 @@ static unsigned long get_extent(enum pgt_entry entry, unsigned long old_addr,
 
 	next = (old_addr + size) & mask;
 	/* even if next overflowed, extent below will be ok */
-	extent = (next > old_end) ? old_end - old_addr : next - old_addr;
+	extent = next - old_addr;
+	if (extent > old_end - old_addr)
+		extent = old_end - old_addr;
 	next = (new_addr + size) & mask;
 	if (extent > next - new_addr)
 		extent = next - new_addr;

From 111fe7186b29d172729db5e294875b9fc7a0ec1d Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Tue, 29 Dec 2020 15:14:43 -0800
Subject: [PATCH 243/547] mm: generalise COW SMC TLB flushing race comment

I'm not sure if I'm completely missing something here, but AFAIKS the
reference to the mysterious "COW SMC race" confuses the issue.  The
original changelog and mailing list thread didn't help me either.

This SMC race is where the problem was detected, but isn't the general
problem bigger and more obvious: that the new PTE could be picked up at
any time by any TLB while entries for the old PTE exist in other TLBs
before the TLB flush takes effect?

The case where the iTLB and dTLB of a CPU are pointing at different pages
is an interesting one but follows from the general problem.

The other (minor) thing with the comment I think it makes it a bit clearer
to say what the old code was doing (i.e., it avoids the race as opposed to
what?).

References: 4ce072f1faf29 ("mm: fix a race condition under SMC + COW")
Link: https://lkml.kernel.org/r/20201215121119.351650-1-npiggin@gmail.com
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Suresh Siddha <sbsiddha@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 7d608765932b..feff48e1465a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2892,11 +2892,13 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
 		entry = mk_pte(new_page, vma->vm_page_prot);
 		entry = pte_sw_mkyoung(entry);
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+
 		/*
 		 * Clear the pte entry and flush it first, before updating the
-		 * pte with the new entry. This will avoid a race condition
-		 * seen in the presence of one thread doing SMC and another
-		 * thread doing COW.
+		 * pte with the new entry, to keep TLBs on different CPUs in
+		 * sync. This code used to set the new PTE then flush TLBs, but
+		 * that left a window where the new PTE could be loaded into
+		 * some TLBs while the old PTE remains in others.
 		 */
 		ptep_clear_flush_notify(vma, vmf->address, vmf->pte);
 		page_add_new_anon_rmap(new_page, vma, vmf->address, false);

From 13384f6125ad7ebdcc8914fe1e03ded48ce76581 Mon Sep 17 00:00:00 2001
From: Walter Wu <walter-zh.wu@mediatek.com>
Date: Tue, 29 Dec 2020 15:14:46 -0800
Subject: [PATCH 244/547] kasan: fix null pointer dereference in
 kasan_record_aux_stack

Syzbot reported the following [1]:

  BUG: kernel NULL pointer dereference, address: 0000000000000008
  #PF: supervisor read access in kernel mode
  #PF: error_code(0x0000) - not-present page
  PGD 2d993067 P4D 2d993067 PUD 19a3c067 PMD 0
  Oops: 0000 [#1] PREEMPT SMP KASAN
  CPU: 1 PID: 3852 Comm: kworker/1:2 Not tainted 5.10.0-syzkaller #0
  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
  Workqueue: events free_ipc
  RIP: 0010:kasan_record_aux_stack+0x77/0xb0

Add null checking slab object from kasan_get_alloc_meta() in order to
avoid null pointer dereference.

[1] https://syzkaller.appspot.com/x/log.txt?x=10a82a50d00000

Link: https://lkml.kernel.org/r/20201228080018.23041-1-walter-zh.wu@mediatek.com
Signed-off-by: Walter Wu <walter-zh.wu@mediatek.com>
Suggested-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Konovalov <andreyknvl@google.com>
Cc: Alexander Potapenko <glider@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/kasan/generic.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c
index 1dd5a0f99372..5106b84b07d4 100644
--- a/mm/kasan/generic.c
+++ b/mm/kasan/generic.c
@@ -337,6 +337,8 @@ void kasan_record_aux_stack(void *addr)
 	cache = page->slab_cache;
 	object = nearest_obj(cache, page, addr);
 	alloc_meta = kasan_get_alloc_meta(cache, object);
+	if (!alloc_meta)
+		return;
 
 	alloc_meta->aux_stack[1] = alloc_meta->aux_stack[0];
 	alloc_meta->aux_stack[0] = kasan_save_stack(GFP_NOWAIT);

From 87dbc209ea04645fd2351981f09eff5d23f8e2e9 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 29 Dec 2020 15:14:49 -0800
Subject: [PATCH 245/547] local64.h: make <asm/local64.h> mandatory

Make <asm-generic/local64.h> mandatory in include/asm-generic/Kbuild and
remove all arch/*/include/asm/local64.h arch-specific files since they
only #include <asm-generic/local64.h>.

This fixes build errors on arch/c6x/ and arch/nios2/ for
block/blk-iocost.c.

Build-tested on 21 of 25 arch-es.  (tools problems on the others)

Yes, we could even rename <asm-generic/local64.h> to
<linux/local64.h> and change all #includes to use
<linux/local64.h> instead.

Link: https://lkml.kernel.org/r/20201227024446.17018-1-rdunlap@infradead.org
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Suggested-by: Christoph Hellwig <hch@infradead.org>
Reviewed-by: Masahiro Yamada <masahiroy@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Ley Foon Tan <ley.foon.tan@intel.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Aurelien Jacquiot <jacquiot.aurelien@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/asm/local64.h   | 1 -
 arch/arc/include/asm/Kbuild        | 1 -
 arch/arm/include/asm/Kbuild        | 1 -
 arch/arm64/include/asm/Kbuild      | 1 -
 arch/csky/include/asm/Kbuild       | 1 -
 arch/h8300/include/asm/Kbuild      | 1 -
 arch/hexagon/include/asm/Kbuild    | 1 -
 arch/ia64/include/asm/local64.h    | 1 -
 arch/m68k/include/asm/Kbuild       | 1 -
 arch/microblaze/include/asm/Kbuild | 1 -
 arch/mips/include/asm/Kbuild       | 1 -
 arch/nds32/include/asm/Kbuild      | 1 -
 arch/openrisc/include/asm/Kbuild   | 1 -
 arch/parisc/include/asm/Kbuild     | 1 -
 arch/powerpc/include/asm/Kbuild    | 1 -
 arch/riscv/include/asm/Kbuild      | 1 -
 arch/s390/include/asm/Kbuild       | 1 -
 arch/sh/include/asm/Kbuild         | 1 -
 arch/sparc/include/asm/Kbuild      | 1 -
 arch/x86/include/asm/local64.h     | 1 -
 arch/xtensa/include/asm/Kbuild     | 1 -
 include/asm-generic/Kbuild         | 1 +
 22 files changed, 1 insertion(+), 21 deletions(-)
 delete mode 100644 arch/alpha/include/asm/local64.h
 delete mode 100644 arch/ia64/include/asm/local64.h
 delete mode 100644 arch/x86/include/asm/local64.h

diff --git a/arch/alpha/include/asm/local64.h b/arch/alpha/include/asm/local64.h
deleted file mode 100644
index 36c93b5cc239..000000000000
--- a/arch/alpha/include/asm/local64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local64.h>
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index 81f4edec0c2a..3c1afa524b9c 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 generic-y += extable.h
 generic-y += kvm_para.h
-generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += parport.h
 generic-y += user.h
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index 4a0848aef207..03657ff8fbe3 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -2,7 +2,6 @@
 generic-y += early_ioremap.h
 generic-y += extable.h
 generic-y += flat.h
-generic-y += local64.h
 generic-y += parport.h
 
 generated-y += mach-types.h
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index ff9cbb631212..07ac208edc89 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 generic-y += early_ioremap.h
-generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += qrwlock.h
 generic-y += qspinlock.h
diff --git a/arch/csky/include/asm/Kbuild b/arch/csky/include/asm/Kbuild
index 93372255984d..cc24bb8e539f 100644
--- a/arch/csky/include/asm/Kbuild
+++ b/arch/csky/include/asm/Kbuild
@@ -2,7 +2,6 @@
 generic-y += asm-offsets.h
 generic-y += gpio.h
 generic-y += kvm_para.h
-generic-y += local64.h
 generic-y += qrwlock.h
 generic-y += user.h
 generic-y += vmlinux.lds.h
diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
index ddf04f32b546..60ee7f0d60a8 100644
--- a/arch/h8300/include/asm/Kbuild
+++ b/arch/h8300/include/asm/Kbuild
@@ -2,7 +2,6 @@
 generic-y += asm-offsets.h
 generic-y += extable.h
 generic-y += kvm_para.h
-generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += parport.h
 generic-y += spinlock.h
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index 373964bb177e..3ece3c93fe08 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -2,5 +2,4 @@
 generic-y += extable.h
 generic-y += iomap.h
 generic-y += kvm_para.h
-generic-y += local64.h
 generic-y += mcs_spinlock.h
diff --git a/arch/ia64/include/asm/local64.h b/arch/ia64/include/asm/local64.h
deleted file mode 100644
index 36c93b5cc239..000000000000
--- a/arch/ia64/include/asm/local64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local64.h>
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index 1bff55aa2d54..0dbf9c5c6fae 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -2,6 +2,5 @@
 generated-y += syscall_table.h
 generic-y += extable.h
 generic-y += kvm_para.h
-generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += spinlock.h
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 63bce836b9f1..29b0e557aa7c 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -2,7 +2,6 @@
 generated-y += syscall_table.h
 generic-y += extable.h
 generic-y += kvm_para.h
-generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += parport.h
 generic-y += syscalls.h
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index 198b3bafdac9..95b4fa7bd0d1 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -6,7 +6,6 @@ generated-y += syscall_table_64_n64.h
 generated-y += syscall_table_64_o32.h
 generic-y += export.h
 generic-y += kvm_para.h
-generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += parport.h
 generic-y += qrwlock.h
diff --git a/arch/nds32/include/asm/Kbuild b/arch/nds32/include/asm/Kbuild
index ff1e94299317..82a4453c9c2d 100644
--- a/arch/nds32/include/asm/Kbuild
+++ b/arch/nds32/include/asm/Kbuild
@@ -4,6 +4,5 @@ generic-y += cmpxchg.h
 generic-y += export.h
 generic-y += gpio.h
 generic-y += kvm_para.h
-generic-y += local64.h
 generic-y += parport.h
 generic-y += user.h
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index 442f3d3bcd90..ca5987e11053 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 generic-y += extable.h
 generic-y += kvm_para.h
-generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += qspinlock_types.h
 generic-y += qspinlock.h
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index f16c4db80116..4406475a2304 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -3,6 +3,5 @@ generated-y += syscall_table_32.h
 generated-y += syscall_table_64.h
 generated-y += syscall_table_c32.h
 generic-y += kvm_para.h
-generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += user.h
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 90cd5c53af66..e1f9b4ea1c53 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -5,7 +5,6 @@ generated-y += syscall_table_c32.h
 generated-y += syscall_table_spu.h
 generic-y += export.h
 generic-y += kvm_types.h
-generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += qrwlock.h
 generic-y += vtime.h
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index 59dd7be55005..445ccc97305a 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -3,6 +3,5 @@ generic-y += early_ioremap.h
 generic-y += extable.h
 generic-y += flat.h
 generic-y += kvm_para.h
-generic-y += local64.h
 generic-y += user.h
 generic-y += vmlinux.lds.h
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 319efa0e6d02..1a18d7b82f86 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -7,5 +7,4 @@ generated-y += unistd_nr.h
 generic-y += asm-offsets.h
 generic-y += export.h
 generic-y += kvm_types.h
-generic-y += local64.h
 generic-y += mcs_spinlock.h
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index 7435182ef846..fc44d9c88b41 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 generated-y += syscall_table.h
 generic-y += kvm_para.h
-generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += parport.h
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index 5269a704801f..3688fdae50e4 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -6,5 +6,4 @@ generated-y += syscall_table_64.h
 generated-y += syscall_table_c32.h
 generic-y += export.h
 generic-y += kvm_para.h
-generic-y += local64.h
 generic-y += mcs_spinlock.h
diff --git a/arch/x86/include/asm/local64.h b/arch/x86/include/asm/local64.h
deleted file mode 100644
index 36c93b5cc239..000000000000
--- a/arch/x86/include/asm/local64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local64.h>
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 9718e9593564..854c5e07e867 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -2,7 +2,6 @@
 generated-y += syscall_table.h
 generic-y += extable.h
 generic-y += kvm_para.h
-generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += param.h
 generic-y += qrwlock.h
diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild
index 4365b9aa3e3f..267f6dfb8960 100644
--- a/include/asm-generic/Kbuild
+++ b/include/asm-generic/Kbuild
@@ -34,6 +34,7 @@ mandatory-y += kmap_size.h
 mandatory-y += kprobes.h
 mandatory-y += linkage.h
 mandatory-y += local.h
+mandatory-y += local64.h
 mandatory-y += mm-arch-hooks.h
 mandatory-y += mmiowb.h
 mandatory-y += mmu.h

From 8b0fac44bd1ff17016502b3c3533f5abb8456c65 Mon Sep 17 00:00:00 2001
From: Huang Shijie <sjhuang@iluvatar.ai>
Date: Tue, 29 Dec 2020 15:14:52 -0800
Subject: [PATCH 246/547] sizes.h: add SZ_8G/SZ_16G/SZ_32G macros

Add these macros, since we can use them in drivers.

Link: https://lkml.kernel.org/r/20201229072819.11183-1-sjhuang@iluvatar.ai
Signed-off-by: Huang Shijie <sjhuang@iluvatar.ai>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sizes.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/linux/sizes.h b/include/linux/sizes.h
index 9874f6f67537..1ac79bcee2bb 100644
--- a/include/linux/sizes.h
+++ b/include/linux/sizes.h
@@ -44,6 +44,9 @@
 #define SZ_2G				0x80000000
 
 #define SZ_4G				_AC(0x100000000, ULL)
+#define SZ_8G				_AC(0x200000000, ULL)
+#define SZ_16G				_AC(0x400000000, ULL)
+#define SZ_32G				_AC(0x800000000, ULL)
 #define SZ_64T				_AC(0x400000000000, ULL)
 
 #endif /* __LINUX_SIZES_H__ */

From aa8c7db494d0a83ecae583aa193f1134ef25d506 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Tue, 29 Dec 2020 15:14:55 -0800
Subject: [PATCH 247/547] kdev_t: always inline major/minor helper functions

Silly GCC doesn't always inline these trivial functions.

Fixes the following warning:

  arch/x86/kernel/sys_ia32.o: warning: objtool: cp_stat64()+0xd8: call to new_encode_dev() with UACCESS enabled

Link: https://lkml.kernel.org/r/984353b44a4484d86ba9f73884b7306232e25e30.1608737428.git.jpoimboe@redhat.com
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>	[build-tested]
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kdev_t.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/linux/kdev_t.h b/include/linux/kdev_t.h
index 85b5151911cf..4856706fbfeb 100644
--- a/include/linux/kdev_t.h
+++ b/include/linux/kdev_t.h
@@ -21,61 +21,61 @@
 	})
 
 /* acceptable for old filesystems */
-static inline bool old_valid_dev(dev_t dev)
+static __always_inline bool old_valid_dev(dev_t dev)
 {
 	return MAJOR(dev) < 256 && MINOR(dev) < 256;
 }
 
-static inline u16 old_encode_dev(dev_t dev)
+static __always_inline u16 old_encode_dev(dev_t dev)
 {
 	return (MAJOR(dev) << 8) | MINOR(dev);
 }
 
-static inline dev_t old_decode_dev(u16 val)
+static __always_inline dev_t old_decode_dev(u16 val)
 {
 	return MKDEV((val >> 8) & 255, val & 255);
 }
 
-static inline u32 new_encode_dev(dev_t dev)
+static __always_inline u32 new_encode_dev(dev_t dev)
 {
 	unsigned major = MAJOR(dev);
 	unsigned minor = MINOR(dev);
 	return (minor & 0xff) | (major << 8) | ((minor & ~0xff) << 12);
 }
 
-static inline dev_t new_decode_dev(u32 dev)
+static __always_inline dev_t new_decode_dev(u32 dev)
 {
 	unsigned major = (dev & 0xfff00) >> 8;
 	unsigned minor = (dev & 0xff) | ((dev >> 12) & 0xfff00);
 	return MKDEV(major, minor);
 }
 
-static inline u64 huge_encode_dev(dev_t dev)
+static __always_inline u64 huge_encode_dev(dev_t dev)
 {
 	return new_encode_dev(dev);
 }
 
-static inline dev_t huge_decode_dev(u64 dev)
+static __always_inline dev_t huge_decode_dev(u64 dev)
 {
 	return new_decode_dev(dev);
 }
 
-static inline int sysv_valid_dev(dev_t dev)
+static __always_inline int sysv_valid_dev(dev_t dev)
 {
 	return MAJOR(dev) < (1<<14) && MINOR(dev) < (1<<18);
 }
 
-static inline u32 sysv_encode_dev(dev_t dev)
+static __always_inline u32 sysv_encode_dev(dev_t dev)
 {
 	return MINOR(dev) | (MAJOR(dev) << 18);
 }
 
-static inline unsigned sysv_major(u32 dev)
+static __always_inline unsigned sysv_major(u32 dev)
 {
 	return (dev >> 18) & 0x3fff;
 }
 
-static inline unsigned sysv_minor(u32 dev)
+static __always_inline unsigned sysv_minor(u32 dev)
 {
 	return dev & 0x3ffff;
 }

From 36845663843fc59c5d794e3dc0641472e3e572da Mon Sep 17 00:00:00 2001
From: Huang Shijie <sjhuang@iluvatar.ai>
Date: Tue, 29 Dec 2020 15:14:58 -0800
Subject: [PATCH 248/547] lib/genalloc: fix the overflow when size is too big

Some graphic card has very big memory on chip, such as 32G bytes.

In the following case, it will cause overflow:

    pool = gen_pool_create(PAGE_SHIFT, NUMA_NO_NODE);
    ret = gen_pool_add(pool, 0x1000000, SZ_32G, NUMA_NO_NODE);

    va = gen_pool_alloc(pool, SZ_4G);

The overflow occurs in gen_pool_alloc_algo_owner():

		....
		size = nbits << order;
		....

The @nbits is "int" type, so it will overflow.
Then the gen_pool_avail() will return the wrong value.

This patch converts some "int" to "unsigned long", and
changes the compare code in while.

Link: https://lkml.kernel.org/r/20201229060657.3389-1-sjhuang@iluvatar.ai
Signed-off-by: Huang Shijie <sjhuang@iluvatar.ai>
Reported-by: Shi Jiasheng <jiasheng.shi@iluvatar.ai>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/genalloc.c | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/lib/genalloc.c b/lib/genalloc.c
index 7f1244b5294a..dab97bb69df6 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -81,14 +81,14 @@ static int clear_bits_ll(unsigned long *addr, unsigned long mask_to_clear)
  * users set the same bit, one user will return remain bits, otherwise
  * return 0.
  */
-static int bitmap_set_ll(unsigned long *map, int start, int nr)
+static int bitmap_set_ll(unsigned long *map, unsigned long start, unsigned long nr)
 {
 	unsigned long *p = map + BIT_WORD(start);
-	const int size = start + nr;
+	const unsigned long size = start + nr;
 	int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
 	unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
 
-	while (nr - bits_to_set >= 0) {
+	while (nr >= bits_to_set) {
 		if (set_bits_ll(p, mask_to_set))
 			return nr;
 		nr -= bits_to_set;
@@ -116,14 +116,15 @@ static int bitmap_set_ll(unsigned long *map, int start, int nr)
  * users clear the same bit, one user will return remain bits,
  * otherwise return 0.
  */
-static int bitmap_clear_ll(unsigned long *map, int start, int nr)
+static unsigned long
+bitmap_clear_ll(unsigned long *map, unsigned long start, unsigned long nr)
 {
 	unsigned long *p = map + BIT_WORD(start);
-	const int size = start + nr;
+	const unsigned long size = start + nr;
 	int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
 	unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
 
-	while (nr - bits_to_clear >= 0) {
+	while (nr >= bits_to_clear) {
 		if (clear_bits_ll(p, mask_to_clear))
 			return nr;
 		nr -= bits_to_clear;
@@ -183,8 +184,8 @@ int gen_pool_add_owner(struct gen_pool *pool, unsigned long virt, phys_addr_t ph
 		 size_t size, int nid, void *owner)
 {
 	struct gen_pool_chunk *chunk;
-	int nbits = size >> pool->min_alloc_order;
-	int nbytes = sizeof(struct gen_pool_chunk) +
+	unsigned long nbits = size >> pool->min_alloc_order;
+	unsigned long nbytes = sizeof(struct gen_pool_chunk) +
 				BITS_TO_LONGS(nbits) * sizeof(long);
 
 	chunk = vzalloc_node(nbytes, nid);
@@ -242,7 +243,7 @@ void gen_pool_destroy(struct gen_pool *pool)
 	struct list_head *_chunk, *_next_chunk;
 	struct gen_pool_chunk *chunk;
 	int order = pool->min_alloc_order;
-	int bit, end_bit;
+	unsigned long bit, end_bit;
 
 	list_for_each_safe(_chunk, _next_chunk, &pool->chunks) {
 		chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
@@ -278,7 +279,7 @@ unsigned long gen_pool_alloc_algo_owner(struct gen_pool *pool, size_t size,
 	struct gen_pool_chunk *chunk;
 	unsigned long addr = 0;
 	int order = pool->min_alloc_order;
-	int nbits, start_bit, end_bit, remain;
+	unsigned long nbits, start_bit, end_bit, remain;
 
 #ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
 	BUG_ON(in_nmi());
@@ -487,7 +488,7 @@ void gen_pool_free_owner(struct gen_pool *pool, unsigned long addr, size_t size,
 {
 	struct gen_pool_chunk *chunk;
 	int order = pool->min_alloc_order;
-	int start_bit, nbits, remain;
+	unsigned long start_bit, nbits, remain;
 
 #ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
 	BUG_ON(in_nmi());
@@ -755,7 +756,7 @@ unsigned long gen_pool_best_fit(unsigned long *map, unsigned long size,
 	index = bitmap_find_next_zero_area(map, size, start, nr, 0);
 
 	while (index < size) {
-		int next_bit = find_next_bit(map, size, index + nr);
+		unsigned long next_bit = find_next_bit(map, size, index + nr);
 		if ((next_bit - index) < len) {
 			len = next_bit - index;
 			start_bit = index;

From f0bb29e8c4076444d32df00c8d32e169ceecf283 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Tue, 29 Dec 2020 15:15:01 -0800
Subject: [PATCH 249/547] lib/zlib: fix inflating zlib streams on s390

Decompressing zlib streams on s390 fails with "incorrect data check"
error.

Userspace zlib checks inflate_state.flags in order to byteswap checksums
only for zlib streams, and s390 hardware inflate code, which was ported
from there, tries to match this behavior.  At the same time, kernel zlib
does not use inflate_state.flags, so it contains essentially random
values.  For many use cases either zlib stream is zeroed out or checksum
is not used, so this problem is masked, but at least SquashFS is still
affected.

Fix by always passing a checksum to and from the hardware as is, which
matches zlib_inflate()'s expectations.

Link: https://lkml.kernel.org/r/20201215155551.894884-1-iii@linux.ibm.com
Fixes: 126196100063 ("lib/zlib: add s390 hardware support for kernel zlib_inflate")
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Tested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Mikhail Zaslonko <zaslonko@linux.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Mikhail Zaslonko <zaslonko@linux.ibm.com>
Cc: <stable@vger.kernel.org>	[5.6+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/zlib_dfltcc/dfltcc_inflate.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/zlib_dfltcc/dfltcc_inflate.c b/lib/zlib_dfltcc/dfltcc_inflate.c
index db107016d29b..fb60b5a6a1cb 100644
--- a/lib/zlib_dfltcc/dfltcc_inflate.c
+++ b/lib/zlib_dfltcc/dfltcc_inflate.c
@@ -125,7 +125,7 @@ dfltcc_inflate_action dfltcc_inflate(
     param->ho = (state->write - state->whave) & ((1 << HB_BITS) - 1);
     if (param->hl)
         param->nt = 0; /* Honor history for the first block */
-    param->cv = state->flags ? REVERSE(state->check) : state->check;
+    param->cv = state->check;
 
     /* Inflate */
     do {
@@ -138,7 +138,7 @@ dfltcc_inflate_action dfltcc_inflate(
     state->bits = param->sbb;
     state->whave = param->hl;
     state->write = (param->ho + param->hl) & ((1 << HB_BITS) - 1);
-    state->check = state->flags ? REVERSE(param->cv) : param->cv;
+    state->check = param->cv;
     if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0) {
         /* Report an error if stream is corrupted */
         state->mode = BAD;

From 605cc30dea249edf1b659e7d0146a2cf13cbbf71 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 29 Dec 2020 15:15:04 -0800
Subject: [PATCH 250/547] zlib: move EXPORT_SYMBOL() and MODULE_LICENSE() out
 of dfltcc_syms.c

In commit 11fb479ff5d9 ("zlib: export S390 symbols for zlib modules"), I
added EXPORT_SYMBOL()s to dfltcc_inflate.c but then Mikhail said that
these should probably be in dfltcc_syms.c with the other
EXPORT_SYMBOL()s.

However, that is contrary to the current kernel style, which places
EXPORT_SYMBOL() immediately after the function that it applies to, so
move all EXPORT_SYMBOL()s to their respective function locations and
drop the dfltcc_syms.c file.  Also move MODULE_LICENSE() from the
deleted file to dfltcc.c.

[rdunlap@infradead.org: remove dfltcc_syms.o from Makefile]
  Link: https://lkml.kernel.org/r/20201227171837.15492-1-rdunlap@infradead.org

Link: https://lkml.kernel.org/r/20201219052530.28461-1-rdunlap@infradead.org
Fixes: 11fb479ff5d9 ("zlib: export S390 symbols for zlib modules")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Zaslonko Mikhail <zaslonko@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/zlib_dfltcc/Makefile         |  2 +-
 lib/zlib_dfltcc/dfltcc.c         |  6 +++++-
 lib/zlib_dfltcc/dfltcc_deflate.c |  3 +++
 lib/zlib_dfltcc/dfltcc_syms.c    | 17 -----------------
 4 files changed, 9 insertions(+), 19 deletions(-)
 delete mode 100644 lib/zlib_dfltcc/dfltcc_syms.c

diff --git a/lib/zlib_dfltcc/Makefile b/lib/zlib_dfltcc/Makefile
index 8e4d5afbbb10..66e1c96387c4 100644
--- a/lib/zlib_dfltcc/Makefile
+++ b/lib/zlib_dfltcc/Makefile
@@ -8,4 +8,4 @@
 
 obj-$(CONFIG_ZLIB_DFLTCC) += zlib_dfltcc.o
 
-zlib_dfltcc-objs := dfltcc.o dfltcc_deflate.o dfltcc_inflate.o dfltcc_syms.o
+zlib_dfltcc-objs := dfltcc.o dfltcc_deflate.o dfltcc_inflate.o
diff --git a/lib/zlib_dfltcc/dfltcc.c b/lib/zlib_dfltcc/dfltcc.c
index c30de430b30c..782f76e9d4da 100644
--- a/lib/zlib_dfltcc/dfltcc.c
+++ b/lib/zlib_dfltcc/dfltcc.c
@@ -1,7 +1,8 @@
 // SPDX-License-Identifier: Zlib
 /* dfltcc.c - SystemZ DEFLATE CONVERSION CALL support. */
 
-#include <linux/zutil.h>
+#include <linux/export.h>
+#include <linux/module.h>
 #include "dfltcc_util.h"
 #include "dfltcc.h"
 
@@ -53,3 +54,6 @@ void dfltcc_reset(
     dfltcc_state->dht_threshold = DFLTCC_DHT_MIN_SAMPLE_SIZE;
     dfltcc_state->param.ribm = DFLTCC_RIBM;
 }
+EXPORT_SYMBOL(dfltcc_reset);
+
+MODULE_LICENSE("GPL");
diff --git a/lib/zlib_dfltcc/dfltcc_deflate.c b/lib/zlib_dfltcc/dfltcc_deflate.c
index 00c185101c6d..6c946e8532ee 100644
--- a/lib/zlib_dfltcc/dfltcc_deflate.c
+++ b/lib/zlib_dfltcc/dfltcc_deflate.c
@@ -4,6 +4,7 @@
 #include "dfltcc_util.h"
 #include "dfltcc.h"
 #include <asm/setup.h>
+#include <linux/export.h>
 #include <linux/zutil.h>
 
 /*
@@ -34,6 +35,7 @@ int dfltcc_can_deflate(
 
     return 1;
 }
+EXPORT_SYMBOL(dfltcc_can_deflate);
 
 static void dfltcc_gdht(
     z_streamp strm
@@ -277,3 +279,4 @@ again:
         goto again; /* deflate() must use all input or all output */
     return 1;
 }
+EXPORT_SYMBOL(dfltcc_deflate);
diff --git a/lib/zlib_dfltcc/dfltcc_syms.c b/lib/zlib_dfltcc/dfltcc_syms.c
deleted file mode 100644
index 6f23481804c1..000000000000
--- a/lib/zlib_dfltcc/dfltcc_syms.c
+++ /dev/null
@@ -1,17 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/lib/zlib_dfltcc/dfltcc_syms.c
- *
- * Exported symbols for the s390 zlib dfltcc support.
- *
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/zlib.h>
-#include "dfltcc.h"
-
-EXPORT_SYMBOL(dfltcc_can_deflate);
-EXPORT_SYMBOL(dfltcc_deflate);
-EXPORT_SYMBOL(dfltcc_reset);
-MODULE_LICENSE("GPL");

From 1f3147b49d75b47b6be54a1e6dfa87a4921e1e51 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Tue, 29 Dec 2020 15:15:07 -0800
Subject: [PATCH 251/547] mm: slub: call account_slab_page() after slab page
 initialization

It's convenient to have page->objects initialized before calling into
account_slab_page().  In particular, this information can be used to
pre-alloc the obj_cgroup vector.

Let's call account_slab_page() a bit later, after the initialization of
page->objects.

This commit doesn't bring any functional change, but is required for
further optimizations.

[akpm@linux-foundation.org: undo changes needed by forthcoming mm-memcg-slab-pre-allocate-obj_cgroups-for-slab-caches-with-slab_account.patch]

Link: https://lkml.kernel.org/r/20201110195753.530157-1-guro@fb.com
Signed-off-by: Roman Gushchin <guro@fb.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/slub.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 0c8b43a5b3b0..dc5b42e700b8 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1619,9 +1619,6 @@ static inline struct page *alloc_slab_page(struct kmem_cache *s,
 	else
 		page = __alloc_pages_node(node, flags, order);
 
-	if (page)
-		account_slab_page(page, order, s);
-
 	return page;
 }
 
@@ -1774,6 +1771,8 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 
 	page->objects = oo_objects(oo);
 
+	account_slab_page(page, oo_order(oo), s);
+
 	page->slab_cache = s;
 	__SetPageSlab(page);
 	if (page_is_pfmemalloc(page))

From 875b2376fd663832bf45f7285c9d26cb8c52929a Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 28 Dec 2020 19:47:06 -0800
Subject: [PATCH 252/547] fs: block_dev.c: fix kernel-doc warnings from struct
 block_device changes

Fix new kernel-doc warnings in fs/block_dev.c:

../fs/block_dev.c:1066: warning: Excess function parameter 'whole' description in 'bd_abort_claiming'
../fs/block_dev.c:1837: warning: Function parameter or member 'dev' not described in 'lookup_bdev'

Fixes: 4e7b5671c6a8 ("block: remove i_bdev")
Fixes: 37c3fc9abb25 ("block: simplify the block device claiming interface")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: linux-fsdevel@vger.kernel.org
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/block_dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 9293045e128c..3e5b02f6606c 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1055,7 +1055,6 @@ static void bd_finish_claiming(struct block_device *bdev, void *holder)
 /**
  * bd_abort_claiming - abort claiming of a block device
  * @bdev: block device of interest
- * @whole: whole block device
  * @holder: holder that has claimed @bdev
  *
  * Abort claiming of a block device when the exclusive open failed. This can be
@@ -1828,6 +1827,7 @@ const struct file_operations def_blk_fops = {
 /**
  * lookup_bdev  - lookup a struct block_device by name
  * @pathname:	special file representing the block device
+ * @dev:	return value of the block device's dev_t
  *
  * Get a reference to the blockdevice at @pathname in the current
  * namespace if possible and return it.  Return ERR_PTR(error)

From dc30432605bbbd486dfede3852ea4d42c40a84b4 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Mon, 28 Dec 2020 11:27:18 -0800
Subject: [PATCH 253/547] block: add debugfs stanza for QUEUE_FLAG_NOWAIT

This was missed in 021a24460dc2. Leads to the numeric value of
QUEUE_FLAG_NOWAIT (i.e. 29) showing up in
/sys/kernel/debug/block/*/state.

Fixes: 021a24460dc28e7412aecfae89f60e1847e685c0
Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andres Freund <andres@anarazel.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-debugfs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 3094542e12ae..e21eed20a155 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -129,6 +129,7 @@ static const char *const blk_queue_flag_name[] = {
 	QUEUE_FLAG_NAME(PCI_P2PDMA),
 	QUEUE_FLAG_NAME(ZONE_RESETALL),
 	QUEUE_FLAG_NAME(RQ_ALLOC_TIME),
+	QUEUE_FLAG_NAME(NOWAIT),
 };
 #undef QUEUE_FLAG_NAME
 

From 44362a3c353aeec5904c2ae6d1737f20fe7e9c79 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Wed, 23 Dec 2020 12:08:54 +0000
Subject: [PATCH 254/547] KVM: arm64: Fix hyp_cpu_pm_{init,exit} __init
 annotation

The __init annotations on hyp_cpu_pm_{init,exit} are obviously incorrect,
and the build system shouts at you if you enable DEBUG_SECTION_MISMATCH.

Nothing really bad happens as we never execute that code outside of the
init context, but we can't label the callers as __int either, as kvm_init
isn't __init itself. Oh well.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Nathan Chancellor <natechancellor@gmail.com>
Link: https://lore.kernel.org/r/20201223120854.255347-1-maz@kernel.org
---
 arch/arm64/kvm/arm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index ab782c480e9a..04c44853b103 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1568,12 +1568,12 @@ static struct notifier_block hyp_init_cpu_pm_nb = {
 	.notifier_call = hyp_init_cpu_pm_notifier,
 };
 
-static void __init hyp_cpu_pm_init(void)
+static void hyp_cpu_pm_init(void)
 {
 	if (!is_protected_kvm_enabled())
 		cpu_pm_register_notifier(&hyp_init_cpu_pm_nb);
 }
-static void __init hyp_cpu_pm_exit(void)
+static void hyp_cpu_pm_exit(void)
 {
 	if (!is_protected_kvm_enabled())
 		cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb);

From 6820e812dafb4258bc14692f686eec5bde6fba86 Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Wed, 16 Dec 2020 11:23:21 +0200
Subject: [PATCH 255/547] spi: Fix the clamping of spi->max_speed_hz

If spi->controller->max_speed_hz is zero, a non-zero spi->max_speed_hz
will be overwritten by zero. Make sure spi->controller->max_speed_hz
is not zero when clamping spi->max_speed_hz.

Put the spi->controller->max_speed_hz non-zero check higher in the if,
so that we avoid a superfluous init to zero when both spi->max_speed_hz
and spi->controller->max_speed_hz are zero.

Fixes: 9326e4f1e5dd ("spi: Limit the spi device max speed to controller's max speed")
Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Suggested-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20201216092321.413262-1-tudor.ambarus@microchip.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 51d7c004fbab..f59bf5094adb 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -3378,8 +3378,9 @@ int spi_setup(struct spi_device *spi)
 	if (status)
 		return status;
 
-	if (!spi->max_speed_hz ||
-	    spi->max_speed_hz > spi->controller->max_speed_hz)
+	if (spi->controller->max_speed_hz &&
+	    (!spi->max_speed_hz ||
+	     spi->max_speed_hz > spi->controller->max_speed_hz))
 		spi->max_speed_hz = spi->controller->max_speed_hz;
 
 	mutex_lock(&spi->controller->io_mutex);

From e042f151ec7474b88b8c1edaaddd1ff7415d7117 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 28 Dec 2020 19:54:28 -0800
Subject: [PATCH 256/547] hwmon: (sbtsi_temp) Fix Documenation kernel-doc
 warning

Fix Documentation/hwmon/ kernel-doc warning in 5.11-rc1:

lnx-511-rc1/Documentation/hwmon/sbtsi_temp.rst:4: WARNING: Title underline too short.

Kernel driver sbtsi_temp
==================

Fixes: 6ec3fcf556fe ("hwmon: (sbtsi) Add documentation")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Kun Yi <kunyi@google.com>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Jean Delvare <jdelvare@suse.com>
Cc: linux-hwmon@vger.kernel.org
Link: https://lore.kernel.org/r/20201229035428.31270-1-rdunlap@infradead.org
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/sbtsi_temp.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/hwmon/sbtsi_temp.rst b/Documentation/hwmon/sbtsi_temp.rst
index 922b3c8db666..749f518389c3 100644
--- a/Documentation/hwmon/sbtsi_temp.rst
+++ b/Documentation/hwmon/sbtsi_temp.rst
@@ -1,7 +1,7 @@
 .. SPDX-License-Identifier: GPL-2.0-or-later
 
 Kernel driver sbtsi_temp
-==================
+========================
 
 Supported hardware:
 

From 742eb4750ff35fd62784b04b675d672b8dee2524 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Wed, 18 Nov 2020 21:23:33 +0100
Subject: [PATCH 257/547] s390: update defconfigs

Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/configs/debug_defconfig    | 12 +++++++++---
 arch/s390/configs/defconfig          | 11 +++++++----
 arch/s390/configs/zfcpdump_defconfig |  2 ++
 3 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 1be32fcf6f2e..c4f6ff98a612 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -61,7 +61,9 @@ CONFIG_OPROFILE=m
 CONFIG_KPROBES=y
 CONFIG_JUMP_LABEL=y
 CONFIG_STATIC_KEYS_SELFTEST=y
+CONFIG_SECCOMP_CACHE_DEBUG=y
 CONFIG_LOCK_EVENT_COUNTS=y
+# CONFIG_GCC_PLUGINS is not set
 CONFIG_MODULES=y
 CONFIG_MODULE_FORCE_LOAD=y
 CONFIG_MODULE_UNLOAD=y
@@ -410,12 +412,12 @@ CONFIG_SCSI_ENCLOSURE=m
 CONFIG_SCSI_CONSTANTS=y
 CONFIG_SCSI_LOGGING=y
 CONFIG_SCSI_SPI_ATTRS=m
-CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_FC_ATTRS=m
 CONFIG_SCSI_SAS_LIBSAS=m
 CONFIG_SCSI_SRP_ATTRS=m
 CONFIG_ISCSI_TCP=m
 CONFIG_SCSI_DEBUG=m
-CONFIG_ZFCP=y
+CONFIG_ZFCP=m
 CONFIG_SCSI_VIRTIO=m
 CONFIG_SCSI_DH=y
 CONFIG_SCSI_DH_RDAC=m
@@ -444,6 +446,7 @@ CONFIG_DM_MULTIPATH=m
 CONFIG_DM_MULTIPATH_QL=m
 CONFIG_DM_MULTIPATH_ST=m
 CONFIG_DM_MULTIPATH_HST=m
+CONFIG_DM_MULTIPATH_IOA=m
 CONFIG_DM_DELAY=m
 CONFIG_DM_UEVENT=y
 CONFIG_DM_FLAKEY=m
@@ -542,7 +545,6 @@ CONFIG_INPUT_EVDEV=y
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
 CONFIG_LEGACY_PTY_COUNT=0
-CONFIG_NULL_TTY=m
 CONFIG_VIRTIO_CONSOLE=y
 CONFIG_HW_RANDOM_VIRTIO=m
 CONFIG_RAW_DRIVER=m
@@ -574,6 +576,7 @@ CONFIG_VIRTIO_BALLOON=m
 CONFIG_VIRTIO_INPUT=y
 CONFIG_VHOST_NET=m
 CONFIG_VHOST_VSOCK=m
+# CONFIG_SURFACE_PLATFORMS is not set
 CONFIG_S390_CCW_IOMMU=y
 CONFIG_S390_AP_IOMMU=y
 CONFIG_EXT4_FS=y
@@ -655,6 +658,7 @@ CONFIG_CIFS_XATTR=y
 CONFIG_CIFS_POSIX=y
 # CONFIG_CIFS_DEBUG is not set
 CONFIG_CIFS_DFS_UPCALL=y
+CONFIG_CIFS_SWN_UPCALL=y
 CONFIG_NLS_DEFAULT="utf8"
 CONFIG_NLS_CODEPAGE_437=m
 CONFIG_NLS_CODEPAGE_850=m
@@ -826,6 +830,8 @@ CONFIG_FTRACE_SYSCALLS=y
 CONFIG_BLK_DEV_IO_TRACE=y
 CONFIG_BPF_KPROBE_OVERRIDE=y
 CONFIG_HIST_TRIGGERS=y
+CONFIG_FTRACE_STARTUP_TEST=y
+# CONFIG_EVENT_TRACE_STARTUP_TEST is not set
 CONFIG_DEBUG_USER_ASCE=y
 CONFIG_NOTIFIER_ERROR_INJECTION=m
 CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index e2171a008809..51135893cffe 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -58,6 +58,7 @@ CONFIG_S390_UNWIND_SELFTEST=m
 CONFIG_OPROFILE=m
 CONFIG_KPROBES=y
 CONFIG_JUMP_LABEL=y
+# CONFIG_GCC_PLUGINS is not set
 CONFIG_MODULES=y
 CONFIG_MODULE_FORCE_LOAD=y
 CONFIG_MODULE_UNLOAD=y
@@ -95,7 +96,6 @@ CONFIG_ZSMALLOC_STAT=y
 CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
 CONFIG_IDLE_PAGE_TRACKING=y
 CONFIG_PERCPU_STATS=y
-CONFIG_GUP_TEST=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=m
@@ -403,12 +403,12 @@ CONFIG_SCSI_ENCLOSURE=m
 CONFIG_SCSI_CONSTANTS=y
 CONFIG_SCSI_LOGGING=y
 CONFIG_SCSI_SPI_ATTRS=m
-CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_FC_ATTRS=m
 CONFIG_SCSI_SAS_LIBSAS=m
 CONFIG_SCSI_SRP_ATTRS=m
 CONFIG_ISCSI_TCP=m
 CONFIG_SCSI_DEBUG=m
-CONFIG_ZFCP=y
+CONFIG_ZFCP=m
 CONFIG_SCSI_VIRTIO=m
 CONFIG_SCSI_DH=y
 CONFIG_SCSI_DH_RDAC=m
@@ -437,6 +437,7 @@ CONFIG_DM_MULTIPATH=m
 CONFIG_DM_MULTIPATH_QL=m
 CONFIG_DM_MULTIPATH_ST=m
 CONFIG_DM_MULTIPATH_HST=m
+CONFIG_DM_MULTIPATH_IOA=m
 CONFIG_DM_DELAY=m
 CONFIG_DM_UEVENT=y
 CONFIG_DM_FLAKEY=m
@@ -536,7 +537,6 @@ CONFIG_INPUT_EVDEV=y
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
 CONFIG_LEGACY_PTY_COUNT=0
-CONFIG_NULL_TTY=m
 CONFIG_VIRTIO_CONSOLE=y
 CONFIG_HW_RANDOM_VIRTIO=m
 CONFIG_RAW_DRIVER=m
@@ -568,6 +568,7 @@ CONFIG_VIRTIO_BALLOON=m
 CONFIG_VIRTIO_INPUT=y
 CONFIG_VHOST_NET=m
 CONFIG_VHOST_VSOCK=m
+# CONFIG_SURFACE_PLATFORMS is not set
 CONFIG_S390_CCW_IOMMU=y
 CONFIG_S390_AP_IOMMU=y
 CONFIG_EXT4_FS=y
@@ -645,6 +646,7 @@ CONFIG_CIFS_XATTR=y
 CONFIG_CIFS_POSIX=y
 # CONFIG_CIFS_DEBUG is not set
 CONFIG_CIFS_DFS_UPCALL=y
+CONFIG_CIFS_SWN_UPCALL=y
 CONFIG_NLS_DEFAULT="utf8"
 CONFIG_NLS_CODEPAGE_437=m
 CONFIG_NLS_CODEPAGE_850=m
@@ -778,6 +780,7 @@ CONFIG_FTRACE_SYSCALLS=y
 CONFIG_BLK_DEV_IO_TRACE=y
 CONFIG_BPF_KPROBE_OVERRIDE=y
 CONFIG_HIST_TRIGGERS=y
+CONFIG_DEBUG_USER_ASCE=y
 CONFIG_LKDTM=m
 CONFIG_PERCPU_TEST=m
 CONFIG_ATOMIC64_SELFTEST=y
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index a302630341ef..1ef211dae77a 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -22,6 +22,7 @@ CONFIG_CRASH_DUMP=y
 # CONFIG_VIRTUALIZATION is not set
 # CONFIG_S390_GUEST is not set
 # CONFIG_SECCOMP is not set
+# CONFIG_GCC_PLUGINS is not set
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_IBM_PARTITION=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
@@ -58,6 +59,7 @@ CONFIG_RAW_DRIVER=y
 # CONFIG_HID is not set
 # CONFIG_VIRTIO_MENU is not set
 # CONFIG_VHOST_MENU is not set
+# CONFIG_SURFACE_PLATFORMS is not set
 # CONFIG_IOMMU_SUPPORT is not set
 # CONFIG_DNOTIFY is not set
 # CONFIG_INOTIFY_USER is not set

From 129975e75b9a2ba528d7f58be2e338cd644f6ed8 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Mon, 28 Dec 2020 09:51:57 +0100
Subject: [PATCH 258/547] s390/Kconfig: sort config S390 select list once again

...and add comments at the top and bottom.

Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/Kconfig | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index e84bdd15150b..c72874f09741 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -54,17 +54,23 @@ config KASAN_SHADOW_OFFSET
 
 config S390
 	def_bool y
+	#
+	# Note: keep this list sorted alphabetically
+	#
+	imply IMA_SECURE_AND_OR_TRUSTED_BOOT
 	select ARCH_BINFMT_ELF_STATE
 	select ARCH_HAS_DEBUG_VM_PGTABLE
 	select ARCH_HAS_DEBUG_WX
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_ELF_RANDOMIZE
+	select ARCH_HAS_FORCE_DMA_UNENCRYPTED
 	select ARCH_HAS_FORTIFY_SOURCE
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_GIGANTIC_PAGE
 	select ARCH_HAS_KCOV
 	select ARCH_HAS_MEM_ENCRYPT
 	select ARCH_HAS_PTE_SPECIAL
+	select ARCH_HAS_SCALED_CPUTIME
 	select ARCH_HAS_SET_MEMORY
 	select ARCH_HAS_STRICT_KERNEL_RWX
 	select ARCH_HAS_STRICT_MODULE_RWX
@@ -111,8 +117,10 @@ config S390
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select BUILDTIME_TABLE_SORT
 	select CLONE_BACKWARDS2
+	select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES
 	select DMA_OPS if PCI
 	select DYNAMIC_FTRACE if FUNCTION_TRACER
+	select GENERIC_ALLOCATOR
 	select GENERIC_CPU_AUTOPROBE
 	select GENERIC_CPU_VULNERABILITIES
 	select GENERIC_FIND_FIRST_BIT
@@ -126,22 +134,21 @@ config S390
 	select HAVE_ARCH_JUMP_LABEL_RELATIVE
 	select HAVE_ARCH_KASAN
 	select HAVE_ARCH_KASAN_VMALLOC
-	select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_SOFT_DIRTY
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	select HAVE_ARCH_VMAP_STACK
 	select HAVE_ASM_MODVERSIONS
-	select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES
 	select HAVE_CMPXCHG_DOUBLE
 	select HAVE_CMPXCHG_LOCAL
 	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_DMA_CONTIGUOUS
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_DYNAMIC_FTRACE_WITH_REGS
-	select HAVE_FAST_GUP
+	select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
+	select HAVE_FAST_GUP
 	select HAVE_FENTRY
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_FUNCTION_ERROR_INJECTION
@@ -163,16 +170,15 @@ config S390
 	select HAVE_KRETPROBES
 	select HAVE_KVM
 	select HAVE_LIVEPATCH
-	select HAVE_PERF_REGS
-	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_MEMBLOCK_PHYS_MAP
-	select MMU_GATHER_NO_GATHER
 	select HAVE_MOD_ARCH_SPECIFIC
+	select HAVE_NMI
 	select HAVE_NOP_MCOUNT
 	select HAVE_OPROFILE
 	select HAVE_PCI
 	select HAVE_PERF_EVENTS
-	select MMU_GATHER_RCU_TABLE_FREE
+	select HAVE_PERF_REGS
+	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_RELIABLE_STACKTRACE
 	select HAVE_RSEQ
@@ -181,6 +187,8 @@ config S390
 	select HAVE_VIRT_CPU_ACCOUNTING_IDLE
 	select IOMMU_HELPER		if PCI
 	select IOMMU_SUPPORT		if PCI
+	select MMU_GATHER_NO_GATHER
+	select MMU_GATHER_RCU_TABLE_FREE
 	select MODULES_USE_ELF_RELA
 	select NEED_DMA_MAP_STATE	if PCI
 	select NEED_SG_DMA_LENGTH	if PCI
@@ -190,17 +198,12 @@ config S390
 	select PCI_MSI			if PCI
 	select PCI_MSI_ARCH_FALLBACKS	if PCI_MSI
 	select SPARSE_IRQ
+	select SWIOTLB
 	select SYSCTL_EXCEPTION_TRACE
 	select THREAD_INFO_IN_TASK
 	select TTY
 	select VIRT_CPU_ACCOUNTING
-	select ARCH_HAS_SCALED_CPUTIME
-	select HAVE_NMI
-	select ARCH_HAS_FORCE_DMA_UNENCRYPTED
-	select SWIOTLB
-	select GENERIC_ALLOCATOR
-	imply IMA_SECURE_AND_OR_TRUSTED_BOOT
-
+	# Note: keep the above list sorted alphabetically
 
 config SCHED_OMIT_FRAME_POINTER
 	def_bool y

From 1eda52334e6d13eb1a85f713ce06dd39342b5020 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Tue, 15 Dec 2020 10:20:30 +0100
Subject: [PATCH 259/547] hwmon: (pwm-fan) Ensure that calculation doesn't
 discard big period values
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With MAX_PWM being defined to 255 the code

	unsigned long period;
	...
	period = ctx->pwm->args.period;
	state.duty_cycle = DIV_ROUND_UP(pwm * (period - 1), MAX_PWM);

calculates a too small value for duty_cycle if the configured period is
big (either by discarding the 64 bit value ctx->pwm->args.period or by
overflowing the multiplication). As this results in a too slow fan and
so maybe an overheating machine better be safe than sorry and error out
in .probe.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20201215092031.152243-1-u.kleine-koenig@pengutronix.de
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pwm-fan.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/hwmon/pwm-fan.c b/drivers/hwmon/pwm-fan.c
index 777439f48c14..111a91dc6b79 100644
--- a/drivers/hwmon/pwm-fan.c
+++ b/drivers/hwmon/pwm-fan.c
@@ -334,8 +334,18 @@ static int pwm_fan_probe(struct platform_device *pdev)
 
 	ctx->pwm_value = MAX_PWM;
 
-	/* Set duty cycle to maximum allowed and enable PWM output */
 	pwm_init_state(ctx->pwm, &state);
+	/*
+	 * __set_pwm assumes that MAX_PWM * (period - 1) fits into an unsigned
+	 * long. Check this here to prevent the fan running at a too low
+	 * frequency.
+	 */
+	if (state.period > ULONG_MAX / MAX_PWM + 1) {
+		dev_err(dev, "Configured period too big\n");
+		return -EINVAL;
+	}
+
+	/* Set duty cycle to maximum allowed and enable PWM output */
 	state.duty_cycle = ctx->pwm->args.period - 1;
 	state.enabled = true;
 

From c318840fb2a42ce25febc95c4c19357acf1ae5ca Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 30 Dec 2020 11:20:44 -0500
Subject: [PATCH 260/547] USB: Gadget: dummy-hcd: Fix shift-out-of-bounds bug

The dummy-hcd driver was written under the assumption that all the
parameters in URBs sent to its root hub would be valid.  With URBs
sent from userspace via usbfs, that assumption can be violated.

In particular, the driver doesn't fully check the port-feature values
stored in the wValue entry of Clear-Port-Feature and Set-Port-Feature
requests.  Values that are too large can cause the driver to perform
an invalid left shift of more than 32 bits.  Ironically, two of those
left shifts are unnecessary, because they implement Set-Port-Feature
requests that hubs are not required to support, according to section
11.24.2.13 of the USB-2.0 spec.

This patch adds the appropriate checks for the port feature selector
values and removes the unnecessary feature settings.  It also rejects
requests to set the TEST feature or to set or clear the INDICATOR and
C_OVERCURRENT features, as none of these are relevant to dummy-hcd's
root-hub emulation.

CC: <stable@vger.kernel.org>
Reported-and-tested-by: syzbot+5925509f78293baa7331@syzkaller.appspotmail.com
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Link: https://lore.kernel.org/r/20201230162044.GA727759@rowland.harvard.edu
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/udc/dummy_hcd.c | 35 ++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c
index ab5e978b5052..1a953f44183a 100644
--- a/drivers/usb/gadget/udc/dummy_hcd.c
+++ b/drivers/usb/gadget/udc/dummy_hcd.c
@@ -2118,9 +2118,21 @@ static int dummy_hub_control(
 				dum_hcd->port_status &= ~USB_PORT_STAT_POWER;
 			set_link_state(dum_hcd);
 			break;
-		default:
+		case USB_PORT_FEAT_ENABLE:
+		case USB_PORT_FEAT_C_ENABLE:
+		case USB_PORT_FEAT_C_SUSPEND:
+			/* Not allowed for USB-3 */
+			if (hcd->speed == HCD_USB3)
+				goto error;
+			fallthrough;
+		case USB_PORT_FEAT_C_CONNECTION:
+		case USB_PORT_FEAT_C_RESET:
 			dum_hcd->port_status &= ~(1 << wValue);
 			set_link_state(dum_hcd);
+			break;
+		default:
+		/* Disallow INDICATOR and C_OVER_CURRENT */
+			goto error;
 		}
 		break;
 	case GetHubDescriptor:
@@ -2281,18 +2293,17 @@ static int dummy_hub_control(
 			 */
 			dum_hcd->re_timeout = jiffies + msecs_to_jiffies(50);
 			fallthrough;
+		case USB_PORT_FEAT_C_CONNECTION:
+		case USB_PORT_FEAT_C_RESET:
+		case USB_PORT_FEAT_C_ENABLE:
+		case USB_PORT_FEAT_C_SUSPEND:
+			/* Not allowed for USB-3, and ignored for USB-2 */
+			if (hcd->speed == HCD_USB3)
+				goto error;
+			break;
 		default:
-			if (hcd->speed == HCD_USB3) {
-				if ((dum_hcd->port_status &
-				     USB_SS_PORT_STAT_POWER) != 0) {
-					dum_hcd->port_status |= (1 << wValue);
-				}
-			} else
-				if ((dum_hcd->port_status &
-				     USB_PORT_STAT_POWER) != 0) {
-					dum_hcd->port_status |= (1 << wValue);
-				}
-			set_link_state(dum_hcd);
+		/* Disallow TEST, INDICATOR, and C_OVER_CURRENT */
+			goto error;
 		}
 		break;
 	case GetPortErrorCount:

From be1283454b61a1f3b089f1a74b73e20532262e32 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 29 Dec 2020 18:08:18 +0100
Subject: [PATCH 261/547] cpufreq: intel_pstate: Fix fast-switch fallback path

When sugov_update_single_perf() falls back to the "frequency"
path due to the missing scale-invariance, it will call
cpufreq_driver_fast_switch() via sugov_fast_switch()
and the driver's ->fast_switch() callback will be invoked,
so it must not be NULL.

However, after commit a365ab6b9dfb ("cpufreq: intel_pstate: Implement
the ->adjust_perf() callback") intel_pstate sets ->fast_switch() to
NULL when it is going to use intel_cpufreq_adjust_perf(), which is a
mistake, because on x86 the scale-invariance may be turned off
dynamically, so modify it to retain the original ->adjust_perf()
callback pointer.

Fixes: a365ab6b9dfb ("cpufreq: intel_pstate: Implement the ->adjust_perf() callback")
Reported-by: Kenneth R. Crudup <kenny@panix.com>
Tested-by: Kenneth R. Crudup <kenny@panix.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 6e23376548ce..1a660466dd75 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -3086,7 +3086,6 @@ static int __init intel_pstate_init(void)
 			intel_pstate.attr = hwp_cpufreq_attrs;
 			intel_cpufreq.attr = hwp_cpufreq_attrs;
 			intel_cpufreq.flags |= CPUFREQ_NEED_UPDATE_LIMITS;
-			intel_cpufreq.fast_switch = NULL;
 			intel_cpufreq.adjust_perf = intel_cpufreq_adjust_perf;
 			if (!default_driver)
 				default_driver = &intel_pstate;

From 9cf93f056f783f986c19f40d5304d1bcffa0fc0d Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Sun, 27 Dec 2020 12:11:16 +0200
Subject: [PATCH 262/547] intel_idle: add SnowRidge C-state table

Add C-state table for the SnowRidge SoC which is found on Intel Jacobsville
platforms.

The following has been changed.

 1. C1E latency changed from 10us to 15us. It was measured using the
    open source "wult" tool (the "nic" method, 15us is the 99.99th
    percentile).

 2. C1E power break even changed from 20us to 25us, which may result
    in less C1E residency in some workloads.

 3. C6 latency changed from 50us to 130us. Measured the same way as C1E.

The C6 C-state is supported only by some SnowRidge revisions, so add a C-state
table commentary about this.

On SnowRidge, C6 support is enumerated via the usual mechanism: "mwait" leaf of
the "cpuid" instruction. The 'intel_idle' driver does check this leaf, so even
though C6 is present in the table, the driver will only use it if the CPU does
support it.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/idle/intel_idle.c | 41 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index d79335506ecd..28f93b9aa51b 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -963,6 +963,39 @@ static struct cpuidle_state dnv_cstates[] __initdata = {
 		.enter = NULL }
 };
 
+/*
+ * Note, depending on HW and FW revision, SnowRidge SoC may or may not support
+ * C6, and this is indicated in the CPUID mwait leaf.
+ */
+static struct cpuidle_state snr_cstates[] __initdata = {
+	{
+		.name = "C1",
+		.desc = "MWAIT 0x00",
+		.flags = MWAIT2flg(0x00),
+		.exit_latency = 2,
+		.target_residency = 2,
+		.enter = &intel_idle,
+		.enter_s2idle = intel_idle_s2idle, },
+	{
+		.name = "C1E",
+		.desc = "MWAIT 0x01",
+		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
+		.exit_latency = 15,
+		.target_residency = 25,
+		.enter = &intel_idle,
+		.enter_s2idle = intel_idle_s2idle, },
+	{
+		.name = "C6",
+		.desc = "MWAIT 0x20",
+		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+		.exit_latency = 130,
+		.target_residency = 500,
+		.enter = &intel_idle,
+		.enter_s2idle = intel_idle_s2idle, },
+	{
+		.enter = NULL }
+};
+
 static const struct idle_cpu idle_cpu_nehalem __initconst = {
 	.state_table = nehalem_cstates,
 	.auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
@@ -1084,6 +1117,12 @@ static const struct idle_cpu idle_cpu_dnv __initconst = {
 	.use_acpi = true,
 };
 
+static const struct idle_cpu idle_cpu_snr __initconst = {
+	.state_table = snr_cstates,
+	.disable_promotion_to_c1e = true,
+	.use_acpi = true,
+};
+
 static const struct x86_cpu_id intel_idle_ids[] __initconst = {
 	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP,		&idle_cpu_nhx),
 	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,		&idle_cpu_nehalem),
@@ -1122,7 +1161,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT,	&idle_cpu_bxt),
 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS,	&idle_cpu_bxt),
 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D,	&idle_cpu_dnv),
-	X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,	&idle_cpu_dnv),
+	X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,	&idle_cpu_snr),
 	{}
 };
 

From 1642b4450d20e31439c80c28256c8eee08684698 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Wed, 30 Dec 2020 21:34:14 +0000
Subject: [PATCH 263/547] io_uring: add a helper for setting a ref node

Setting a new reference node to a file data is not trivial, don't repeat
it, add and use a helper.

Cc: stable@vger.kernel.org # 5.6+
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index eb4620ff638e..6372aba8d0c2 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7231,6 +7231,16 @@ static void io_file_ref_kill(struct percpu_ref *ref)
 	complete(&data->done);
 }
 
+static void io_sqe_files_set_node(struct fixed_file_data *file_data,
+				  struct fixed_file_ref_node *ref_node)
+{
+	spin_lock_bh(&file_data->lock);
+	file_data->node = ref_node;
+	list_add_tail(&ref_node->node, &file_data->ref_list);
+	spin_unlock_bh(&file_data->lock);
+	percpu_ref_get(&file_data->refs);
+}
+
 static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 {
 	struct fixed_file_data *data = ctx->file_data;
@@ -7758,11 +7768,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
 		return PTR_ERR(ref_node);
 	}
 
-	file_data->node = ref_node;
-	spin_lock_bh(&file_data->lock);
-	list_add_tail(&ref_node->node, &file_data->ref_list);
-	spin_unlock_bh(&file_data->lock);
-	percpu_ref_get(&file_data->refs);
+	io_sqe_files_set_node(file_data, ref_node);
 	return ret;
 out_fput:
 	for (i = 0; i < ctx->nr_user_files; i++) {
@@ -7918,11 +7924,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
 
 	if (needs_switch) {
 		percpu_ref_kill(&data->node->refs);
-		spin_lock_bh(&data->lock);
-		list_add_tail(&ref_node->node, &data->ref_list);
-		data->node = ref_node;
-		spin_unlock_bh(&data->lock);
-		percpu_ref_get(&ctx->file_data->refs);
+		io_sqe_files_set_node(data, ref_node);
 	} else
 		destroy_fixed_file_ref_node(ref_node);
 

From 1ffc54220c444774b7f09e6d2121e732f8e19b94 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Wed, 30 Dec 2020 21:34:15 +0000
Subject: [PATCH 264/547] io_uring: fix io_sqe_files_unregister() hangs

io_sqe_files_unregister() uninterruptibly waits for enqueued ref nodes,
however requests keeping them may never complete, e.g. because of some
userspace dependency. Make sure it's interruptible otherwise it would
hang forever.

Cc: stable@vger.kernel.org # 5.6+
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6372aba8d0c2..ca46f314640b 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -992,6 +992,10 @@ enum io_mem_account {
 	ACCT_PINNED,
 };
 
+static void destroy_fixed_file_ref_node(struct fixed_file_ref_node *ref_node);
+static struct fixed_file_ref_node *alloc_fixed_file_ref_node(
+			struct io_ring_ctx *ctx);
+
 static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
 			     struct io_comp_state *cs);
 static void io_cqring_fill_event(struct io_kiocb *req, long res);
@@ -7244,11 +7248,15 @@ static void io_sqe_files_set_node(struct fixed_file_data *file_data,
 static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 {
 	struct fixed_file_data *data = ctx->file_data;
-	struct fixed_file_ref_node *ref_node = NULL;
+	struct fixed_file_ref_node *backup_node, *ref_node = NULL;
 	unsigned nr_tables, i;
+	int ret;
 
 	if (!data)
 		return -ENXIO;
+	backup_node = alloc_fixed_file_ref_node(ctx);
+	if (!backup_node)
+		return -ENOMEM;
 
 	spin_lock_bh(&data->lock);
 	ref_node = data->node;
@@ -7260,7 +7268,18 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 
 	/* wait for all refs nodes to complete */
 	flush_delayed_work(&ctx->file_put_work);
-	wait_for_completion(&data->done);
+	do {
+		ret = wait_for_completion_interruptible(&data->done);
+		if (!ret)
+			break;
+		ret = io_run_task_work_sig();
+		if (ret < 0) {
+			percpu_ref_resurrect(&data->refs);
+			reinit_completion(&data->done);
+			io_sqe_files_set_node(data, backup_node);
+			return ret;
+		}
+	} while (1);
 
 	__io_sqe_files_unregister(ctx);
 	nr_tables = DIV_ROUND_UP(ctx->nr_user_files, IORING_MAX_FILES_TABLE);
@@ -7271,6 +7290,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 	kfree(data);
 	ctx->file_data = NULL;
 	ctx->nr_user_files = 0;
+	destroy_fixed_file_ref_node(backup_node);
 	return 0;
 }
 

From b1b6b5a30dce872f500dc43f067cba8e7f86fc7d Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Wed, 30 Dec 2020 21:34:16 +0000
Subject: [PATCH 265/547] kernel/io_uring: cancel io_uring before task works

For cancelling io_uring requests it needs either to be able to run
currently enqueued task_works or having it shut down by that moment.
Otherwise io_uring_cancel_files() may be waiting for requests that won't
ever complete.

Go with the first way and do cancellations before setting PF_EXITING and
so before putting the task_work infrastructure into a transition state
where task_work_run() would better not be called.

Cc: stable@vger.kernel.org # 5.5+
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/file.c     | 2 --
 kernel/exit.c | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/file.c b/fs/file.c
index c0b60961c672..dab120b71e44 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -21,7 +21,6 @@
 #include <linux/rcupdate.h>
 #include <linux/close_range.h>
 #include <net/sock.h>
-#include <linux/io_uring.h>
 
 unsigned int sysctl_nr_open __read_mostly = 1024*1024;
 unsigned int sysctl_nr_open_min = BITS_PER_LONG;
@@ -428,7 +427,6 @@ void exit_files(struct task_struct *tsk)
 	struct files_struct * files = tsk->files;
 
 	if (files) {
-		io_uring_files_cancel(files);
 		task_lock(tsk);
 		tsk->files = NULL;
 		task_unlock(tsk);
diff --git a/kernel/exit.c b/kernel/exit.c
index 3594291a8542..04029e35e69a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -63,6 +63,7 @@
 #include <linux/random.h>
 #include <linux/rcuwait.h>
 #include <linux/compat.h>
+#include <linux/io_uring.h>
 
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
@@ -776,6 +777,7 @@ void __noreturn do_exit(long code)
 		schedule();
 	}
 
+	io_uring_files_cancel(tsk->files);
 	exit_signals(tsk);  /* sets PF_EXITING */
 
 	/* sync mm's RSS info before statistics gathering */

From f93274ef0fe972c120c96b3207f8fce376231a60 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 4 Dec 2020 09:01:36 +0100
Subject: [PATCH 266/547] crypto: asym_tpm: correct zero out potential secrets

The function derive_pub_key() should be calling memzero_explicit()
instead of memset() in case the complier decides to optimize away the
call to memset() because it "knows" no one is going to touch the memory
anymore.

Cc: stable <stable@vger.kernel.org>
Reported-by: Ilil Blum Shem-Tov <ilil.blum.shem-tov@intel.com>
Tested-by: Ilil Blum Shem-Tov <ilil.blum.shem-tov@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/X8ns4AfwjKudpyfe@kroah.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 crypto/asymmetric_keys/asym_tpm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crypto/asymmetric_keys/asym_tpm.c b/crypto/asymmetric_keys/asym_tpm.c
index 511932aa94a6..0959613560b9 100644
--- a/crypto/asymmetric_keys/asym_tpm.c
+++ b/crypto/asymmetric_keys/asym_tpm.c
@@ -354,7 +354,7 @@ static uint32_t derive_pub_key(const void *pub_key, uint32_t len, uint8_t *buf)
 	memcpy(cur, e, sizeof(e));
 	cur += sizeof(e);
 	/* Zero parameters to satisfy set_pub_key ABI. */
-	memset(cur, 0, SETKEY_PARAMS_SIZE);
+	memzero_explicit(cur, SETKEY_PARAMS_SIZE);
 
 	return cur - buf;
 }

From 744a11abc56405c5a106e63da30a941b6d27f737 Mon Sep 17 00:00:00 2001
From: bo liu <bo.liu@senarytech.com>
Date: Tue, 29 Dec 2020 11:52:26 +0800
Subject: [PATCH 267/547] ALSA: hda/conexant: add a new hda codec CX11970

The current kernel does not support the cx11970 codec chip.
Add a codec configuration item to kernel.

[ Minor coding style fix by tiwai ]

Signed-off-by: bo liu <bo.liu@senarytech.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20201229035226.62120-1-bo.liu@senarytech.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_conexant.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index be5000dd1585..d49cc4409d59 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -1070,6 +1070,7 @@ static int patch_conexant_auto(struct hda_codec *codec)
 static const struct hda_device_id snd_hda_id_conexant[] = {
 	HDA_CODEC_ENTRY(0x14f11f86, "CX8070", patch_conexant_auto),
 	HDA_CODEC_ENTRY(0x14f12008, "CX8200", patch_conexant_auto),
+	HDA_CODEC_ENTRY(0x14f120d0, "CX11970", patch_conexant_auto),
 	HDA_CODEC_ENTRY(0x14f15045, "CX20549 (Venice)", patch_conexant_auto),
 	HDA_CODEC_ENTRY(0x14f15047, "CX20551 (Waikiki)", patch_conexant_auto),
 	HDA_CODEC_ENTRY(0x14f15051, "CX20561 (Hermosa)", patch_conexant_auto),

From 484229585a5e91eeb00ee10e05d5204e1ca6c481 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Manuel=20Jim=C3=A9nez?= <mjbfm99@me.com>
Date: Tue, 29 Dec 2020 15:38:56 +0100
Subject: [PATCH 268/547] ALSA: hda/realtek: Add mute LED quirk for more HP
 laptops
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

HP Pavilion 13-bb0000 (SSID 103c:87c8) needs the same
quirk as other models with ALC287.

Signed-off-by: Manuel Jiménez <mjbfm99@me.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/X+s/gKNydVrI6nLj@HP-Pavilion-13
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index f49cf0f227c4..a7015b3a62c7 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -7969,6 +7969,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x103c, 0x8760, "HP", ALC285_FIXUP_HP_MUTE_LED),
 	SND_PCI_QUIRK(0x103c, 0x877a, "HP", ALC285_FIXUP_HP_MUTE_LED),
 	SND_PCI_QUIRK(0x103c, 0x877d, "HP", ALC236_FIXUP_HP_MUTE_LED),
+	SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x87f4, "HP", ALC287_FIXUP_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x87f5, "HP", ALC287_FIXUP_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),

From ce2e79b223867b9e586021b55dee7035517a236b Mon Sep 17 00:00:00 2001
From: PeiSen Hou <pshou@realtek.com>
Date: Thu, 31 Dec 2020 11:57:28 +0100
Subject: [PATCH 269/547] ALSA: hda/realtek: Add two "Intel Reference board"
 SSID in the ALC256.

Add two "Intel Reference boad" SSID in the alc256.
Enable "power saving mode" and Enable "headset jack mode".

Signed-off-by: PeiSen Hou <pshou@realtek.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/5978d2267f034c28973d117925ec9c63@realtek.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index a7015b3a62c7..71a85bf99055 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -8027,6 +8027,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC),
 	SND_PCI_QUIRK(0x10ec, 0x10f2, "Intel Reference board", ALC700_FIXUP_INTEL_REFERENCE),
 	SND_PCI_QUIRK(0x10ec, 0x1230, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
+	SND_PCI_QUIRK(0x10ec, 0x1252, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
+	SND_PCI_QUIRK(0x10ec, 0x1254, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
 	SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_HEADSET_MODE),
 	SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC),
 	SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET),

From a598098cc9737f612dbab52294433fc26c51cc9b Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Wed, 30 Dec 2020 20:56:35 +0800
Subject: [PATCH 270/547] ALSA: hda/realtek: Enable mute and micmute LED on HP
 EliteBook 850 G7

HP EliteBook 850 G7 uses the same GPIO pins as ALC285_FIXUP_HP_GPIO_LED
to enable mute and micmute LED. So apply the quirk to enable the LEDs.

Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20201230125636.45028-1-kai.heng.feng@canonical.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 71a85bf99055..3c1d2a3fb1a4 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -7964,6 +7964,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x103c, 0x8497, "HP Envy x360", ALC269_FIXUP_HP_MUTE_LED_MIC3),
 	SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3),
 	SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED),
+	SND_PCI_QUIRK(0x103c, 0x8724, "HP EliteBook 850 G7", ALC285_FIXUP_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x8729, "HP", ALC285_FIXUP_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x8736, "HP", ALC285_FIXUP_HP_GPIO_AMP_INIT),
 	SND_PCI_QUIRK(0x103c, 0x8760, "HP", ALC285_FIXUP_HP_MUTE_LED),

From 957cbca7317f7413e1bac555a6b567af06598b10 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 31 Dec 2020 15:05:46 +0000
Subject: [PATCH 271/547] KVM: arm64: Remove spurious semicolon in
 reg_to_encoding()

Although not a problem right now, it flared up while working
on some other aspects of the code-base. Remove the useless
semicolon.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/sys_regs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index d46e7f706cb0..42ccc27fb684 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -923,7 +923,7 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 
 #define reg_to_encoding(x)						\
 	sys_reg((u32)(x)->Op0, (u32)(x)->Op1,				\
-		(u32)(x)->CRn, (u32)(x)->CRm, (u32)(x)->Op2);
+		(u32)(x)->CRn, (u32)(x)->CRm, (u32)(x)->Op2)
 
 /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
 #define DBG_BCR_BVR_WCR_WVR_EL1(n)					\

From 4f8af077a02eed4831885048a10e04daa4e61a72 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?=
 <nfraprado@protonmail.com>
Date: Mon, 28 Dec 2020 14:46:07 +0000
Subject: [PATCH 272/547] docs: Fix reST markup when linking to sections
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

During the process of converting the documentation to reST, some links
were converted using the following wrong syntax (and sometimes using %20
instead of spaces):

   `Display text <#section-name-in-html>`__

This syntax isn't valid according to the docutils' spec [1], but more
importantly, it is specific to HTML, since it uses '#' to link to an
HTML anchor.

The right syntax would instead use a docutils hyperlink reference as the
embedded URI to point to the section [2], that is:

   `Display text <Section Name_>`__

This syntax works in both HTML and PDF.

The LaTeX toolchain doesn't mind the HTML anchor syntax when generating
the pdf documentation (make pdfdocs), that is, the build succeeds but
the links don't work, but that syntax causes errors when trying to build
using the not-yet-merged rst2pdf:

   ValueError: format not resolved, probably missing URL scheme or undefined destination target for 'Forcing%20Quiescent%20States'

So, use the correct syntax in order to have it work in all different
output formats.

[1]: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#reference-names
[2]: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#embedded-uris-and-aliases

Fixes: ccc9971e2147 ("docs: rcu: convert some articles from html to ReST")
Fixes: c8cce10a62aa ("docs: Fix the reference labels in Locking.rst")
Fixes: e548cdeffcd8 ("docs-rst: convert kernel-locking to ReST")
Fixes: 7ddedebb03b7 ("ALSA: doc: ReSTize writing-an-alsa-driver document")
Signed-off-by: Nícolas F. R. A. Prado <nfraprado@protonmail.com>
Reviewed-by: Takashi Iwai <tiwai@suse.de>
Reviewed-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/20201228144537.135353-1-nfraprado@protonmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../Tree-RCU-Memory-Ordering.rst              |  8 ++++----
 .../RCU/Design/Requirements/Requirements.rst  | 20 +++++++++----------
 Documentation/kernel-hacking/locking.rst      |  8 ++++----
 .../kernel-api/writing-an-alsa-driver.rst     | 16 +++++++--------
 4 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst
index 83ae3b79a643..a648b423ba0e 100644
--- a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst
+++ b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst
@@ -473,7 +473,7 @@ read-side critical sections that follow the idle period (the oval near
 the bottom of the diagram above).
 
 Plumbing this into the full grace-period execution is described
-`below <#Forcing%20Quiescent%20States>`__.
+`below <Forcing Quiescent States_>`__.
 
 CPU-Hotplug Interface
 ^^^^^^^^^^^^^^^^^^^^^
@@ -494,7 +494,7 @@ mask to detect CPUs having gone offline since the beginning of this
 grace period.
 
 Plumbing this into the full grace-period execution is described
-`below <#Forcing%20Quiescent%20States>`__.
+`below <Forcing Quiescent States_>`__.
 
 Forcing Quiescent States
 ^^^^^^^^^^^^^^^^^^^^^^^^
@@ -532,7 +532,7 @@ from other CPUs.
 | RCU. But this diagram is complex enough as it is, so simplicity       |
 | overrode accuracy. You can think of it as poetic license, or you can  |
 | think of it as misdirection that is resolved in the                   |
-| `stitched-together diagram <#Putting%20It%20All%20Together>`__.       |
+| `stitched-together diagram <Putting It All Together_>`__.             |
 +-----------------------------------------------------------------------+
 
 Grace-Period Cleanup
@@ -596,7 +596,7 @@ maintain ordering. For example, if the callback function wakes up a task
 that runs on some other CPU, proper ordering must in place in both the
 callback function and the task being awakened. To see why this is
 important, consider the top half of the `grace-period
-cleanup <#Grace-Period%20Cleanup>`__ diagram. The callback might be
+cleanup`_ diagram. The callback might be
 running on a CPU corresponding to the leftmost leaf ``rcu_node``
 structure, and awaken a task that is to run on a CPU corresponding to
 the rightmost leaf ``rcu_node`` structure, and the grace-period kernel
diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst
index e8c84fcc0507..d4c9a016074b 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.rst
+++ b/Documentation/RCU/Design/Requirements/Requirements.rst
@@ -45,7 +45,7 @@ requirements:
 #. `Other RCU Flavors`_
 #. `Possible Future Changes`_
 
-This is followed by a `summary <#Summary>`__, however, the answers to
+This is followed by a summary_, however, the answers to
 each quick quiz immediately follows the quiz. Select the big white space
 with your mouse to see the answer.
 
@@ -1096,7 +1096,7 @@ memory barriers.
 | case, voluntary context switch) within an RCU read-side critical      |
 | section. However, sleeping locks may be used within userspace RCU     |
 | read-side critical sections, and also within Linux-kernel sleepable   |
-| RCU `(SRCU) <#Sleepable%20RCU>`__ read-side critical sections. In     |
+| RCU `(SRCU) <Sleepable RCU_>`__ read-side critical sections. In       |
 | addition, the -rt patchset turns spinlocks into a sleeping locks so   |
 | that the corresponding critical sections can be preempted, which also |
 | means that these sleeplockified spinlocks (but not other sleeping     |
@@ -1186,7 +1186,7 @@ non-preemptible (``CONFIG_PREEMPT=n``) kernels, and thus `tiny
 RCU <https://lkml.kernel.org/g/20090113221724.GA15307@linux.vnet.ibm.com>`__
 was born. Josh Triplett has since taken over the small-memory banner
 with his `Linux kernel tinification <https://tiny.wiki.kernel.org/>`__
-project, which resulted in `SRCU <#Sleepable%20RCU>`__ becoming optional
+project, which resulted in `SRCU <Sleepable RCU_>`__ becoming optional
 for those kernels not needing it.
 
 The remaining performance requirements are, for the most part,
@@ -1457,8 +1457,8 @@ will vary as the value of ``HZ`` varies, and can also be changed using
 the relevant Kconfig options and kernel boot parameters. RCU currently
 does not do much sanity checking of these parameters, so please use
 caution when changing them. Note that these forward-progress measures
-are provided only for RCU, not for `SRCU <#Sleepable%20RCU>`__ or `Tasks
-RCU <#Tasks%20RCU>`__.
+are provided only for RCU, not for `SRCU <Sleepable RCU_>`__ or `Tasks
+RCU`_.
 
 RCU takes the following steps in ``call_rcu()`` to encourage timely
 invocation of callbacks when any given non-\ ``rcu_nocbs`` CPU has
@@ -1477,8 +1477,8 @@ encouragement was provided:
 
 Again, these are default values when running at ``HZ=1000``, and can be
 overridden. Again, these forward-progress measures are provided only for
-RCU, not for `SRCU <#Sleepable%20RCU>`__ or `Tasks
-RCU <#Tasks%20RCU>`__. Even for RCU, callback-invocation forward
+RCU, not for `SRCU <Sleepable RCU_>`__ or `Tasks
+RCU`_. Even for RCU, callback-invocation forward
 progress for ``rcu_nocbs`` CPUs is much less well-developed, in part
 because workloads benefiting from ``rcu_nocbs`` CPUs tend to invoke
 ``call_rcu()`` relatively infrequently. If workloads emerge that need
@@ -1920,7 +1920,7 @@ Hotplug CPU
 
 The Linux kernel supports CPU hotplug, which means that CPUs can come
 and go. It is of course illegal to use any RCU API member from an
-offline CPU, with the exception of `SRCU <#Sleepable%20RCU>`__ read-side
+offline CPU, with the exception of `SRCU <Sleepable RCU_>`__ read-side
 critical sections. This requirement was present from day one in
 DYNIX/ptx, but on the other hand, the Linux kernel's CPU-hotplug
 implementation is “interesting.”
@@ -2177,7 +2177,7 @@ handles these states differently:
 However, RCU must be reliably informed as to whether any given CPU is
 currently in the idle loop, and, for ``NO_HZ_FULL``, also whether that
 CPU is executing in usermode, as discussed
-`earlier <#Energy%20Efficiency>`__. It also requires that the
+`earlier <Energy Efficiency_>`__. It also requires that the
 scheduling-clock interrupt be enabled when RCU needs it to be:
 
 #. If a CPU is either idle or executing in usermode, and RCU believes it
@@ -2294,7 +2294,7 @@ Performance, Scalability, Response Time, and Reliability
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Expanding on the `earlier
-discussion <#Performance%20and%20Scalability>`__, RCU is used heavily by
+discussion <Performance and Scalability_>`__, RCU is used heavily by
 hot code paths in performance-critical portions of the Linux kernel's
 networking, security, virtualization, and scheduling code paths. RCU
 must therefore use efficient implementations, especially in its
diff --git a/Documentation/kernel-hacking/locking.rst b/Documentation/kernel-hacking/locking.rst
index 6ed806e6061b..c3448929a824 100644
--- a/Documentation/kernel-hacking/locking.rst
+++ b/Documentation/kernel-hacking/locking.rst
@@ -118,11 +118,11 @@ spinlock, but you may block holding a mutex. If you can't lock a mutex,
 your task will suspend itself, and be woken up when the mutex is
 released. This means the CPU can do something else while you are
 waiting. There are many cases when you simply can't sleep (see
-`What Functions Are Safe To Call From Interrupts? <#sleeping-things>`__),
+`What Functions Are Safe To Call From Interrupts?`_),
 and so have to use a spinlock instead.
 
 Neither type of lock is recursive: see
-`Deadlock: Simple and Advanced <#deadlock>`__.
+`Deadlock: Simple and Advanced`_.
 
 Locks and Uniprocessor Kernels
 ------------------------------
@@ -179,7 +179,7 @@ perfect world).
 
 Note that you can also use spin_lock_irq() or
 spin_lock_irqsave() here, which stop hardware interrupts
-as well: see `Hard IRQ Context <#hard-irq-context>`__.
+as well: see `Hard IRQ Context`_.
 
 This works perfectly for UP as well: the spin lock vanishes, and this
 macro simply becomes local_bh_disable()
@@ -230,7 +230,7 @@ The Same Softirq
 ~~~~~~~~~~~~~~~~
 
 The same softirq can run on the other CPUs: you can use a per-CPU array
-(see `Per-CPU Data <#per-cpu-data>`__) for better performance. If you're
+(see `Per-CPU Data`_) for better performance. If you're
 going so far as to use a softirq, you probably care about scalable
 performance enough to justify the extra complexity.
 
diff --git a/Documentation/sound/kernel-api/writing-an-alsa-driver.rst b/Documentation/sound/kernel-api/writing-an-alsa-driver.rst
index 73bbd59afc33..e6365836fa8b 100644
--- a/Documentation/sound/kernel-api/writing-an-alsa-driver.rst
+++ b/Documentation/sound/kernel-api/writing-an-alsa-driver.rst
@@ -71,7 +71,7 @@ core/oss
 The codes for PCM and mixer OSS emulation modules are stored in this
 directory. The rawmidi OSS emulation is included in the ALSA rawmidi
 code since it's quite small. The sequencer code is stored in
-``core/seq/oss`` directory (see `below <#core-seq-oss>`__).
+``core/seq/oss`` directory (see `below <core/seq/oss_>`__).
 
 core/seq
 ~~~~~~~~
@@ -382,7 +382,7 @@ where ``enable[dev]`` is the module option.
 Each time the ``probe`` callback is called, check the availability of
 the device. If not available, simply increment the device index and
 returns. dev will be incremented also later (`step 7
-<#set-the-pci-driver-data-and-return-zero>`__).
+<7) Set the PCI driver data and return zero._>`__).
 
 2) Create a card instance
 ~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -450,10 +450,10 @@ field contains the information shown in ``/proc/asound/cards``.
 5) Create other components, such as mixer, MIDI, etc.
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Here you define the basic components such as `PCM <#PCM-Interface>`__,
-mixer (e.g. `AC97 <#API-for-AC97-Codec>`__), MIDI (e.g.
-`MPU-401 <#MIDI-MPU401-UART-Interface>`__), and other interfaces.
-Also, if you want a `proc file <#Proc-Interface>`__, define it here,
+Here you define the basic components such as `PCM <PCM Interface_>`__,
+mixer (e.g. `AC97 <API for AC97 Codec_>`__), MIDI (e.g.
+`MPU-401 <MIDI (MPU401-UART) Interface_>`__), and other interfaces.
+Also, if you want a `proc file <Proc Interface_>`__, define it here,
 too.
 
 6) Register the card instance.
@@ -941,7 +941,7 @@ The allocation of an interrupt source is done like this:
   chip->irq = pci->irq;
 
 where :c:func:`snd_mychip_interrupt()` is the interrupt handler
-defined `later <#pcm-interface-interrupt-handler>`__. Note that
+defined `later <PCM Interrupt Handler_>`__. Note that
 ``chip->irq`` should be defined only when :c:func:`request_irq()`
 succeeded.
 
@@ -3104,7 +3104,7 @@ processing the output stream in the irq handler.
 
 If the MPU-401 interface shares its interrupt with the other logical
 devices on the card, set ``MPU401_INFO_IRQ_HOOK`` (see
-`below <#MIDI-Interrupt-Handler>`__).
+`below <MIDI Interrupt Handler_>`__).
 
 Usually, the port address corresponds to the command port and port + 1
 corresponds to the data port. If not, you may change the ``cport``

From 81e79063004f32aae5196f0c929192e69aca1694 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 26 Dec 2020 09:44:33 -0800
Subject: [PATCH 273/547] Documentation: admin: early_param()s are also listed
 in kernel-parameters

Add info that "early_param()" kernel boot parameters are also listed
in kernel-parameters.txt.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: linux-doc@vger.kernel.org
Link: https://lore.kernel.org/r/20201226174433.7885-1-rdunlap@infradead.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/kernel-parameters.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst
index 06fb1b4aa849..682ab28b5c94 100644
--- a/Documentation/admin-guide/kernel-parameters.rst
+++ b/Documentation/admin-guide/kernel-parameters.rst
@@ -3,8 +3,8 @@
 The kernel's command-line parameters
 ====================================
 
-The following is a consolidated list of the kernel parameters as
-implemented by the __setup(), core_param() and module_param() macros
+The following is a consolidated list of the kernel parameters as implemented
+by the __setup(), early_param(), core_param() and module_param() macros
 and sorted into English Dictionary order (defined as ignoring all
 punctuation and sorting digits before letters in a case insensitive
 manner), and with descriptions where known.

From c7e74b3c7b1cf4c04164ff16e6c047232fd3bcef Mon Sep 17 00:00:00 2001
From: Liao Pingfang <winndows@163.com>
Date: Sun, 27 Dec 2020 15:15:19 +0800
Subject: [PATCH 274/547] docs/mm: concepts.rst: Correct the threshold to low
 watermark

It should be "low watermark" where we wake up kswapd daemon.

Signed-off-by: Liao Pingfang <winndows@163.com>
Link: https://lore.kernel.org/r/1609053319-3112-1-git-send-email-winndows@163.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/mm/concepts.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/mm/concepts.rst b/Documentation/admin-guide/mm/concepts.rst
index fa0974fbeae7..b966fcff993b 100644
--- a/Documentation/admin-guide/mm/concepts.rst
+++ b/Documentation/admin-guide/mm/concepts.rst
@@ -184,7 +184,7 @@ pages either asynchronously or synchronously, depending on the state
 of the system. When the system is not loaded, most of the memory is free
 and allocation requests will be satisfied immediately from the free
 pages supply. As the load increases, the amount of the free pages goes
-down and when it reaches a certain threshold (high watermark), an
+down and when it reaches a certain threshold (low watermark), an
 allocation request will awaken the ``kswapd`` daemon. It will
 asynchronously scan memory pages and either just free them if the data
 they contain is available elsewhere, or evict to the backing storage

From 0be1511f516e2b9766597336cedc6dc6d19e5af1 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 28 Dec 2020 15:12:12 -0800
Subject: [PATCH 275/547] Documentation: doc-guide: fixes to sphinx.rst

Various fixes to sphinx.rst:

- eliminate a double-space between 2 words
- grammar/wording
- punctuation
- call rows in a table 'rows' instead of 'columns' (or does Sphinx
  call everything a column?)
- It seems that "amdfonts" should be "amsfonts". I can't find any
  amdfonts.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/20201228231212.22448-1-rdunlap@infradead.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/doc-guide/sphinx.rst | 32 +++++++++++++++---------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/Documentation/doc-guide/sphinx.rst b/Documentation/doc-guide/sphinx.rst
index 2fb2ff297d69..36ac2166ad67 100644
--- a/Documentation/doc-guide/sphinx.rst
+++ b/Documentation/doc-guide/sphinx.rst
@@ -48,12 +48,12 @@ or ``virtualenv``, depending on how your distribution packaged Python 3.
       those versions, you should run ``pip install 'docutils==0.12'``.
 
    #) It is recommended to use the RTD theme for html output. Depending
-      on the Sphinx version, it should be installed  in separate,
+      on the Sphinx version, it should be installed separately,
       with ``pip install sphinx_rtd_theme``.
 
-   #) Some ReST pages contain math expressions. Due to the way Sphinx work,
+   #) Some ReST pages contain math expressions. Due to the way Sphinx works,
       those expressions are written using LaTeX notation. It needs texlive
-      installed with amdfonts and amsmath in order to evaluate them.
+      installed with amsfonts and amsmath in order to evaluate them.
 
 In summary, if you want to install Sphinx version 1.7.9, you should do::
 
@@ -128,7 +128,7 @@ Sphinx Build
 ============
 
 The usual way to generate the documentation is to run ``make htmldocs`` or
-``make pdfdocs``. There are also other formats available, see the documentation
+``make pdfdocs``. There are also other formats available: see the documentation
 section of ``make help``. The generated documentation is placed in
 format-specific subdirectories under ``Documentation/output``.
 
@@ -303,17 +303,17 @@ and *targets* (e.g. a ref to ``:ref:`last row <last row>``` / :ref:`last row
         - head col 3
         - head col 4
 
-      * - column 1
+      * - row 1
         - field 1.1
         - field 1.2 with autospan
 
-      * - column 2
+      * - row 2
         - field 2.1
         - :rspan:`1` :cspan:`1` field 2.2 - 3.3
 
       * .. _`last row`:
 
-        - column 3
+        - row 3
 
 Rendered as:
 
@@ -325,17 +325,17 @@ Rendered as:
         - head col 3
         - head col 4
 
-      * - column 1
+      * - row 1
         - field 1.1
         - field 1.2 with autospan
 
-      * - column 2
+      * - row 2
         - field 2.1
         - :rspan:`1` :cspan:`1` field 2.2 - 3.3
 
       * .. _`last row`:
 
-        - column 3
+        - row 3
 
 Cross-referencing
 -----------------
@@ -361,7 +361,7 @@ Figures & Images
 
 If you want to add an image, you should use the ``kernel-figure`` and
 ``kernel-image`` directives. E.g. to insert a figure with a scalable
-image format use SVG (:ref:`svg_image_example`)::
+image format, use SVG (:ref:`svg_image_example`)::
 
     .. kernel-figure::  svg_image.svg
        :alt:    simple SVG image
@@ -375,7 +375,7 @@ image format use SVG (:ref:`svg_image_example`)::
 
    SVG image example
 
-The kernel figure (and image) directive support **DOT** formatted files, see
+The kernel figure (and image) directive supports **DOT** formatted files, see
 
 * DOT: http://graphviz.org/pdf/dotguide.pdf
 * Graphviz: http://www.graphviz.org/content/dot-language
@@ -394,7 +394,7 @@ A simple example (:ref:`hello_dot_file`)::
 
    DOT's hello world example
 
-Embed *render* markups (or languages) like Graphviz's **DOT** is provided by the
+Embedded *render* markups (or languages) like Graphviz's **DOT** are provided by the
 ``kernel-render`` directives.::
 
   .. kernel-render:: DOT
@@ -406,7 +406,7 @@ Embed *render* markups (or languages) like Graphviz's **DOT** is provided by the
      }
 
 How this will be rendered depends on the installed tools. If Graphviz is
-installed, you will see an vector image. If not the raw markup is inserted as
+installed, you will see a vector image. If not, the raw markup is inserted as
 *literal-block* (:ref:`hello_dot_render`).
 
 .. _hello_dot_render:
@@ -421,8 +421,8 @@ installed, you will see an vector image. If not the raw markup is inserted as
 
 The *render* directive has all the options known from the *figure* directive,
 plus option ``caption``.  If ``caption`` has a value, a *figure* node is
-inserted. If not, a *image* node is inserted. A ``caption`` is also needed, if
-you want to refer it (:ref:`hello_svg_render`).
+inserted. If not, an *image* node is inserted. A ``caption`` is also needed, if
+you want to refer to it (:ref:`hello_svg_render`).
 
 Embedded **SVG**::
 

From 798ed7800e20dfc3304de1b99df5ac71ad48966b Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Sun, 20 Dec 2020 07:09:27 +0100
Subject: [PATCH 276/547] atomic: remove further references to atomic_ops

Commit f0400a77ebdc ("atomic: Delete obsolete documentation") removed
./Documentation/core-api/atomic_ops.rst, but missed to remove further
references to that file.

Hence, make htmldocs warns:

  Documentation/core-api/index.rst:53: WARNING:
  toctree contains reference to nonexisting document 'core-api/atomic_ops'

Also, ./scripts/get_maintainer.pl --self-test=patterns warns:

  warning: no file matches    F:    Documentation/core-api/atomic_ops.rst

Remove further references to ./Documentation/core-api/atomic_ops.rst.

Fixes: f0400a77ebdc ("atomic: Delete obsolete documentation")
Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Link: https://lore.kernel.org/r/20201220060927.21582-1-lukas.bulwahn@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/core-api/index.rst | 1 -
 MAINTAINERS                      | 1 -
 2 files changed, 2 deletions(-)

diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index 69171b1799f2..f1c9d20bd42d 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -53,7 +53,6 @@ How Linux keeps everything from happening at the same time.  See
 .. toctree::
    :maxdepth: 1
 
-   atomic_ops
    refcount-vs-atomic
    irq/index
    local_ops
diff --git a/MAINTAINERS b/MAINTAINERS
index 546aa66428c9..ddc8e90c1224 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10260,7 +10260,6 @@ S:	Supported
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev
 F:	Documentation/atomic_bitops.txt
 F:	Documentation/atomic_t.txt
-F:	Documentation/core-api/atomic_ops.rst
 F:	Documentation/core-api/refcount-vs-atomic.rst
 F:	Documentation/litmus-tests/
 F:	Documentation/memory-barriers.txt

From 3d5c5fdcee0f9a94deb0472e594706018b00aa31 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 1 Jan 2021 09:38:52 +0100
Subject: [PATCH 277/547] ALSA: hda/hdmi: Fix incorrect mutex unlock in
 silent_stream_disable()

The silent_stream_disable() function introduced by the commit
b1a5039759cb ("ALSA: hda/hdmi: fix silent stream for first playback to
DP") takes the per_pin->lock mutex, but it unlocks the wrong one,
spec->pcm_lock, which causes a deadlock.  This patch corrects it.

Fixes: b1a5039759cb ("ALSA: hda/hdmi: fix silent stream for first playback to DP")
Reported-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
Cc: <stable@vger.kernel.org>
Acked-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
Link: https://lore.kernel.org/r/20210101083852.12094-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_hdmi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 1e4a4b83fbf6..74d246a0dc6d 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -1733,7 +1733,7 @@ static void silent_stream_disable(struct hda_codec *codec,
 	per_pin->silent_stream = false;
 
  unlock_out:
-	mutex_unlock(&spec->pcm_lock);
+	mutex_unlock(&per_pin->lock);
 }
 
 /* update ELD and jack state via audio component */

From cedd1862be7e666be87ec824dabc6a2b05618f36 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 28 Dec 2020 11:40:22 -0800
Subject: [PATCH 278/547] depmod: handle the case of /sbin/depmod without /sbin
 in PATH

Commit 436e980e2ed5 ("kbuild: don't hardcode depmod path") stopped
hard-coding the path of depmod, but in the process caused trouble for
distributions that had that /sbin location, but didn't have it in the
PATH (generally because /sbin is limited to the super-user path).

Work around it for now by just adding /sbin to the end of PATH in the
depmod.sh script.

Reported-and-tested-by: Sedat Dilek <sedat.dilek@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/depmod.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/depmod.sh b/scripts/depmod.sh
index e083bcae343f..3643b4f896ed 100755
--- a/scripts/depmod.sh
+++ b/scripts/depmod.sh
@@ -15,6 +15,8 @@ if ! test -r System.map ; then
 	exit 0
 fi
 
+# legacy behavior: "depmod" in /sbin, no /sbin in PATH
+PATH="$PATH:/sbin"
 if [ -z $(command -v $DEPMOD) ]; then
 	echo "Warning: 'make modules_install' requires $DEPMOD. Please install it." >&2
 	echo "This is probably in the kmod package." >&2

From fd16931a2f518a32753920ff20895e5cf04c8ff1 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Sun, 13 Dec 2020 15:39:29 +0100
Subject: [PATCH 279/547] crypto: arm/chacha-neon - add missing counter
 increment

Commit 86cd97ec4b943af3 ("crypto: arm/chacha-neon - optimize for non-block
size multiples") refactored the chacha block handling in the glue code in
a way that may result in the counter increment to be omitted when calling
chacha_block_xor_neon() to process a full block. This violates the skcipher
API, which requires that the output IV is suitable for handling more input
as long as the preceding input has been presented in round multiples of the
block size. Also, the same code is exposed via the chacha library interface
whose callers may actually rely on this increment to occur even for final
blocks that are smaller than the chacha block size.

So increment the counter after calling chacha_block_xor_neon().

Fixes: 86cd97ec4b943af3 ("crypto: arm/chacha-neon - optimize for non-block size multiples")
Reported-by: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm/crypto/chacha-glue.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c
index 7b5cf8430c6d..cdde8fd01f8f 100644
--- a/arch/arm/crypto/chacha-glue.c
+++ b/arch/arm/crypto/chacha-glue.c
@@ -60,6 +60,7 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
 		chacha_block_xor_neon(state, d, s, nrounds);
 		if (d != dst)
 			memcpy(dst, buf, bytes);
+		state[12]++;
 	}
 }
 

From 0aa171e9b267ce7c52d3a3df7bc9c1fc0203dec5 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Sat, 2 Jan 2021 14:59:09 +0100
Subject: [PATCH 280/547] crypto: ecdh - avoid buffer overflow in
 ecdh_set_secret()

Pavel reports that commit 17858b140bf4 ("crypto: ecdh - avoid unaligned
accesses in ecdh_set_secret()") fixes one problem but introduces another:
the unconditional memcpy() introduced by that commit may overflow the
target buffer if the source data is invalid, which could be the result of
intentional tampering.

So check params.key_size explicitly against the size of the target buffer
before validating the key further.

Fixes: 17858b140bf4 ("crypto: ecdh - avoid unaligned accesses in ecdh_set_secret()")
Reported-by: Pavel Machek <pavel@denx.de>
Cc: <stable@vger.kernel.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/ecdh.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/crypto/ecdh.c b/crypto/ecdh.c
index d56b8603dec9..96f80c8f8e30 100644
--- a/crypto/ecdh.c
+++ b/crypto/ecdh.c
@@ -39,7 +39,8 @@ static int ecdh_set_secret(struct crypto_kpp *tfm, const void *buf,
 	struct ecdh params;
 	unsigned int ndigits;
 
-	if (crypto_ecdh_decode_key(buf, len, &params) < 0)
+	if (crypto_ecdh_decode_key(buf, len, &params) < 0 ||
+	    params.key_size > sizeof(ctx->private_key))
 		return -EINVAL;
 
 	ndigits = ecdh_supported_curve(params.curve_id);

From 04901aab40ea3779f6fc6383ef74d8e130e817bf Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 30 Dec 2020 21:24:18 -0800
Subject: [PATCH 281/547] bpf: Fix a task_iter bug caused by a merge conflict
 resolution

Latest bpf tree has a bug for bpf_iter selftest:

  $ ./test_progs -n 4/25
  test_bpf_sk_storage_get:PASS:bpf_iter_bpf_sk_storage_helpers__open_and_load 0 nsec
  test_bpf_sk_storage_get:PASS:socket 0 nsec
  ...
  do_dummy_read:PASS:read 0 nsec
  test_bpf_sk_storage_get:FAIL:bpf_map_lookup_elem map value wasn't set correctly
                          (expected 1792, got -1, err=0)
  #4/25 bpf_sk_storage_get:FAIL
  #4 bpf_iter:FAIL
  Summary: 0/0 PASSED, 0 SKIPPED, 2 FAILED

When doing merge conflict resolution, Commit 4bfc4714849d missed to
save curr_task to seq_file private data. The task pointer in seq_file
private data is passed to bpf program. This caused NULL-pointer task
passed to bpf program which will immediately return upon checking
whether task pointer is NULL.

This patch added back the assignment of curr_task to seq_file private
data and fixed the issue.

Fixes: 4bfc4714849d ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf")
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20201231052418.577024-1-yhs@fb.com
---
 kernel/bpf/task_iter.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 3efe38191d1c..175b7b42bfc4 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -159,6 +159,7 @@ again:
                 }
 
                 /* set info->task and info->tid */
+		info->task = curr_task;
 		if (curr_tid == info->tid) {
 			curr_fd = info->fd;
 		} else {

From b0e1306302018d876472ed074c1bfaa8020bf9df Mon Sep 17 00:00:00 2001
From: Timon Reinold <tirei@agon.one>
Date: Sat, 2 Jan 2021 22:08:35 +0100
Subject: [PATCH 282/547] ALSA: usb-audio: Add quirk for RC-505

BOSS RC-505 (shown by lsusb as "Roland Corp. RC-505") does require the
same quirk as these other BOSS devices.

Without this quirk it is neither possible to capture audio from nor to
write audio to the RC-505. Both just result in an empty audio
stream. With these changes both capture and playback seem to work
quite fine. MIDI funtionality was not tested.

Tested-by: Harry Reinold <harry.reinold@posteo.de>
Signed-off-by: Timon Reinold <tirei@agon.one>
Link: https://lore.kernel.org/r/20210102210835.21268-1-tirei@agon.one
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/implicit.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/usb/implicit.c b/sound/usb/implicit.c
index 70b9777b2e63..931042a6a051 100644
--- a/sound/usb/implicit.c
+++ b/sound/usb/implicit.c
@@ -74,6 +74,7 @@ static const struct snd_usb_implicit_fb_match playback_implicit_fb_quirks[] = {
 
 	/* No quirk for playback but with capture quirk (see below) */
 	IMPLICIT_FB_SKIP_DEV(0x0582, 0x0130),	/* BOSS BR-80 */
+	IMPLICIT_FB_SKIP_DEV(0x0582, 0x0171),   /* BOSS RC-505 */
 	IMPLICIT_FB_SKIP_DEV(0x0582, 0x0189),	/* BOSS GT-100v2 */
 	IMPLICIT_FB_SKIP_DEV(0x0582, 0x01d6),	/* BOSS GT-1 */
 	IMPLICIT_FB_SKIP_DEV(0x0582, 0x01d8),	/* BOSS Katana */
@@ -86,6 +87,7 @@ static const struct snd_usb_implicit_fb_match playback_implicit_fb_quirks[] = {
 /* Implicit feedback quirk table for capture: only FIXED type */
 static const struct snd_usb_implicit_fb_match capture_implicit_fb_quirks[] = {
 	IMPLICIT_FB_FIXED_DEV(0x0582, 0x0130, 0x0d, 0x01), /* BOSS BR-80 */
+	IMPLICIT_FB_FIXED_DEV(0x0582, 0x0171, 0x0d, 0x01), /* BOSS RC-505 */
 	IMPLICIT_FB_FIXED_DEV(0x0582, 0x0189, 0x0d, 0x01), /* BOSS GT-100v2 */
 	IMPLICIT_FB_FIXED_DEV(0x0582, 0x01d6, 0x0d, 0x01), /* BOSS GT-1 */
 	IMPLICIT_FB_FIXED_DEV(0x0582, 0x01d8, 0x0d, 0x01), /* BOSS Katana */

From 36a106a4c1c100d55ba3d32a21ef748cfcd4fa99 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 3 Jan 2021 22:42:39 +0100
Subject: [PATCH 283/547] block: rsxx: select CONFIG_CRC32

Without crc32, the driver fails to link:

arm-linux-gnueabi-ld: drivers/block/rsxx/config.o: in function `rsxx_load_config':
config.c:(.text+0x124): undefined reference to `crc32_le'

Fixes: 8722ff8cdbfa ("block: IBM RamSan 70/80 device driver")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 262326973ee0..583b671b1d2d 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -445,6 +445,7 @@ config BLK_DEV_RBD
 config BLK_DEV_RSXX
 	tristate "IBM Flash Adapter 900GB Full Height PCIe Device Driver"
 	depends on PCI
+	select CRC32
 	help
 	  Device driver for IBM's high speed PCIe SSD
 	  storage device: Flash Adapter 900GB Full Height.

From 19cd3403cb0d522dd5e10188eef85817de29e26e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 3 Jan 2021 22:43:09 +0100
Subject: [PATCH 284/547] lightnvm: select CONFIG_CRC32

Without CRC32 support, this fails to link:

arm-linux-gnueabi-ld: drivers/lightnvm/pblk-init.o: in function `pblk_init':
pblk-init.c:(.text+0x2654): undefined reference to `crc32_le'
arm-linux-gnueabi-ld: drivers/lightnvm/pblk-init.o: in function `pblk_exit':
pblk-init.c:(.text+0x2a7c): undefined reference to `crc32_le'

Fixes: a4bd217b4326 ("lightnvm: physical block device (pblk) target")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/lightnvm/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig
index 8f39f9ba5c80..4c2ce210c123 100644
--- a/drivers/lightnvm/Kconfig
+++ b/drivers/lightnvm/Kconfig
@@ -19,6 +19,7 @@ if NVM
 
 config NVM_PBLK
 	tristate "Physical Block Device Open-Channel SSD target"
+	select CRC32
 	help
 	  Allows an open-channel SSD to be exposed as a block device to the
 	  host. The target assumes the device exposes raw flash and must be

From e71ba9452f0b5b2e8dc8aa5445198cd9214a6a62 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 3 Jan 2021 15:55:30 -0800
Subject: [PATCH 285/547] Linux 5.11-rc2

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 3d328b7ab200..8b2c3f88ee5e 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 11
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = Kleptomaniac Octopus
 
 # *DOCUMENTATION*

From 4f8b848788f77c7f5c3bd98febce66b7aa14785f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 3 Jan 2021 22:43:52 +0100
Subject: [PATCH 286/547] zonefs: select CONFIG_CRC32

When CRC32 is disabled, zonefs cannot be linked:

ld: fs/zonefs/super.o: in function `zonefs_fill_super':

Add a Kconfig 'select' statement for it.

Fixes: 8dcc1a9d90c1 ("fs: New zonefs file system")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
---
 fs/zonefs/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/zonefs/Kconfig b/fs/zonefs/Kconfig
index ef2697b78820..827278f937fe 100644
--- a/fs/zonefs/Kconfig
+++ b/fs/zonefs/Kconfig
@@ -3,6 +3,7 @@ config ZONEFS_FS
 	depends on BLOCK
 	depends on BLK_DEV_ZONED
 	select FS_IOMAP
+	select CRC32
 	help
 	  zonefs is a simple file system which exposes zones of a zoned block
 	  device (e.g. host-managed or host-aware SMR disk drives) as files.

From 5136bb8c8b5872676f397b27f93a30568baf3a25 Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Sat, 19 Dec 2020 17:24:56 +0100
Subject: [PATCH 287/547] MAINTAINERS: adjust GCC PLUGINS after gcc-plugin.sh
 removal

Commit 1e860048c53e ("gcc-plugins: simplify GCC plugin-dev capability test")
removed ./scripts/gcc-plugin.sh, but missed to adjust MAINTAINERS.

Hence, ./scripts/get_maintainers.pl --self-test=patterns warns:

  warning: no file matches    F:    scripts/gcc-plugin.sh

Adjust entries in GGC PLUGINS section after this file removal.

Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 6eff4f720c72..181c32c18aaf 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7363,7 +7363,6 @@ L:	linux-hardening@vger.kernel.org
 S:	Maintained
 F:	Documentation/kbuild/gcc-plugins.rst
 F:	scripts/Makefile.gcc-plugins
-F:	scripts/gcc-plugin.sh
 F:	scripts/gcc-plugins/
 
 GCOV BASED KERNEL PROFILING

From d39648eb67ac851c7918c794424c266a5d2635b9 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 19 Dec 2020 09:08:05 -0800
Subject: [PATCH 288/547] kconfig: config script: add a little user help

Give the user a clue about the problem along with the 35 lines of
usage/help text.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: linux-kbuild@vger.kernel.org
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/config | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/config b/scripts/config
index 8c8d7c3d7acc..ff88e2faefd3 100755
--- a/scripts/config
+++ b/scripts/config
@@ -223,6 +223,7 @@ while [ "$1" != "" ] ; do
 		;;
 
 	*)
+		echo "bad command: $CMD" >&2
 		usage
 		;;
 	esac

From c0f975af1745391749e4306aa8081b9a4d2cced8 Mon Sep 17 00:00:00 2001
From: John Millikin <john@john-millikin.com>
Date: Wed, 23 Dec 2020 14:04:23 +0900
Subject: [PATCH 289/547] kconfig: Support building mconf with vendor sysroot
 ncurses

Changes the final fallback path in the ncurses locator for mconf
to support host compilers with a non-default sysroot.

This is similar to the hardcoded search for ncurses under
'/usr/include', but can support compilers that keep their default
header and library directories elsewhere.

For nconfig, do nothing because the only vendor compiler I'm aware
of with this layout (Apple Clang) ships an ncurses version that's too
old for nconfig anyway.

Signed-off-by: John Millikin <john@john-millikin.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/kconfig/mconf-cfg.sh | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scripts/kconfig/mconf-cfg.sh b/scripts/kconfig/mconf-cfg.sh
index aa68ec95620d..fcd4acd4e9cb 100755
--- a/scripts/kconfig/mconf-cfg.sh
+++ b/scripts/kconfig/mconf-cfg.sh
@@ -33,7 +33,9 @@ if [ -f /usr/include/ncurses/ncurses.h ]; then
 	exit 0
 fi
 
-if [ -f /usr/include/ncurses.h ]; then
+# As a final fallback before giving up, check if $HOSTCC knows of a default
+# ncurses installation (e.g. from a vendor-specific sysroot).
+if echo '#include <ncurses.h>' | "${HOSTCC}" -E - >/dev/null 2>&1; then
 	echo cflags=\"-D_GNU_SOURCE\"
 	echo libs=\"-lncurses\"
 	exit 0

From 0c36d88cff4d72149f94809303c5180b6f716d39 Mon Sep 17 00:00:00 2001
From: John Millikin <john@john-millikin.com>
Date: Wed, 23 Dec 2020 15:23:25 +0900
Subject: [PATCH 290/547] lib/raid6: Let $(UNROLL) rules work with macOS
 userland

Older versions of BSD awk are fussy about the order of '-v' and '-f'
flags, and require a space after the flag name. This causes build
failures on platforms with an old awk, such as macOS and NetBSD.

Since GNU awk and modern versions of BSD awk (distributed with
FreeBSD/OpenBSD) are fine with either form, the definition of
'cmd_unroll' can be trivially tweaked to let the lib/raid6 Makefile
work with both old and new awk flag dialects.

Signed-off-by: John Millikin <john@john-millikin.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 lib/raid6/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index b4c0df6d706d..c770570bfe4f 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -48,7 +48,7 @@ endif
 endif
 
 quiet_cmd_unroll = UNROLL  $@
-      cmd_unroll = $(AWK) -f$(srctree)/$(src)/unroll.awk -vN=$* < $< > $@
+      cmd_unroll = $(AWK) -v N=$* -f $(srctree)/$(src)/unroll.awk < $< > $@
 
 targets += int1.c int2.c int4.c int8.c int16.c int32.c
 $(obj)/int%.c: $(src)/int.uc $(src)/unroll.awk FORCE

From 9bba03d4473df0b707224d4d2067b62d1e1e2a77 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Wed, 23 Dec 2020 15:35:42 +0900
Subject: [PATCH 291/547] kconfig: remove 'kvmconfig' and 'xenconfig'
 shorthands

Linux 5.10 is out. Remove the 'kvmconfig' and 'xenconfig' shorthands
as previously announced.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/kconfig/Makefile | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile
index e46df0a2d4f9..2c40e68853dd 100644
--- a/scripts/kconfig/Makefile
+++ b/scripts/kconfig/Makefile
@@ -94,16 +94,6 @@ configfiles=$(wildcard $(srctree)/kernel/configs/$@ $(srctree)/arch/$(SRCARCH)/c
 	$(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m .config $(configfiles)
 	$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig
 
-PHONY += kvmconfig
-kvmconfig: kvm_guest.config
-	@echo >&2 "WARNING: 'make $@' will be removed after Linux 5.10"
-	@echo >&2 "         Please use 'make $<' instead."
-
-PHONY += xenconfig
-xenconfig: xen.config
-	@echo >&2 "WARNING: 'make $@' will be removed after Linux 5.10"
-	@echo >&2 "         Please use 'make $<' instead."
-
 PHONY += tinyconfig
 tinyconfig:
 	$(Q)$(MAKE) -f $(srctree)/Makefile allnoconfig tiny.config

From d6c1ddd938d84a1adef7e19e8efc10e1b4df5034 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Wed, 30 Dec 2020 16:25:34 +0100
Subject: [PATCH 292/547] USB: serial: option: add Quectel EM160R-GL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New modem using ff/ff/30 for QCDM, ff/00/00 for  AT and NMEA,
and ff/ff/ff for RMNET/QMI.

T: Bus=02 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=5000 MxCh= 0
D: Ver= 3.20 Cls=ef(misc ) Sub=02 Prot=01 MxPS= 9 #Cfgs= 1
P: Vendor=2c7c ProdID=0620 Rev= 4.09
S: Manufacturer=Quectel
S: Product=EM160R-GL
S: SerialNumber=e31cedc1
C:* #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=896mA
I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=(none)
E: Ad=81(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms
E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms
I:* If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none)
E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms
E: Ad=82(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms
E: Ad=02(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms
I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none)
E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms
E: Ad=84(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms
E: Ad=03(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms
I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none)
E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms
E: Ad=86(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms
E: Ad=04(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms
I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none)
E: Ad=88(I) Atr=03(Int.) MxPS= 8 Ivl=32ms
E: Ad=8e(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms
E: Ad=0f(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms

Cc: stable@vger.kernel.org
Signed-off-by: Bjørn Mork <bjorn@mork.no>
[ johan: add model comment ]
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/option.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 2c21e34235bb..ee726a106706 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -1117,6 +1117,8 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0xff, 0xff),
 	  .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 },
 	{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0, 0) },
+	{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, 0x0620, 0xff, 0xff, 0x30) },	/* EM160R-GL */
+	{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, 0x0620, 0xff, 0, 0) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x30) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0, 0) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x10),

From 42e85f90171a4ba59a1e1cedbbc30ce3f68f2317 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 15 Dec 2020 11:30:26 +0100
Subject: [PATCH 293/547] arm64/smp: Remove unused irq variable in
 arch_show_interrupts()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

    arch/arm64/kernel/smp.c: In function ‘arch_show_interrupts’:
    arch/arm64/kernel/smp.c:808:16: warning: unused variable ‘irq’ [-Wunused-variable]
      808 |   unsigned int irq = irq_desc_get_irq(ipi_desc[i]);
	  |                ^~~

The removal of the last user forgot to remove the variable.

Fixes: 5089bc51f81f ("arm64/smp: Use irq_desc_kstat_cpu() in arch_show_interrupts()")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20201215103026.2872532-1-geert+renesas@glider.be
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/smp.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 6bc3a3698c3d..376343d6f13a 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -807,7 +807,6 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 	unsigned int cpu, i;
 
 	for (i = 0; i < NR_IPI; i++) {
-		unsigned int irq = irq_desc_get_irq(ipi_desc[i]);
 		seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i,
 			   prec >= 4 ? " " : "");
 		for_each_online_cpu(cpu)

From b614231dec7864a338ce85032aa3d2d7ea2bc46d Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Wed, 2 Dec 2020 23:34:58 -0800
Subject: [PATCH 294/547] arm64: mte: remove an ISB on kernel exit

This ISB is unnecessary because we will soon do an ERET.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Link: https://linux-review.googlesource.com/id/I69f1ee6bb09b1372dd744a0e01cedaf090c8d448
Link: https://lore.kernel.org/r/20201203073458.2675400-1-pcc@google.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/entry.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 2a93fa5f4e49..a8c3e7aaca74 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -182,7 +182,6 @@ alternative_else_nop_endif
 	mrs_s	\tmp2, SYS_GCR_EL1
 	bfi	\tmp2, \tmp, #0, #16
 	msr_s	SYS_GCR_EL1, \tmp2
-	isb
 #endif
 	.endm
 
@@ -194,6 +193,7 @@ alternative_else_nop_endif
 	ldr_l	\tmp, gcr_kernel_excl
 
 	mte_set_gcr \tmp, \tmp2
+	isb
 1:
 #endif
 	.endm

From 095507dc1350b3a2b8b39fdc05edba0c10859eca Mon Sep 17 00:00:00 2001
From: Nicolas Saenz Julienne <nsaenzjulienne@suse.de>
Date: Fri, 18 Dec 2020 17:33:07 +0100
Subject: [PATCH 295/547] arm64: mm: Fix ARCH_LOW_ADDRESS_LIMIT when
 !CONFIG_ZONE_DMA

Systems configured with CONFIG_ZONE_DMA32, CONFIG_ZONE_NORMAL and
!CONFIG_ZONE_DMA will fail to properly setup ARCH_LOW_ADDRESS_LIMIT. The
limit will default to ~0ULL, effectively spanning the whole memory,
which is too high for a configuration that expects low memory to be
capped at 4GB.

Fix ARCH_LOW_ADDRESS_LIMIT by falling back to arm64_dma32_phys_limit
when arm64_dma_phys_limit isn't set. arm64_dma32_phys_limit will honour
CONFIG_ZONE_DMA32, or span the entire memory when not enabled.

Fixes: 1a8e1cef7603 ("arm64: use both ZONE_DMA and ZONE_DMA32")
Signed-off-by: Nicolas Saenz Julienne <nsaenzjulienne@suse.de>
Link: https://lore.kernel.org/r/20201218163307.10150-1-nsaenzjulienne@suse.de
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/processor.h | 3 ++-
 arch/arm64/mm/init.c               | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index ca2cd75d3286..69ad25fbeae4 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -94,7 +94,8 @@
 #endif /* CONFIG_ARM64_FORCE_52BIT */
 
 extern phys_addr_t arm64_dma_phys_limit;
-#define ARCH_LOW_ADDRESS_LIMIT	(arm64_dma_phys_limit - 1)
+extern phys_addr_t arm64_dma32_phys_limit;
+#define ARCH_LOW_ADDRESS_LIMIT	((arm64_dma_phys_limit ? : arm64_dma32_phys_limit) - 1)
 
 struct debug_info {
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 75addb36354a..7deddf56f7c3 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -59,7 +59,7 @@ EXPORT_SYMBOL(memstart_addr);
  * bit addressable memory area.
  */
 phys_addr_t arm64_dma_phys_limit __ro_after_init;
-static phys_addr_t arm64_dma32_phys_limit __ro_after_init;
+phys_addr_t arm64_dma32_phys_limit __ro_after_init;
 
 #ifdef CONFIG_KEXEC_CORE
 /*

From 26982a89cad77c0efc1c0c79bee0e3d75e9281d4 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 21 Dec 2020 22:37:58 +0000
Subject: [PATCH 296/547] afs: Work around strnlen() oops with
 CONFIG_FORTIFIED_SOURCE=y

AFS has a structured layout in its directory contents (AFS dirs are
downloaded as files and parsed locally by the client for lookup/readdir).
The slots in the directory are defined by union afs_xdr_dirent.  This,
however, only directly allows a name of a length that will fit into that
union.  To support a longer name, the next 1-8 contiguous entries are
annexed to the first one and the name flows across these.

afs_dir_iterate_block() uses strnlen(), limited to the space to the end of
the page, to find out how long the name is.  This worked fine until
6a39e62abbaf.  With that commit, the compiler determines the size of the
array and asserts that the string fits inside that array.  This is a
problem for AFS because we *expect* it to overflow one or more arrays.

A similar problem also occurs in afs_dir_scan_block() when a directory file
is being locally edited to avoid the need to redownload it.  There strlen()
was being used safely because each page has the last byte set to 0 when the
file is downloaded and validated (in afs_dir_check_page()).

Fix this by changing the afs_xdr_dirent union name field to an
indeterminate-length array and dropping the overflow field.

(Note that whilst looking at this, I realised that the calculation of the
number of slots a dirent used is non-standard and not quite right, but I'll
address that in a separate patch.)

The issue can be triggered by something like:

        touch /afs/example.com/thisisaveryveryverylongname

and it generates a report that looks like:

        detected buffer overflow in strnlen
        ------------[ cut here ]------------
        kernel BUG at lib/string.c:1149!
        ...
        RIP: 0010:fortify_panic+0xf/0x11
        ...
        Call Trace:
         afs_dir_iterate_block+0x12b/0x35b
         afs_dir_iterate+0x14e/0x1ce
         afs_do_lookup+0x131/0x417
         afs_lookup+0x24f/0x344
         lookup_open.isra.0+0x1bb/0x27d
         open_last_lookups+0x166/0x237
         path_openat+0xe0/0x159
         do_filp_open+0x48/0xa4
         ? kmem_cache_alloc+0xf5/0x16e
         ? __clear_close_on_exec+0x13/0x22
         ? _raw_spin_unlock+0xa/0xb
         do_sys_openat2+0x72/0xde
         do_sys_open+0x3b/0x58
         do_syscall_64+0x2d/0x3a
         entry_SYSCALL_64_after_hwframe+0x44/0xa9

Fixes: 6a39e62abbaf ("lib: string.h: detect intra-object overflow in fortified string functions")
Reported-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marc Dionne <marc.dionne@auristor.com>
cc: Daniel Axtens <dja@axtens.net>
---
 fs/afs/xdr_fs.h | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/fs/afs/xdr_fs.h b/fs/afs/xdr_fs.h
index 94f1f398eefa..c926430fd08a 100644
--- a/fs/afs/xdr_fs.h
+++ b/fs/afs/xdr_fs.h
@@ -54,10 +54,15 @@ union afs_xdr_dirent {
 		__be16		hash_next;
 		__be32		vnode;
 		__be32		unique;
-		u8		name[16];
-		u8		overflow[4];	/* if any char of the name (inc
-						 * NUL) reaches here, consume
-						 * the next dirent too */
+		u8		name[];
+		/* When determining the number of dirent slots needed to
+		 * represent a directory entry, name should be assumed to be 16
+		 * bytes, due to a now-standardised (mis)calculation, but it is
+		 * in fact 20 bytes in size.
+		 *
+		 * For names longer than (16 or) 20 bytes, extra slots should
+		 * be annexed to this one using the extended_name format.
+		 */
 	} u;
 	u8			extended_name[32];
 } __packed;

From 366911cd762db02c2dd32fad1be96b72a66f205d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 23 Dec 2020 10:39:57 +0000
Subject: [PATCH 297/547] afs: Fix directory entry size calculation

The number of dirent records used by an AFS directory entry should be
calculated using the assumption that there is a 16-byte name field in the
first block, rather than a 20-byte name field (which is actually the case).
This miscalculation is historic and effectively standard, so we have to use
it.

The calculation we need to use is:

	1 + (((strlen(name) + 1) + 15) >> 5)

where we are adding one to the strlen() result to account for the NUL
termination.

Fix this by the following means:

 (1) Create an inline function to do the calculation for a given name
     length.

 (2) Use the function to calculate the number of records used for a dirent
     in afs_dir_iterate_block().

     Use this to move the over-end check out of the loop since it only
     needs to be done once.

     Further use this to only go through the loop for the 2nd+ records
     composing an entry.  The only test there now is for if the record is
     allocated - and we already checked the first block at the top of the
     outer loop.

 (3) Add a max name length check in afs_dir_iterate_block().

 (4) Make afs_edit_dir_add() and afs_edit_dir_remove() use the function
     from (1) to calculate the number of blocks rather than doing it
     incorrectly themselves.

Fixes: 63a4681ff39c ("afs: Locally edit directory data for mkdir/create/unlink/...")
Fixes: ^1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marc Dionne <marc.dionne@auristor.com>
---
 fs/afs/dir.c               | 49 ++++++++++++++++++++------------------
 fs/afs/dir_edit.c          |  6 ++---
 fs/afs/xdr_fs.h            | 14 ++++++++++-
 include/trace/events/afs.h |  2 ++
 4 files changed, 43 insertions(+), 28 deletions(-)

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 9068d5578a26..7bd659ad959e 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -350,7 +350,7 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode,
 				 unsigned blkoff)
 {
 	union afs_xdr_dirent *dire;
-	unsigned offset, next, curr;
+	unsigned offset, next, curr, nr_slots;
 	size_t nlen;
 	int tmp;
 
@@ -363,13 +363,12 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode,
 	     offset < AFS_DIR_SLOTS_PER_BLOCK;
 	     offset = next
 	     ) {
-		next = offset + 1;
-
 		/* skip entries marked unused in the bitmap */
 		if (!(block->hdr.bitmap[offset / 8] &
 		      (1 << (offset % 8)))) {
 			_debug("ENT[%zu.%u]: unused",
 			       blkoff / sizeof(union afs_xdr_dir_block), offset);
+			next = offset + 1;
 			if (offset >= curr)
 				ctx->pos = blkoff +
 					next * sizeof(union afs_xdr_dirent);
@@ -381,35 +380,39 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode,
 		nlen = strnlen(dire->u.name,
 			       sizeof(*block) -
 			       offset * sizeof(union afs_xdr_dirent));
+		if (nlen > AFSNAMEMAX - 1) {
+			_debug("ENT[%zu]: name too long (len %u/%zu)",
+			       blkoff / sizeof(union afs_xdr_dir_block),
+			       offset, nlen);
+			return afs_bad(dvnode, afs_file_error_dir_name_too_long);
+		}
 
 		_debug("ENT[%zu.%u]: %s %zu \"%s\"",
 		       blkoff / sizeof(union afs_xdr_dir_block), offset,
 		       (offset < curr ? "skip" : "fill"),
 		       nlen, dire->u.name);
 
-		/* work out where the next possible entry is */
-		for (tmp = nlen; tmp > 15; tmp -= sizeof(union afs_xdr_dirent)) {
-			if (next >= AFS_DIR_SLOTS_PER_BLOCK) {
-				_debug("ENT[%zu.%u]:"
-				       " %u travelled beyond end dir block"
-				       " (len %u/%zu)",
+		nr_slots = afs_dir_calc_slots(nlen);
+		next = offset + nr_slots;
+		if (next > AFS_DIR_SLOTS_PER_BLOCK) {
+			_debug("ENT[%zu.%u]:"
+			       " %u extends beyond end dir block"
+			       " (len %zu)",
+			       blkoff / sizeof(union afs_xdr_dir_block),
+			       offset, next, nlen);
+			return afs_bad(dvnode, afs_file_error_dir_over_end);
+		}
+
+		/* Check that the name-extension dirents are all allocated */
+		for (tmp = 1; tmp < nr_slots; tmp++) {
+			unsigned int ix = offset + tmp;
+			if (!(block->hdr.bitmap[ix / 8] & (1 << (ix % 8)))) {
+				_debug("ENT[%zu.u]:"
+				       " %u unmarked extension (%u/%u)",
 				       blkoff / sizeof(union afs_xdr_dir_block),
-				       offset, next, tmp, nlen);
-				return afs_bad(dvnode, afs_file_error_dir_over_end);
-			}
-			if (!(block->hdr.bitmap[next / 8] &
-			      (1 << (next % 8)))) {
-				_debug("ENT[%zu.%u]:"
-				       " %u unmarked extension (len %u/%zu)",
-				       blkoff / sizeof(union afs_xdr_dir_block),
-				       offset, next, tmp, nlen);
+				       offset, tmp, nr_slots);
 				return afs_bad(dvnode, afs_file_error_dir_unmarked_ext);
 			}
-
-			_debug("ENT[%zu.%u]: ext %u/%zu",
-			       blkoff / sizeof(union afs_xdr_dir_block),
-			       next, tmp, nlen);
-			next++;
 		}
 
 		/* skip if starts before the current position */
diff --git a/fs/afs/dir_edit.c b/fs/afs/dir_edit.c
index 2ffe09abae7f..f4600c1353ad 100644
--- a/fs/afs/dir_edit.c
+++ b/fs/afs/dir_edit.c
@@ -215,8 +215,7 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
 	}
 
 	/* Work out how many slots we're going to need. */
-	need_slots = round_up(12 + name->len + 1 + 4, AFS_DIR_DIRENT_SIZE);
-	need_slots /= AFS_DIR_DIRENT_SIZE;
+	need_slots = afs_dir_calc_slots(name->len);
 
 	meta_page = kmap(page0);
 	meta = &meta_page->blocks[0];
@@ -393,8 +392,7 @@ void afs_edit_dir_remove(struct afs_vnode *vnode,
 	}
 
 	/* Work out how many slots we're going to discard. */
-	need_slots = round_up(12 + name->len + 1 + 4, AFS_DIR_DIRENT_SIZE);
-	need_slots /= AFS_DIR_DIRENT_SIZE;
+	need_slots = afs_dir_calc_slots(name->len);
 
 	meta_page = kmap(page0);
 	meta = &meta_page->blocks[0];
diff --git a/fs/afs/xdr_fs.h b/fs/afs/xdr_fs.h
index c926430fd08a..8ca868164507 100644
--- a/fs/afs/xdr_fs.h
+++ b/fs/afs/xdr_fs.h
@@ -58,7 +58,8 @@ union afs_xdr_dirent {
 		/* When determining the number of dirent slots needed to
 		 * represent a directory entry, name should be assumed to be 16
 		 * bytes, due to a now-standardised (mis)calculation, but it is
-		 * in fact 20 bytes in size.
+		 * in fact 20 bytes in size.  afs_dir_calc_slots() should be
+		 * used for this.
 		 *
 		 * For names longer than (16 or) 20 bytes, extra slots should
 		 * be annexed to this one using the extended_name format.
@@ -101,4 +102,15 @@ struct afs_xdr_dir_page {
 	union afs_xdr_dir_block	blocks[AFS_DIR_BLOCKS_PER_PAGE];
 };
 
+/*
+ * Calculate the number of dirent slots required for any given name length.
+ * The calculation is made assuming the part of the name in the first slot is
+ * 16 bytes, rather than 20, but this miscalculation is now standardised.
+ */
+static inline unsigned int afs_dir_calc_slots(size_t name_len)
+{
+	name_len++; /* NUL-terminated */
+	return 1 + ((name_len + 15) / AFS_DIR_DIRENT_SIZE);
+}
+
 #endif /* XDR_FS_H */
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index 4eef374d4413..4a5cc8c64be3 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -231,6 +231,7 @@ enum afs_file_error {
 	afs_file_error_dir_bad_magic,
 	afs_file_error_dir_big,
 	afs_file_error_dir_missing_page,
+	afs_file_error_dir_name_too_long,
 	afs_file_error_dir_over_end,
 	afs_file_error_dir_small,
 	afs_file_error_dir_unmarked_ext,
@@ -488,6 +489,7 @@ enum afs_cb_break_reason {
 	EM(afs_file_error_dir_bad_magic,	"DIR_BAD_MAGIC")	\
 	EM(afs_file_error_dir_big,		"DIR_BIG")		\
 	EM(afs_file_error_dir_missing_page,	"DIR_MISSING_PAGE")	\
+	EM(afs_file_error_dir_name_too_long,	"DIR_NAME_TOO_LONG")	\
 	EM(afs_file_error_dir_over_end,		"DIR_ENT_OVER_END")	\
 	EM(afs_file_error_dir_small,		"DIR_SMALL")		\
 	EM(afs_file_error_dir_unmarked_ext,	"DIR_UNMARKED_EXT")	\

From 0bd1bf86ab79555425b9f0b63005e181defe4da6 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 3 Jan 2021 14:57:29 +0100
Subject: [PATCH 298/547] dmaengine: qcom: fix gpi undefined behavior

gcc points out an incorrect error handling loop:

drivers/dma/qcom/gpi.c: In function 'gpi_ch_init':
drivers/dma/qcom/gpi.c:1254:15: error: iteration 2 invokes undefined behavior [-Werror=aggressive-loop-optimizations]
 1254 |  struct gpii *gpii = gchan->gpii;
      |               ^~~~
drivers/dma/qcom/gpi.c:1951:2: note: within this loop
 1951 |  for (i = i - 1; i >= 0; i++) {
      |  ^~~

Change the loop to correctly walk backwards through the
initialized fields rather than off into the woods.

Fixes: 5d0c3533a19f ("dmaengine: qcom: Add GPI dma driver")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20210103135738.3741123-1-arnd@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/qcom/gpi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c
index 556c070a514c..1a0bf6b0567a 100644
--- a/drivers/dma/qcom/gpi.c
+++ b/drivers/dma/qcom/gpi.c
@@ -1948,7 +1948,7 @@ static int gpi_ch_init(struct gchan *gchan)
 	return ret;
 
 error_start_chan:
-	for (i = i - 1; i >= 0; i++) {
+	for (i = i - 1; i >= 0; i--) {
 		gpi_stop_chan(&gpii->gchan[i]);
 		gpi_send_cmd(gpii, gchan, GPI_CH_CMD_RESET);
 	}

From 99974aedbd73523969afb09f33c6e3047cd0ddae Mon Sep 17 00:00:00 2001
From: Shravya Kumbham <shravya.kumbham@xilinx.com>
Date: Wed, 23 Dec 2020 16:51:00 +0530
Subject: [PATCH 299/547] dmaengine: xilinx_dma: check
 dma_async_device_register return value

dma_async_device_register() can return non-zero error code. Add
condition to check the return value of dma_async_device_register
function and handle the error path.

Addresses-Coverity: Event check_return.
Fixes: 9cd4360de609 ("dma: Add Xilinx AXI Video Direct Memory Access Engine driver support")
Signed-off-by: Shravya Kumbham <shravya.kumbham@xilinx.com>
Signed-off-by: Radhey Shyam Pandey <radhey.shyam.pandey@xilinx.com>
Link: https://lore.kernel.org/r/1608722462-29519-2-git-send-email-radhey.shyam.pandey@xilinx.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/xilinx/xilinx_dma.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index 22faea653ea8..091f1f80dcff 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -3112,7 +3112,11 @@ static int xilinx_dma_probe(struct platform_device *pdev)
 	}
 
 	/* Register the DMA engine with the core */
-	dma_async_device_register(&xdev->common);
+	err = dma_async_device_register(&xdev->common);
+	if (err) {
+		dev_err(xdev->dev, "failed to register the dma device\n");
+		goto error;
+	}
 
 	err = of_dma_controller_register(node, of_dma_xilinx_xlate,
 					 xdev);

From faeb0731be0a31e2246b21a85fa7dabbd750101d Mon Sep 17 00:00:00 2001
From: Shravya Kumbham <shravya.kumbham@xilinx.com>
Date: Wed, 23 Dec 2020 16:51:01 +0530
Subject: [PATCH 300/547] dmaengine: xilinx_dma: fix incompatible param warning
 in _child_probe()

In xilinx_dma_child_probe function, the nr_channels variable is
passed to of_property_read_u32() which expects an u32 return value
pointer. Modify the nr_channels variable type from int to u32 to
fix the incompatible parameter coverity warning.

Addresses-Coverity: Event incompatible_param.
Fixes: 1a9e7a03c761 ("dmaengine: vdma: Add support for mulit-channel dma mode")
Signed-off-by: Shravya Kumbham <shravya.kumbham@xilinx.com>
Signed-off-by: Radhey Shyam Pandey <radhey.shyam.pandey@xilinx.com>
Link: https://lore.kernel.org/r/1608722462-29519-3-git-send-email-radhey.shyam.pandey@xilinx.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/xilinx/xilinx_dma.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index 091f1f80dcff..3e374e8d2244 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -2900,7 +2900,8 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev,
 static int xilinx_dma_child_probe(struct xilinx_dma_device *xdev,
 				    struct device_node *node)
 {
-	int ret, i, nr_channels = 1;
+	int ret, i;
+	u32 nr_channels = 1;
 
 	ret = of_property_read_u32(node, "dma-channels", &nr_channels);
 	if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA && ret < 0)

From 2d5efea64472469117dc1a9a39530069e95b21e9 Mon Sep 17 00:00:00 2001
From: Shravya Kumbham <shravya.kumbham@xilinx.com>
Date: Wed, 23 Dec 2020 16:51:02 +0530
Subject: [PATCH 301/547] dmaengine: xilinx_dma: fix mixed_enum_type coverity
 warning

Typecast the fls(width -1) with (enum dmaengine_alignment) in
xilinx_dma_chan_probe function to fix the coverity warning.

Addresses-Coverity: Event mixed_enum_type.
Fixes: 9cd4360de609 ("dma: Add Xilinx AXI Video Direct Memory Access Engine driver support")
Signed-off-by: Shravya Kumbham <shravya.kumbham@xilinx.com>
Signed-off-by: Radhey Shyam Pandey <radhey.shyam.pandey@xilinx.com>
Link: https://lore.kernel.org/r/1608722462-29519-4-git-send-email-radhey.shyam.pandey@xilinx.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/xilinx/xilinx_dma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index 3e374e8d2244..79777550a6ff 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -2781,7 +2781,7 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev,
 		has_dre = false;
 
 	if (!has_dre)
-		xdev->common.copy_align = fls(width - 1);
+		xdev->common.copy_align = (enum dmaengine_alignment)fls(width - 1);
 
 	if (of_device_is_compatible(node, "xlnx,axi-vdma-mm2s-channel") ||
 	    of_device_is_compatible(node, "xlnx,axi-dma-mm2s-channel") ||

From 98bf2d3f4970179c702ef64db658e0553bc6ef3a Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Tue, 22 Dec 2020 07:11:18 +0000
Subject: [PATCH 302/547] powerpc/32s: Fix RTAS machine check with VMAP stack

When we have VMAP stack, exception prolog 1 sets r1, not r11.

When it is not an RTAS machine check, don't trash r1 because it is
needed by prolog 1.

Fixes: da7bb43ab9da ("powerpc/32: Fix vmap stack - Properly set r1 before activating MMU")
Fixes: d2e006036082 ("powerpc/32: Use SPRN_SPRG_SCRATCH2 in exception prologs")
Cc: stable@vger.kernel.org # v5.10+
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
[mpe: Squash in fixup for RTAS machine check from Christophe]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/bc77d61d1c18940e456a2dee464f1e2eda65a3f0.1608621048.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/kernel/head_book3s_32.S | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S
index 349bf3f0c3af..858fbc8b19f3 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -260,10 +260,19 @@ __secondary_hold_acknowledge:
 MachineCheck:
 	EXCEPTION_PROLOG_0
 #ifdef CONFIG_PPC_CHRP
+#ifdef CONFIG_VMAP_STACK
+	mtspr	SPRN_SPRG_SCRATCH2,r1
+	mfspr	r1, SPRN_SPRG_THREAD
+	lwz	r1, RTAS_SP(r1)
+	cmpwi	cr1, r1, 0
+	bne	cr1, 7f
+	mfspr	r1, SPRN_SPRG_SCRATCH2
+#else
 	mfspr	r11, SPRN_SPRG_THREAD
 	lwz	r11, RTAS_SP(r11)
 	cmpwi	cr1, r11, 0
 	bne	cr1, 7f
+#endif
 #endif /* CONFIG_PPC_CHRP */
 	EXCEPTION_PROLOG_1 for_rtas=1
 7:	EXCEPTION_PROLOG_2

From 6170d077bf92c5b3dfbe1021688d3c0404f7c9e9 Mon Sep 17 00:00:00 2001
From: Xu Yilun <yilun.xu@intel.com>
Date: Mon, 4 Jan 2021 09:29:09 +0800
Subject: [PATCH 303/547] spi: fix the divide by 0 error when calculating xfer
 waiting time

The xfer waiting time is the result of xfer->len / xfer->speed_hz. This
patch makes the assumption of 100khz xfer speed if the xfer->speed_hz is
not assigned and stays 0. This avoids the divide by 0 issue and ensures
a reasonable tolerant waiting time.

Signed-off-by: Xu Yilun <yilun.xu@intel.com>
Link: https://lore.kernel.org/r/1609723749-3557-1-git-send-email-yilun.xu@intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index f59bf5094adb..720ab34784c1 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1108,6 +1108,7 @@ static int spi_transfer_wait(struct spi_controller *ctlr,
 {
 	struct spi_statistics *statm = &ctlr->statistics;
 	struct spi_statistics *stats = &msg->spi->statistics;
+	u32 speed_hz = xfer->speed_hz;
 	unsigned long long ms;
 
 	if (spi_controller_is_slave(ctlr)) {
@@ -1116,8 +1117,11 @@ static int spi_transfer_wait(struct spi_controller *ctlr,
 			return -EINTR;
 		}
 	} else {
+		if (!speed_hz)
+			speed_hz = 100000;
+
 		ms = 8LL * 1000LL * xfer->len;
-		do_div(ms, xfer->speed_hz);
+		do_div(ms, speed_hz);
 		ms += ms + 200; /* some tolerance */
 
 		if (ms > UINT_MAX)

From 2bf3a72b08e7f6356a2db9e1571ca65f683510bb Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Thu, 31 Dec 2020 15:23:45 +0300
Subject: [PATCH 304/547] dt-bindings: regulator: qcom,rpmh-regulator: add
 pm8009 revision

PMIC pm8009 has special revision (P=1) made for sm8250 platform. The
major difference is the S2 regulator which supplies 0.95 V instead of
2.848V. Add special compatibility string for this chip revision.
The datasheet calls the chip just pm8009-1, so use the same name.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Vinod Koul <vkoul@kernel.org>
Link: https://lore.kernel.org/r/20201231122348.637917-2-dmitry.baryshkov@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../devicetree/bindings/regulator/qcom,rpmh-regulator.txt        | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt
index b8f0b7809c02..7d462b899473 100644
--- a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt
@@ -44,6 +44,7 @@ First Level Nodes - PMIC
 	Definition: Must be one of below:
 		    "qcom,pm8005-rpmh-regulators"
 		    "qcom,pm8009-rpmh-regulators"
+		    "qcom,pm8009-1-rpmh-regulators"
 		    "qcom,pm8150-rpmh-regulators"
 		    "qcom,pm8150l-rpmh-regulators"
 		    "qcom,pm8350-rpmh-regulators"

From df6b92fa40050e59ea89784294bf6d04c0c47705 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Thu, 31 Dec 2020 15:23:46 +0300
Subject: [PATCH 305/547] regulator: qcom-rpmh-regulator: correct hfsmps515
 definition

According to the datasheet pm8009's HFS515 regulators have 16mV
resolution rather than declared 1.6 mV. Correct the resolution.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Fixes: 06369bcc15a1 ("regulator: qcom-rpmh: Add support for SM8150")
Reviewed-by: Vinod Koul <vkoul@kernel.org>
Link: https://lore.kernel.org/r/20201231122348.637917-3-dmitry.baryshkov@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/qcom-rpmh-regulator.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/qcom-rpmh-regulator.c b/drivers/regulator/qcom-rpmh-regulator.c
index fe030ec4b7db..c395a8dda6f7 100644
--- a/drivers/regulator/qcom-rpmh-regulator.c
+++ b/drivers/regulator/qcom-rpmh-regulator.c
@@ -726,7 +726,7 @@ static const struct rpmh_vreg_hw_data pmic5_ftsmps510 = {
 static const struct rpmh_vreg_hw_data pmic5_hfsmps515 = {
 	.regulator_type = VRM,
 	.ops = &rpmh_regulator_vrm_ops,
-	.voltage_range = REGULATOR_LINEAR_RANGE(2800000, 0, 4, 1600),
+	.voltage_range = REGULATOR_LINEAR_RANGE(2800000, 0, 4, 16000),
 	.n_voltages = 5,
 	.pmic_mode_map = pmic_mode_map_pmic5_smps,
 	.of_map_mode = rpmh_regulator_pmic4_smps_of_map_mode,

From d957d1610c661e758426654de3b04bea6fb29f8b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 30 Dec 2020 15:56:23 +0100
Subject: [PATCH 306/547] regulator: qcom-rpmh: add QCOM_COMMAND_DB dependency

A built-in regulator driver cannot link against a modular cmd_db driver:

qcom-rpmh-regulator.c:(.text+0x174): undefined reference to `cmd_db_read_addr'

There is already a dependency for RPMh, so add another one of this
type for cmd_db.

Fixes: 34c5aa2666db ("regulator: Kconfig: Fix REGULATOR_QCOM_RPMH dependencies to avoid build error")
Fixes: 46fc033eba42 ("regulator: add QCOM RPMh regulator driver")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/20201230145712.3133110-1-arnd@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 53fa84f4d1e1..5abdd29fb9f3 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -881,6 +881,7 @@ config REGULATOR_QCOM_RPM
 config REGULATOR_QCOM_RPMH
 	tristate "Qualcomm Technologies, Inc. RPMh regulator driver"
 	depends on QCOM_RPMH || (QCOM_RPMH=n && COMPILE_TEST)
+	depends on QCOM_COMMAND_DB || (QCOM_COMMAND_DB=n && COMPILE_TEST)
 	help
 	  This driver supports control of PMIC regulators via the RPMh hardware
 	  block found on Qualcomm Technologies Inc. SoCs.  RPMh regulator

From 4b1a60a1bb8f03d82c3f6da424adc96667b59f2a Mon Sep 17 00:00:00 2001
From: Georgi Djakov <georgi.djakov@linaro.org>
Date: Mon, 4 Jan 2021 15:50:43 +0200
Subject: [PATCH 307/547] MAINTAINERS: Update Georgi's email address

Use my kernel.org email as main address to make things a bit easier
for me to handle.

Link: https://lore.kernel.org/r/20210104135043.31262-1-georgi.djakov@linaro.org
Signed-off-by: Georgi Djakov <georgi.djakov@linaro.org>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 546aa66428c9..a15e306123ef 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9240,7 +9240,7 @@ F:	tools/testing/selftests/sgx/*
 K:	\bSGX_
 
 INTERCONNECT API
-M:	Georgi Djakov <georgi.djakov@linaro.org>
+M:	Georgi Djakov <djakov@kernel.org>
 L:	linux-pm@vger.kernel.org
 S:	Maintained
 F:	Documentation/devicetree/bindings/interconnect/

From 0e2d6795e8dbe91c2f5473564c6b25d11df3778b Mon Sep 17 00:00:00 2001
From: Daniel Palmer <daniel@0x0f.com>
Date: Sun, 27 Dec 2020 12:17:16 +0900
Subject: [PATCH 308/547] USB: serial: option: add LongSung M5710 module
 support

Add a device-id entry for the LongSung M5710 module.

T:  Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#=  2 Spd=480  MxCh= 0
D:  Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs=  1
P:  Vendor=2df3 ProdID=9d03 Rev= 1.00
S:  Manufacturer=Marvell
S:  Product=Mobile Composite Device Bus
S:  SerialNumber=<snip>
C:* #Ifs= 5 Cfg#= 1 Atr=c0 MxPwr=500mA
A:  FirstIf#= 0 IfCount= 2 Cls=e0(wlcon) Sub=01 Prot=03
I:* If#= 0 Alt= 0 #EPs= 1 Cls=e0(wlcon) Sub=01 Prot=03 Driver=rndis_host
E:  Ad=87(I) Atr=03(Int.) MxPS=  64 Ivl=4096ms
I:* If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=rndis_host
E:  Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=0c(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
E:  Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=0b(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
E:  Ad=89(I) Atr=03(Int.) MxPS=  64 Ivl=4096ms
E:  Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=0f(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
E:  Ad=88(I) Atr=03(Int.) MxPS=  64 Ivl=4096ms
E:  Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=0a(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms

Signed-off-by: Daniel Palmer <daniel@0x0f.com>
https://lore.kernel.org/r/20201227031716.1343300-1-daniel@0x0f.com
[ johan: drop id defines, only bind to vendor class ]
Cc: stable@vger.kernel.org
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/option.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index ee726a106706..3fe959104311 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -2059,6 +2059,7 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0105, 0xff),			/* Fibocom NL678 series */
 	  .driver_info = RSVD(6) },
 	{ USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) },			/* Fibocom NL668-AM/NL652-EU (laptop MBIM) */
+	{ USB_DEVICE_INTERFACE_CLASS(0x2df3, 0x9d03, 0xff) },			/* LongSung M5710 */
 	{ USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1404, 0xff) },			/* GosunCn GM500 RNDIS */
 	{ USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1405, 0xff) },			/* GosunCn GM500 MBIM */
 	{ USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1406, 0xff) },			/* GosunCn GM500 ECM/NCM */

From 54d0a3ab80f49f19ee916def62fe067596833403 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 4 Jan 2021 15:50:07 +0100
Subject: [PATCH 309/547] USB: serial: iuu_phoenix: fix DMA from stack

Stack-allocated buffers cannot be used for DMA (on all architectures) so
allocate the flush command buffer using kmalloc().

Fixes: 60a8fc017103 ("USB: add iuu_phoenix driver")
Cc: stable <stable@vger.kernel.org>     # 2.6.25
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/iuu_phoenix.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c
index f1201d4de297..e8f06b41a503 100644
--- a/drivers/usb/serial/iuu_phoenix.c
+++ b/drivers/usb/serial/iuu_phoenix.c
@@ -532,23 +532,29 @@ static int iuu_uart_flush(struct usb_serial_port *port)
 	struct device *dev = &port->dev;
 	int i;
 	int status;
-	u8 rxcmd = IUU_UART_RX;
+	u8 *rxcmd;
 	struct iuu_private *priv = usb_get_serial_port_data(port);
 
 	if (iuu_led(port, 0xF000, 0, 0, 0xFF) < 0)
 		return -EIO;
 
+	rxcmd = kmalloc(1, GFP_KERNEL);
+	if (!rxcmd)
+		return -ENOMEM;
+
+	rxcmd[0] = IUU_UART_RX;
+
 	for (i = 0; i < 2; i++) {
-		status = bulk_immediate(port, &rxcmd, 1);
+		status = bulk_immediate(port, rxcmd, 1);
 		if (status != IUU_OPERATION_OK) {
 			dev_dbg(dev, "%s - uart_flush_write error\n", __func__);
-			return status;
+			goto out_free;
 		}
 
 		status = read_immediate(port, &priv->len, 1);
 		if (status != IUU_OPERATION_OK) {
 			dev_dbg(dev, "%s - uart_flush_read error\n", __func__);
-			return status;
+			goto out_free;
 		}
 
 		if (priv->len > 0) {
@@ -556,12 +562,16 @@ static int iuu_uart_flush(struct usb_serial_port *port)
 			status = read_immediate(port, priv->buf, priv->len);
 			if (status != IUU_OPERATION_OK) {
 				dev_dbg(dev, "%s - uart_flush_read error\n", __func__);
-				return status;
+				goto out_free;
 			}
 		}
 	}
 	dev_dbg(dev, "%s - uart_flush_read OK!\n", __func__);
 	iuu_led(port, 0, 0xF000, 0, 0xFF);
+
+out_free:
+	kfree(rxcmd);
+
 	return status;
 }
 

From 4bfd6247fa9164c8e193a55ef9c0ea3ee22f82d8 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 4 Jan 2021 16:30:46 +0100
Subject: [PATCH 310/547] ALSA: hda/via: Fix runtime PM for Clevo W35xSS

Clevo W35xSS_370SS with VIA codec has had the runtime PM problem that
looses the power state of some nodes after the runtime resume.  This
was worked around by disabling the default runtime PM via a denylist
entry.  Since 5.10.x made the runtime PM applied (casually) even
though it's disabled in the denylist, this problem was revisited.  The
result was that disabling power_save_node feature suffices for the
runtime PM problem.

This patch implements the disablement of power_save_node feature in
VIA codec for the device.  It also drops the former denylist entry,
too, as the runtime PM should work in the codec side properly now.

Fixes: b529ef2464ad ("ALSA: hda: Add Clevo W35xSS_370SS to the power_save blacklist")
Reported-by: Christian Labisch <clnetbox@gmail.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210104153046.19993-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/hda_intel.c |  2 --
 sound/pci/hda/patch_via.c | 13 +++++++++++++
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 6852668f1bcb..770ad25f1907 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2220,8 +2220,6 @@ static const struct snd_pci_quirk power_save_denylist[] = {
 	SND_PCI_QUIRK(0x1849, 0x7662, "Asrock H81M-HDS", 0),
 	/* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
 	SND_PCI_QUIRK(0x1043, 0x8733, "Asus Prime X370-Pro", 0),
-	/* https://bugzilla.redhat.com/show_bug.cgi?id=1581607 */
-	SND_PCI_QUIRK(0x1558, 0x3501, "Clevo W35xSS_370SS", 0),
 	/* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
 	SND_PCI_QUIRK(0x1558, 0x6504, "Clevo W65_67SB", 0),
 	/* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c
index 7ef8f3105cdb..0ab40a8a68fb 100644
--- a/sound/pci/hda/patch_via.c
+++ b/sound/pci/hda/patch_via.c
@@ -1002,6 +1002,7 @@ static const struct hda_verb vt1802_init_verbs[] = {
 enum {
 	VIA_FIXUP_INTMIC_BOOST,
 	VIA_FIXUP_ASUS_G75,
+	VIA_FIXUP_POWER_SAVE,
 };
 
 static void via_fixup_intmic_boost(struct hda_codec *codec,
@@ -1011,6 +1012,13 @@ static void via_fixup_intmic_boost(struct hda_codec *codec,
 		override_mic_boost(codec, 0x30, 0, 2, 40);
 }
 
+static void via_fixup_power_save(struct hda_codec *codec,
+				 const struct hda_fixup *fix, int action)
+{
+	if (action == HDA_FIXUP_ACT_PRE_PROBE)
+		codec->power_save_node = 0;
+}
+
 static const struct hda_fixup via_fixups[] = {
 	[VIA_FIXUP_INTMIC_BOOST] = {
 		.type = HDA_FIXUP_FUNC,
@@ -1025,11 +1033,16 @@ static const struct hda_fixup via_fixups[] = {
 			{ }
 		}
 	},
+	[VIA_FIXUP_POWER_SAVE] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = via_fixup_power_save,
+	},
 };
 
 static const struct snd_pci_quirk vt2002p_fixups[] = {
 	SND_PCI_QUIRK(0x1043, 0x1487, "Asus G75", VIA_FIXUP_ASUS_G75),
 	SND_PCI_QUIRK(0x1043, 0x8532, "Asus X202E", VIA_FIXUP_INTMIC_BOOST),
+	SND_PCI_QUIRK(0x1558, 0x3501, "Clevo W35xSS_370SS", VIA_FIXUP_POWER_SAVE),
 	{}
 };
 

From 020a1f453449294926ca548d8d5ca970926e8dfd Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 4 Jan 2021 15:53:02 +0100
Subject: [PATCH 311/547] USB: usblp: fix DMA to stack

Stack-allocated buffers cannot be used for DMA (on all architectures).

Replace the HP-channel macro with a helper function that allocates a
dedicated transfer buffer so that it can continue to be used with
arguments from the stack.

Note that the buffer is cleared on allocation as usblp_ctrl_msg()
returns success also on short transfers (the buffer is only used for
debugging).

Cc: stable@vger.kernel.org
Signed-off-by: Johan Hovold <johan@kernel.org>
Link: https://lore.kernel.org/r/20210104145302.2087-1-johan@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/usblp.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c
index 67cbd42421be..134dc2005ce9 100644
--- a/drivers/usb/class/usblp.c
+++ b/drivers/usb/class/usblp.c
@@ -274,8 +274,25 @@ static int usblp_ctrl_msg(struct usblp *usblp, int request, int type, int dir, i
 #define usblp_reset(usblp)\
 	usblp_ctrl_msg(usblp, USBLP_REQ_RESET, USB_TYPE_CLASS, USB_DIR_OUT, USB_RECIP_OTHER, 0, NULL, 0)
 
-#define usblp_hp_channel_change_request(usblp, channel, buffer) \
-	usblp_ctrl_msg(usblp, USBLP_REQ_HP_CHANNEL_CHANGE_REQUEST, USB_TYPE_VENDOR, USB_DIR_IN, USB_RECIP_INTERFACE, channel, buffer, 1)
+static int usblp_hp_channel_change_request(struct usblp *usblp, int channel, u8 *new_channel)
+{
+	u8 *buf;
+	int ret;
+
+	buf = kzalloc(1, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	ret = usblp_ctrl_msg(usblp, USBLP_REQ_HP_CHANNEL_CHANGE_REQUEST,
+			USB_TYPE_VENDOR, USB_DIR_IN, USB_RECIP_INTERFACE,
+			channel, buf, 1);
+	if (ret == 0)
+		*new_channel = buf[0];
+
+	kfree(buf);
+
+	return ret;
+}
 
 /*
  * See the description for usblp_select_alts() below for the usage

From 718bf42b119de652ebcc93655a1f33a9c0d04b3c Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 28 Dec 2020 23:13:09 -0800
Subject: [PATCH 312/547] usb: usbip: vhci_hcd: protect shift size

Fix shift out-of-bounds in vhci_hcd.c:

  UBSAN: shift-out-of-bounds in ../drivers/usb/usbip/vhci_hcd.c:399:41
  shift exponent 768 is too large for 32-bit type 'int'

Fixes: 03cd00d538a6 ("usbip: vhci-hcd: Set the vhci structure up to work")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: syzbot+297d20e437b79283bf6d@syzkaller.appspotmail.com
Cc: Yuyang Du <yuyang.du@intel.com>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: linux-usb@vger.kernel.org
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20201229071309.18418-1-rdunlap@infradead.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/usbip/vhci_hcd.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c
index 66cde5e5f796..3209b5ddd30c 100644
--- a/drivers/usb/usbip/vhci_hcd.c
+++ b/drivers/usb/usbip/vhci_hcd.c
@@ -396,6 +396,8 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
 		default:
 			usbip_dbg_vhci_rh(" ClearPortFeature: default %x\n",
 					  wValue);
+			if (wValue >= 32)
+				goto error;
 			vhci_hcd->port_status[rhport] &= ~(1 << wValue);
 			break;
 		}

From a1383b3537a7bea1c213baa7878ccc4ecf4413b5 Mon Sep 17 00:00:00 2001
From: Wesley Cheng <wcheng@codeaurora.org>
Date: Tue, 29 Dec 2020 15:00:37 -0800
Subject: [PATCH 313/547] usb: dwc3: gadget: Restart DWC3 gadget when enabling
 pullup

usb_gadget_deactivate/usb_gadget_activate does not execute the UDC start
operation, which may leave EP0 disabled and event IRQs disabled when
re-activating the function. Move the enabling/disabling of USB EP0 and
device event IRQs to be performed in the pullup routine.

Fixes: ae7e86108b12 ("usb: dwc3: Stop active transfers before halting the controller")
Tested-by: Michael Tretter <m.tretter@pengutronix.de>
Cc: stable <stable@vger.kernel.org>
Reported-by: Michael Tretter <m.tretter@pengutronix.de>
Signed-off-by: Wesley Cheng <wcheng@codeaurora.org>
Link: https://lore.kernel.org/r/1609282837-21666-1-git-send-email-wcheng@codeaurora.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/gadget.c | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 78cb4db8a6e4..25f654b79e48 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2083,6 +2083,7 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on, int suspend)
 
 static void dwc3_gadget_disable_irq(struct dwc3 *dwc);
 static void __dwc3_gadget_stop(struct dwc3 *dwc);
+static int __dwc3_gadget_start(struct dwc3 *dwc);
 
 static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
 {
@@ -2145,6 +2146,8 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
 			dwc->ev_buf->lpos = (dwc->ev_buf->lpos + count) %
 						dwc->ev_buf->length;
 		}
+	} else {
+		__dwc3_gadget_start(dwc);
 	}
 
 	ret = dwc3_gadget_run_stop(dwc, is_on, false);
@@ -2319,10 +2322,6 @@ static int dwc3_gadget_start(struct usb_gadget *g,
 	}
 
 	dwc->gadget_driver	= driver;
-
-	if (pm_runtime_active(dwc->dev))
-		__dwc3_gadget_start(dwc);
-
 	spin_unlock_irqrestore(&dwc->lock, flags);
 
 	return 0;
@@ -2348,13 +2347,6 @@ static int dwc3_gadget_stop(struct usb_gadget *g)
 	unsigned long		flags;
 
 	spin_lock_irqsave(&dwc->lock, flags);
-
-	if (pm_runtime_suspended(dwc->dev))
-		goto out;
-
-	__dwc3_gadget_stop(dwc);
-
-out:
 	dwc->gadget_driver	= NULL;
 	spin_unlock_irqrestore(&dwc->lock, flags);
 

From 64e6bbfff52db4bf6785fab9cffab850b2de6870 Mon Sep 17 00:00:00 2001
From: Eddie Hung <eddie.hung@mediatek.com>
Date: Tue, 29 Dec 2020 18:53:35 +0800
Subject: [PATCH 314/547] usb: gadget: configfs: Fix use-after-free issue with
 udc_name

There is a use-after-free issue, if access udc_name
in function gadget_dev_desc_UDC_store after another context
free udc_name in function unregister_gadget.

Context 1:
gadget_dev_desc_UDC_store()->unregister_gadget()->
free udc_name->set udc_name to NULL

Context 2:
gadget_dev_desc_UDC_show()-> access udc_name

Call trace:
dump_backtrace+0x0/0x340
show_stack+0x14/0x1c
dump_stack+0xe4/0x134
print_address_description+0x78/0x478
__kasan_report+0x270/0x2ec
kasan_report+0x10/0x18
__asan_report_load1_noabort+0x18/0x20
string+0xf4/0x138
vsnprintf+0x428/0x14d0
sprintf+0xe4/0x12c
gadget_dev_desc_UDC_show+0x54/0x64
configfs_read_file+0x210/0x3a0
__vfs_read+0xf0/0x49c
vfs_read+0x130/0x2b4
SyS_read+0x114/0x208
el0_svc_naked+0x34/0x38

Add mutex_lock to protect this kind of scenario.

Signed-off-by: Eddie Hung <eddie.hung@mediatek.com>
Signed-off-by: Macpaul Lin <macpaul.lin@mediatek.com>
Reviewed-by: Peter Chen <peter.chen@nxp.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/1609239215-21819-1-git-send-email-macpaul.lin@mediatek.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/configfs.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index 56051bb97349..d9743f4b56c3 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -221,9 +221,16 @@ static ssize_t gadget_dev_desc_bcdUSB_store(struct config_item *item,
 
 static ssize_t gadget_dev_desc_UDC_show(struct config_item *item, char *page)
 {
-	char *udc_name = to_gadget_info(item)->composite.gadget_driver.udc_name;
+	struct gadget_info *gi = to_gadget_info(item);
+	char *udc_name;
+	int ret;
 
-	return sprintf(page, "%s\n", udc_name ?: "");
+	mutex_lock(&gi->lock);
+	udc_name = gi->composite.gadget_driver.udc_name;
+	ret = sprintf(page, "%s\n", udc_name ?: "");
+	mutex_unlock(&gi->lock);
+
+	return ret;
 }
 
 static int unregister_gadget(struct gadget_info *gi)

From 7043e311a57625467b6fdb032dec8a6dea878208 Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@nxp.com>
Date: Wed, 30 Dec 2020 13:11:14 +0800
Subject: [PATCH 315/547] usb: gadget: core: change the comment for
 usb_gadget_connect

The pullup does not need to be enabled at below situations:
- For platforms which the hardware pullup starts after setting
register even they do not see the VBUS. If the pullup is always
enabled for these platforms, it will consume more power and
break the USB IF compliance tests [1].
- For platforms which need to do BC 1.2 charger detection after
seeing the VBUS. Pullup D+ will break the charger detection
flow.
- For platforms which the system suspend is allowed when the
connection with host is there but the bus is not at suspend.
For these platforms, it is better to disable pullup when
entering the system suspend otherwise the host may confuse
the device behavior after controller is in low power mode.
Disable pullup is considered as a disconnection event from
host side.

[1] USB-IF Full and Low Speed Compliance Test Procedure
F Back-voltage Testing
Section 7.2.1 of the USB specification requires that no device
shall supply (source) current on VBUS at its upstream facing
port at any time. From VBUS on its upstream facing port,
a device may only draw (sink) current. They may not provide power
to the pull-up resistor on D+/D- unless VBUS is present (see
Section 7.1.5).

Signed-off-by: Peter Chen <peter.chen@nxp.com>
Link: https://lore.kernel.org/r/20201230051114.21370-1-peter.chen@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/udc/core.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index 5b5cfeb6c14a..6a62bbd01324 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -659,8 +659,7 @@ EXPORT_SYMBOL_GPL(usb_gadget_vbus_disconnect);
  *
  * Enables the D+ (or potentially D-) pullup.  The host will start
  * enumerating this gadget when the pullup is active and a VBUS session
- * is active (the link is powered).  This pullup is always enabled unless
- * usb_gadget_disconnect() has been used to disable it.
+ * is active (the link is powered).
  *
  * Returns zero on success, else negative errno.
  */

From d7889c2020e08caab0d7e36e947f642d91015bd0 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 3 Jan 2021 22:42:17 +0100
Subject: [PATCH 316/547] usb: gadget: select CONFIG_CRC32

Without crc32 support, this driver fails to link:

arm-linux-gnueabi-ld: drivers/usb/gadget/function/f_eem.o: in function `eem_unwrap':
f_eem.c:(.text+0x11cc): undefined reference to `crc32_le'
arm-linux-gnueabi-ld: drivers/usb/gadget/function/f_ncm.o:f_ncm.c:(.text+0x1e40):
more undefined references to `crc32_le' follow

Fixes: 6d3865f9d41f ("usb: gadget: NCM: Add transmit multi-frame.")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210103214224.1996535-1-arnd@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/Kconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
index 7e47e6223089..2d152571a7de 100644
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig
@@ -265,6 +265,7 @@ config USB_CONFIGFS_NCM
 	depends on NET
 	select USB_U_ETHER
 	select USB_F_NCM
+	select CRC32
 	help
 	  NCM is an advanced protocol for Ethernet encapsulation, allows
 	  grouping of several ethernet frames into one USB transfer and
@@ -314,6 +315,7 @@ config USB_CONFIGFS_EEM
 	depends on NET
 	select USB_U_ETHER
 	select USB_F_EEM
+	select CRC32
 	help
 	  CDC EEM is a newer USB standard that is somewhat simpler than CDC ECM
 	  and therefore can be supported by more hardware.  Technically ECM and

From 6cd0fe91387917be48e91385a572a69dfac2f3f7 Mon Sep 17 00:00:00 2001
From: Chandana Kishori Chiluveru <cchiluve@codeaurora.org>
Date: Tue, 29 Dec 2020 14:44:43 -0800
Subject: [PATCH 317/547] usb: gadget: configfs: Preserve function ordering
 after bind failure

When binding the ConfigFS gadget to a UDC, the functions in each
configuration are added in list order. However, if usb_add_function()
fails, the failed function is put back on its configuration's
func_list and purge_configs_funcs() is called to further clean up.

purge_configs_funcs() iterates over the configurations and functions
in forward order, calling unbind() on each of the previously added
functions. But after doing so, each function gets moved to the
tail of the configuration's func_list. This results in reshuffling
the original order of the functions within a configuration such
that the failed function now appears first even though it may have
originally appeared in the middle or even end of the list. At this
point if the ConfigFS gadget is attempted to re-bind to the UDC,
the functions will be added in a different order than intended,
with the only recourse being to remove and relink the functions all
over again.

An example of this as follows:

ln -s functions/mass_storage.0 configs/c.1
ln -s functions/ncm.0 configs/c.1
ln -s functions/ffs.adb configs/c.1	# oops, forgot to start adbd
echo "<udc device>" > UDC		# fails
start adbd
echo "<udc device>" > UDC		# now succeeds, but...
					# bind order is
					# "ADB", mass_storage, ncm

[30133.118289] configfs-gadget gadget: adding 'Mass Storage Function'/ffffff810af87200 to config 'c'/ffffff817d6a2520
[30133.119875] configfs-gadget gadget: adding 'cdc_network'/ffffff80f48d1a00 to config 'c'/ffffff817d6a2520
[30133.119974] using random self ethernet address
[30133.120002] using random host ethernet address
[30133.139604] usb0: HOST MAC 3e:27:46:ba:3e:26
[30133.140015] usb0: MAC 6e:28:7e:42:66:6a
[30133.140062] configfs-gadget gadget: adding 'Function FS Gadget'/ffffff80f3868438 to config 'c'/ffffff817d6a2520
[30133.140081] configfs-gadget gadget: adding 'Function FS Gadget'/ffffff80f3868438 --> -19
[30133.140098] configfs-gadget gadget: unbind function 'Mass Storage Function'/ffffff810af87200
[30133.140119] configfs-gadget gadget: unbind function 'cdc_network'/ffffff80f48d1a00
[30133.173201] configfs-gadget a600000.dwc3: failed to start g1: -19
[30136.661933] init: starting service 'adbd'...
[30136.700126] read descriptors
[30136.700413] read strings
[30138.574484] configfs-gadget gadget: adding 'Function FS Gadget'/ffffff80f3868438 to config 'c'/ffffff817d6a2520
[30138.575497] configfs-gadget gadget: adding 'Mass Storage Function'/ffffff810af87200 to config 'c'/ffffff817d6a2520
[30138.575554] configfs-gadget gadget: adding 'cdc_network'/ffffff80f48d1a00 to config 'c'/ffffff817d6a2520
[30138.575631] using random self ethernet address
[30138.575660] using random host ethernet address
[30138.595338] usb0: HOST MAC 2e:cf:43:cd:ca:c8
[30138.597160] usb0: MAC 6a:f0:9f:ee:82:a0
[30138.791490] configfs-gadget gadget: super-speed config #1: c

Fix this by reversing the iteration order of the functions in
purge_config_funcs() when unbinding them, and adding them back to
the config's func_list at the head instead of the tail. This
ensures that we unbind and unwind back to the original list order.

Fixes: 88af8bbe4ef7 ("usb: gadget: the start of the configfs interface")
Signed-off-by: Chandana Kishori Chiluveru <cchiluve@codeaurora.org>
Signed-off-by: Jack Pham <jackp@codeaurora.org>
Reviewed-by: Peter Chen <peter.chen@nxp.com>
Link: https://lore.kernel.org/r/20201229224443.31623-1-jackp@codeaurora.org
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/configfs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index d9743f4b56c3..408a5332a975 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -1255,9 +1255,9 @@ static void purge_configs_funcs(struct gadget_info *gi)
 
 		cfg = container_of(c, struct config_usb_cfg, c);
 
-		list_for_each_entry_safe(f, tmp, &c->functions, list) {
+		list_for_each_entry_safe_reverse(f, tmp, &c->functions, list) {
 
-			list_move_tail(&f->list, &cfg->func_list);
+			list_move(&f->list, &cfg->func_list);
 			if (f->unbind) {
 				dev_dbg(&gi->cdev.gadget->dev,
 					"unbind function '%s'/%p\n",

From e1263f9277bad198c2acc8092a41aea1edbea0e4 Mon Sep 17 00:00:00 2001
From: Amelie Delaunay <amelie.delaunay@foss.st.com>
Date: Mon, 4 Jan 2021 15:20:45 +0100
Subject: [PATCH 318/547] dmaengine: stm32-mdma: fix
 STM32_MDMA_VERY_HIGH_PRIORITY value

STM32_MDMA_VERY_HIGH_PRIORITY is b11 not 0x11, so fix it with 0x3.

Signed-off-by: Amelie Delaunay <amelie.delaunay@foss.st.com>
Link: https://lore.kernel.org/r/20210104142045.25583-1-amelie.delaunay@foss.st.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/stm32-mdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c
index e4637ec786d3..36ba8b43e78d 100644
--- a/drivers/dma/stm32-mdma.c
+++ b/drivers/dma/stm32-mdma.c
@@ -199,7 +199,7 @@
 #define STM32_MDMA_MAX_CHANNELS		63
 #define STM32_MDMA_MAX_REQUESTS		256
 #define STM32_MDMA_MAX_BURST		128
-#define STM32_MDMA_VERY_HIGH_PRIORITY	0x11
+#define STM32_MDMA_VERY_HIGH_PRIORITY	0x3
 
 enum stm32_mdma_trigger_mode {
 	STM32_MDMA_BUFFER,

From 65a4e5299739abe0888cda0938d21f8ea3b5c606 Mon Sep 17 00:00:00 2001
From: David Gow <davidgow@google.com>
Date: Mon, 21 Dec 2020 23:39:00 -0800
Subject: [PATCH 319/547] kunit: tool: Force the use of the 'tty' console for
 UML

kunit_tool relies on the UML console outputting printk() output to the
tty in order to get results. Since the default console driver could
change, pass 'console=tty' to the kernel.

This is triggered by a change[1] to use ttynull as a fallback console
driver which -- by chance or by design -- seems to have changed the
default console output on UML, breaking kunit_tool. While this may be
fixed, we should be less fragile to such changes in the default.

[1]:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=757055ae8dedf5333af17b3b5b4b70ba9bc9da4e

Signed-off-by: David Gow <davidgow@google.com>
Fixes: 757055ae8ded ("init/console: Use ttynull as a fallback when there is no console")
Reported-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit_kernel.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index 57c1724b7e5d..698358c9c0d6 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -198,7 +198,7 @@ class LinuxSourceTree(object):
 		return self.validate_config(build_dir)
 
 	def run_kernel(self, args=[], build_dir='', timeout=None):
-		args.extend(['mem=1G'])
+		args.extend(['mem=1G', 'console=tty'])
 		self._ops.linux_bin(args, timeout, build_dir)
 		outfile = get_outfile_path(build_dir)
 		subprocess.call(['stty', 'sane'])

From 3b4cf848dad5dad4bf239ba664c809c8cf29f1ed Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Thu, 17 Dec 2020 17:31:40 +0100
Subject: [PATCH 320/547] selftests/vDSO: add additional binaries to .gitignore

Add the test binaries introduced by commit 693f5ca08ca0 ("kselftest:
Extend vDSO selftest"), commit 03f55c7952c9 ("kselftest: Extend vDSO
selftest to clock_getres") and commit c7e5789b24d3 ("kselftest: Move
test_vdso to the vDSO test suite") to .gitignore.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/vDSO/.gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/testing/selftests/vDSO/.gitignore b/tools/testing/selftests/vDSO/.gitignore
index 5eb64d41e541..a8dc51af5a9c 100644
--- a/tools/testing/selftests/vDSO/.gitignore
+++ b/tools/testing/selftests/vDSO/.gitignore
@@ -1,5 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0-only
 vdso_test
+vdso_test_abi
+vdso_test_clock_getres
+vdso_test_correctness
 vdso_test_gettimeofday
 vdso_test_getcpu
 vdso_standalone_test_x86

From df00d02989024d193a6efd1a85513a5658c6a10f Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Thu, 17 Dec 2020 17:32:35 +0100
Subject: [PATCH 321/547] selftests/vDSO: fix -Wformat warning in
 vdso_test_correctness
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the following -Wformat warnings in vdso_test_correctness.c:

vdso_test_correctness.c: In function ‘test_one_clock_gettime64’:
vdso_test_correctness.c:352:21: warning: format ‘%ld’ expects argument of type ‘long int’, but argument 3 has type ‘long long int’ [-Wformat=]
  352 |  printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n",
      |                 ~~~~^
      |                     |
      |                     long int
      |                 %09lld
  353 |         (unsigned long long)start.tv_sec, start.tv_nsec,
      |                                           ~~~~~~~~~~~~~
      |                                                |
      |                                                long long int
vdso_test_correctness.c:352:32: warning: format ‘%ld’ expects argument of type ‘long int’, but argument 5 has type ‘long long int’ [-Wformat=]
  352 |  printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n",
      |                            ~~~~^
      |                                |
      |                                long int
      |                            %09lld
  353 |         (unsigned long long)start.tv_sec, start.tv_nsec,
  354 |         (unsigned long long)vdso.tv_sec, vdso.tv_nsec,
      |                                          ~~~~~~~~~~~~
      |                                              |
      |                                              long long int
vdso_test_correctness.c:352:43: warning: format ‘%ld’ expects argument of type ‘long int’, but argument 7 has type ‘long long int’ [-Wformat=]

The tv_sec member of __kernel_timespec is long long, both in
uapi/linux/time_types.h and locally in vdso_test_correctness.c.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/vDSO/vdso_test_correctness.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/vDSO/vdso_test_correctness.c b/tools/testing/selftests/vDSO/vdso_test_correctness.c
index 5029ef9b228c..c4aea794725a 100644
--- a/tools/testing/selftests/vDSO/vdso_test_correctness.c
+++ b/tools/testing/selftests/vDSO/vdso_test_correctness.c
@@ -349,7 +349,7 @@ static void test_one_clock_gettime64(int clock, const char *name)
 		return;
 	}
 
-	printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n",
+	printf("\t%llu.%09lld %llu.%09lld %llu.%09lld\n",
 	       (unsigned long long)start.tv_sec, start.tv_nsec,
 	       (unsigned long long)vdso.tv_sec, vdso.tv_nsec,
 	       (unsigned long long)end.tv_sec, end.tv_nsec);

From 8cbebc4118b5933b3ae6351ceb433f75ac6b7c6b Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 4 Jan 2021 16:50:16 +0000
Subject: [PATCH 322/547] KVM: arm64: Replace KVM_ARM_PMU with HW_PERF_EVENTS

KVM_ARM_PMU only existed for the benefit of 32bit ARM hosts,
and makes no sense now that we are 64bit only. Get rid of it.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/Kconfig  | 8 --------
 arch/arm64/kvm/Makefile | 2 +-
 include/kvm/arm_pmu.h   | 2 +-
 3 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 043756db8f6e..3964acf5451e 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -49,14 +49,6 @@ if KVM
 
 source "virt/kvm/Kconfig"
 
-config KVM_ARM_PMU
-	bool "Virtual Performance Monitoring Unit (PMU) support"
-	depends on HW_PERF_EVENTS
-	default y
-	help
-	  Adds support for a virtual Performance Monitoring Unit (PMU) in
-	  virtual machines.
-
 endif # KVM
 
 endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 60fd181df624..13b017284bf9 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -24,4 +24,4 @@ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
 	 vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \
 	 vgic/vgic-its.o vgic/vgic-debug.o
 
-kvm-$(CONFIG_KVM_ARM_PMU)  += pmu-emul.o
+kvm-$(CONFIG_HW_PERF_EVENTS)  += pmu-emul.o
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index fc85f50fa0e9..8dcb3e1477bc 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -13,7 +13,7 @@
 #define ARMV8_PMU_CYCLE_IDX		(ARMV8_PMU_MAX_COUNTERS - 1)
 #define ARMV8_PMU_MAX_COUNTER_PAIRS	((ARMV8_PMU_MAX_COUNTERS + 1) >> 1)
 
-#ifdef CONFIG_KVM_ARM_PMU
+#ifdef CONFIG_HW_PERF_EVENTS
 
 struct kvm_pmc {
 	u8 idx;	/* index into the pmu->pmc array */

From 0b884fe71f9ee6a5df35e677154256ea2099ebb8 Mon Sep 17 00:00:00 2001
From: Chunyan Zhang <chunyan.zhang@unisoc.com>
Date: Mon, 14 Dec 2020 12:58:50 +0800
Subject: [PATCH 323/547] i2c: sprd: use a specific timeout to avoid system
 hang up issue

If the i2c device SCL bus being pulled up due to some exception before
message transfer done, the system cannot receive the completing interrupt
signal any more, it would not exit waiting loop until MAX_SCHEDULE_TIMEOUT
jiffies eclipse, that would make the system seemed hang up. To avoid that
happen, this patch adds a specific timeout for message transfer.

Fixes: 8b9ec0719834 ("i2c: Add Spreadtrum I2C controller driver")
Signed-off-by: Linhua Xu <linhua.xu@unisoc.com>
Signed-off-by: Chunyan Zhang <chunyan.zhang@unisoc.com>
[wsa: changed errno to ETIMEDOUT]
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-sprd.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-sprd.c b/drivers/i2c/busses/i2c-sprd.c
index 19cda6742423..2917fecf6c80 100644
--- a/drivers/i2c/busses/i2c-sprd.c
+++ b/drivers/i2c/busses/i2c-sprd.c
@@ -72,6 +72,8 @@
 
 /* timeout (ms) for pm runtime autosuspend */
 #define SPRD_I2C_PM_TIMEOUT	1000
+/* timeout (ms) for transfer message */
+#define I2C_XFER_TIMEOUT	1000
 
 /* SPRD i2c data structure */
 struct sprd_i2c {
@@ -244,6 +246,7 @@ static int sprd_i2c_handle_msg(struct i2c_adapter *i2c_adap,
 			       struct i2c_msg *msg, bool is_last_msg)
 {
 	struct sprd_i2c *i2c_dev = i2c_adap->algo_data;
+	unsigned long time_left;
 
 	i2c_dev->msg = msg;
 	i2c_dev->buf = msg->buf;
@@ -273,7 +276,10 @@ static int sprd_i2c_handle_msg(struct i2c_adapter *i2c_adap,
 
 	sprd_i2c_opt_start(i2c_dev);
 
-	wait_for_completion(&i2c_dev->complete);
+	time_left = wait_for_completion_timeout(&i2c_dev->complete,
+				msecs_to_jiffies(I2C_XFER_TIMEOUT));
+	if (!time_left)
+		return -ETIMEDOUT;
 
 	return i2c_dev->err;
 }

From 0b3ea2a06de1f52ea30865e227e109a5fd3b6214 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 21 Dec 2020 14:42:25 +0100
Subject: [PATCH 324/547] i2c: i801: Fix the i2c-mux gpiod_lookup_table not
 being properly terminated

gpiod_add_lookup_table() expects the gpiod_lookup_table->table passed to
it to be terminated with a zero-ed out entry.

So we need to allocate one more entry then we will use.

Fixes: d308dfbf62ef ("i2c: mux/i801: Switch to use descriptor passing")
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Jean Delvare <jdelvare@suse.de>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-i801.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index ae90713443fa..877fe3733a42 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -1449,7 +1449,7 @@ static int i801_add_mux(struct i801_priv *priv)
 
 	/* Register GPIO descriptor lookup table */
 	lookup = devm_kzalloc(dev,
-			      struct_size(lookup, table, mux_config->n_gpios),
+			      struct_size(lookup, table, mux_config->n_gpios + 1),
 			      GFP_KERNEL);
 	if (!lookup)
 		return -ENOMEM;

From d9e44981739a96f1a468c13bbbd54ace378caf1c Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Mon, 28 Dec 2020 15:21:36 +0000
Subject: [PATCH 325/547] bareudp: set NETIF_F_LLTX flag

Like other tunneling interfaces, the bareudp doesn't need TXLOCK.
So, It is good to set the NETIF_F_LLTX flag to improve performance and
to avoid lockdep's false-positive warning.

Test commands:
    ip netns add A
    ip netns add B
    ip link add veth0 netns A type veth peer name veth1 netns B
    ip netns exec A ip link set veth0 up
    ip netns exec A ip a a 10.0.0.1/24 dev veth0
    ip netns exec B ip link set veth1 up
    ip netns exec B ip a a 10.0.0.2/24 dev veth1

    for i in {2..1}
    do
            let A=$i-1
            ip netns exec A ip link add bareudp$i type bareudp \
		    dstport $i ethertype ip
            ip netns exec A ip link set bareudp$i up
            ip netns exec A ip a a 10.0.$i.1/24 dev bareudp$i
            ip netns exec A ip r a 10.0.$i.2 encap ip src 10.0.$A.1 \
		    dst 10.0.$A.2 via 10.0.$i.2 dev bareudp$i

            ip netns exec B ip link add bareudp$i type bareudp \
		    dstport $i ethertype ip
            ip netns exec B ip link set bareudp$i up
            ip netns exec B ip a a 10.0.$i.2/24 dev bareudp$i
            ip netns exec B ip r a 10.0.$i.1 encap ip src 10.0.$A.2 \
		    dst 10.0.$A.1 via 10.0.$i.1 dev bareudp$i
    done
    ip netns exec A ping 10.0.2.2

Splat looks like:
[   96.992803][  T822] ============================================
[   96.993954][  T822] WARNING: possible recursive locking detected
[   96.995102][  T822] 5.10.0+ #819 Not tainted
[   96.995927][  T822] --------------------------------------------
[   96.997091][  T822] ping/822 is trying to acquire lock:
[   96.998083][  T822] ffff88810f753898 (_xmit_NONE#2){+.-.}-{2:2}, at: __dev_queue_xmit+0x1f52/0x2960
[   96.999813][  T822]
[   96.999813][  T822] but task is already holding lock:
[   97.001192][  T822] ffff88810c385498 (_xmit_NONE#2){+.-.}-{2:2}, at: __dev_queue_xmit+0x1f52/0x2960
[   97.002908][  T822]
[   97.002908][  T822] other info that might help us debug this:
[   97.004401][  T822]  Possible unsafe locking scenario:
[   97.004401][  T822]
[   97.005784][  T822]        CPU0
[   97.006407][  T822]        ----
[   97.007010][  T822]   lock(_xmit_NONE#2);
[   97.007779][  T822]   lock(_xmit_NONE#2);
[   97.008550][  T822]
[   97.008550][  T822]  *** DEADLOCK ***
[   97.008550][  T822]
[   97.010057][  T822]  May be due to missing lock nesting notation
[   97.010057][  T822]
[   97.011594][  T822] 7 locks held by ping/822:
[   97.012426][  T822]  #0: ffff888109a144f0 (sk_lock-AF_INET){+.+.}-{0:0}, at: raw_sendmsg+0x12f7/0x2b00
[   97.014191][  T822]  #1: ffffffffbce2f5a0 (rcu_read_lock_bh){....}-{1:2}, at: ip_finish_output2+0x249/0x2020
[   97.016045][  T822]  #2: ffffffffbce2f5a0 (rcu_read_lock_bh){....}-{1:2}, at: __dev_queue_xmit+0x1fd/0x2960
[   97.017897][  T822]  #3: ffff88810c385498 (_xmit_NONE#2){+.-.}-{2:2}, at: __dev_queue_xmit+0x1f52/0x2960
[   97.019684][  T822]  #4: ffffffffbce2f600 (rcu_read_lock){....}-{1:2}, at: bareudp_xmit+0x31b/0x3690 [bareudp]
[   97.021573][  T822]  #5: ffffffffbce2f5a0 (rcu_read_lock_bh){....}-{1:2}, at: ip_finish_output2+0x249/0x2020
[   97.023424][  T822]  #6: ffffffffbce2f5a0 (rcu_read_lock_bh){....}-{1:2}, at: __dev_queue_xmit+0x1fd/0x2960
[   97.025259][  T822]
[   97.025259][  T822] stack backtrace:
[   97.026349][  T822] CPU: 3 PID: 822 Comm: ping Not tainted 5.10.0+ #819
[   97.027609][  T822] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
[   97.029407][  T822] Call Trace:
[   97.030015][  T822]  dump_stack+0x99/0xcb
[   97.030783][  T822]  __lock_acquire.cold.77+0x149/0x3a9
[   97.031773][  T822]  ? stack_trace_save+0x81/0xa0
[   97.032661][  T822]  ? register_lock_class+0x1910/0x1910
[   97.033673][  T822]  ? register_lock_class+0x1910/0x1910
[   97.034679][  T822]  ? rcu_read_lock_sched_held+0x91/0xc0
[   97.035697][  T822]  ? rcu_read_lock_bh_held+0xa0/0xa0
[   97.036690][  T822]  lock_acquire+0x1b2/0x730
[   97.037515][  T822]  ? __dev_queue_xmit+0x1f52/0x2960
[   97.038466][  T822]  ? check_flags+0x50/0x50
[   97.039277][  T822]  ? netif_skb_features+0x296/0x9c0
[   97.040226][  T822]  ? validate_xmit_skb+0x29/0xb10
[   97.041151][  T822]  _raw_spin_lock+0x30/0x70
[   97.041977][  T822]  ? __dev_queue_xmit+0x1f52/0x2960
[   97.042927][  T822]  __dev_queue_xmit+0x1f52/0x2960
[   97.043852][  T822]  ? netdev_core_pick_tx+0x290/0x290
[   97.044824][  T822]  ? mark_held_locks+0xb7/0x120
[   97.045712][  T822]  ? lockdep_hardirqs_on_prepare+0x12c/0x3e0
[   97.046824][  T822]  ? __local_bh_enable_ip+0xa5/0xf0
[   97.047771][  T822]  ? ___neigh_create+0x12a8/0x1eb0
[   97.048710][  T822]  ? trace_hardirqs_on+0x41/0x120
[   97.049626][  T822]  ? ___neigh_create+0x12a8/0x1eb0
[   97.050556][  T822]  ? __local_bh_enable_ip+0xa5/0xf0
[   97.051509][  T822]  ? ___neigh_create+0x12a8/0x1eb0
[   97.052443][  T822]  ? check_chain_key+0x244/0x5f0
[   97.053352][  T822]  ? rcu_read_lock_bh_held+0x56/0xa0
[   97.054317][  T822]  ? ip_finish_output2+0x6ea/0x2020
[   97.055263][  T822]  ? pneigh_lookup+0x410/0x410
[   97.056135][  T822]  ip_finish_output2+0x6ea/0x2020
[ ... ]

Acked-by: Guillaume Nault <gnault@redhat.com>
Fixes: 571912c69f0e ("net: UDP tunnel encapsulation module for tunnelling different protocols like MPLS, IP, NSH etc.")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Link: https://lore.kernel.org/r/20201228152136.24215-1-ap420073@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/bareudp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c
index 85ebd2b7e446..aea10196c222 100644
--- a/drivers/net/bareudp.c
+++ b/drivers/net/bareudp.c
@@ -534,6 +534,7 @@ static void bareudp_setup(struct net_device *dev)
 	SET_NETDEV_DEVTYPE(dev, &bareudp_type);
 	dev->features    |= NETIF_F_SG | NETIF_F_HW_CSUM;
 	dev->features    |= NETIF_F_RXCSUM;
+	dev->features    |= NETIF_F_LLTX;
 	dev->features    |= NETIF_F_GSO_SOFTWARE;
 	dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
 	dev->hw_features |= NETIF_F_GSO_SOFTWARE;

From 10ad3e998fa0c25315f27cf3002ff8b02dc31c38 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Mon, 28 Dec 2020 15:21:46 +0000
Subject: [PATCH 326/547] bareudp: Fix use of incorrect min_headroom size

In the bareudp6_xmit_skb(), it calculates min_headroom.
At that point, it uses struct iphdr, but it's not correct.
So panic could occur.
The struct ipv6hdr should be used.

Test commands:
    ip netns add A
    ip netns add B
    ip link add veth0 netns A type veth peer name veth1 netns B
    ip netns exec A ip link set veth0 up
    ip netns exec A ip a a 2001:db8:0::1/64 dev veth0
    ip netns exec B ip link set veth1 up
    ip netns exec B ip a a 2001:db8:0::2/64 dev veth1

    for i in {10..1}
    do
            let A=$i-1
            ip netns exec A ip link add bareudp$i type bareudp dstport $i \
		    ethertype 0x86dd
            ip netns exec A ip link set bareudp$i up
            ip netns exec A ip -6 a a 2001:db8:$i::1/64 dev bareudp$i
            ip netns exec A ip -6 r a 2001:db8:$i::2 encap ip6 src \
		    2001:db8:$A::1 dst 2001:db8:$A::2 via 2001:db8:$i::2 \
		    dev bareudp$i

            ip netns exec B ip link add bareudp$i type bareudp dstport $i \
		    ethertype 0x86dd
            ip netns exec B ip link set bareudp$i up
            ip netns exec B ip -6 a a 2001:db8:$i::2/64 dev bareudp$i
            ip netns exec B ip -6 r a 2001:db8:$i::1 encap ip6 src \
		    2001:db8:$A::2 dst 2001:db8:$A::1 via 2001:db8:$i::1 \
		    dev bareudp$i
    done
    ip netns exec A ping 2001:db8:7::2

Splat looks like:
[   66.436679][    C2] skbuff: skb_under_panic: text:ffffffff928614c8 len:454 put:14 head:ffff88810abb4000 data:ffff88810abb3ffa tail:0x1c0 end:0x3ec0 dev:veth0
[   66.441626][    C2] ------------[ cut here ]------------
[   66.443458][    C2] kernel BUG at net/core/skbuff.c:109!
[   66.445313][    C2] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI
[   66.447606][    C2] CPU: 2 PID: 913 Comm: ping Not tainted 5.10.0+ #819
[   66.450251][    C2] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
[   66.453713][    C2] RIP: 0010:skb_panic+0x15d/0x15f
[   66.455345][    C2] Code: 98 fe 4c 8b 4c 24 10 53 8b 4d 70 45 89 e0 48 c7 c7 60 8b 78 93 41 57 41 56 41 55 48 8b 54 24 20 48 8b 74 24 28 e8 b5 40 f9 ff <0f> 0b 48 8b 6c 24 20 89 34 24 e8 08 c9 98 fe 8b 34 24 48 c7 c1 80
[   66.462314][    C2] RSP: 0018:ffff888119209648 EFLAGS: 00010286
[   66.464281][    C2] RAX: 0000000000000089 RBX: ffff888003159000 RCX: 0000000000000000
[   66.467216][    C2] RDX: 0000000000000089 RSI: 0000000000000008 RDI: ffffed10232412c0
[   66.469768][    C2] RBP: ffff88810a53d440 R08: ffffed102328018d R09: ffffed102328018d
[   66.472297][    C2] R10: ffff888119400c67 R11: ffffed102328018c R12: 000000000000000e
[   66.474833][    C2] R13: ffff88810abb3ffa R14: 00000000000001c0 R15: 0000000000003ec0
[   66.477361][    C2] FS:  00007f37c0c72f00(0000) GS:ffff888119200000(0000) knlGS:0000000000000000
[   66.480214][    C2] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   66.482296][    C2] CR2: 000055a058808570 CR3: 000000011039e002 CR4: 00000000003706e0
[   66.484811][    C2] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   66.487793][    C2] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[   66.490424][    C2] Call Trace:
[   66.491469][    C2]  <IRQ>
[   66.492374][    C2]  ? eth_header+0x28/0x190
[   66.494054][    C2]  ? eth_header+0x28/0x190
[   66.495401][    C2]  skb_push.cold.99+0x22/0x22
[   66.496700][    C2]  eth_header+0x28/0x190
[   66.497867][    C2]  neigh_resolve_output+0x3de/0x720
[   66.499615][    C2]  ? __neigh_update+0x7e8/0x20a0
[   66.501176][    C2]  __neigh_update+0x8bd/0x20a0
[   66.502749][    C2]  ndisc_update+0x34/0xc0
[   66.504010][    C2]  ndisc_recv_na+0x8da/0xb80
[   66.505041][    C2]  ? pndisc_redo+0x20/0x20
[   66.505888][    C2]  ? rcu_read_lock_sched_held+0xc0/0xc0
[   66.506965][    C2]  ndisc_rcv+0x3a0/0x470
[   66.507797][    C2]  icmpv6_rcv+0xad9/0x1b00
[   66.508645][    C2]  ip6_protocol_deliver_rcu+0xcd6/0x1560
[   66.509719][    C2]  ip6_input_finish+0x5b/0xf0
[   66.510615][    C2]  ip6_input+0xcd/0x2d0
[   66.511406][    C2]  ? ip6_input_finish+0xf0/0xf0
[   66.512327][    C2]  ? rcu_read_lock_held+0x91/0xa0
[   66.513279][    C2]  ? ip6_protocol_deliver_rcu+0x1560/0x1560
[   66.514414][    C2]  ipv6_rcv+0xe8/0x300
[ ... ]

Acked-by: Guillaume Nault <gnault@redhat.com>
Fixes: 571912c69f0e ("net: UDP tunnel encapsulation module for tunnelling different protocols like MPLS, IP, NSH etc.")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Link: https://lore.kernel.org/r/20201228152146.24270-1-ap420073@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/bareudp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c
index aea10196c222..708171c0d628 100644
--- a/drivers/net/bareudp.c
+++ b/drivers/net/bareudp.c
@@ -380,7 +380,7 @@ static int bareudp6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
 		goto free_dst;
 
 	min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len +
-		BAREUDP_BASE_HLEN + info->options_len + sizeof(struct iphdr);
+		BAREUDP_BASE_HLEN + info->options_len + sizeof(struct ipv6hdr);
 
 	err = skb_cow_head(skb, min_headroom);
 	if (unlikely(err))

From 01e31bea7e622f1890c274f4aaaaf8bccd296aa5 Mon Sep 17 00:00:00 2001
From: Yunjian Wang <wangyunjian@huawei.com>
Date: Tue, 29 Dec 2020 10:01:48 +0800
Subject: [PATCH 327/547] vhost_net: fix ubuf refcount incorrectly when sendmsg
 fails

Currently the vhost_zerocopy_callback() maybe be called to decrease
the refcount when sendmsg fails in tun. The error handling in vhost
handle_tx_zerocopy() will try to decrease the same refcount again.
This is wrong. To fix this issue, we only call vhost_net_ubuf_put()
when vq->heads[nvq->desc].len == VHOST_DMA_IN_PROGRESS.

Fixes: bab632d69ee4 ("vhost: vhost TX zero-copy support")
Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Link: https://lore.kernel.org/r/1609207308-20544-1-git-send-email-wangyunjian@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/vhost/net.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 531a00d703cd..c8784dfafdd7 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -863,6 +863,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
 	size_t len, total_len = 0;
 	int err;
 	struct vhost_net_ubuf_ref *ubufs;
+	struct ubuf_info *ubuf;
 	bool zcopy_used;
 	int sent_pkts = 0;
 
@@ -895,9 +896,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
 
 		/* use msg_control to pass vhost zerocopy ubuf info to skb */
 		if (zcopy_used) {
-			struct ubuf_info *ubuf;
 			ubuf = nvq->ubuf_info + nvq->upend_idx;
-
 			vq->heads[nvq->upend_idx].id = cpu_to_vhost32(vq, head);
 			vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS;
 			ubuf->callback = vhost_zerocopy_callback;
@@ -927,7 +926,8 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
 		err = sock->ops->sendmsg(sock, &msg, len);
 		if (unlikely(err < 0)) {
 			if (zcopy_used) {
-				vhost_net_ubuf_put(ubufs);
+				if (vq->heads[ubuf->desc].len == VHOST_DMA_IN_PROGRESS)
+					vhost_net_ubuf_put(ubufs);
 				nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
 					% UIO_MAXIOV;
 			}

From 17e94567c57df3d9609e6bacaed9247c4f2629e2 Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Tue, 29 Dec 2020 11:08:38 +0200
Subject: [PATCH 328/547] docs: networking: packet_mmap: fix formatting for C
 macros

The citation of macro definitions should appear in a code block.

Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Link: https://lore.kernel.org/r/5cb47005e7a59b64299e038827e295822193384c.1609232919.git.baruch@tkos.co.il
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/networking/packet_mmap.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/networking/packet_mmap.rst b/Documentation/networking/packet_mmap.rst
index 6c009ceb1183..f3646c80b019 100644
--- a/Documentation/networking/packet_mmap.rst
+++ b/Documentation/networking/packet_mmap.rst
@@ -437,7 +437,7 @@ and the following flags apply:
 Capture process
 ^^^^^^^^^^^^^^^
 
-     from include/linux/if_packet.h
+From include/linux/if_packet.h::
 
      #define TP_STATUS_COPY          (1 << 1)
      #define TP_STATUS_LOSING        (1 << 2)

From e4da63cda51f17fa1e86a10e84d47d692932530d Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Tue, 29 Dec 2020 11:08:39 +0200
Subject: [PATCH 329/547] docs: networking: packet_mmap: fix old config
 reference

Before commit 889b8f964f2f ("packet: Kill CONFIG_PACKET_MMAP.") there
used to be a CONFIG_PACKET_MMAP config symbol that depended on
CONFIG_PACKET. The text still implies that PACKET_MMAP can be disabled.
Remove that from the text, as well as reference to old kernel versions.

Also, drop reference to broken link to information for pre 2.6.5
kernels.

Make a slight working improvement (s/In/On/) while at it.

Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Link: https://lore.kernel.org/r/80089f3783372c8fd7833f28ce774a171b2ef252.1609232919.git.baruch@tkos.co.il
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/networking/packet_mmap.rst | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/Documentation/networking/packet_mmap.rst b/Documentation/networking/packet_mmap.rst
index f3646c80b019..500ef60b1b82 100644
--- a/Documentation/networking/packet_mmap.rst
+++ b/Documentation/networking/packet_mmap.rst
@@ -8,7 +8,7 @@ Abstract
 ========
 
 This file documents the mmap() facility available with the PACKET
-socket interface on 2.4/2.6/3.x kernels. This type of sockets is used for
+socket interface. This type of sockets is used for
 
 i) capture network traffic with utilities like tcpdump,
 ii) transmit network traffic, or any other that needs raw
@@ -25,12 +25,12 @@ Please send your comments to
 Why use PACKET_MMAP
 ===================
 
-In Linux 2.4/2.6/3.x if PACKET_MMAP is not enabled, the capture process is very
+Non PACKET_MMAP capture process (plain AF_PACKET) is very
 inefficient. It uses very limited buffers and requires one system call to
 capture each packet, it requires two if you want to get packet's timestamp
 (like libpcap always does).
 
-In the other hand PACKET_MMAP is very efficient. PACKET_MMAP provides a size
+On the other hand PACKET_MMAP is very efficient. PACKET_MMAP provides a size
 configurable circular buffer mapped in user space that can be used to either
 send or receive packets. This way reading packets just needs to wait for them,
 most of the time there is no need to issue a single system call. Concerning
@@ -252,8 +252,7 @@ PACKET_MMAP setting constraints
 
 In kernel versions prior to 2.4.26 (for the 2.4 branch) and 2.6.5 (2.6 branch),
 the PACKET_MMAP buffer could hold only 32768 frames in a 32 bit architecture or
-16384 in a 64 bit architecture. For information on these kernel versions
-see http://pusa.uv.es/~ulisses/packet_mmap/packet_mmap.pre-2.4.26_2.6.5.txt
+16384 in a 64 bit architecture.
 
 Block size limit
 ----------------

From 862aecbd9569e563b979c0e23a908b43cda4b0b9 Mon Sep 17 00:00:00 2001
From: YANG LI <abaci-bugfix@linux.alibaba.com>
Date: Wed, 30 Dec 2020 15:23:14 +0800
Subject: [PATCH 330/547] ibmvnic: fix: NULL pointer dereference.

The error is due to dereference a null pointer in function
reset_one_sub_crq_queue():

if (!scrq) {
    netdev_dbg(adapter->netdev,
               "Invalid scrq reset. irq (%d) or msgs(%p).\n",
		scrq->irq, scrq->msgs);
		return -EINVAL;
}

If the expression is true, scrq must be a null pointer and cannot
dereference.

Fixes: 9281cf2d5840 ("ibmvnic: avoid memset null scrq msgs")
Signed-off-by: YANG LI <abaci-bugfix@linux.alibaba.com>
Reported-by: Abaci <abaci@linux.alibaba.com>
Acked-by: Lijun Pan <ljp@linux.ibm.com>
Link: https://lore.kernel.org/r/1609312994-121032-1-git-send-email-abaci-bugfix@linux.alibaba.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 91ebcde30292..9778c83150f1 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2981,9 +2981,7 @@ static int reset_one_sub_crq_queue(struct ibmvnic_adapter *adapter,
 	int rc;
 
 	if (!scrq) {
-		netdev_dbg(adapter->netdev,
-			   "Invalid scrq reset. irq (%d) or msgs (%p).\n",
-			   scrq->irq, scrq->msgs);
+		netdev_dbg(adapter->netdev, "Invalid scrq reset.\n");
 		return -EINVAL;
 	}
 

From 1d0d561ad1d7606bb745c1ed9478e7206860e56e Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Mon, 4 Jan 2021 10:38:02 +0000
Subject: [PATCH 331/547] net: macb: Correct usage of MACB_CAPS_CLK_HW_CHG flag

A new flag MACB_CAPS_CLK_HW_CHG was added and all callers of
macb_set_tx_clk were gated on the presence of this flag.

-   if (!clk)
+ if (!bp->tx_clk || !(bp->caps & MACB_CAPS_CLK_HW_CHG))

However the flag was not added to anything other than the new
sama7g5_gem, turning that function call into a no op for all other
systems. This breaks the networking on Zynq.

The commit message adding this states: a new capability so that
macb_set_tx_clock() to not be called for IPs having this
capability

This strongly implies that present of the flag was intended to skip
the function not absence of the flag. Update the if statement to
this effect, which repairs the existing users.

Fixes: daafa1d33cc9 ("net: macb: add capability to not set the clock rate")
Suggested-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Reviewed-by: Claudiu Beznea <claudiu.beznea@microchip.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://lore.kernel.org/r/20210104103802.13091-1-ckeepax@opensource.cirrus.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/cadence/macb_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index d5d910916c2e..814a5b10141d 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -467,7 +467,7 @@ static void macb_set_tx_clk(struct macb *bp, int speed)
 {
 	long ferr, rate, rate_rounded;
 
-	if (!bp->tx_clk || !(bp->caps & MACB_CAPS_CLK_HW_CHG))
+	if (!bp->tx_clk || (bp->caps & MACB_CAPS_CLK_HW_CHG))
 		return;
 
 	switch (speed) {

From 2ff2c7e274392871bfdee00ff2adbb8ebae5d240 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 30 Dec 2020 13:42:51 +0200
Subject: [PATCH 332/547] selftests: mlxsw: Set headroom size of correct port

The test was setting the headroom size of the wrong port. This was not
visible because of a firmware bug that canceled this bug.

Set the headroom size of the correct port, so that the test will pass
with both old and new firmware versions.

Fixes: bfa804784e32 ("selftests: mlxsw: Add a PFC test")
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Link: https://lore.kernel.org/r/20201230114251.394009-1-idosch@idosch.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
index 4d900bc1f76c..5c7700212f75 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
@@ -230,7 +230,7 @@ switch_create()
 	__mlnx_qos -i $swp4 --pfc=0,1,0,0,0,0,0,0 >/dev/null
 	# PG0 will get autoconfigured to Xoff, give PG1 arbitrarily 100K, which
 	# is (-2*MTU) about 80K of delay provision.
-	__mlnx_qos -i $swp3 --buffer_size=0,$_100KB,0,0,0,0,0,0 >/dev/null
+	__mlnx_qos -i $swp4 --buffer_size=0,$_100KB,0,0,0,0,0,0 >/dev/null
 
 	# bridges
 	# -------

From cfd82dfc9799c53ef109343a23af006a0f6860a9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Wed, 30 Dec 2020 16:24:51 +0100
Subject: [PATCH 333/547] net: usb: qmi_wwan: add Quectel EM160R-GL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New modem using ff/ff/30 for QCDM, ff/00/00 for  AT and NMEA,
and ff/ff/ff for RMNET/QMI.

T: Bus=02 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=5000 MxCh= 0
D: Ver= 3.20 Cls=ef(misc ) Sub=02 Prot=01 MxPS= 9 #Cfgs= 1
P: Vendor=2c7c ProdID=0620 Rev= 4.09
S: Manufacturer=Quectel
S: Product=EM160R-GL
S: SerialNumber=e31cedc1
C:* #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=896mA
I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=(none)
E: Ad=81(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms
E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms
I:* If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none)
E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms
E: Ad=82(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms
E: Ad=02(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms
I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none)
E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms
E: Ad=84(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms
E: Ad=03(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms
I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none)
E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms
E: Ad=86(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms
E: Ad=04(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms
I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none)
E: Ad=88(I) Atr=03(Int.) MxPS= 8 Ivl=32ms
E: Ad=8e(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms
E: Ad=0f(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Link: https://lore.kernel.org/r/20201230152451.245271-1-bjorn@mork.no
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/usb/qmi_wwan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index d166c321ee9b..af19513a9f75 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1013,6 +1013,7 @@ static const struct usb_device_id products[] = {
 	{QMI_MATCH_FF_FF_FF(0x2c7c, 0x0125)},	/* Quectel EC25, EC20 R2.0  Mini PCIe */
 	{QMI_MATCH_FF_FF_FF(0x2c7c, 0x0306)},	/* Quectel EP06/EG06/EM06 */
 	{QMI_MATCH_FF_FF_FF(0x2c7c, 0x0512)},	/* Quectel EG12/EM12 */
+	{QMI_MATCH_FF_FF_FF(0x2c7c, 0x0620)},	/* Quectel EM160R-GL */
 	{QMI_MATCH_FF_FF_FF(0x2c7c, 0x0800)},	/* Quectel RM500Q-GL */
 
 	/* 3. Combined interface devices matching on interface number */

From e80bd76fbf563cc7ed8c9e9f3bbcdf59b0897f69 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Wed, 30 Dec 2020 19:33:34 +0100
Subject: [PATCH 334/547] r8169: work around power-saving bug on some chip
 versions

A user reported failing network with RTL8168dp (a quite rare chip
version). Realtek confirmed that few chip versions suffer from a PLL
power-down hw bug.

Fixes: 07df5bd874f0 ("r8169: power down chip in probe")
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://lore.kernel.org/r/a1c39460-d533-7f9e-fa9d-2b8990b02426@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/realtek/r8169_main.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 46d8510b2fe2..a569abe7f5ef 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -2207,7 +2207,8 @@ static void rtl_pll_power_down(struct rtl8169_private *tp)
 	}
 
 	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_33:
+	case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_26:
+	case RTL_GIGA_MAC_VER_32 ... RTL_GIGA_MAC_VER_33:
 	case RTL_GIGA_MAC_VER_37:
 	case RTL_GIGA_MAC_VER_39:
 	case RTL_GIGA_MAC_VER_43:
@@ -2233,7 +2234,8 @@ static void rtl_pll_power_down(struct rtl8169_private *tp)
 static void rtl_pll_power_up(struct rtl8169_private *tp)
 {
 	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_33:
+	case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_26:
+	case RTL_GIGA_MAC_VER_32 ... RTL_GIGA_MAC_VER_33:
 	case RTL_GIGA_MAC_VER_37:
 	case RTL_GIGA_MAC_VER_39:
 	case RTL_GIGA_MAC_VER_43:

From b40f97b91a3b167ab22c9e9f1ef00b1615ff01e9 Mon Sep 17 00:00:00 2001
From: Xie He <xie.he.0141@gmail.com>
Date: Thu, 31 Dec 2020 09:43:31 -0800
Subject: [PATCH 335/547] net: lapb: Decrease the refcount of "struct lapb_cb"
 in lapb_device_event

In lapb_device_event, lapb_devtostruct is called to get a reference to
an object of "struct lapb_cb". lapb_devtostruct increases the refcount
of the object and returns a pointer to it. However, we didn't decrease
the refcount after we finished using the pointer. This patch fixes this
problem.

Fixes: a4989fa91110 ("net/lapb: support netdev events")
Cc: Martin Schiller <ms@dev.tdt.de>
Signed-off-by: Xie He <xie.he.0141@gmail.com>
Link: https://lore.kernel.org/r/20201231174331.64539-1-xie.he.0141@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/lapb/lapb_iface.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c
index 213ea7abc9ab..40961889e9c0 100644
--- a/net/lapb/lapb_iface.c
+++ b/net/lapb/lapb_iface.c
@@ -489,6 +489,7 @@ static int lapb_device_event(struct notifier_block *this, unsigned long event,
 		break;
 	}
 
+	lapb_put(lapb);
 	return NOTIFY_DONE;
 }
 

From c1a9ec7e5d577a9391660800c806c53287fca991 Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Sun, 3 Jan 2021 02:25:43 +0100
Subject: [PATCH 336/547] net: dsa: lantiq_gswip: Enable GSWIP_MII_CFG_EN also
 for internal PHYs

Enable GSWIP_MII_CFG_EN also for internal PHYs to make traffic flow.
Without this the PHY link is detected properly and ethtool statistics
for TX are increasing but there's no RX traffic coming in.

Fixes: 14fceff4771e51 ("net: dsa: Add Lantiq / Intel DSA driver for vrx200")
Suggested-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Acked-by: Hauke Mehrtens <hauke@hauke-m.de>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/lantiq_gswip.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c
index 09701c17f3f6..5d378c8026f0 100644
--- a/drivers/net/dsa/lantiq_gswip.c
+++ b/drivers/net/dsa/lantiq_gswip.c
@@ -1541,9 +1541,7 @@ static void gswip_phylink_mac_link_up(struct dsa_switch *ds, int port,
 {
 	struct gswip_priv *priv = ds->priv;
 
-	/* Enable the xMII interface only for the external PHY */
-	if (interface != PHY_INTERFACE_MODE_INTERNAL)
-		gswip_mii_mask_cfg(priv, 0, GSWIP_MII_CFG_EN, port);
+	gswip_mii_mask_cfg(priv, 0, GSWIP_MII_CFG_EN, port);
 }
 
 static void gswip_get_strings(struct dsa_switch *ds, int port, u32 stringset,

From 709a3c9dff2a639966ae7d8ba6239d2b8aba036d Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Sun, 3 Jan 2021 02:25:44 +0100
Subject: [PATCH 337/547] net: dsa: lantiq_gswip: Fix GSWIP_MII_CFG(p) register
 access

There is one GSWIP_MII_CFG register for each switch-port except the CPU
port. The register offset for the first port is 0x0, 0x02 for the
second, 0x04 for the third and so on.

Update the driver to not only restrict the GSWIP_MII_CFG registers to
ports 0, 1 and 5. Handle ports 0..5 instead but skip the CPU port. This
means we are not overwriting the configuration for the third port (port
two since we start counting from zero) with the settings for the sixth
port (with number five) anymore.

The GSWIP_MII_PCDU(p) registers are not updated because there's really
only three (one for each of the following ports: 0, 1, 5).

Fixes: 14fceff4771e51 ("net: dsa: Add Lantiq / Intel DSA driver for vrx200")
Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Acked-by: Hauke Mehrtens <hauke@hauke-m.de>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/lantiq_gswip.c | 23 ++++++-----------------
 1 file changed, 6 insertions(+), 17 deletions(-)

diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c
index 5d378c8026f0..4b36d89bec06 100644
--- a/drivers/net/dsa/lantiq_gswip.c
+++ b/drivers/net/dsa/lantiq_gswip.c
@@ -92,9 +92,7 @@
 					 GSWIP_MDIO_PHY_FDUP_MASK)
 
 /* GSWIP MII Registers */
-#define GSWIP_MII_CFG0			0x00
-#define GSWIP_MII_CFG1			0x02
-#define GSWIP_MII_CFG5			0x04
+#define GSWIP_MII_CFGp(p)		(0x2 * (p))
 #define  GSWIP_MII_CFG_EN		BIT(14)
 #define  GSWIP_MII_CFG_LDCLKDIS		BIT(12)
 #define  GSWIP_MII_CFG_MODE_MIIP	0x0
@@ -392,17 +390,9 @@ static void gswip_mii_mask(struct gswip_priv *priv, u32 clear, u32 set,
 static void gswip_mii_mask_cfg(struct gswip_priv *priv, u32 clear, u32 set,
 			       int port)
 {
-	switch (port) {
-	case 0:
-		gswip_mii_mask(priv, clear, set, GSWIP_MII_CFG0);
-		break;
-	case 1:
-		gswip_mii_mask(priv, clear, set, GSWIP_MII_CFG1);
-		break;
-	case 5:
-		gswip_mii_mask(priv, clear, set, GSWIP_MII_CFG5);
-		break;
-	}
+	/* There's no MII_CFG register for the CPU port */
+	if (!dsa_is_cpu_port(priv->ds, port))
+		gswip_mii_mask(priv, clear, set, GSWIP_MII_CFGp(port));
 }
 
 static void gswip_mii_mask_pcdu(struct gswip_priv *priv, u32 clear, u32 set,
@@ -822,9 +812,8 @@ static int gswip_setup(struct dsa_switch *ds)
 	gswip_mdio_mask(priv, 0xff, 0x09, GSWIP_MDIO_MDC_CFG1);
 
 	/* Disable the xMII link */
-	gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, 0, 0);
-	gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, 0, 1);
-	gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, 0, 5);
+	for (i = 0; i < priv->hw_info->max_ports; i++)
+		gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, 0, i);
 
 	/* enable special tag insertion on cpu port */
 	gswip_switch_mask(priv, 0, GSWIP_FDMA_PCTRL_STEN,

From 81b6d05ccad4f3d8a9dfb091fb46ad6978ee40e4 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 4 Jan 2021 20:36:35 +0000
Subject: [PATCH 338/547] io_uring: synchronise IOPOLL on task_submit fail

io_req_task_submit() might be called for IOPOLL, do the fail path under
uring_lock to comply with IOPOLL synchronisation based solely on it.

Cc: stable@vger.kernel.org # 5.5+
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index ca46f314640b..5be33fd8b6bc 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2126,15 +2126,16 @@ static void io_req_task_cancel(struct callback_head *cb)
 static void __io_req_task_submit(struct io_kiocb *req)
 {
 	struct io_ring_ctx *ctx = req->ctx;
+	bool fail;
 
-	if (!__io_sq_thread_acquire_mm(ctx) &&
-	    !__io_sq_thread_acquire_files(ctx)) {
-		mutex_lock(&ctx->uring_lock);
+	fail = __io_sq_thread_acquire_mm(ctx) ||
+		__io_sq_thread_acquire_files(ctx);
+	mutex_lock(&ctx->uring_lock);
+	if (!fail)
 		__io_queue_sqe(req, NULL);
-		mutex_unlock(&ctx->uring_lock);
-	} else {
+	else
 		__io_req_task_cancel(req, -EFAULT);
-	}
+	mutex_unlock(&ctx->uring_lock);
 }
 
 static void io_req_task_submit(struct callback_head *cb)

From 6c503150ae33ee19036255cfda0998463613352c Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 4 Jan 2021 20:36:36 +0000
Subject: [PATCH 339/547] io_uring: patch up IOPOLL overflow_flush sync

IOPOLL skips completion locking but keeps it under uring_lock, thus
io_cqring_overflow_flush() and so io_cqring_events() need additional
locking with uring_lock in some cases for IOPOLL.

Remove __io_cqring_overflow_flush() from io_cqring_events(), introduce a
wrapper around flush doing needed synchronisation and call it by hand.

Cc: stable@vger.kernel.org # 5.5+
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 78 +++++++++++++++++++++++++++------------------------
 1 file changed, 41 insertions(+), 37 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5be33fd8b6bc..445035b24a50 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1713,9 +1713,9 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
 }
 
 /* Returns true if there are no backlogged entries after the flush */
-static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
-				     struct task_struct *tsk,
-				     struct files_struct *files)
+static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
+				       struct task_struct *tsk,
+				       struct files_struct *files)
 {
 	struct io_rings *rings = ctx->rings;
 	struct io_kiocb *req, *tmp;
@@ -1768,6 +1768,20 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
 	return all_flushed;
 }
 
+static void io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
+				     struct task_struct *tsk,
+				     struct files_struct *files)
+{
+	if (test_bit(0, &ctx->cq_check_overflow)) {
+		/* iopoll syncs against uring_lock, not completion_lock */
+		if (ctx->flags & IORING_SETUP_IOPOLL)
+			mutex_lock(&ctx->uring_lock);
+		__io_cqring_overflow_flush(ctx, force, tsk, files);
+		if (ctx->flags & IORING_SETUP_IOPOLL)
+			mutex_unlock(&ctx->uring_lock);
+	}
+}
+
 static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
 {
 	struct io_ring_ctx *ctx = req->ctx;
@@ -2314,20 +2328,8 @@ static void io_double_put_req(struct io_kiocb *req)
 		io_free_req(req);
 }
 
-static unsigned io_cqring_events(struct io_ring_ctx *ctx, bool noflush)
+static unsigned io_cqring_events(struct io_ring_ctx *ctx)
 {
-	if (test_bit(0, &ctx->cq_check_overflow)) {
-		/*
-		 * noflush == true is from the waitqueue handler, just ensure
-		 * we wake up the task, and the next invocation will flush the
-		 * entries. We cannot safely to it from here.
-		 */
-		if (noflush)
-			return -1U;
-
-		io_cqring_overflow_flush(ctx, false, NULL, NULL);
-	}
-
 	/* See comment at the top of this file */
 	smp_rmb();
 	return __io_cqring_events(ctx);
@@ -2552,7 +2554,9 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
 		 * If we do, we can potentially be spinning for commands that
 		 * already triggered a CQE (eg in error).
 		 */
-		if (io_cqring_events(ctx, false))
+		if (test_bit(0, &ctx->cq_check_overflow))
+			__io_cqring_overflow_flush(ctx, false, NULL, NULL);
+		if (io_cqring_events(ctx))
 			break;
 
 		/*
@@ -6827,7 +6831,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
 
 	/* if we have a backlog and couldn't flush it all, return BUSY */
 	if (test_bit(0, &ctx->sq_check_overflow)) {
-		if (!io_cqring_overflow_flush(ctx, false, NULL, NULL))
+		if (!__io_cqring_overflow_flush(ctx, false, NULL, NULL))
 			return -EBUSY;
 	}
 
@@ -7090,7 +7094,7 @@ struct io_wait_queue {
 	unsigned nr_timeouts;
 };
 
-static inline bool io_should_wake(struct io_wait_queue *iowq, bool noflush)
+static inline bool io_should_wake(struct io_wait_queue *iowq)
 {
 	struct io_ring_ctx *ctx = iowq->ctx;
 
@@ -7099,7 +7103,7 @@ static inline bool io_should_wake(struct io_wait_queue *iowq, bool noflush)
 	 * started waiting. For timeouts, we always want to return to userspace,
 	 * regardless of event count.
 	 */
-	return io_cqring_events(ctx, noflush) >= iowq->to_wait ||
+	return io_cqring_events(ctx) >= iowq->to_wait ||
 			atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts;
 }
 
@@ -7109,11 +7113,13 @@ static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode,
 	struct io_wait_queue *iowq = container_of(curr, struct io_wait_queue,
 							wq);
 
-	/* use noflush == true, as we can't safely rely on locking context */
-	if (!io_should_wake(iowq, true))
-		return -1;
-
-	return autoremove_wake_function(curr, mode, wake_flags, key);
+	/*
+	 * Cannot safely flush overflowed CQEs from here, ensure we wake up
+	 * the task, and the next invocation will do it.
+	 */
+	if (io_should_wake(iowq) || test_bit(0, &iowq->ctx->cq_check_overflow))
+		return autoremove_wake_function(curr, mode, wake_flags, key);
+	return -1;
 }
 
 static int io_run_task_work_sig(void)
@@ -7150,7 +7156,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
 	int ret = 0;
 
 	do {
-		if (io_cqring_events(ctx, false) >= min_events)
+		io_cqring_overflow_flush(ctx, false, NULL, NULL);
+		if (io_cqring_events(ctx) >= min_events)
 			return 0;
 		if (!io_run_task_work())
 			break;
@@ -7178,6 +7185,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
 	iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
 	trace_io_uring_cqring_wait(ctx, min_events);
 	do {
+		io_cqring_overflow_flush(ctx, false, NULL, NULL);
 		prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
 						TASK_INTERRUPTIBLE);
 		/* make sure we run task_work before checking for signals */
@@ -7186,8 +7194,10 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
 			continue;
 		else if (ret < 0)
 			break;
-		if (io_should_wake(&iowq, false))
+		if (io_should_wake(&iowq))
 			break;
+		if (test_bit(0, &ctx->cq_check_overflow))
+			continue;
 		if (uts) {
 			timeout = schedule_timeout(timeout);
 			if (timeout == 0) {
@@ -8625,7 +8635,8 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
 	smp_rmb();
 	if (!io_sqring_full(ctx))
 		mask |= EPOLLOUT | EPOLLWRNORM;
-	if (io_cqring_events(ctx, false))
+	io_cqring_overflow_flush(ctx, false, NULL, NULL);
+	if (io_cqring_events(ctx))
 		mask |= EPOLLIN | EPOLLRDNORM;
 
 	return mask;
@@ -8683,7 +8694,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
 	/* if force is set, the ring is going away. always drop after that */
 	ctx->cq_overflow_flushed = 1;
 	if (ctx->rings)
-		io_cqring_overflow_flush(ctx, true, NULL, NULL);
+		__io_cqring_overflow_flush(ctx, true, NULL, NULL);
 	mutex_unlock(&ctx->uring_lock);
 
 	io_kill_timeouts(ctx, NULL, NULL);
@@ -8857,9 +8868,7 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
 	}
 
 	io_cancel_defer_files(ctx, task, files);
-	io_ring_submit_lock(ctx, (ctx->flags & IORING_SETUP_IOPOLL));
 	io_cqring_overflow_flush(ctx, true, task, files);
-	io_ring_submit_unlock(ctx, (ctx->flags & IORING_SETUP_IOPOLL));
 
 	if (!files)
 		__io_uring_cancel_task_requests(ctx, task);
@@ -9195,13 +9204,8 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 	 */
 	ret = 0;
 	if (ctx->flags & IORING_SETUP_SQPOLL) {
-		if (!list_empty_careful(&ctx->cq_overflow_list)) {
-			bool needs_lock = ctx->flags & IORING_SETUP_IOPOLL;
+		io_cqring_overflow_flush(ctx, false, NULL, NULL);
 
-			io_ring_submit_lock(ctx, needs_lock);
-			io_cqring_overflow_flush(ctx, false, NULL, NULL);
-			io_ring_submit_unlock(ctx, needs_lock);
-		}
 		if (flags & IORING_ENTER_SQ_WAKEUP)
 			wake_up(&ctx->sq_data->wait);
 		if (flags & IORING_ENTER_SQ_WAIT)

From de7f1d9e99d8b99e4e494ad8fcd91f0c4c5c9357 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 4 Jan 2021 20:43:29 +0000
Subject: [PATCH 340/547] io_uring: drop file refs after task cancel

io_uring fds marked O_CLOEXEC and we explicitly cancel all requests
before going through exec, so we don't want to leave task's file
references to not our anymore io_uring instances.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 445035b24a50..85de42c42433 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -8958,6 +8958,15 @@ static void io_uring_attempt_task_drop(struct file *file)
 		io_uring_del_task_file(file);
 }
 
+static void io_uring_remove_task_files(struct io_uring_task *tctx)
+{
+	struct file *file;
+	unsigned long index;
+
+	xa_for_each(&tctx->xa, index, file)
+		io_uring_del_task_file(file);
+}
+
 void __io_uring_files_cancel(struct files_struct *files)
 {
 	struct io_uring_task *tctx = current->io_uring;
@@ -8966,16 +8975,12 @@ void __io_uring_files_cancel(struct files_struct *files)
 
 	/* make sure overflow events are dropped */
 	atomic_inc(&tctx->in_idle);
-
-	xa_for_each(&tctx->xa, index, file) {
-		struct io_ring_ctx *ctx = file->private_data;
-
-		io_uring_cancel_task_requests(ctx, files);
-		if (files)
-			io_uring_del_task_file(file);
-	}
-
+	xa_for_each(&tctx->xa, index, file)
+		io_uring_cancel_task_requests(file->private_data, files);
 	atomic_dec(&tctx->in_idle);
+
+	if (files)
+		io_uring_remove_task_files(tctx);
 }
 
 static s64 tctx_inflight(struct io_uring_task *tctx)
@@ -9038,6 +9043,8 @@ void __io_uring_task_cancel(void)
 	} while (1);
 
 	atomic_dec(&tctx->in_idle);
+
+	io_uring_remove_task_files(tctx);
 }
 
 static int io_uring_flush(struct file *file, void *data)

From 90df08538c07b7135703358a0c8c08d97889a704 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 4 Jan 2021 20:43:30 +0000
Subject: [PATCH 341/547] io_uring: cancel more aggressively in exit_work

While io_ring_exit_work() is running new requests of all sorts may be
issued, so it should do a bit more to cancel them, otherwise they may
just get stuck. e.g. in io-wq, in poll lists, etc.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 85de42c42433..5bccb235271f 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -992,6 +992,9 @@ enum io_mem_account {
 	ACCT_PINNED,
 };
 
+static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
+					    struct task_struct *task);
+
 static void destroy_fixed_file_ref_node(struct fixed_file_ref_node *ref_node);
 static struct fixed_file_ref_node *alloc_fixed_file_ref_node(
 			struct io_ring_ctx *ctx);
@@ -8675,7 +8678,7 @@ static void io_ring_exit_work(struct work_struct *work)
 	 * as nobody else will be looking for them.
 	 */
 	do {
-		io_iopoll_try_reap_events(ctx);
+		__io_uring_cancel_task_requests(ctx, NULL);
 	} while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20));
 	io_ring_ctx_free(ctx);
 }
@@ -8830,9 +8833,11 @@ static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
 		enum io_wq_cancel cret;
 		bool ret = false;
 
-		cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, &cancel, true);
-		if (cret != IO_WQ_CANCEL_NOTFOUND)
-			ret = true;
+		if (ctx->io_wq) {
+			cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb,
+					       &cancel, true);
+			ret |= (cret != IO_WQ_CANCEL_NOTFOUND);
+		}
 
 		/* SQPOLL thread does its own polling */
 		if (!(ctx->flags & IORING_SETUP_SQPOLL)) {

From 75353bcd2184010f08a3ed2f0da019bd9d604e1e Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Thu, 24 Dec 2020 15:13:57 +0000
Subject: [PATCH 342/547] drm/i915: clear the shadow batch

The shadow batch is an internal object, which doesn't have any page
clearing, and since the batch_len can be smaller than the object, we
should take care to clear it.

Testcase: igt/gen9_exec_parse/shadow-peek
Fixes: 4f7af1948abc ("drm/i915: Support ro ppgtt mapped cmdparser shadow buffers")
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20201224151358.401345-1-matthew.auld@intel.com
Cc: stable@vger.kernel.org
(cherry picked from commit eeb52ee6c4a429ec301faf1dc48988744960786e)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 27 +++++++++-----------------
 1 file changed, 9 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 93265951fdbb..b0899b665e85 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1166,7 +1166,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 		}
 	}
 	if (IS_ERR(src)) {
-		unsigned long x, n;
+		unsigned long x, n, remain;
 		void *ptr;
 
 		/*
@@ -1177,14 +1177,15 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 		 * We don't care about copying too much here as we only
 		 * validate up to the end of the batch.
 		 */
+		remain = length;
 		if (!(dst_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
-			length = round_up(length,
+			remain = round_up(remain,
 					  boot_cpu_data.x86_clflush_size);
 
 		ptr = dst;
 		x = offset_in_page(offset);
-		for (n = offset >> PAGE_SHIFT; length; n++) {
-			int len = min(length, PAGE_SIZE - x);
+		for (n = offset >> PAGE_SHIFT; remain; n++) {
+			int len = min(remain, PAGE_SIZE - x);
 
 			src = kmap_atomic(i915_gem_object_get_page(src_obj, n));
 			if (needs_clflush)
@@ -1193,13 +1194,15 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 			kunmap_atomic(src);
 
 			ptr += len;
-			length -= len;
+			remain -= len;
 			x = 0;
 		}
 	}
 
 	i915_gem_object_unpin_pages(src_obj);
 
+	memset32(dst + length, 0, (dst_obj->base.size - length) / sizeof(u32));
+
 	/* dst_obj is returned with vmap pinned */
 	return dst;
 }
@@ -1392,11 +1395,6 @@ static unsigned long *alloc_whitelist(u32 batch_length)
 
 #define LENGTH_BIAS 2
 
-static bool shadow_needs_clflush(struct drm_i915_gem_object *obj)
-{
-	return !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE);
-}
-
 /**
  * intel_engine_cmd_parser() - parse a batch buffer for privilege violations
  * @engine: the engine on which the batch is to execute
@@ -1538,16 +1536,9 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
 				ret = 0; /* allow execution */
 			}
 		}
-
-		if (shadow_needs_clflush(shadow->obj))
-			drm_clflush_virt_range(batch_end, 8);
 	}
 
-	if (shadow_needs_clflush(shadow->obj)) {
-		void *ptr = page_mask_bits(shadow->obj->mm.mapping);
-
-		drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr);
-	}
+	i915_gem_object_flush_map(shadow->obj);
 
 	if (!IS_ERR_OR_NULL(jump_whitelist))
 		kfree(jump_whitelist);

From 641382e9b44fba81a0778e1914ee35b8471121f9 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Thu, 24 Dec 2020 15:13:58 +0000
Subject: [PATCH 343/547] drm/i915: clear the gpu reloc batch

The reloc batch is short lived but can exist in the user visible ppGTT,
and since it's backed by an internal object, which lacks page clearing,
we should take care to clear it upfront.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20201224151358.401345-2-matthew.auld@intel.com
Cc: stable@vger.kernel.org
(cherry picked from commit 26ebc511e799f621357982ccc37a7987a56a00f4)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index bcc80f428172..bd3046e5a934 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -1046,7 +1046,7 @@ static void reloc_gpu_flush(struct i915_execbuffer *eb, struct reloc_cache *cach
 	GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32));
 	cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END;
 
-	__i915_gem_object_flush_map(obj, 0, sizeof(u32) * (cache->rq_size + 1));
+	i915_gem_object_flush_map(obj);
 	i915_gem_object_unpin_map(obj);
 
 	intel_gt_chipset_flush(cache->rq->engine->gt);
@@ -1296,6 +1296,8 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 		goto err_pool;
 	}
 
+	memset32(cmd, 0, pool->obj->base.size / sizeof(u32));
+
 	batch = i915_vma_instance(pool->obj, vma->vm, NULL);
 	if (IS_ERR(batch)) {
 		err = PTR_ERR(batch);

From 557862535c2cad6de6f6fb12312b7a6d09c06407 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 29 Dec 2020 12:08:28 +0000
Subject: [PATCH 344/547] drm/i915/gt: Define guc firmware blob for older
 Cometlakes

When splitting the Coffeelake define to also identify Cometlakes, I
missed the double fw_def for Coffeelake. That is only newer Cometlakes
use the cml specific guc firmware, older Cometlakes should use kbl
firmware.

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2859
Fixes: 5f4ae2704d59 ("drm/i915: Identify Cometlake platform")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: <stable@vger.kernel.org> # v5.9+
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201229120828.29931-1-chris@chris-wilson.co.uk
(cherry picked from commit 70960ab27542d8dc322f909f516391f331fbd3f1)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 180c23e2e25e..602f1a0bc587 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -53,6 +53,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
 	fw_def(ELKHARTLAKE, 0, guc_def(ehl, 49, 0, 1), huc_def(ehl,  9, 0, 0)) \
 	fw_def(ICELAKE,     0, guc_def(icl, 49, 0, 1), huc_def(icl,  9, 0, 0)) \
 	fw_def(COMETLAKE,   5, guc_def(cml, 49, 0, 1), huc_def(cml,  4, 0, 0)) \
+	fw_def(COMETLAKE,   0, guc_def(kbl, 49, 0, 1), huc_def(kbl,  4, 0, 0)) \
 	fw_def(COFFEELAKE,  0, guc_def(kbl, 49, 0, 1), huc_def(kbl,  4, 0, 0)) \
 	fw_def(GEMINILAKE,  0, guc_def(glk, 49, 0, 1), huc_def(glk,  4, 0, 0)) \
 	fw_def(KABYLAKE,    0, guc_def(kbl, 49, 0, 1), huc_def(kbl,  4, 0, 0)) \

From 9397d66212cdf7a21c66523f1583e5d63a609e84 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 30 Dec 2020 20:23:09 +0000
Subject: [PATCH 345/547] drm/i915/dp: Track pm_qos per connector
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since multiple connectors may run intel_dp_aux_xfer conncurrently, a
single global pm_qos does not suffice. (One connector may disable the
dma-latency boost prematurely while the second is still depending on
it.) Instead of a single global pm_qos, track the pm_qos request for
each intel_dp.

v2: Move the pm_qos setup/teardown to intel_dp_aux_init/fini

Fixes: 9ee32fea5fe8 ("drm/i915: irq-drive the dp aux communication")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Imre Deak <imre.deak@intel.com>
Reviewed-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201230202309.23982-1-chris@chris-wilson.co.uk
(cherry picked from commit b3304591f14b437b6bccd8dbff06006c11837031)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_display_types.h | 3 +++
 drivers/gpu/drm/i915/display/intel_dp.c            | 8 ++++++--
 drivers/gpu/drm/i915/i915_drv.c                    | 5 -----
 drivers/gpu/drm/i915/i915_drv.h                    | 3 ---
 4 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index ce82d654d0f2..34d78c654df3 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -1436,6 +1436,9 @@ struct intel_dp {
 		bool ycbcr_444_to_420;
 	} dfp;
 
+	/* To control wakeup latency, e.g. for irq-driven dp aux transfers. */
+	struct pm_qos_request pm_qos;
+
 	/* Display stream compression testing */
 	bool force_dsc_en;
 
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 2165398d2c7c..37f1a10fd021 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -1489,7 +1489,7 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp,
 	 * lowest possible wakeup latency and so prevent the cpu from going into
 	 * deep sleep states.
 	 */
-	cpu_latency_qos_update_request(&i915->pm_qos, 0);
+	cpu_latency_qos_update_request(&intel_dp->pm_qos, 0);
 
 	intel_dp_check_edp(intel_dp);
 
@@ -1622,7 +1622,7 @@ done:
 
 	ret = recv_bytes;
 out:
-	cpu_latency_qos_update_request(&i915->pm_qos, PM_QOS_DEFAULT_VALUE);
+	cpu_latency_qos_update_request(&intel_dp->pm_qos, PM_QOS_DEFAULT_VALUE);
 
 	if (vdd)
 		edp_panel_vdd_off(intel_dp, false);
@@ -1898,6 +1898,9 @@ static i915_reg_t tgl_aux_data_reg(struct intel_dp *intel_dp, int index)
 static void
 intel_dp_aux_fini(struct intel_dp *intel_dp)
 {
+	if (cpu_latency_qos_request_active(&intel_dp->pm_qos))
+		cpu_latency_qos_remove_request(&intel_dp->pm_qos);
+
 	kfree(intel_dp->aux.name);
 }
 
@@ -1950,6 +1953,7 @@ intel_dp_aux_init(struct intel_dp *intel_dp)
 					       encoder->base.name);
 
 	intel_dp->aux.transfer = intel_dp_aux_transfer;
+	cpu_latency_qos_add_request(&intel_dp->pm_qos, PM_QOS_DEFAULT_VALUE);
 }
 
 bool intel_dp_source_supports_hbr2(struct intel_dp *intel_dp)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 320856b665a1..88ad754962af 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -578,8 +578,6 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
 
 	pci_set_master(pdev);
 
-	cpu_latency_qos_add_request(&dev_priv->pm_qos, PM_QOS_DEFAULT_VALUE);
-
 	intel_gt_init_workarounds(dev_priv);
 
 	/* On the 945G/GM, the chipset reports the MSI capability on the
@@ -626,7 +624,6 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
 err_msi:
 	if (pdev->msi_enabled)
 		pci_disable_msi(pdev);
-	cpu_latency_qos_remove_request(&dev_priv->pm_qos);
 err_mem_regions:
 	intel_memory_regions_driver_release(dev_priv);
 err_ggtt:
@@ -648,8 +645,6 @@ static void i915_driver_hw_remove(struct drm_i915_private *dev_priv)
 
 	if (pdev->msi_enabled)
 		pci_disable_msi(pdev);
-
-	cpu_latency_qos_remove_request(&dev_priv->pm_qos);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0a3ee4f9dc0a..632c713227dc 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -891,9 +891,6 @@ struct drm_i915_private {
 
 	bool display_irqs_enabled;
 
-	/* To control wakeup latency, e.g. for irq-driven dp aux transfers. */
-	struct pm_qos_request pm_qos;
-
 	/* Sideband mailbox protection */
 	struct mutex sb_lock;
 	struct pm_qos_request sb_qos;

From 05f6f7271a38c482c5021967433f7b698e102c45 Mon Sep 17 00:00:00 2001
From: Qii Wang <qii.wang@mediatek.com>
Date: Thu, 24 Dec 2020 20:26:07 +0800
Subject: [PATCH 346/547] i2c: mediatek: Fix apdma and i2c hand-shake timeout

With the apdma remove hand-shake signal, it requirs special
operation timing to reset i2c manually, otherwise the interrupt
will not be triggered, i2c transmission will be timeout.

Fixes: 8426fe70cfa4("i2c: mediatek: Add apdma sync in i2c driver")
Signed-off-by: Qii Wang <qii.wang@mediatek.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-mt65xx.c | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c
index 33de99b7bc20..0818d3e50734 100644
--- a/drivers/i2c/busses/i2c-mt65xx.c
+++ b/drivers/i2c/busses/i2c-mt65xx.c
@@ -38,6 +38,7 @@
 #define I2C_IO_CONFIG_OPEN_DRAIN	0x0003
 #define I2C_IO_CONFIG_PUSH_PULL		0x0000
 #define I2C_SOFT_RST			0x0001
+#define I2C_HANDSHAKE_RST		0x0020
 #define I2C_FIFO_ADDR_CLR		0x0001
 #define I2C_DELAY_LEN			0x0002
 #define I2C_TIME_CLR_VALUE		0x0000
@@ -45,6 +46,7 @@
 #define I2C_WRRD_TRANAC_VALUE		0x0002
 #define I2C_RD_TRANAC_VALUE		0x0001
 #define I2C_SCL_MIS_COMP_VALUE		0x0000
+#define I2C_CHN_CLR_FLAG		0x0000
 
 #define I2C_DMA_CON_TX			0x0000
 #define I2C_DMA_CON_RX			0x0001
@@ -54,7 +56,9 @@
 #define I2C_DMA_START_EN		0x0001
 #define I2C_DMA_INT_FLAG_NONE		0x0000
 #define I2C_DMA_CLR_FLAG		0x0000
+#define I2C_DMA_WARM_RST		0x0001
 #define I2C_DMA_HARD_RST		0x0002
+#define I2C_DMA_HANDSHAKE_RST		0x0004
 
 #define MAX_SAMPLE_CNT_DIV		8
 #define MAX_STEP_CNT_DIV		64
@@ -475,11 +479,24 @@ static void mtk_i2c_init_hw(struct mtk_i2c *i2c)
 {
 	u16 control_reg;
 
-	writel(I2C_DMA_HARD_RST, i2c->pdmabase + OFFSET_RST);
-	udelay(50);
-	writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_RST);
-
-	mtk_i2c_writew(i2c, I2C_SOFT_RST, OFFSET_SOFTRESET);
+	if (i2c->dev_comp->dma_sync) {
+		writel(I2C_DMA_WARM_RST, i2c->pdmabase + OFFSET_RST);
+		udelay(10);
+		writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_RST);
+		udelay(10);
+		writel(I2C_DMA_HANDSHAKE_RST | I2C_DMA_HARD_RST,
+		       i2c->pdmabase + OFFSET_RST);
+		mtk_i2c_writew(i2c, I2C_HANDSHAKE_RST | I2C_SOFT_RST,
+			       OFFSET_SOFTRESET);
+		udelay(10);
+		writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_RST);
+		mtk_i2c_writew(i2c, I2C_CHN_CLR_FLAG, OFFSET_SOFTRESET);
+	} else {
+		writel(I2C_DMA_HARD_RST, i2c->pdmabase + OFFSET_RST);
+		udelay(50);
+		writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_RST);
+		mtk_i2c_writew(i2c, I2C_SOFT_RST, OFFSET_SOFTRESET);
+	}
 
 	/* Set ioconfig */
 	if (i2c->use_push_pull)

From d1c5246e08eb64991001d97a3bd119c93edbc79a Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 2 Dec 2020 22:28:12 -0800
Subject: [PATCH 347/547] x86/mm: Fix leak of pmd ptlock

Commit

  28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces")

introduced a new location where a pmd was released, but neglected to
run the pmd page destructor. In fact, this happened previously for a
different pmd release path and was fixed by commit:

  c283610e44ec ("x86, mm: do not leak page->ptl for pmd page tables").

This issue was hidden until recently because the failure mode is silent,
but commit:

  b2b29d6d0119 ("mm: account PMD tables like PTE tables")

turns the failure mode into this signature:

 BUG: Bad page state in process lt-pmem-ns  pfn:15943d
 page:000000007262ed7b refcount:0 mapcount:-1024 mapping:0000000000000000 index:0x0 pfn:0x15943d
 flags: 0xaffff800000000()
 raw: 00affff800000000 dead000000000100 0000000000000000 0000000000000000
 raw: 0000000000000000 ffff913a029bcc08 00000000fffffbff 0000000000000000
 page dumped because: nonzero mapcount
 [..]
  dump_stack+0x8b/0xb0
  bad_page.cold+0x63/0x94
  free_pcp_prepare+0x224/0x270
  free_unref_page+0x18/0xd0
  pud_free_pmd_page+0x146/0x160
  ioremap_pud_range+0xe3/0x350
  ioremap_page_range+0x108/0x160
  __ioremap_caller.constprop.0+0x174/0x2b0
  ? memremap+0x7a/0x110
  memremap+0x7a/0x110
  devm_memremap+0x53/0xa0
  pmem_attach_disk+0x4ed/0x530 [nd_pmem]
  ? __devm_release_region+0x52/0x80
  nvdimm_bus_probe+0x85/0x210 [libnvdimm]

Given this is a repeat occurrence it seemed prudent to look for other
places where this destructor might be missing and whether a better
helper is needed. try_to_free_pmd_page() looks like a candidate, but
testing with setting up and tearing down pmd mappings via the dax unit
tests is thus far not triggering the failure.

As for a better helper pmd_free() is close, but it is a messy fit
due to requiring an @mm arg. Also, ___pmd_free_tlb() wants to call
paravirt_tlb_remove_table() instead of free_page(), so open-coded
pgtable_pmd_page_dtor() seems the best way forward for now.

Debugged together with Matthew Wilcox <willy@infradead.org>.

Fixes: 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces")
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: Yi Zhang <yi.zhang@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <stable@vger.kernel.org>
Link: https://lkml.kernel.org/r/160697689204.605323.17629854984697045602.stgit@dwillia2-desk3.amr.corp.intel.com
---
 arch/x86/mm/pgtable.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index dfd82f51ba66..f6a9e2e36642 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -829,6 +829,8 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr)
 	}
 
 	free_page((unsigned long)pmd_sv);
+
+	pgtable_pmd_page_dtor(virt_to_page(pmd));
 	free_page((unsigned long)pmd);
 
 	return 1;

From 311bea3cb9ee20ef150ca76fc60a592bf6b159f5 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Thu, 17 Dec 2020 16:24:32 -0800
Subject: [PATCH 348/547] arm64: link with -z norelro for LLD or aarch64-elf
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With GNU binutils 2.35+, linking with BFD produces warnings for vmlinux:
aarch64-linux-gnu-ld: warning: -z norelro ignored

BFD can produce this warning when the target emulation mode does not
support RELRO program headers, and -z relro or -z norelro is passed.

Alan Modra clarifies:
  The default linker emulation for an aarch64-linux ld.bfd is
  -maarch64linux, the default for an aarch64-elf linker is
  -maarch64elf.  They are not equivalent.  If you choose -maarch64elf
  you get an emulation that doesn't support -z relro.

The ARCH=arm64 kernel prefers -maarch64elf, but may fall back to
-maarch64linux based on the toolchain configuration.

LLD will always create RELRO program header regardless of target
emulation.

To avoid the above warning when linking with BFD, pass -z norelro only
when linking with LLD or with -maarch64linux.

Fixes: 3b92fa7485eb ("arm64: link with -z norelro regardless of CONFIG_RELOCATABLE")
Fixes: 3bbd3db86470 ("arm64: relocatable: fix inconsistencies in linker script and options")
Cc: <stable@vger.kernel.org> # 5.0.x-
Reported-by: kernelci.org bot <bot@kernelci.org>
Reported-by: Quentin Perret <qperret@google.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Nathan Chancellor <natechancellor@gmail.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Cc: Alan Modra <amodra@gmail.com>
Cc: Fāng-ruì Sòng <maskray@google.com>
Link: https://lore.kernel.org/r/20201218002432.788499-1-ndesaulniers@google.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Makefile | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 6be9b3750250..90309208bb28 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -10,7 +10,7 @@
 #
 # Copyright (C) 1995-2001 by Russell King
 
-LDFLAGS_vmlinux	:=--no-undefined -X -z norelro
+LDFLAGS_vmlinux	:=--no-undefined -X
 
 ifeq ($(CONFIG_RELOCATABLE), y)
 # Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour
@@ -115,16 +115,20 @@ KBUILD_CPPFLAGS	+= -mbig-endian
 CHECKFLAGS	+= -D__AARCH64EB__
 # Prefer the baremetal ELF build target, but not all toolchains include
 # it so fall back to the standard linux version if needed.
-KBUILD_LDFLAGS	+= -EB $(call ld-option, -maarch64elfb, -maarch64linuxb)
+KBUILD_LDFLAGS	+= -EB $(call ld-option, -maarch64elfb, -maarch64linuxb -z norelro)
 UTS_MACHINE	:= aarch64_be
 else
 KBUILD_CPPFLAGS	+= -mlittle-endian
 CHECKFLAGS	+= -D__AARCH64EL__
 # Same as above, prefer ELF but fall back to linux target if needed.
-KBUILD_LDFLAGS	+= -EL $(call ld-option, -maarch64elf, -maarch64linux)
+KBUILD_LDFLAGS	+= -EL $(call ld-option, -maarch64elf, -maarch64linux -z norelro)
 UTS_MACHINE	:= aarch64
 endif
 
+ifeq ($(CONFIG_LD_IS_LLD), y)
+KBUILD_LDFLAGS	+= -z norelro
+endif
+
 CHECKFLAGS	+= -D__aarch64__
 
 ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_REGS),y)

From 96ebc9c871d8a28fb22aa758dd9188a4732df482 Mon Sep 17 00:00:00 2001
From: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
Date: Mon, 4 Jan 2021 20:07:15 -0800
Subject: [PATCH 349/547] usb: uas: Add PNY USB Portable SSD to unusual_uas

Here's another variant PNY Pro Elite USB 3.1 Gen 2 portable SSD that
hangs and doesn't respond to ATA_1x pass-through commands. If it doesn't
support these commands, it should respond properly to the host. Add it
to the unusual uas list to be able to move forward with other
operations.

Cc: stable@vger.kernel.org
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Oliver Neukum <oneukum@suse.com>
Signed-off-by: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
Link: https://lore.kernel.org/r/2edc7af892d0913bf06f5b35e49ec463f03d5ed8.1609819418.git.Thinh.Nguyen@synopsys.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/storage/unusual_uas.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h
index 870e9cf3d5dc..f9677a5ec31b 100644
--- a/drivers/usb/storage/unusual_uas.h
+++ b/drivers/usb/storage/unusual_uas.h
@@ -90,6 +90,13 @@ UNUSUAL_DEV(0x152d, 0x0578, 0x0000, 0x9999,
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_BROKEN_FUA),
 
+/* Reported-by: Thinh Nguyen <thinhn@synopsys.com> */
+UNUSUAL_DEV(0x154b, 0xf00b, 0x0000, 0x9999,
+		"PNY",
+		"Pro Elite SSD",
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+		US_FL_NO_ATA_1X),
+
 /* Reported-by: Thinh Nguyen <thinhn@synopsys.com> */
 UNUSUAL_DEV(0x154b, 0xf00d, 0x0000, 0x9999,
 		"PNY",

From 45ba7b195a369f35cb39094fdb32efe5908b34ad Mon Sep 17 00:00:00 2001
From: Shannon Zhao <shannon.zhao@linux.alibaba.com>
Date: Mon, 4 Jan 2021 19:38:44 +0800
Subject: [PATCH 350/547] arm64: cpufeature: remove non-exist
 CONFIG_KVM_ARM_HOST

Commit d82755b2e781 ("KVM: arm64: Kill off CONFIG_KVM_ARM_HOST") deletes
CONFIG_KVM_ARM_HOST option, it should use CONFIG_KVM instead.

Just remove CONFIG_KVM_ARM_HOST here.

Fixes: d82755b2e781 ("KVM: arm64: Kill off CONFIG_KVM_ARM_HOST")
Signed-off-by: Shannon Zhao <shannon.zhao@linux.alibaba.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/1609760324-92271-1-git-send-email-shannon.zhao@linux.alibaba.com
---
 arch/arm64/kernel/cpufeature.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index d96f4554282d..bc3549663957 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -2558,7 +2558,7 @@ static void verify_hyp_capabilities(void)
 	int parange, ipa_max;
 	unsigned int safe_vmid_bits, vmid_bits;
 
-	if (!IS_ENABLED(CONFIG_KVM) || !IS_ENABLED(CONFIG_KVM_ARM_HOST))
+	if (!IS_ENABLED(CONFIG_KVM))
 		return;
 
 	safe_mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);

From c9c48bb701ba78df7d4652146b12bcf3ad716507 Mon Sep 17 00:00:00 2001
From: Samuel Thibault <samuel.thibault@ens-lyon.org>
Date: Tue, 22 Dec 2020 02:47:56 +0100
Subject: [PATCH 351/547] speakup: Add github repository URL and bug tracker

We have set up a repository for users to try newer releases more easily, and
keep records of known bugs.

Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Link: https://lore.kernel.org/r/20201222014756.ov5vi6fywylbp5n6@function
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 546aa66428c9..62934e770646 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16710,6 +16710,8 @@ M:	Samuel Thibault <samuel.thibault@ens-lyon.org>
 L:	speakup@linux-speakup.org
 S:	Odd Fixes
 W:	http://www.linux-speakup.org/
+W:	https://github.com/linux-speakup/speakup
+B:	https://github.com/linux-speakup/speakup/issues
 F:	drivers/accessibility/speakup/
 
 SPEAR CLOCK FRAMEWORK SUPPORT

From f6bcb4c7f366905b66ce8ffca7190118244bb642 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 5 Jan 2021 14:42:29 +0300
Subject: [PATCH 352/547] regmap: debugfs: Fix a reversed if statement in
 regmap_debugfs_init()

This code will leak "map->debugfs_name" because the if statement is
reversed so it only frees NULL pointers instead of non-NULL.  In
fact the if statement is not required and should just be removed
because kfree() accepts NULL pointers.

Fixes: cffa4b2122f5 ("regmap: debugfs: Fix a memory leak when calling regmap_attach_dev")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Link: https://lore.kernel.org/r/X/RQpfAwRdLg0GqQ@mwanda
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap-debugfs.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c
index bf03cd343be2..ff2ee87987c7 100644
--- a/drivers/base/regmap/regmap-debugfs.c
+++ b/drivers/base/regmap/regmap-debugfs.c
@@ -594,9 +594,7 @@ void regmap_debugfs_init(struct regmap *map)
 	}
 
 	if (!strcmp(name, "dummy")) {
-		if (!map->debugfs_name)
-			kfree(map->debugfs_name);
-
+		kfree(map->debugfs_name);
 		map->debugfs_name = kasprintf(GFP_KERNEL, "dummy%d",
 						dummy_index);
 		if (!map->debugfs_name)

From 3fb6819f411b5a89afb5726afafacf0c4b62844f Mon Sep 17 00:00:00 2001
From: Tian Tao <tiantao6@hisilicon.com>
Date: Mon, 28 Dec 2020 15:05:08 +0800
Subject: [PATCH 353/547] arm64: traps: remove duplicate include statement

asm/exception.h is included more than once. Remove the one that isn't
necessary.

Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Link: https://lore.kernel.org/r/1609139108-10819-1-git-send-email-tiantao6@hisilicon.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/traps.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 08156be75569..6895ce777e7f 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -42,7 +42,6 @@
 #include <asm/smp.h>
 #include <asm/stack_pointer.h>
 #include <asm/stacktrace.h>
-#include <asm/exception.h>
 #include <asm/system_misc.h>
 #include <asm/sysreg.h>
 

From e2bba5f92354488c331b7821d873db7c388e31aa Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Wed, 30 Dec 2020 14:19:54 -0800
Subject: [PATCH 354/547] arm64: vdso: disable .eh_frame_hdr via /DISCARD/
 instead of --no-eh-frame-hdr

Currently with ld.lld we emit an empty .eh_frame_hdr section (and a
corresponding program header) into the vDSO. With ld.bfd the section
is not emitted but the program header is, with p_vaddr set to 0. This
can lead to unwinders attempting to interpret the data at whichever
location the program header happens to point to as an unwind info
header. This happens to be mostly harmless as long as the byte at
that location (interpreted as a version number) has a value other
than 1, causing both libgcc and LLVM libunwind to ignore the section
(in libunwind's case, after printing an error message to stderr),
but it could lead to worse problems if the byte happened to be 1 or
the program header points to non-readable memory (e.g. if the empty
section was placed at a page boundary).

Instead of disabling .eh_frame_hdr via --no-eh-frame-hdr (which
also has the downside of being unsupported by older versions of GNU
binutils), disable it by discarding the section, and stop emitting
the program header that points to it.

I understand that we intend to emit valid unwind info for the vDSO
at some point. Once that happens this patch can be reverted.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Acked-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Link: https://linux-review.googlesource.com/id/If745fd9cadcb31b4010acbf5693727fe111b0863
Link: https://lore.kernel.org/r/20201230221954.2007257-1-pcc@google.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/vdso/Makefile   | 3 +--
 arch/arm64/kernel/vdso/vdso.lds.S | 5 +----
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index a8f8e409e2bf..cd9c3fa25902 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -24,8 +24,7 @@ btildflags-$(CONFIG_ARM64_BTI_KERNEL) += -z force-bti
 # routines, as x86 does (see 6f121e548f83 ("x86, vdso: Reimplement vdso.so
 # preparation in build-time C")).
 ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv	\
-	     -Bsymbolic $(call ld-option, --no-eh-frame-hdr) --build-id=sha1 -n	\
-	     $(btildflags-y) -T
+	     -Bsymbolic --build-id=sha1 -n $(btildflags-y) -T
 
 ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18
 ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S
index d808ad31e01f..61dbb4c838ef 100644
--- a/arch/arm64/kernel/vdso/vdso.lds.S
+++ b/arch/arm64/kernel/vdso/vdso.lds.S
@@ -40,9 +40,6 @@ SECTIONS
 	PROVIDE (_etext = .);
 	PROVIDE (etext = .);
 
-	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
-	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
-
 	.dynamic	: { *(.dynamic) }		:text	:dynamic
 
 	.rodata		: { *(.rodata*) }		:text
@@ -54,6 +51,7 @@ SECTIONS
 		*(.note.GNU-stack)
 		*(.data .data.* .gnu.linkonce.d.* .sdata*)
 		*(.bss .sbss .dynbss .dynsbss)
+		*(.eh_frame .eh_frame_hdr)
 	}
 }
 
@@ -66,7 +64,6 @@ PHDRS
 	text		PT_LOAD		FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
 	dynamic		PT_DYNAMIC	FLAGS(4);		/* PF_R */
 	note		PT_NOTE		FLAGS(4);		/* PF_R */
-	eh_frame_hdr	PT_GNU_EH_FRAME;
 }
 
 /*

From f34d93f30d6a72f6b15ba24b6994b746df0c30de Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 4 Jan 2021 12:15:41 +0000
Subject: [PATCH 355/547] arm64: kasan: Set TCR_EL1.TBID1 when KASAN_HW_TAGS is
 enabled

Commit 49b3cf035edc ("kasan: arm64: set TCR_EL1.TBID1 when enabled") set
the TBID1 bit for the KASAN_SW_TAGS configuration, freeing up 8 bits to
be used by PAC. With in-kernel MTE now in mainline, also set this bit
for the KASAN_HW_TAGS configuration.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Peter Collingbourne <pcc@google.com>
Acked-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Acked-by: Andrey Konovalov <andreyknvl@google.com>
---
 arch/arm64/mm/proc.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 37a54b57178a..1f7ee8c8b7b8 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -46,7 +46,7 @@
 #endif
 
 #ifdef CONFIG_KASAN_HW_TAGS
-#define TCR_KASAN_HW_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1
+#define TCR_KASAN_HW_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 | TCR_TBID1
 #else
 #define TCR_KASAN_HW_FLAGS 0
 #endif

From a8f7e08a81708920a928664a865208fdf451c49f Mon Sep 17 00:00:00 2001
From: Peter Gonda <pgonda@google.com>
Date: Tue, 5 Jan 2021 08:33:11 -0800
Subject: [PATCH 356/547] x86/sev-es: Fix SEV-ES OUT/IN immediate opcode vc
 handling

The IN and OUT instructions with port address as an immediate operand
only use an 8-bit immediate (imm8). The current VC handler uses the
entire 32-bit immediate value but these instructions only set the first
bytes.

Cast the operand to an u8 for that.

 [ bp: Massage commit message. ]

Fixes: 25189d08e5168 ("x86/sev-es: Add support for handling IOIO exceptions")
Signed-off-by: Peter Gonda <pgonda@google.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: David Rientjes <rientjes@google.com>
Link: https://lkml.kernel.org/r/20210105163311.221490-1-pgonda@google.com
---
 arch/x86/kernel/sev-es-shared.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-es-shared.c
index 7d04b356d44d..cdc04d091242 100644
--- a/arch/x86/kernel/sev-es-shared.c
+++ b/arch/x86/kernel/sev-es-shared.c
@@ -305,14 +305,14 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
 	case 0xe4:
 	case 0xe5:
 		*exitinfo |= IOIO_TYPE_IN;
-		*exitinfo |= (u64)insn->immediate.value << 16;
+		*exitinfo |= (u8)insn->immediate.value << 16;
 		break;
 
 	/* OUT immediate opcodes */
 	case 0xe6:
 	case 0xe7:
 		*exitinfo |= IOIO_TYPE_OUT;
-		*exitinfo |= (u64)insn->immediate.value << 16;
+		*exitinfo |= (u8)insn->immediate.value << 16;
 		break;
 
 	/* IN register opcodes */

From d16baa3f1453c14d680c5fee01cd122a22d0e0ce Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 5 Jan 2021 12:37:23 -0500
Subject: [PATCH 357/547] blk-iocost: fix NULL iocg deref from racing against
 initialization

When initializing iocost for a queue, its rqos should be registered before
the blkcg policy is activated to allow policy data initiailization to lookup
the associated ioc. This unfortunately means that the rqos methods can be
called on bios before iocgs are attached to all existing blkgs.

While the race is theoretically possible on ioc_rqos_throttle(), it mostly
happened in ioc_rqos_merge() due to the difference in how they lookup ioc.
The former determines it from the passed in @rqos and then bails before
dereferencing iocg if the looked up ioc is disabled, which most likely is
the case if initialization is still in progress. The latter looked up ioc by
dereferencing the possibly NULL iocg making it a lot more prone to actually
triggering the bug.

* Make ioc_rqos_merge() use the same method as ioc_rqos_throttle() to look
  up ioc for consistency.

* Make ioc_rqos_throttle() and ioc_rqos_merge() test for NULL iocg before
  dereferencing it.

* Explain the danger of NULL iocgs in blk_iocost_init().

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Jonathan Lemon <bsd@fb.com>
Cc: stable@vger.kernel.org # v5.4+
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-iocost.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index ac6078a34939..98d656bdb42b 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -2551,8 +2551,8 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
 	bool use_debt, ioc_locked;
 	unsigned long flags;
 
-	/* bypass IOs if disabled or for root cgroup */
-	if (!ioc->enabled || !iocg->level)
+	/* bypass IOs if disabled, still initializing, or for root cgroup */
+	if (!ioc->enabled || !iocg || !iocg->level)
 		return;
 
 	/* calculate the absolute vtime cost */
@@ -2679,14 +2679,14 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
 			   struct bio *bio)
 {
 	struct ioc_gq *iocg = blkg_to_iocg(bio->bi_blkg);
-	struct ioc *ioc = iocg->ioc;
+	struct ioc *ioc = rqos_to_ioc(rqos);
 	sector_t bio_end = bio_end_sector(bio);
 	struct ioc_now now;
 	u64 vtime, abs_cost, cost;
 	unsigned long flags;
 
-	/* bypass if disabled or for root cgroup */
-	if (!ioc->enabled || !iocg->level)
+	/* bypass if disabled, still initializing, or for root cgroup */
+	if (!ioc->enabled || !iocg || !iocg->level)
 		return;
 
 	abs_cost = calc_vtime_cost(bio, iocg, true);
@@ -2863,6 +2863,12 @@ static int blk_iocost_init(struct request_queue *q)
 	ioc_refresh_params(ioc, true);
 	spin_unlock_irq(&ioc->lock);
 
+	/*
+	 * rqos must be added before activation to allow iocg_pd_init() to
+	 * lookup the ioc from q. This means that the rqos methods may get
+	 * called before policy activation completion, can't assume that the
+	 * target bio has an iocg associated and need to test for NULL iocg.
+	 */
 	rq_qos_add(q, rqos);
 	ret = blkcg_activate_policy(q, &blkcg_policy_iocost);
 	if (ret) {

From 6d4d273588378c65915acaf7b2ee74e9dd9c130a Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 10 Dec 2020 10:44:33 +0100
Subject: [PATCH 358/547] bfq: Fix computation of shallow depth

BFQ computes number of tags it allows to be allocated for each request type
based on tag bitmap. However it uses 1 << bitmap.shift as number of
available tags which is wrong. 'shift' is just an internal bitmap value
containing logarithm of how many bits bitmap uses in each bitmap word.
Thus number of tags allowed for some request types can be far to low.
Use proper bitmap.depth which has the number of tags instead.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 9e81d1052091..9e4eb0fc1c16 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -6332,13 +6332,13 @@ static unsigned int bfq_update_depths(struct bfq_data *bfqd,
 	 * limit 'something'.
 	 */
 	/* no more than 50% of tags for async I/O */
-	bfqd->word_depths[0][0] = max((1U << bt->sb.shift) >> 1, 1U);
+	bfqd->word_depths[0][0] = max(bt->sb.depth >> 1, 1U);
 	/*
 	 * no more than 75% of tags for sync writes (25% extra tags
 	 * w.r.t. async I/O, to prevent async I/O from starving sync
 	 * writes)
 	 */
-	bfqd->word_depths[0][1] = max(((1U << bt->sb.shift) * 3) >> 2, 1U);
+	bfqd->word_depths[0][1] = max((bt->sb.depth * 3) >> 2, 1U);
 
 	/*
 	 * In-word depths in case some bfq_queue is being weight-
@@ -6348,9 +6348,9 @@ static unsigned int bfq_update_depths(struct bfq_data *bfqd,
 	 * shortage.
 	 */
 	/* no more than ~18% of tags for async I/O */
-	bfqd->word_depths[1][0] = max(((1U << bt->sb.shift) * 3) >> 4, 1U);
+	bfqd->word_depths[1][0] = max((bt->sb.depth * 3) >> 4, 1U);
 	/* no more than ~37% of tags for sync writes (~20% extra tags) */
-	bfqd->word_depths[1][1] = max(((1U << bt->sb.shift) * 6) >> 4, 1U);
+	bfqd->word_depths[1][1] = max((bt->sb.depth * 6) >> 4, 1U);
 
 	for (i = 0; i < 2; i++)
 		for (j = 0; j < 2; j++)

From 170b3bbda08852277b97f4f0516df0785c939764 Mon Sep 17 00:00:00 2001
From: Ye Bin <yebin10@huawei.com>
Date: Tue, 5 Jan 2021 21:53:40 +0800
Subject: [PATCH 359/547] =?UTF-8?q?io=5Furing:=20Delete=20useless=20variab?=
 =?UTF-8?q?le=20=E2=80=98id=E2=80=99=20in=20io=5Fprep=5Fasync=5Fwork?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix follow warning:
fs/io_uring.c:1523:22: warning: variable ‘id’ set but not used
[-Wunused-but-set-variable]
  struct io_identity *id;
                        ^~
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Ye Bin <yebin10@huawei.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5bccb235271f..dc92ca5090a3 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1523,10 +1523,8 @@ static void io_prep_async_work(struct io_kiocb *req)
 {
 	const struct io_op_def *def = &io_op_defs[req->opcode];
 	struct io_ring_ctx *ctx = req->ctx;
-	struct io_identity *id;
 
 	io_req_init_async(req);
-	id = req->work.identity;
 
 	if (req->flags & REQ_F_FORCE_ASYNC)
 		req->work.flags |= IO_WQ_WORK_CONCURRENT;

From aebf5db917055b38f4945ed6d621d9f07a44ff30 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Mon, 21 Dec 2020 12:33:35 +0800
Subject: [PATCH 360/547] block: fix use-after-free in disk_part_iter_next

Make sure that bdgrab() is done on the 'block_device' instance before
referring to it for avoiding use-after-free.

Cc: <stable@vger.kernel.org>
Reported-by: syzbot+825f0f9657d4e528046e@syzkaller.appspotmail.com
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 73faec438e49..419548e92d82 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -246,15 +246,18 @@ struct block_device *disk_part_iter_next(struct disk_part_iter *piter)
 		part = rcu_dereference(ptbl->part[piter->idx]);
 		if (!part)
 			continue;
-		if (!bdev_nr_sectors(part) &&
-		    !(piter->flags & DISK_PITER_INCL_EMPTY) &&
-		    !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 &&
-		      piter->idx == 0))
-			continue;
-
 		piter->part = bdgrab(part);
 		if (!piter->part)
 			continue;
+		if (!bdev_nr_sectors(part) &&
+		    !(piter->flags & DISK_PITER_INCL_EMPTY) &&
+		    !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 &&
+		      piter->idx == 0)) {
+			bdput(piter->part);
+			piter->part = NULL;
+			continue;
+		}
+
 		piter->idx += inc;
 		break;
 	}

From 6775ae901ffd130d0be9c32837f88d1f9d560189 Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Tue, 22 Dec 2020 17:42:32 +0100
Subject: [PATCH 361/547] iommu/iova: fix 'domain' typos

Replace misspelled 'doamin' with 'domain' in several comments.

Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Link: https://lore.kernel.org/r/20201222164232.88795-1-sgarzare@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/iommu/iova.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 4bb3293ae4d7..d20b8b333d30 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -358,7 +358,7 @@ static void private_free_iova(struct iova_domain *iovad, struct iova *iova)
  * @iovad: - iova domain in question.
  * @pfn: - page frame number
  * This function finds and returns an iova belonging to the
- * given doamin which matches the given pfn.
+ * given domain which matches the given pfn.
  */
 struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
 {
@@ -601,7 +601,7 @@ void queue_iova(struct iova_domain *iovad,
 EXPORT_SYMBOL_GPL(queue_iova);
 
 /**
- * put_iova_domain - destroys the iova doamin
+ * put_iova_domain - destroys the iova domain
  * @iovad: - iova domain in question.
  * All the iova's in that domain are destroyed.
  */
@@ -712,9 +712,9 @@ EXPORT_SYMBOL_GPL(reserve_iova);
 
 /**
  * copy_reserved_iova - copies the reserved between domains
- * @from: - source doamin from where to copy
+ * @from: - source domain from where to copy
  * @to: - destination domin where to copy
- * This function copies reserved iova's from one doamin to
+ * This function copies reserved iova's from one domain to
  * other.
  */
 void

From ff2b46d7cff80d27d82f7f3252711f4ca1666129 Mon Sep 17 00:00:00 2001
From: Dinghao Liu <dinghao.liu@zju.edu.cn>
Date: Tue, 5 Jan 2021 13:18:37 +0800
Subject: [PATCH 362/547] iommu/intel: Fix memleak in intel_irq_remapping_alloc

When irq_domain_get_irq_data() or irqd_cfg() fails
at i == 0, data allocated by kzalloc() has not been
freed before returning, which leads to memleak.

Fixes: b106ee63abcc ("irq_remapping/vt-d: Enhance Intel IR driver to support hierarchical irqdomains")
Signed-off-by: Dinghao Liu <dinghao.liu@zju.edu.cn>
Acked-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/20210105051837.32118-1-dinghao.liu@zju.edu.cn
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/iommu/intel/irq_remapping.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index aeffda92b10b..685200a5cff0 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -1353,6 +1353,8 @@ static int intel_irq_remapping_alloc(struct irq_domain *domain,
 		irq_data = irq_domain_get_irq_data(domain, virq + i);
 		irq_cfg = irqd_cfg(irq_data);
 		if (!irq_data || !irq_cfg) {
+			if (!i)
+				kfree(data);
 			ret = -EINVAL;
 			goto out_free_data;
 		}

From 12bc4570c14e24e6244d66466aeda994f805634b Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Tue, 5 Jan 2021 01:32:51 +0000
Subject: [PATCH 363/547] iommu/amd: Set iommu->int_enabled consistently when
 interrupts are set up

When I made the INTCAPXT support stop gratuitously pretending to be MSI,
I missed the fact that iommu_setup_msi() also sets the ->int_enabled
flag. I missed this in the iommu_setup_intcapxt() code path, which means
that a resume from suspend will try to allocate the IRQ domains again,
accidentally re-enabling interrupts as it does, resulting in much sadness.

Lift out the bit which sets iommu->int_enabled into the iommu_init_irq()
function which is also where it gets checked.

Link: https://lore.kernel.org/r/20210104132250.GE32151@zn.tnic/
Fixes: d1adcfbb520c ("iommu/amd: Fix IOMMU interrupt generation in X2APIC mode")
Reported-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Tested-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/50cd5f55be8ead0937ac315cd2f5b89364f6a9a5.camel@infradead.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/iommu/amd/init.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index f54cd79b43e4..6a1f7048dacc 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -1973,8 +1973,6 @@ static int iommu_setup_msi(struct amd_iommu *iommu)
 		return r;
 	}
 
-	iommu->int_enabled = true;
-
 	return 0;
 }
 
@@ -2169,6 +2167,7 @@ static int iommu_init_irq(struct amd_iommu *iommu)
 	if (ret)
 		return ret;
 
+	iommu->int_enabled = true;
 enable_faults:
 	iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
 

From b34f10c2dc5961021850c3c15f46a84b56a0c0e8 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Tue, 5 Jan 2021 01:36:13 +0000
Subject: [PATCH 364/547] iommu/amd: Stop irq_remapping_select() matching when
 remapping is disabled

The AMD IOMMU initialisation registers the IRQ remapping domain for
each IOMMU before doing the final sanity check that every I/OAPIC is
covered.

This means that the AMD irq_remapping_select() function gets invoked
even when IRQ remapping has been disabled, eventually leading to a NULL
pointer dereference in alloc_irq_table().

Unfortunately, the IVRS isn't fully parsed early enough that the sanity
check can be done in time to registering the IRQ domain altogether.
Doing that would be nice, but is a larger and more error-prone task. The
simple fix is just for irq_remapping_select() to refuse to report a
match when IRQ remapping has disabled.

Link: https://lore.kernel.org/lkml/ed4be9b4-24ac-7128-c522-7ef359e8185d@gmx.at
Fixes: a1a785b57242 ("iommu/amd: Implement select() method on remapping irqdomain")
Reported-by: Johnathan Smithinovic <johnathan.smithinovic@gmx.at>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Link: https://lore.kernel.org/r/04bbe8bca87f81a3cfa93ec4299e53f47e00e5b3.camel@infradead.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/iommu/amd/iommu.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 7e2c445a1fae..f0adbc48fd17 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -3854,6 +3854,9 @@ static int irq_remapping_select(struct irq_domain *d, struct irq_fwspec *fwspec,
 	struct amd_iommu *iommu;
 	int devid = -1;
 
+	if (!amd_iommu_irq_remap)
+		return 0;
+
 	if (x86_fwspec_is_ioapic(fwspec))
 		devid = get_ioapic_devid(fwspec->param[0]);
 	else if (x86_fwspec_is_hpet(fwspec))

From c2407cf7d22d0c0d94cf20342b3b8f06f1d904e7 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 5 Jan 2021 11:33:00 -0800
Subject: [PATCH 365/547] mm: make wait_on_page_writeback() wait for multiple
 pending writebacks

Ever since commit 2a9127fcf229 ("mm: rewrite wait_on_page_bit_common()
logic") we've had some very occasional reports of BUG_ON(PageWriteback)
in write_cache_pages(), which we thought we already fixed in commit
073861ed77b6 ("mm: fix VM_BUG_ON(PageTail) and BUG_ON(PageWriteback)").

But syzbot just reported another one, even with that commit in place.

And it turns out that there's a simpler way to trigger the BUG_ON() than
the one Hugh found with page re-use.  It all boils down to the fact that
the page writeback is ostensibly serialized by the page lock, but that
isn't actually really true.

Yes, the people _setting_ writeback all do so under the page lock, but
the actual clearing of the bit - and waking up any waiters - happens
without any page lock.

This gives us this fairly simple race condition:

  CPU1 = end previous writeback
  CPU2 = start new writeback under page lock
  CPU3 = write_cache_pages()

  CPU1          CPU2            CPU3
  ----          ----            ----

  end_page_writeback()
    test_clear_page_writeback(page)
    ... delayed...

                lock_page();
                set_page_writeback()
                unlock_page()

                                lock_page()
                                wait_on_page_writeback();

    wake_up_page(page, PG_writeback);
    .. wakes up CPU3 ..

                                BUG_ON(PageWriteback(page));

where the BUG_ON() happens because we woke up the PG_writeback bit
becasue of the _previous_ writeback, but a new one had already been
started because the clearing of the bit wasn't actually atomic wrt the
actual wakeup or serialized by the page lock.

The reason this didn't use to happen was that the old logic in waiting
on a page bit would just loop if it ever saw the bit set again.

The nice proper fix would probably be to get rid of the whole "wait for
writeback to clear, and then set it" logic in the writeback path, and
replace it with an atomic "wait-to-set" (ie the same as we have for page
locking: we set the page lock bit with a single "lock_page()", not with
"wait for lock bit to clear and then set it").

However, out current model for writeback is that the waiting for the
writeback bit is done by the generic VFS code (ie write_cache_pages()),
but the actual setting of the writeback bit is done much later by the
filesystem ".writepages()" function.

IOW, to make the writeback bit have that same kind of "wait-to-set"
behavior as we have for page locking, we'd have to change our roughly
~50 different writeback functions.  Painful.

Instead, just make "wait_on_page_writeback()" loop on the very unlikely
situation that the PG_writeback bit is still set, basically re-instating
the old behavior.  This is very non-optimal in case of contention, but
since we only ever set the bit under the page lock, that situation is
controlled.

Reported-by: syzbot+2fc0712f8f8b8b8fa0ef@syzkaller.appspotmail.com
Fixes: 2a9127fcf229 ("mm: rewrite wait_on_page_bit_common() logic")
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page-writeback.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 586042472ac9..eb34d204d4ee 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2826,7 +2826,7 @@ EXPORT_SYMBOL(__test_set_page_writeback);
  */
 void wait_on_page_writeback(struct page *page)
 {
-	if (PageWriteback(page)) {
+	while (PageWriteback(page)) {
 		trace_wait_on_page_writeback(page, page_mapping(page));
 		wait_on_page_bit(page, PG_writeback);
 	}

From 8a48c0a3360bf2bf4f40c980d0ec216e770e58ee Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 4 Jan 2021 19:44:53 -0800
Subject: [PATCH 366/547] arch/arc: add copy_user_page() to <asm/page.h> to fix
 build error on ARC

fs/dax.c uses copy_user_page() but ARC does not provide that interface,
resulting in a build error.

Provide copy_user_page() in <asm/page.h>.

../fs/dax.c: In function 'copy_cow_page_dax':
../fs/dax.c:702:2: error: implicit declaration of function 'copy_user_page'; did you mean 'copy_to_user_page'? [-Werror=implicit-function-declaration]

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: linux-snps-arc@lists.infradead.org
Cc: Dan Williams <dan.j.williams@intel.com>
#Acked-by: Vineet Gupta <vgupta@synopsys.com> # v1
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Jan Kara <jack@suse.cz>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-nvdimm@lists.01.org
#Reviewed-by: Ira Weiny <ira.weiny@intel.com> # v2
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/page.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h
index 23e41e890eda..ad9b7fe4dba3 100644
--- a/arch/arc/include/asm/page.h
+++ b/arch/arc/include/asm/page.h
@@ -10,6 +10,7 @@
 #ifndef __ASSEMBLY__
 
 #define clear_page(paddr)		memset((paddr), 0, PAGE_SIZE)
+#define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
 #define copy_page(to, from)		memcpy((to), (from), PAGE_SIZE)
 
 struct vm_area_struct;

From f4d9359de8ac0fb64a5ecc9c34833705eb53327b Mon Sep 17 00:00:00 2001
From: Vineet Gupta <Vineet.Gupta1@synopsys.com>
Date: Thu, 5 Nov 2020 13:22:10 -0800
Subject: [PATCH 367/547] include/soc: remove headers for EZChip NPS

NPS platform has been removed from ARC port and there are no in-tree
user of it now . So RIP !

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 include/soc/nps/common.h | 172 ---------------------------------------
 include/soc/nps/mtm.h    |  59 --------------
 2 files changed, 231 deletions(-)
 delete mode 100644 include/soc/nps/common.h
 delete mode 100644 include/soc/nps/mtm.h

diff --git a/include/soc/nps/common.h b/include/soc/nps/common.h
deleted file mode 100644
index 8c18dc6d3fde..000000000000
--- a/include/soc/nps/common.h
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef SOC_NPS_COMMON_H
-#define SOC_NPS_COMMON_H
-
-#ifdef CONFIG_SMP
-#define NPS_IPI_IRQ					5
-#endif
-
-#define NPS_HOST_REG_BASE			0xF6000000
-
-#define NPS_MSU_BLKID				0x018
-
-#define CTOP_INST_RSPI_GIC_0_R12		0x3C56117E
-#define CTOP_INST_MOV2B_FLIP_R3_B1_B2_INST	0x5B60
-#define CTOP_INST_MOV2B_FLIP_R3_B1_B2_LIMM	0x00010422
-
-#ifndef AUX_IENABLE
-#define AUX_IENABLE				0x40c
-#endif
-
-#define CTOP_AUX_IACK				(0xFFFFF800 + 0x088)
-
-#ifndef __ASSEMBLY__
-
-/* In order to increase compilation test coverage */
-#ifdef CONFIG_ARC
-static inline void nps_ack_gic(void)
-{
-	__asm__ __volatile__ (
-	"       .word %0\n"
-	:
-	: "i"(CTOP_INST_RSPI_GIC_0_R12)
-	: "memory");
-}
-#else
-static inline void nps_ack_gic(void) { }
-#define write_aux_reg(r, v)
-#define read_aux_reg(r) 0
-#endif
-
-/* CPU global ID */
-struct global_id {
-	union {
-		struct {
-#ifdef CONFIG_EZNPS_MTM_EXT
-			u32 __reserved:20, cluster:4, core:4, thread:4;
-#else
-			u32 __reserved:24, cluster:4, core:4;
-#endif
-		};
-		u32 value;
-	};
-};
-
-/*
- * Convert logical to physical CPU IDs
- *
- * The conversion swap bits 1 and 2 of cluster id (out of 4 bits)
- * Now quad of logical clusters id's are adjacent physically,
- * and not like the id's physically came with each cluster.
- * Below table is 4x4 mesh of core clusters as it layout on chip.
- * Cluster ids are in format: logical (physical)
- *
- *    -----------------   ------------------
- * 3 |  5 (3)   7 (7)  | | 13 (11)   15 (15)|
- *
- * 2 |  4 (2)   6 (6)  | | 12 (10)   14 (14)|
- *    -----------------   ------------------
- * 1 |  1 (1)   3 (5)  | |  9  (9)   11 (13)|
- *
- * 0 |  0 (0)   2 (4)  | |  8  (8)   10 (12)|
- *    -----------------   ------------------
- *       0       1            2        3
- */
-static inline int nps_cluster_logic_to_phys(int cluster)
-{
-#ifdef __arc__
-	 __asm__ __volatile__(
-	"       mov r3,%0\n"
-	"       .short %1\n"
-	"       .word %2\n"
-	"       mov %0,r3\n"
-	: "+r"(cluster)
-	: "i"(CTOP_INST_MOV2B_FLIP_R3_B1_B2_INST),
-	  "i"(CTOP_INST_MOV2B_FLIP_R3_B1_B2_LIMM)
-	: "r3");
-#endif
-
-	return cluster;
-}
-
-#define NPS_CPU_TO_CLUSTER_NUM(cpu) \
-	({ struct global_id gid; gid.value = cpu; \
-		nps_cluster_logic_to_phys(gid.cluster); })
-
-struct nps_host_reg_address {
-	union {
-		struct {
-			u32 base:8, cl_x:4, cl_y:4,
-			blkid:6, reg:8, __reserved:2;
-		};
-		u32 value;
-	};
-};
-
-struct nps_host_reg_address_non_cl {
-	union {
-		struct {
-			u32 base:7, blkid:11, reg:12, __reserved:2;
-		};
-		u32 value;
-	};
-};
-
-static inline void *nps_host_reg_non_cl(u32 blkid, u32 reg)
-{
-	struct nps_host_reg_address_non_cl reg_address;
-
-	reg_address.value = NPS_HOST_REG_BASE;
-	reg_address.blkid = blkid;
-	reg_address.reg = reg;
-
-	return (void *)reg_address.value;
-}
-
-static inline void *nps_host_reg(u32 cpu, u32 blkid, u32 reg)
-{
-	struct nps_host_reg_address reg_address;
-	u32 cl = NPS_CPU_TO_CLUSTER_NUM(cpu);
-
-	reg_address.value = NPS_HOST_REG_BASE;
-	reg_address.cl_x  = (cl >> 2) & 0x3;
-	reg_address.cl_y  = cl & 0x3;
-	reg_address.blkid = blkid;
-	reg_address.reg   = reg;
-
-	return (void *)reg_address.value;
-}
-#endif /* __ASSEMBLY__ */
-
-#endif /* SOC_NPS_COMMON_H */
diff --git a/include/soc/nps/mtm.h b/include/soc/nps/mtm.h
deleted file mode 100644
index d2f5e7e3703e..000000000000
--- a/include/soc/nps/mtm.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef SOC_NPS_MTM_H
-#define SOC_NPS_MTM_H
-
-#define CTOP_INST_HWSCHD_OFF_R3                 0x3B6F00BF
-#define CTOP_INST_HWSCHD_RESTORE_R3             0x3E6F70C3
-
-static inline void hw_schd_save(unsigned int *flags)
-{
-	__asm__ __volatile__(
-	"       .word %1\n"
-	"       st r3,[%0]\n"
-	:
-	: "r"(flags), "i"(CTOP_INST_HWSCHD_OFF_R3)
-	: "r3", "memory");
-}
-
-static inline void hw_schd_restore(unsigned int flags)
-{
-	__asm__ __volatile__(
-	"       mov r3, %0\n"
-	"       .word %1\n"
-	:
-	: "r"(flags), "i"(CTOP_INST_HWSCHD_RESTORE_R3)
-	: "r3");
-}
-
-#endif /* SOC_NPS_MTM_H */

From 2860d45a589818dd8ffd90cdc4bcf77f36a5a6be Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 3 Jan 2021 22:36:17 +0100
Subject: [PATCH 368/547] qed: select CONFIG_CRC32

Without this, the driver fails to link:

lpc_eth.c:(.text+0x1934): undefined reference to `crc32_le'
arm-linux-gnueabi-ld: drivers/net/ethernet/qlogic/qed/qed_debug.o: in function `qed_grc_dump':
qed_debug.c:(.text+0x4068): undefined reference to `crc32_le'
arm-linux-gnueabi-ld: drivers/net/ethernet/qlogic/qed/qed_debug.o: in function `qed_idle_chk_dump':
qed_debug.c:(.text+0x51fc): undefined reference to `crc32_le'
arm-linux-gnueabi-ld: drivers/net/ethernet/qlogic/qed/qed_debug.o: in function `qed_mcp_trace_dump':
qed_debug.c:(.text+0x6000): undefined reference to `crc32_le'
arm-linux-gnueabi-ld: drivers/net/ethernet/qlogic/qed/qed_debug.o: in function `qed_dbg_reg_fifo_dump':
qed_debug.c:(.text+0x66cc): undefined reference to `crc32_le'
arm-linux-gnueabi-ld: drivers/net/ethernet/qlogic/qed/qed_debug.o:qed_debug.c:(.text+0x6aa4): more undefined references to `crc32_le' follow

Fixes: 7a4b21b7d1f0 ("qed: Add nvram selftest")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig
index 4366c7a8de95..6b5ddb07ee83 100644
--- a/drivers/net/ethernet/qlogic/Kconfig
+++ b/drivers/net/ethernet/qlogic/Kconfig
@@ -78,6 +78,7 @@ config QED
 	depends on PCI
 	select ZLIB_INFLATE
 	select CRC8
+	select CRC32
 	select NET_DEVLINK
 	help
 	  This enables the support for Marvell FastLinQ adapters family.

From f9d6f94132f01d2a552dcbab54fa56496638186d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 3 Jan 2021 22:36:18 +0100
Subject: [PATCH 369/547] phy: dp83640: select CONFIG_CRC32

Without crc32, this driver fails to link:

arm-linux-gnueabi-ld: drivers/net/phy/dp83640.o: in function `match':
dp83640.c:(.text+0x476c): undefined reference to `crc32_le'

Fixes: 539e44d26855 ("dp83640: Include hash in timestamp/packet matching")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig
index 476d7c7fe70a..d2bf05ccbbe2 100644
--- a/drivers/ptp/Kconfig
+++ b/drivers/ptp/Kconfig
@@ -64,6 +64,7 @@ config DP83640_PHY
 	depends on NETWORK_PHY_TIMESTAMPING
 	depends on PHYLIB
 	depends on PTP_1588_CLOCK
+	select CRC32
 	help
 	  Supports the DP83640 PHYTER with IEEE 1588 features.
 

From 1d48595c786b1b9dc6be301e8d7f6fc74e9882aa Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 3 Jan 2021 22:36:19 +0100
Subject: [PATCH 370/547] can: kvaser_pciefd: select CONFIG_CRC32

Without crc32, this driver fails to link:

arm-linux-gnueabi-ld: drivers/net/can/kvaser_pciefd.o: in function `kvaser_pciefd_probe':
kvaser_pciefd.c:(.text+0x2b0): undefined reference to `crc32_be'

Fixes: 26ad340e582d ("can: kvaser_pciefd: Add driver for Kvaser PCIEcan devices")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/can/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig
index 424970939fd4..1c28eade6bec 100644
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig
@@ -123,6 +123,7 @@ config CAN_JANZ_ICAN3
 config CAN_KVASER_PCIEFD
 	depends on PCI
 	tristate "Kvaser PCIe FD cards"
+	select CRC32
 	  help
 	  This is a driver for the Kvaser PCI Express CAN FD family.
 

From e186620d7bf11b274b985b839c38266d7918cc05 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 3 Jan 2021 22:36:20 +0100
Subject: [PATCH 371/547] wil6210: select CONFIG_CRC32

Without crc32, the driver fails to link:

arm-linux-gnueabi-ld: drivers/net/wireless/ath/wil6210/fw.o: in function `wil_fw_verify':
fw.c:(.text+0x74c): undefined reference to `crc32_le'
arm-linux-gnueabi-ld: drivers/net/wireless/ath/wil6210/fw.o:fw.c:(.text+0x758): more undefined references to `crc32_le' follow

Fixes: 151a9706503f ("wil6210: firmware download")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wireless/ath/wil6210/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/ath/wil6210/Kconfig b/drivers/net/wireless/ath/wil6210/Kconfig
index 6a95b199bf62..f074e9c31aa2 100644
--- a/drivers/net/wireless/ath/wil6210/Kconfig
+++ b/drivers/net/wireless/ath/wil6210/Kconfig
@@ -2,6 +2,7 @@
 config WIL6210
 	tristate "Wilocity 60g WiFi card wil6210 support"
 	select WANT_DEV_COREDUMP
+	select CRC32
 	depends on CFG80211
 	depends on PCI
 	default n

From 152a8a6c017bfdeda7f6d052fbc6e151891bd9b6 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 3 Jan 2021 22:36:21 +0100
Subject: [PATCH 372/547] cfg80211: select CONFIG_CRC32

Without crc32 support, this fails to link:

arm-linux-gnueabi-ld: net/wireless/scan.o: in function `cfg80211_scan_6ghz':
scan.c:(.text+0x928): undefined reference to `crc32_le'

Fixes: c8cb5b854b40 ("nl80211/cfg80211: support 6 GHz scanning")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wireless/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 27026f587fa6..f620acd2a0f5 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -21,6 +21,7 @@ config CFG80211
 	tristate "cfg80211 - wireless configuration API"
 	depends on RFKILL || !RFKILL
 	select FW_LOADER
+	select CRC32
 	# may need to update this when certificates are changed and are
 	# using a different algorithm, though right now they shouldn't
 	# (this is here rather than below to allow it to be a module)

From 51049bd903a81307f751babe15a1df8d197884e8 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 3 Jan 2021 22:36:22 +0100
Subject: [PATCH 373/547] misdn: dsp: select CONFIG_BITREVERSE

Without this, we run into a link error

arm-linux-gnueabi-ld: drivers/isdn/mISDN/dsp_audio.o: in function `dsp_audio_generate_law_tables':
(.text+0x30c): undefined reference to `byte_rev_table'
arm-linux-gnueabi-ld: drivers/isdn/mISDN/dsp_audio.o:(.text+0x5e4): more undefined references to `byte_rev_table' follow

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/mISDN/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/isdn/mISDN/Kconfig b/drivers/isdn/mISDN/Kconfig
index 26cf0ac9c4ad..c9a53c222472 100644
--- a/drivers/isdn/mISDN/Kconfig
+++ b/drivers/isdn/mISDN/Kconfig
@@ -13,6 +13,7 @@ if MISDN != n
 config MISDN_DSP
 	tristate "Digital Audio Processing of transparent data"
 	depends on MISDN
+	select BITREVERSE
 	help
 	  Enable support for digital audio processing capability.
 

From 69931e11288520c250152180ecf9b6ac5e6e40ed Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 3 Jan 2021 22:36:23 +0100
Subject: [PATCH 374/547] wan: ds26522: select CONFIG_BITREVERSE

Without this, the driver runs into a link failure

arm-linux-gnueabi-ld: drivers/net/wan/slic_ds26522.o: in function `slic_ds26522_probe':
slic_ds26522.c:(.text+0x100c): undefined reference to `byte_rev_table'
arm-linux-gnueabi-ld: slic_ds26522.c:(.text+0x1cdc): undefined reference to `byte_rev_table'
arm-linux-gnueabi-ld: drivers/net/wan/slic_ds26522.o: in function `slic_write':
slic_ds26522.c:(.text+0x1e4c): undefined reference to `byte_rev_table'

Fixes: c37d4a0085c5 ("Maxim/driver: Add driver for maxim ds26522")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig
index 4029fde71a9e..83c9481995dd 100644
--- a/drivers/net/wan/Kconfig
+++ b/drivers/net/wan/Kconfig
@@ -282,6 +282,7 @@ config SLIC_DS26522
 	tristate "Slic Maxim ds26522 card support"
 	depends on SPI
 	depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE || COMPILE_TEST
+	select BITREVERSE
 	help
 	  This module initializes and configures the slic maxim card
 	  in T1 or E1 mode.

From 0f7ba7bc46fa0b574ccacf5672991b321e028492 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 3 Jan 2021 11:26:26 +1100
Subject: [PATCH 375/547] net/sonic: Fix some resource leaks in error handling
 paths

A call to dma_alloc_coherent() is wrapped by sonic_alloc_descriptors().

This is correctly freed in the remove function, but not in the error
handling path of the probe function. Fix this by adding the missing
dma_free_coherent() call.

While at it, rename a label in order to be slightly more informative.

Cc: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Chris Zankel <chris@zankel.net>
References: commit 10e3cc180e64 ("net/sonic: Fix a resource leak in an error handling path in 'jazz_sonic_probe()'")
Fixes: 74f2a5f0ef64 ("xtensa: Add support for the Sonic Ethernet device for the XT2000 board.")
Fixes: efcce839360f ("[PATCH] macsonic/jazzsonic network drivers update")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/natsemi/macsonic.c | 12 ++++++++++--
 drivers/net/ethernet/natsemi/xtsonic.c  |  7 +++++--
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/natsemi/macsonic.c b/drivers/net/ethernet/natsemi/macsonic.c
index 776b7d264dc3..2289e1fe3741 100644
--- a/drivers/net/ethernet/natsemi/macsonic.c
+++ b/drivers/net/ethernet/natsemi/macsonic.c
@@ -506,10 +506,14 @@ static int mac_sonic_platform_probe(struct platform_device *pdev)
 
 	err = register_netdev(dev);
 	if (err)
-		goto out;
+		goto undo_probe;
 
 	return 0;
 
+undo_probe:
+	dma_free_coherent(lp->device,
+			  SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode),
+			  lp->descriptors, lp->descriptors_laddr);
 out:
 	free_netdev(dev);
 
@@ -584,12 +588,16 @@ static int mac_sonic_nubus_probe(struct nubus_board *board)
 
 	err = register_netdev(ndev);
 	if (err)
-		goto out;
+		goto undo_probe;
 
 	nubus_set_drvdata(board, ndev);
 
 	return 0;
 
+undo_probe:
+	dma_free_coherent(lp->device,
+			  SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode),
+			  lp->descriptors, lp->descriptors_laddr);
 out:
 	free_netdev(ndev);
 	return err;
diff --git a/drivers/net/ethernet/natsemi/xtsonic.c b/drivers/net/ethernet/natsemi/xtsonic.c
index afa166ff7aef..28d9e98db81a 100644
--- a/drivers/net/ethernet/natsemi/xtsonic.c
+++ b/drivers/net/ethernet/natsemi/xtsonic.c
@@ -229,11 +229,14 @@ int xtsonic_probe(struct platform_device *pdev)
 	sonic_msg_init(dev);
 
 	if ((err = register_netdev(dev)))
-		goto out1;
+		goto undo_probe1;
 
 	return 0;
 
-out1:
+undo_probe1:
+	dma_free_coherent(lp->device,
+			  SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode),
+			  lp->descriptors, lp->descriptors_laddr);
 	release_region(dev->base_addr, SONIC_MEM_SIZE);
 out:
 	free_netdev(dev);

From cf0720697143f3eaa0779cca5a6602d8557d1c6f Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 30 Dec 2020 19:37:53 -0800
Subject: [PATCH 376/547] net: suggest L2 discards be counted towards
 rx_dropped

From the existing definitions it's unclear which stat to
use to report filtering based on L2 dst addr in old
broadcast-medium Ethernet.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_link.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 874cc12a34d9..82708c6db432 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -75,8 +75,9 @@ struct rtnl_link_stats {
  *
  * @rx_dropped: Number of packets received but not processed,
  *   e.g. due to lack of resources or unsupported protocol.
- *   For hardware interfaces this counter should not include packets
- *   dropped by the device which are counted separately in
+ *   For hardware interfaces this counter may include packets discarded
+ *   due to L2 address filtering but should not include packets dropped
+ *   by the device due to buffer exhaustion which are counted separately in
  *   @rx_missed_errors (since procfs folds those two counters together).
  *
  * @tx_dropped: Number of packets dropped on their way to transmission,

From 55b7ab1178cbf41f979ff83236d3321ad35ed2ad Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 30 Dec 2020 19:40:27 -0800
Subject: [PATCH 377/547] net: vlan: avoid leaks on register_vlan_dev()
 failures

VLAN checks for NETREG_UNINITIALIZED to distinguish between
registration failure and unregistration in progress.

Since commit cb626bf566eb ("net-sysfs: Fix reference count leak")
registration failure may, however, result in NETREG_UNREGISTERED
as well as NETREG_UNINITIALIZED.

This fix is similer to cebb69754f37 ("rtnetlink: Fix
memory(net_device) leak when ->newlink fails")

Fixes: cb626bf566eb ("net-sysfs: Fix reference count leak")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index f292e0267bb9..15bbfaf943fd 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -284,7 +284,8 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
 	return 0;
 
 out_free_newdev:
-	if (new_dev->reg_state == NETREG_UNINITIALIZED)
+	if (new_dev->reg_state == NETREG_UNINITIALIZED ||
+	    new_dev->reg_state == NETREG_UNREGISTERED)
 		free_netdev(new_dev);
 	return err;
 }

From 7eeecc4b1f480c7ba1932cb9a7693f8c452640f2 Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel@sholland.org>
Date: Sun, 3 Jan 2021 05:17:41 -0600
Subject: [PATCH 378/547] net: stmmac: dwmac-sun8i: Fix probe error handling

stmmac_pltfr_remove does three things in one function, making it
inapproprate for unwinding the steps in the probe function. Currently,
a failure before the call to stmmac_dvr_probe would leak OF node
references due to missing a call to stmmac_remove_config_dt. And an
error in stmmac_dvr_probe would cause the driver to attempt to remove a
netdevice that was never added. Fix these by reordering the init and
splitting out the error handling steps.

Fixes: 9f93ac8d4085 ("net-next: stmmac: Add dwmac-sun8i")
Fixes: 40a1dcee2d18 ("net: ethernet: dwmac-sun8i: Use the correct function in exit path")
Signed-off-by: Samuel Holland <samuel@sholland.org>
Reviewed-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 25 +++++++++++--------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index 58e0511badba..b20f261fce5b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -1134,10 +1134,6 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
-	if (IS_ERR(plat_dat))
-		return PTR_ERR(plat_dat);
-
 	gmac = devm_kzalloc(dev, sizeof(*gmac), GFP_KERNEL);
 	if (!gmac)
 		return -ENOMEM;
@@ -1201,11 +1197,15 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
 	ret = of_get_phy_mode(dev->of_node, &interface);
 	if (ret)
 		return -EINVAL;
-	plat_dat->interface = interface;
+
+	plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+	if (IS_ERR(plat_dat))
+		return PTR_ERR(plat_dat);
 
 	/* platform data specifying hardware features and callbacks.
 	 * hardware features were copied from Allwinner drivers.
 	 */
+	plat_dat->interface = interface;
 	plat_dat->rx_coe = STMMAC_RX_COE_TYPE2;
 	plat_dat->tx_coe = 1;
 	plat_dat->has_sun8i = true;
@@ -1216,7 +1216,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
 
 	ret = sun8i_dwmac_init(pdev, plat_dat->bsp_priv);
 	if (ret)
-		return ret;
+		goto dwmac_deconfig;
 
 	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
 	if (ret)
@@ -1230,7 +1230,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
 	if (gmac->variant->soc_has_internal_phy) {
 		ret = get_ephy_nodes(priv);
 		if (ret)
-			goto dwmac_exit;
+			goto dwmac_remove;
 		ret = sun8i_dwmac_register_mdio_mux(priv);
 		if (ret) {
 			dev_err(&pdev->dev, "Failed to register mux\n");
@@ -1239,15 +1239,20 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
 	} else {
 		ret = sun8i_dwmac_reset(priv);
 		if (ret)
-			goto dwmac_exit;
+			goto dwmac_remove;
 	}
 
 	return ret;
 dwmac_mux:
 	sun8i_dwmac_unset_syscon(gmac);
+dwmac_remove:
+	stmmac_dvr_remove(&pdev->dev);
 dwmac_exit:
-	stmmac_pltfr_remove(pdev);
-return ret;
+	sun8i_dwmac_exit(pdev, gmac);
+dwmac_deconfig:
+	stmmac_remove_config_dt(pdev, plat_dat);
+
+	return ret;
 }
 
 static const struct of_device_id sun8i_dwmac_match[] = {

From 529254216773acd5039c07aa18cf06fd1f9fccdd Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel@sholland.org>
Date: Sun, 3 Jan 2021 05:17:42 -0600
Subject: [PATCH 379/547] net: stmmac: dwmac-sun8i: Balance internal PHY
 resource references

While stmmac_pltfr_remove calls sun8i_dwmac_exit, the sun8i_dwmac_init
and sun8i_dwmac_exit functions are also called by the stmmac_platform
suspend/resume callbacks. They may be called many times during the
device's lifetime and should not release resources used by the driver.

Furthermore, there was no error handling in case registering the MDIO
mux failed during probe, and the EPHY clock was never released at all.

Fix all of these issues by moving the deinitialization code to a driver
removal callback. Also ensure the EPHY is powered down before removal.

Fixes: 634db83b8265 ("net: stmmac: dwmac-sun8i: Handle integrated/external MDIOs")
Signed-off-by: Samuel Holland <samuel@sholland.org>
Reviewed-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 27 ++++++++++++++-----
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index b20f261fce5b..a05dee5d4584 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -1004,17 +1004,12 @@ static void sun8i_dwmac_exit(struct platform_device *pdev, void *priv)
 	struct sunxi_priv_data *gmac = priv;
 
 	if (gmac->variant->soc_has_internal_phy) {
-		/* sun8i_dwmac_exit could be called with mdiomux uninit */
-		if (gmac->mux_handle)
-			mdio_mux_uninit(gmac->mux_handle);
 		if (gmac->internal_phy_powered)
 			sun8i_dwmac_unpower_internal_phy(gmac);
 	}
 
 	sun8i_dwmac_unset_syscon(gmac);
 
-	reset_control_put(gmac->rst_ephy);
-
 	clk_disable_unprepare(gmac->tx_clk);
 
 	if (gmac->regulator)
@@ -1244,6 +1239,8 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
 
 	return ret;
 dwmac_mux:
+	reset_control_put(gmac->rst_ephy);
+	clk_put(gmac->ephy_clk);
 	sun8i_dwmac_unset_syscon(gmac);
 dwmac_remove:
 	stmmac_dvr_remove(&pdev->dev);
@@ -1255,6 +1252,24 @@ dwmac_deconfig:
 	return ret;
 }
 
+static int sun8i_dwmac_remove(struct platform_device *pdev)
+{
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct stmmac_priv *priv = netdev_priv(ndev);
+	struct sunxi_priv_data *gmac = priv->plat->bsp_priv;
+
+	if (gmac->variant->soc_has_internal_phy) {
+		mdio_mux_uninit(gmac->mux_handle);
+		sun8i_dwmac_unpower_internal_phy(gmac);
+		reset_control_put(gmac->rst_ephy);
+		clk_put(gmac->ephy_clk);
+	}
+
+	stmmac_pltfr_remove(pdev);
+
+	return 0;
+}
+
 static const struct of_device_id sun8i_dwmac_match[] = {
 	{ .compatible = "allwinner,sun8i-h3-emac",
 		.data = &emac_variant_h3 },
@@ -1274,7 +1289,7 @@ MODULE_DEVICE_TABLE(of, sun8i_dwmac_match);
 
 static struct platform_driver sun8i_dwmac_driver = {
 	.probe  = sun8i_dwmac_probe,
-	.remove = stmmac_pltfr_remove,
+	.remove = sun8i_dwmac_remove,
 	.driver = {
 		.name           = "dwmac-sun8i",
 		.pm		= &stmmac_pltfr_pm_ops,

From b8239638853e3e37b287e4bd4d57b41f14c78550 Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel@sholland.org>
Date: Sun, 3 Jan 2021 05:17:43 -0600
Subject: [PATCH 380/547] net: stmmac: dwmac-sun8i: Balance internal PHY power

sun8i_dwmac_exit calls sun8i_dwmac_unpower_internal_phy, but
sun8i_dwmac_init did not call sun8i_dwmac_power_internal_phy. This
caused PHY power to remain off after a suspend/resume cycle. Fix this by
recording if PHY power should be restored, and if so, restoring it.

Fixes: 634db83b8265 ("net: stmmac: dwmac-sun8i: Handle integrated/external MDIOs")
Signed-off-by: Samuel Holland <samuel@sholland.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 31 ++++++++++++++-----
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index a05dee5d4584..e2c25c1c702a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -64,6 +64,7 @@ struct emac_variant {
  * @variant:	reference to the current board variant
  * @regmap:	regmap for using the syscon
  * @internal_phy_powered: Does the internal PHY is enabled
+ * @use_internal_phy: Is the internal PHY selected for use
  * @mux_handle:	Internal pointer used by mdio-mux lib
  */
 struct sunxi_priv_data {
@@ -74,6 +75,7 @@ struct sunxi_priv_data {
 	const struct emac_variant *variant;
 	struct regmap_field *regmap_field;
 	bool internal_phy_powered;
+	bool use_internal_phy;
 	void *mux_handle;
 };
 
@@ -539,8 +541,11 @@ static const struct stmmac_dma_ops sun8i_dwmac_dma_ops = {
 	.dma_interrupt = sun8i_dwmac_dma_interrupt,
 };
 
+static int sun8i_dwmac_power_internal_phy(struct stmmac_priv *priv);
+
 static int sun8i_dwmac_init(struct platform_device *pdev, void *priv)
 {
+	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct sunxi_priv_data *gmac = priv;
 	int ret;
 
@@ -554,13 +559,25 @@ static int sun8i_dwmac_init(struct platform_device *pdev, void *priv)
 
 	ret = clk_prepare_enable(gmac->tx_clk);
 	if (ret) {
-		if (gmac->regulator)
-			regulator_disable(gmac->regulator);
 		dev_err(&pdev->dev, "Could not enable AHB clock\n");
-		return ret;
+		goto err_disable_regulator;
+	}
+
+	if (gmac->use_internal_phy) {
+		ret = sun8i_dwmac_power_internal_phy(netdev_priv(ndev));
+		if (ret)
+			goto err_disable_clk;
 	}
 
 	return 0;
+
+err_disable_clk:
+	clk_disable_unprepare(gmac->tx_clk);
+err_disable_regulator:
+	if (gmac->regulator)
+		regulator_disable(gmac->regulator);
+
+	return ret;
 }
 
 static void sun8i_dwmac_core_init(struct mac_device_info *hw,
@@ -831,7 +848,6 @@ static int mdio_mux_syscon_switch_fn(int current_child, int desired_child,
 	struct sunxi_priv_data *gmac = priv->plat->bsp_priv;
 	u32 reg, val;
 	int ret = 0;
-	bool need_power_ephy = false;
 
 	if (current_child ^ desired_child) {
 		regmap_field_read(gmac->regmap_field, &reg);
@@ -839,13 +855,12 @@ static int mdio_mux_syscon_switch_fn(int current_child, int desired_child,
 		case DWMAC_SUN8I_MDIO_MUX_INTERNAL_ID:
 			dev_info(priv->device, "Switch mux to internal PHY");
 			val = (reg & ~H3_EPHY_MUX_MASK) | H3_EPHY_SELECT;
-
-			need_power_ephy = true;
+			gmac->use_internal_phy = true;
 			break;
 		case DWMAC_SUN8I_MDIO_MUX_EXTERNAL_ID:
 			dev_info(priv->device, "Switch mux to external PHY");
 			val = (reg & ~H3_EPHY_MUX_MASK) | H3_EPHY_SHUTDOWN;
-			need_power_ephy = false;
+			gmac->use_internal_phy = false;
 			break;
 		default:
 			dev_err(priv->device, "Invalid child ID %x\n",
@@ -853,7 +868,7 @@ static int mdio_mux_syscon_switch_fn(int current_child, int desired_child,
 			return -EINVAL;
 		}
 		regmap_field_write(gmac->regmap_field, val);
-		if (need_power_ephy) {
+		if (gmac->use_internal_phy) {
 			ret = sun8i_dwmac_power_internal_phy(priv);
 			if (ret)
 				return ret;

From 9b1e39cf5dd81f33186cdb950fcf75a121f1a9a7 Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel@sholland.org>
Date: Sun, 3 Jan 2021 05:17:44 -0600
Subject: [PATCH 381/547] net: stmmac: dwmac-sun8i: Balance syscon
 (de)initialization

Previously, sun8i_dwmac_set_syscon was called from a chain of functions
in several different files:
    sun8i_dwmac_probe
      stmmac_dvr_probe
        stmmac_hw_init
          stmmac_hwif_init
            sun8i_dwmac_setup
              sun8i_dwmac_set_syscon
which made the lifetime of the syscon values hard to reason about. Part
of the problem is that there is no similar platform driver callback from
stmmac_dvr_remove. As a result, the driver unset the syscon value in
sun8i_dwmac_exit, but this leaves it uninitialized after a suspend/
resume cycle. It was also unset a second time (outside sun8i_dwmac_exit)
in the probe error path.

Move the init to the earliest available place in sun8i_dwmac_probe
(after stmmac_probe_config_dt, which initializes plat_dat), and the
deinit to the corresponding position in the cleanup order.

Since priv is not filled in until stmmac_dvr_probe, this requires
changing the sun8i_dwmac_set_syscon parameters to priv's two relevant
members.

Fixes: 9f93ac8d4085 ("net-next: stmmac: Add dwmac-sun8i")
Fixes: 634db83b8265 ("net: stmmac: dwmac-sun8i: Handle integrated/external MDIOs")
Signed-off-by: Samuel Holland <samuel@sholland.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 50 +++++++++----------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index e2c25c1c702a..a5e0eff4a387 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -898,22 +898,23 @@ static int sun8i_dwmac_register_mdio_mux(struct stmmac_priv *priv)
 	return ret;
 }
 
-static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv)
+static int sun8i_dwmac_set_syscon(struct device *dev,
+				  struct plat_stmmacenet_data *plat)
 {
-	struct sunxi_priv_data *gmac = priv->plat->bsp_priv;
-	struct device_node *node = priv->device->of_node;
+	struct sunxi_priv_data *gmac = plat->bsp_priv;
+	struct device_node *node = dev->of_node;
 	int ret;
 	u32 reg, val;
 
 	ret = regmap_field_read(gmac->regmap_field, &val);
 	if (ret) {
-		dev_err(priv->device, "Fail to read from regmap field.\n");
+		dev_err(dev, "Fail to read from regmap field.\n");
 		return ret;
 	}
 
 	reg = gmac->variant->default_syscon_value;
 	if (reg != val)
-		dev_warn(priv->device,
+		dev_warn(dev,
 			 "Current syscon value is not the default %x (expect %x)\n",
 			 val, reg);
 
@@ -926,9 +927,9 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv)
 		/* Force EPHY xtal frequency to 24MHz. */
 		reg |= H3_EPHY_CLK_SEL;
 
-		ret = of_mdio_parse_addr(priv->device, priv->plat->phy_node);
+		ret = of_mdio_parse_addr(dev, plat->phy_node);
 		if (ret < 0) {
-			dev_err(priv->device, "Could not parse MDIO addr\n");
+			dev_err(dev, "Could not parse MDIO addr\n");
 			return ret;
 		}
 		/* of_mdio_parse_addr returns a valid (0 ~ 31) PHY
@@ -944,17 +945,17 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv)
 
 	if (!of_property_read_u32(node, "allwinner,tx-delay-ps", &val)) {
 		if (val % 100) {
-			dev_err(priv->device, "tx-delay must be a multiple of 100\n");
+			dev_err(dev, "tx-delay must be a multiple of 100\n");
 			return -EINVAL;
 		}
 		val /= 100;
-		dev_dbg(priv->device, "set tx-delay to %x\n", val);
+		dev_dbg(dev, "set tx-delay to %x\n", val);
 		if (val <= gmac->variant->tx_delay_max) {
 			reg &= ~(gmac->variant->tx_delay_max <<
 				 SYSCON_ETXDC_SHIFT);
 			reg |= (val << SYSCON_ETXDC_SHIFT);
 		} else {
-			dev_err(priv->device, "Invalid TX clock delay: %d\n",
+			dev_err(dev, "Invalid TX clock delay: %d\n",
 				val);
 			return -EINVAL;
 		}
@@ -962,17 +963,17 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv)
 
 	if (!of_property_read_u32(node, "allwinner,rx-delay-ps", &val)) {
 		if (val % 100) {
-			dev_err(priv->device, "rx-delay must be a multiple of 100\n");
+			dev_err(dev, "rx-delay must be a multiple of 100\n");
 			return -EINVAL;
 		}
 		val /= 100;
-		dev_dbg(priv->device, "set rx-delay to %x\n", val);
+		dev_dbg(dev, "set rx-delay to %x\n", val);
 		if (val <= gmac->variant->rx_delay_max) {
 			reg &= ~(gmac->variant->rx_delay_max <<
 				 SYSCON_ERXDC_SHIFT);
 			reg |= (val << SYSCON_ERXDC_SHIFT);
 		} else {
-			dev_err(priv->device, "Invalid RX clock delay: %d\n",
+			dev_err(dev, "Invalid RX clock delay: %d\n",
 				val);
 			return -EINVAL;
 		}
@@ -983,7 +984,7 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv)
 	if (gmac->variant->support_rmii)
 		reg &= ~SYSCON_RMII_EN;
 
-	switch (priv->plat->interface) {
+	switch (plat->interface) {
 	case PHY_INTERFACE_MODE_MII:
 		/* default */
 		break;
@@ -997,8 +998,8 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv)
 		reg |= SYSCON_RMII_EN | SYSCON_ETCS_EXT_GMII;
 		break;
 	default:
-		dev_err(priv->device, "Unsupported interface mode: %s",
-			phy_modes(priv->plat->interface));
+		dev_err(dev, "Unsupported interface mode: %s",
+			phy_modes(plat->interface));
 		return -EINVAL;
 	}
 
@@ -1023,8 +1024,6 @@ static void sun8i_dwmac_exit(struct platform_device *pdev, void *priv)
 			sun8i_dwmac_unpower_internal_phy(gmac);
 	}
 
-	sun8i_dwmac_unset_syscon(gmac);
-
 	clk_disable_unprepare(gmac->tx_clk);
 
 	if (gmac->regulator)
@@ -1059,16 +1058,11 @@ static struct mac_device_info *sun8i_dwmac_setup(void *ppriv)
 {
 	struct mac_device_info *mac;
 	struct stmmac_priv *priv = ppriv;
-	int ret;
 
 	mac = devm_kzalloc(priv->device, sizeof(*mac), GFP_KERNEL);
 	if (!mac)
 		return NULL;
 
-	ret = sun8i_dwmac_set_syscon(priv);
-	if (ret)
-		return NULL;
-
 	mac->pcsr = priv->ioaddr;
 	mac->mac = &sun8i_dwmac_ops;
 	mac->dma = &sun8i_dwmac_dma_ops;
@@ -1224,10 +1218,14 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
 	plat_dat->exit = sun8i_dwmac_exit;
 	plat_dat->setup = sun8i_dwmac_setup;
 
-	ret = sun8i_dwmac_init(pdev, plat_dat->bsp_priv);
+	ret = sun8i_dwmac_set_syscon(&pdev->dev, plat_dat);
 	if (ret)
 		goto dwmac_deconfig;
 
+	ret = sun8i_dwmac_init(pdev, plat_dat->bsp_priv);
+	if (ret)
+		goto dwmac_syscon;
+
 	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
 	if (ret)
 		goto dwmac_exit;
@@ -1256,11 +1254,12 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
 dwmac_mux:
 	reset_control_put(gmac->rst_ephy);
 	clk_put(gmac->ephy_clk);
-	sun8i_dwmac_unset_syscon(gmac);
 dwmac_remove:
 	stmmac_dvr_remove(&pdev->dev);
 dwmac_exit:
 	sun8i_dwmac_exit(pdev, gmac);
+dwmac_syscon:
+	sun8i_dwmac_unset_syscon(gmac);
 dwmac_deconfig:
 	stmmac_remove_config_dt(pdev, plat_dat);
 
@@ -1281,6 +1280,7 @@ static int sun8i_dwmac_remove(struct platform_device *pdev)
 	}
 
 	stmmac_pltfr_remove(pdev);
+	sun8i_dwmac_unset_syscon(gmac);
 
 	return 0;
 }

From 9f9d41f03bb07069e6e83ff4720cfea74a63898d Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 4 Jan 2021 17:22:24 -0800
Subject: [PATCH 382/547] docs: net: fix documentation on .ndo_get_stats

Fix calling context.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/netdevices.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst
index 5a85fcc80c76..e65665c5ab50 100644
--- a/Documentation/networking/netdevices.rst
+++ b/Documentation/networking/netdevices.rst
@@ -64,8 +64,8 @@ ndo_do_ioctl:
 	Context: process
 
 ndo_get_stats:
-	Synchronization: dev_base_lock rwlock.
-	Context: nominally process, but don't sleep inside an rwlock
+	Synchronization: rtnl_lock() semaphore, dev_base_lock rwlock, or RCU.
+	Context: atomic (can't sleep under rwlock or RCU)
 
 ndo_start_xmit:
 	Synchronization: __netif_tx_lock spinlock.

From f04bbcbf1e38d192e94bbfa126731a52332c40b1 Mon Sep 17 00:00:00 2001
From: Yonglong Liu <liuyonglong@huawei.com>
Date: Tue, 5 Jan 2021 11:37:26 +0800
Subject: [PATCH 383/547] net: hns3: fix a phy loopback fail issue

When phy driver does not implement the set_loopback interface,
phy loopback test will return -EOPNOTSUPP, and the loopback test
will fail. So when phy driver does not implement the set_loopback
interface, don't do phy loopback test.

Fixes: c9765a89d142 ("net: hns3: add phy selftest function")
Signed-off-by: Yonglong Liu <liuyonglong@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index e6f37f91c489..135bd0a0dcaf 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -752,7 +752,8 @@ static int hclge_get_sset_count(struct hnae3_handle *handle, int stringset)
 		handle->flags |= HNAE3_SUPPORT_SERDES_SERIAL_LOOPBACK;
 		handle->flags |= HNAE3_SUPPORT_SERDES_PARALLEL_LOOPBACK;
 
-		if (hdev->hw.mac.phydev) {
+		if (hdev->hw.mac.phydev && hdev->hw.mac.phydev->drv &&
+		    hdev->hw.mac.phydev->drv->set_loopback) {
 			count += 1;
 			handle->flags |= HNAE3_SUPPORT_PHY_LOOPBACK;
 		}

From 65e61e3c2a619c4d4b873885b2d5394025ed117b Mon Sep 17 00:00:00 2001
From: Yufeng Mo <moyufeng@huawei.com>
Date: Tue, 5 Jan 2021 11:37:27 +0800
Subject: [PATCH 384/547] net: hns3: fix the number of queues actually used by
 ARQ

HCLGE_MBX_MAX_ARQ_MSG_NUM is used to apply memory for the number
of queues used by ARQ(Asynchronous Receive Queue), so the head
and tail pointers should also use this macro.

Fixes: 07a0556a3a73 ("net: hns3: Changes to support ARQ(Asynchronous Receive Queue)")
Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
index fb5e8842983c..33defa4c180a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
@@ -169,7 +169,7 @@ struct hclgevf_mbx_arq_ring {
 #define hclge_mbx_ring_ptr_move_crq(crq) \
 	(crq->next_to_use = (crq->next_to_use + 1) % crq->desc_num)
 #define hclge_mbx_tail_ptr_move_arq(arq) \
-	(arq.tail = (arq.tail + 1) % HCLGE_MBX_MAX_ARQ_MSG_SIZE)
+		(arq.tail = (arq.tail + 1) % HCLGE_MBX_MAX_ARQ_MSG_NUM)
 #define hclge_mbx_head_ptr_move_arq(arq) \
-		(arq.head = (arq.head + 1) % HCLGE_MBX_MAX_ARQ_MSG_SIZE)
+		(arq.head = (arq.head + 1) % HCLGE_MBX_MAX_ARQ_MSG_NUM)
 #endif

From ab6e32d2913a594bc8f822ce4a75c400190b2ecc Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Tue, 5 Jan 2021 11:37:28 +0800
Subject: [PATCH 385/547] net: hns3: fix incorrect handling of sctp6 rss tuple

For DEVICE_VERSION_V2, the hardware only supports src-ip,
dst-ip and verification-tag for rss tuple set of sctp6
packet. For DEVICE_VERSION_V3, the hardware supports
src-port and dst-port as well.

Currently, when user queries the sctp6 rss tuples info,
some unsupported information will be showed on V2. So add
a check for hardware version when initializing and queries
sctp6 rss tuple to fix this issue.

Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support")
Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c  | 6 ++++--
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h  | 2 ++
 .../net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c    | 9 ++++++---
 .../net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h    | 2 ++
 4 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 135bd0a0dcaf..c242883fea5d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -4538,8 +4538,8 @@ static int hclge_set_rss_tuple(struct hnae3_handle *handle,
 		req->ipv4_sctp_en = tuple_sets;
 		break;
 	case SCTP_V6_FLOW:
-		if ((nfc->data & RXH_L4_B_0_1) ||
-		    (nfc->data & RXH_L4_B_2_3))
+		if (hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2 &&
+		    (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)))
 			return -EINVAL;
 
 		req->ipv6_sctp_en = tuple_sets;
@@ -4731,6 +4731,8 @@ static void hclge_rss_init_cfg(struct hclge_dev *hdev)
 		vport[i].rss_tuple_sets.ipv6_udp_en =
 			HCLGE_RSS_INPUT_TUPLE_OTHER;
 		vport[i].rss_tuple_sets.ipv6_sctp_en =
+			hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2 ?
+			HCLGE_RSS_INPUT_TUPLE_SCTP_NO_PORT :
 			HCLGE_RSS_INPUT_TUPLE_SCTP;
 		vport[i].rss_tuple_sets.ipv6_fragment_en =
 			HCLGE_RSS_INPUT_TUPLE_OTHER;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 50a294dfaff5..ca46bc9110d7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -107,6 +107,8 @@
 #define HCLGE_D_IP_BIT			BIT(2)
 #define HCLGE_S_IP_BIT			BIT(3)
 #define HCLGE_V_TAG_BIT			BIT(4)
+#define HCLGE_RSS_INPUT_TUPLE_SCTP_NO_PORT	\
+		(HCLGE_D_IP_BIT | HCLGE_S_IP_BIT | HCLGE_V_TAG_BIT)
 
 #define HCLGE_RSS_TC_SIZE_0		1
 #define HCLGE_RSS_TC_SIZE_1		2
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 145757cb70f9..674b3a22e91f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -917,8 +917,8 @@ static int hclgevf_set_rss_tuple(struct hnae3_handle *handle,
 		req->ipv4_sctp_en = tuple_sets;
 		break;
 	case SCTP_V6_FLOW:
-		if ((nfc->data & RXH_L4_B_0_1) ||
-		    (nfc->data & RXH_L4_B_2_3))
+		if (hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2 &&
+		    (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)))
 			return -EINVAL;
 
 		req->ipv6_sctp_en = tuple_sets;
@@ -2502,7 +2502,10 @@ static void hclgevf_rss_init_cfg(struct hclgevf_dev *hdev)
 		tuple_sets->ipv4_fragment_en = HCLGEVF_RSS_INPUT_TUPLE_OTHER;
 		tuple_sets->ipv6_tcp_en = HCLGEVF_RSS_INPUT_TUPLE_OTHER;
 		tuple_sets->ipv6_udp_en = HCLGEVF_RSS_INPUT_TUPLE_OTHER;
-		tuple_sets->ipv6_sctp_en = HCLGEVF_RSS_INPUT_TUPLE_SCTP;
+		tuple_sets->ipv6_sctp_en =
+			hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2 ?
+					HCLGEVF_RSS_INPUT_TUPLE_SCTP_NO_PORT :
+					HCLGEVF_RSS_INPUT_TUPLE_SCTP;
 		tuple_sets->ipv6_fragment_en = HCLGEVF_RSS_INPUT_TUPLE_OTHER;
 	}
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index 1b183bc35604..f6d817a3edcb 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -122,6 +122,8 @@
 #define HCLGEVF_D_IP_BIT		BIT(2)
 #define HCLGEVF_S_IP_BIT		BIT(3)
 #define HCLGEVF_V_TAG_BIT		BIT(4)
+#define HCLGEVF_RSS_INPUT_TUPLE_SCTP_NO_PORT	\
+	(HCLGEVF_D_IP_BIT | HCLGEVF_S_IP_BIT | HCLGEVF_V_TAG_BIT)
 
 #define HCLGEVF_STATS_TIMER_INTERVAL	36U
 

From 7a68d725e4ea384977445e0bcaed3d7de83ab5b3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= <jks@iki.fi>
Date: Tue, 5 Jan 2021 06:52:49 +0200
Subject: [PATCH 386/547] net: cdc_ncm: correct overhead in delayed_ndp_size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Aligning to tx_ndp_modulus is not sufficient because the next align
call can be cdc_ncm_align_tail, which can add up to ctx->tx_modulus +
ctx->tx_remainder - 1 bytes. This used to lead to occasional crashes
on a Huawei 909s-120 LTE module as follows:

- the condition marked /* if there is a remaining skb [...] */ is true
  so the swaps happen
- skb_out is set from ctx->tx_curr_skb
- skb_out->len is exactly 0x3f52
- ctx->tx_curr_size is 0x4000 and delayed_ndp_size is 0xac
  (note that the sum of skb_out->len and delayed_ndp_size is 0x3ffe)
- the for loop over n is executed once
- the cdc_ncm_align_tail call marked /* align beginning of next frame */
  increases skb_out->len to 0x3f56 (the sum is now 0x4002)
- the condition marked /* check if we had enough room left [...] */ is
  false so we break out of the loop
- the condition marked /* If requested, put NDP at end of frame. */ is
  true so the NDP is written into skb_out
- now skb_out->len is 0x4002, so padding_count is minus two interpreted
  as an unsigned number, which is used as the length argument to memset,
  leading to a crash with various symptoms but usually including

> Call Trace:
>  <IRQ>
>  cdc_ncm_fill_tx_frame+0x83a/0x970 [cdc_ncm]
>  cdc_mbim_tx_fixup+0x1d9/0x240 [cdc_mbim]
>  usbnet_start_xmit+0x5d/0x720 [usbnet]

The cdc_ncm_align_tail call first aligns on a ctx->tx_modulus
boundary (adding at most ctx->tx_modulus-1 bytes), then adds
ctx->tx_remainder bytes. Alternatively, the next alignment call can
occur in cdc_ncm_ndp16 or cdc_ncm_ndp32, in which case at most
ctx->tx_ndp_modulus-1 bytes are added.

A similar problem has occurred before, and the code is nontrivial to
reason about, so add a guard before the crashing call. By that time it
is too late to prevent any memory corruption (we'll have written past
the end of the buffer already) but we can at least try to get a warning
written into an on-disk log by avoiding the hard crash caused by padding
past the buffer with a huge number of zeros.

Signed-off-by: Jouni K. Seppänen <jks@iki.fi>
Fixes: 4a0e3e989d66 ("cdc_ncm: Add support for moving NDP to end of NCM frame")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=209407
Reported-by: kernel test robot <lkp@intel.com>
Reviewed-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 3b816a4731f2..5a78848db93f 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -1199,7 +1199,10 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 	 * accordingly. Otherwise, we should check here.
 	 */
 	if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END)
-		delayed_ndp_size = ALIGN(ctx->max_ndp_size, ctx->tx_ndp_modulus);
+		delayed_ndp_size = ctx->max_ndp_size +
+			max_t(u32,
+			      ctx->tx_ndp_modulus,
+			      ctx->tx_modulus + ctx->tx_remainder) - 1;
 	else
 		delayed_ndp_size = 0;
 
@@ -1410,7 +1413,8 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 	if (!(dev->driver_info->flags & FLAG_SEND_ZLP) &&
 	    skb_out->len > ctx->min_tx_pkt) {
 		padding_count = ctx->tx_curr_size - skb_out->len;
-		skb_put_zero(skb_out, padding_count);
+		if (!WARN_ON(padding_count > ctx->tx_curr_size))
+			skb_put_zero(skb_out, padding_count);
 	} else if (skb_out->len < ctx->tx_curr_size &&
 		   (skb_out->len % dev->maxpacket) == 0) {
 		skb_put_u8(skb_out, 0);	/* force short packet */

From 4beb17e553b49c3dd74505c9f361e756aaae653e Mon Sep 17 00:00:00 2001
From: Qinglang Miao <miaoqinglang@huawei.com>
Date: Tue, 5 Jan 2021 13:57:54 +0800
Subject: [PATCH 387/547] net: qrtr: fix null-ptr-deref in qrtr_ns_remove

A null-ptr-deref bug is reported by Hulk Robot like this:
--------------
KASAN: null-ptr-deref in range [0x0000000000000128-0x000000000000012f]
Call Trace:
qrtr_ns_remove+0x22/0x40 [ns]
qrtr_proto_fini+0xa/0x31 [qrtr]
__x64_sys_delete_module+0x337/0x4e0
do_syscall_64+0x34/0x80
entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x468ded
--------------

When qrtr_ns_init fails in qrtr_proto_init, qrtr_ns_remove which would
be called later on would raise a null-ptr-deref because qrtr_ns.workqueue
has been destroyed.

Fix it by making qrtr_ns_init have a return value and adding a check in
qrtr_proto_init.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Qinglang Miao <miaoqinglang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/qrtr/ns.c   |  7 ++++---
 net/qrtr/qrtr.c | 16 +++++++++++-----
 net/qrtr/qrtr.h |  2 +-
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c
index 56aaf8cb6527..8d00dfe8139e 100644
--- a/net/qrtr/ns.c
+++ b/net/qrtr/ns.c
@@ -755,7 +755,7 @@ static void qrtr_ns_data_ready(struct sock *sk)
 	queue_work(qrtr_ns.workqueue, &qrtr_ns.work);
 }
 
-void qrtr_ns_init(void)
+int qrtr_ns_init(void)
 {
 	struct sockaddr_qrtr sq;
 	int ret;
@@ -766,7 +766,7 @@ void qrtr_ns_init(void)
 	ret = sock_create_kern(&init_net, AF_QIPCRTR, SOCK_DGRAM,
 			       PF_QIPCRTR, &qrtr_ns.sock);
 	if (ret < 0)
-		return;
+		return ret;
 
 	ret = kernel_getsockname(qrtr_ns.sock, (struct sockaddr *)&sq);
 	if (ret < 0) {
@@ -797,12 +797,13 @@ void qrtr_ns_init(void)
 	if (ret < 0)
 		goto err_wq;
 
-	return;
+	return 0;
 
 err_wq:
 	destroy_workqueue(qrtr_ns.workqueue);
 err_sock:
 	sock_release(qrtr_ns.sock);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(qrtr_ns_init);
 
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index f4ab3ca6d73b..b34358282f37 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -1287,13 +1287,19 @@ static int __init qrtr_proto_init(void)
 		return rc;
 
 	rc = sock_register(&qrtr_family);
-	if (rc) {
-		proto_unregister(&qrtr_proto);
-		return rc;
-	}
+	if (rc)
+		goto err_proto;
 
-	qrtr_ns_init();
+	rc = qrtr_ns_init();
+	if (rc)
+		goto err_sock;
 
+	return 0;
+
+err_sock:
+	sock_unregister(qrtr_family.family);
+err_proto:
+	proto_unregister(&qrtr_proto);
 	return rc;
 }
 postcore_initcall(qrtr_proto_init);
diff --git a/net/qrtr/qrtr.h b/net/qrtr/qrtr.h
index dc2b67f17927..3f2d28696062 100644
--- a/net/qrtr/qrtr.h
+++ b/net/qrtr/qrtr.h
@@ -29,7 +29,7 @@ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep);
 
 int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len);
 
-void qrtr_ns_init(void);
+int qrtr_ns_init(void);
 
 void qrtr_ns_remove(void);
 

From 445c6198fe7be03b7d38e66fe8d4b3187bc251d4 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 5 Jan 2021 20:15:15 +1100
Subject: [PATCH 388/547] net: ethernet: fs_enet: Add missing MODULE_LICENSE

Since commit 1d6cd3929360 ("modpost: turn missing MODULE_LICENSE()
into error") the ppc32_allmodconfig build fails with:

  ERROR: modpost: missing MODULE_LICENSE() in drivers/net/ethernet/freescale/fs_enet/mii-fec.o
  ERROR: modpost: missing MODULE_LICENSE() in drivers/net/ethernet/freescale/fs_enet/mii-bitbang.o

Add the missing MODULE_LICENSEs to fix the build. Both files include a
copyright header indicating they are GPL v2.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c | 1 +
 drivers/net/ethernet/freescale/fs_enet/mii-fec.c     | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c b/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c
index c8e5d889bd81..21de56345503 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c
@@ -223,3 +223,4 @@ static struct platform_driver fs_enet_bb_mdio_driver = {
 };
 
 module_platform_driver(fs_enet_bb_mdio_driver);
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c
index 8b51ee142fa3..152f4d83765a 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c
@@ -224,3 +224,4 @@ static struct platform_driver fs_enet_fec_mdio_driver = {
 };
 
 module_platform_driver(fs_enet_fec_mdio_driver);
+MODULE_LICENSE("GPL");

From 3503ee6c0bec5f173d606359e6384a5ef85492fb Mon Sep 17 00:00:00 2001
From: Po-Hsu Lin <po-hsu.lin@canonical.com>
Date: Tue, 5 Jan 2021 18:17:40 +0800
Subject: [PATCH 389/547] selftests: fix the return value for UDP GRO test

The udpgro.sh will always return 0 (unless the bpf selftest was not
build first) even if there are some failed sub test-cases.

Therefore the kselftest framework will report this case is OK.

Check and return the exit status of each test to make it easier to
spot real failures.

Signed-off-by: Po-Hsu Lin <po-hsu.lin@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/udpgro.sh | 34 +++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh
index ac2a30be9b32..f8a19f548ae9 100755
--- a/tools/testing/selftests/net/udpgro.sh
+++ b/tools/testing/selftests/net/udpgro.sh
@@ -5,6 +5,14 @@
 
 readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
 
+# set global exit status, but never reset nonzero one.
+check_err()
+{
+	if [ $ret -eq 0 ]; then
+		ret=$1
+	fi
+}
+
 cleanup() {
 	local -r jobs="$(jobs -p)"
 	local -r ns="$(ip netns list|grep $PEER_NS)"
@@ -44,7 +52,9 @@ run_one() {
 	# Hack: let bg programs complete the startup
 	sleep 0.1
 	./udpgso_bench_tx ${tx_args}
+	ret=$?
 	wait $(jobs -p)
+	return $ret
 }
 
 run_test() {
@@ -87,8 +97,10 @@ run_one_nat() {
 
 	sleep 0.1
 	./udpgso_bench_tx ${tx_args}
+	ret=$?
 	kill -INT $pid
 	wait $(jobs -p)
+	return $ret
 }
 
 run_one_2sock() {
@@ -110,7 +122,9 @@ run_one_2sock() {
 	sleep 0.1
 	# first UDP GSO socket should be closed at this point
 	./udpgso_bench_tx ${tx_args}
+	ret=$?
 	wait $(jobs -p)
+	return $ret
 }
 
 run_nat_test() {
@@ -131,36 +145,54 @@ run_all() {
 	local -r core_args="-l 4"
 	local -r ipv4_args="${core_args} -4 -D 192.168.1.1"
 	local -r ipv6_args="${core_args} -6 -D 2001:db8::1"
+	ret=0
 
 	echo "ipv4"
 	run_test "no GRO" "${ipv4_args} -M 10 -s 1400" "-4 -n 10 -l 1400"
+	check_err $?
 
 	# explicitly check we are not receiving UDP_SEGMENT cmsg (-S -1)
 	# when GRO does not take place
 	run_test "no GRO chk cmsg" "${ipv4_args} -M 10 -s 1400" "-4 -n 10 -l 1400 -S -1"
+	check_err $?
 
 	# the GSO packets are aggregated because:
 	# * veth schedule napi after each xmit
 	# * segmentation happens in BH context, veth napi poll is delayed after
 	#   the transmission of the last segment
 	run_test "GRO" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720"
+	check_err $?
 	run_test "GRO chk cmsg" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720 -S 1472"
+	check_err $?
 	run_test "GRO with custom segment size" "${ipv4_args} -M 1 -s 14720 -S 500 " "-4 -n 1 -l 14720"
+	check_err $?
 	run_test "GRO with custom segment size cmsg" "${ipv4_args} -M 1 -s 14720 -S 500 " "-4 -n 1 -l 14720 -S 500"
+	check_err $?
 
 	run_nat_test "bad GRO lookup" "${ipv4_args} -M 1 -s 14720 -S 0" "-n 10 -l 1472"
+	check_err $?
 	run_2sock_test "multiple GRO socks" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720 -S 1472"
+	check_err $?
 
 	echo "ipv6"
 	run_test "no GRO" "${ipv6_args} -M 10 -s 1400" "-n 10 -l 1400"
+	check_err $?
 	run_test "no GRO chk cmsg" "${ipv6_args} -M 10 -s 1400" "-n 10 -l 1400 -S -1"
+	check_err $?
 	run_test "GRO" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 1 -l 14520"
+	check_err $?
 	run_test "GRO chk cmsg" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 1 -l 14520 -S 1452"
+	check_err $?
 	run_test "GRO with custom segment size" "${ipv6_args} -M 1 -s 14520 -S 500" "-n 1 -l 14520"
+	check_err $?
 	run_test "GRO with custom segment size cmsg" "${ipv6_args} -M 1 -s 14520 -S 500" "-n 1 -l 14520 -S 500"
+	check_err $?
 
 	run_nat_test "bad GRO lookup" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 10 -l 1452"
+	check_err $?
 	run_2sock_test "multiple GRO socks" "${ipv6_args} -M 1 -s 14520 -S 0 " "-n 1 -l 14520 -S 1452"
+	check_err $?
+	return $ret
 }
 
 if [ ! -f ../bpf/xdp_dummy.o ]; then
@@ -180,3 +212,5 @@ elif [[ $1 == "__subprocess_2sock" ]]; then
 	shift
 	run_one_2sock $@
 fi
+
+exit $?

From 67208692802ce3cacfa00fe586dc0cb1bef0a51c Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 6 Jan 2021 00:42:19 +0100
Subject: [PATCH 390/547] tools/resolve_btfids: Warn when having multiple IDs
 for single type

The kernel image can contain multiple types (structs/unions)
with the same name. This causes distinct type hierarchies in
BTF data and makes resolve_btfids fail with error like:

  BTFIDS  vmlinux
FAILED unresolved symbol udp6_sock

as reported by Qais Yousef [1].

This change adds warning when multiple types of the same name
are detected:

  BTFIDS  vmlinux
WARN: multiple IDs found for 'file': 526, 113351 - using 526
WARN: multiple IDs found for 'sk_buff': 2744, 113958 - using 2744

We keep the lower ID for the given type instance and let the
build continue.

Also changing the 'nr' variable name to 'nr_types' to avoid confusion.

[1] https://lore.kernel.org/lkml/20201229151352.6hzmjvu3qh6p2qgg@e107158-lin/

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210105234219.970039-1-jolsa@kernel.org
---
 tools/bpf/resolve_btfids/main.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index e3ea569ee125..7409d7860aa6 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -139,6 +139,8 @@ int eprintf(int level, int var, const char *fmt, ...)
 #define pr_debug2(fmt, ...) pr_debugN(2, pr_fmt(fmt), ##__VA_ARGS__)
 #define pr_err(fmt, ...) \
 	eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_info(fmt, ...) \
+	eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)
 
 static bool is_btf_id(const char *name)
 {
@@ -472,7 +474,7 @@ static int symbols_resolve(struct object *obj)
 	int nr_funcs    = obj->nr_funcs;
 	int err, type_id;
 	struct btf *btf;
-	__u32 nr;
+	__u32 nr_types;
 
 	btf = btf__parse(obj->btf ?: obj->path, NULL);
 	err = libbpf_get_error(btf);
@@ -483,12 +485,12 @@ static int symbols_resolve(struct object *obj)
 	}
 
 	err = -1;
-	nr  = btf__get_nr_types(btf);
+	nr_types = btf__get_nr_types(btf);
 
 	/*
 	 * Iterate all the BTF types and search for collected symbol IDs.
 	 */
-	for (type_id = 1; type_id <= nr; type_id++) {
+	for (type_id = 1; type_id <= nr_types; type_id++) {
 		const struct btf_type *type;
 		struct rb_root *root;
 		struct btf_id *id;
@@ -526,8 +528,13 @@ static int symbols_resolve(struct object *obj)
 
 		id = btf_id__find(root, str);
 		if (id) {
-			id->id = type_id;
-			(*nr)--;
+			if (id->id) {
+				pr_info("WARN: multiple IDs found for '%s': %d, %d - using %d\n",
+					str, id->id, type_id, id->id);
+			} else {
+				id->id = type_id;
+				(*nr)--;
+			}
 		}
 	}
 

From 19fce0470f05031e6af36e49ce222d0f0050d432 Mon Sep 17 00:00:00 2001
From: James Smart <james.smart@broadcom.com>
Date: Tue, 1 Dec 2020 17:52:43 -0800
Subject: [PATCH 391/547] nvme-fc: avoid calling _nvme_fc_abort_outstanding_ios
 from interrupt context

Recent patches changed calling sequences. nvme_fc_abort_outstanding_ios
used to be called from a timeout or work context. Now it is being called
in an io completion context, which can be an interrupt handler.
Unfortunately, the abort outstanding ios routine attempts to stop nvme
queues and nested routines that may try to sleep, which is in conflict
with the interrupt handler.

Correct replacing the direct call with a work element scheduling, and the
abort outstanding ios routine will be called in the work element.

Fixes: 95ced8a2c72d ("nvme-fc: eliminate terminate_io use by nvme_fc_error_recovery")
Signed-off-by: James Smart <james.smart@broadcom.com>
Reported-by: Daniel Wagner <dwagner@suse.de>
Tested-by: Daniel Wagner <dwagner@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 38373a0e86ef..5f36cfa8136c 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -166,6 +166,7 @@ struct nvme_fc_ctrl {
 	struct blk_mq_tag_set	admin_tag_set;
 	struct blk_mq_tag_set	tag_set;
 
+	struct work_struct	ioerr_work;
 	struct delayed_work	connect_work;
 
 	struct kref		ref;
@@ -1888,6 +1889,15 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
 	}
 }
 
+static void
+nvme_fc_ctrl_ioerr_work(struct work_struct *work)
+{
+	struct nvme_fc_ctrl *ctrl =
+			container_of(work, struct nvme_fc_ctrl, ioerr_work);
+
+	nvme_fc_error_recovery(ctrl, "transport detected io error");
+}
+
 static void
 nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
 {
@@ -2046,7 +2056,7 @@ done:
 
 check_error:
 	if (terminate_assoc)
-		nvme_fc_error_recovery(ctrl, "transport detected io error");
+		queue_work(nvme_reset_wq, &ctrl->ioerr_work);
 }
 
 static int
@@ -3233,6 +3243,7 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl)
 {
 	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
 
+	cancel_work_sync(&ctrl->ioerr_work);
 	cancel_delayed_work_sync(&ctrl->connect_work);
 	/*
 	 * kill the association on the link side.  this will block
@@ -3449,6 +3460,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 
 	INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work);
 	INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
+	INIT_WORK(&ctrl->ioerr_work, nvme_fc_ctrl_ioerr_work);
 	spin_lock_init(&ctrl->lock);
 
 	/* io queue count */
@@ -3540,6 +3552,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 
 fail_ctrl:
 	nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING);
+	cancel_work_sync(&ctrl->ioerr_work);
 	cancel_work_sync(&ctrl->ctrl.reset_work);
 	cancel_delayed_work_sync(&ctrl->connect_work);
 

From 2b54996b7d56badc563755840838614f2fa9c4de Mon Sep 17 00:00:00 2001
From: James Smart <james.smart@broadcom.com>
Date: Mon, 7 Dec 2020 12:29:40 -0800
Subject: [PATCH 392/547] nvme-fcloop: Fix sscanf type and
 list_first_entry_or_null warnings

Kernel robot had the following warnings:

>> fcloop.c:1506:6: warning: %x in format string (no. 1) requires
>> 'unsigned int *' but the argument type is 'signed int *'.
>> [invalidScanfArgType_int]
>>    if (sscanf(buf, "%x:%d:%d", &opcode, &starting, &amount) != 3)
>>        ^

Resolve by changing opcode from and int to an unsigned int

and

>>  fcloop.c:1632:32: warning: Uninitialized variable: lport [uninitvar]
>>     ret = __wait_localport_unreg(lport);
>>                                  ^

>>  fcloop.c:1615:28: warning: Uninitialized variable: nport [uninitvar]
>>     ret = __remoteport_unreg(nport, rport);
>>                              ^

These aren't actual issues as the values are assigned prior to use.
It appears the tool doesn't understand list_first_entry_or_null().
Regardless, quiet the tool by initializing the pointers to NULL at
declaration.

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/fcloop.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index 733d9363900e..68213f0a052b 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -1501,7 +1501,8 @@ static ssize_t
 fcloop_set_cmd_drop(struct device *dev, struct device_attribute *attr,
 		const char *buf, size_t count)
 {
-	int opcode, starting, amount;
+	unsigned int opcode;
+	int starting, amount;
 
 	if (sscanf(buf, "%x:%d:%d", &opcode, &starting, &amount) != 3)
 		return -EBADRQC;
@@ -1588,8 +1589,8 @@ out_destroy_class:
 
 static void __exit fcloop_exit(void)
 {
-	struct fcloop_lport *lport;
-	struct fcloop_nport *nport;
+	struct fcloop_lport *lport = NULL;
+	struct fcloop_nport *nport = NULL;
 	struct fcloop_tport *tport;
 	struct fcloop_rport *rport;
 	unsigned long flags;

From 7ee5c78ca3895d44e918c38332921983ed678be0 Mon Sep 17 00:00:00 2001
From: Gopal Tiwari <gtiwari@redhat.com>
Date: Fri, 4 Dec 2020 21:46:57 +0530
Subject: [PATCH 393/547] nvme-pci: mark Samsung PM1725a as IGNORE_DEV_SUBNQN

A system with more than one of these SSDs will only have one usable.
Hence the kernel fails to detect nvme devices due to duplicate cntlids.

[    6.274554] nvme nvme1: Duplicate cntlid 33 with nvme0, rejecting
[    6.274566] nvme nvme1: Removing after probe failure status: -22

Adding the NVME_QUIRK_IGNORE_DEV_SUBNQN quirk to resolves the issue.

Signed-off-by: Gopal Tiwari <gtiwari@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index b4385cb0ff60..553871e6962b 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -3196,7 +3196,8 @@ static const struct pci_device_id nvme_id_table[] = {
 	{ PCI_DEVICE(0x144d, 0xa821),   /* Samsung PM1725 */
 		.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
 	{ PCI_DEVICE(0x144d, 0xa822),   /* Samsung PM1725a */
-		.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
+		.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY |
+				NVME_QUIRK_IGNORE_DEV_SUBNQN, },
 	{ PCI_DEVICE(0x1d1d, 0x1f1f),	/* LighNVM qemu device */
 		.driver_data = NVME_QUIRK_LIGHTNVM, },
 	{ PCI_DEVICE(0x1d1d, 0x2807),	/* CNEX WL */

From 5c11f7d9f843bdd24cd29b95401938bc3f168070 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 21 Dec 2020 00:03:39 -0800
Subject: [PATCH 394/547] nvme-tcp: Fix possible race of io_work and direct
 send

We may send a request (with or without its data) from two paths:

  1. From our I/O context nvme_tcp_io_work which is triggered from:
    - queue_rq
    - r2t reception
    - socket data_ready and write_space callbacks
  2. Directly from queue_rq if the send_list is empty (because we want to
     save the context switch associated with scheduling our io_work).

However, given that now we have the send_mutex, we may run into a race
condition where none of these contexts will send the pending payload to
the controller. Both io_work send path and queue_rq send path
opportunistically attempt to acquire the send_mutex however queue_rq only
attempts to send a single request, and if io_work context fails to
acquire the send_mutex it will complete without rescheduling itself.

The race can trigger with the following sequence:

  1. queue_rq sends request (no incapsule data) and blocks
  2. RX path receives r2t - prepares data PDU to send, adds h2cdata PDU
     to the send_list and schedules io_work
  3. io_work triggers and cannot acquire the send_mutex - because of (1),
     ends without self rescheduling
  4. queue_rq completes the send, and completes

==> no context will send the h2cdata - timeout.

Fix this by having queue_rq sending as much as it can from the send_list
such that if it still has any left, its because the socket buffer is
full and the socket write_space callback will trigger, thus guaranteeing
that a context will be scheduled to send the h2cdata PDU.

Fixes: db5ad6b7f8cd ("nvme-tcp: try to send request in queue_rq context")
Reported-by: Potnuri Bharat Teja <bharat@chelsio.com>
Reported-by: Samuel Jones <sjones@kalrayinc.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Tested-by: Potnuri Bharat Teja <bharat@chelsio.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/tcp.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 1ba659927442..979ee31b8dd1 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -262,6 +262,16 @@ static inline void nvme_tcp_advance_req(struct nvme_tcp_request *req,
 	}
 }
 
+static inline void nvme_tcp_send_all(struct nvme_tcp_queue *queue)
+{
+	int ret;
+
+	/* drain the send queue as much as we can... */
+	do {
+		ret = nvme_tcp_try_send(queue);
+	} while (ret > 0);
+}
+
 static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
 		bool sync, bool last)
 {
@@ -279,7 +289,7 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
 	if (queue->io_cpu == smp_processor_id() &&
 	    sync && empty && mutex_trylock(&queue->send_mutex)) {
 		queue->more_requests = !last;
-		nvme_tcp_try_send(queue);
+		nvme_tcp_send_all(queue);
 		queue->more_requests = false;
 		mutex_unlock(&queue->send_mutex);
 	} else if (last) {

From 62df80165d7f197c9c0652e7416164f294a96661 Mon Sep 17 00:00:00 2001
From: Lalithambika Krishnakumar <lalithambika.krishnakumar@intel.com>
Date: Wed, 23 Dec 2020 14:09:00 -0800
Subject: [PATCH 395/547] nvme: avoid possible double fetch in handling CQE

While handling the completion queue, keep a local copy of the command id
from the DMA-accessible completion entry. This silences a time-of-check
to time-of-use (TOCTOU) warning from KF/x[1], with respect to a
Thunderclap[2] vulnerability analysis. The double-read impact appears
benign.

There may be a theoretical window for @command_id to be used as an
adversary-controlled array-index-value for mounting a speculative
execution attack, but that mitigation is saved for a potential follow-on.
A man-in-the-middle attack on the data payload is out of scope for this
analysis and is hopefully mitigated by filesystem integrity mechanisms.

[1] https://github.com/intel/kernel-fuzzer-for-xen-project
[2] http://thunderclap.io/thunderclap-paper-ndss2019.pdf
Signed-off-by: Lalithambika Krishna Kumar <lalithambika.krishnakumar@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 553871e6962b..50d9a20568a2 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -967,6 +967,7 @@ static inline struct blk_mq_tags *nvme_queue_tagset(struct nvme_queue *nvmeq)
 static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
 {
 	struct nvme_completion *cqe = &nvmeq->cqes[idx];
+	__u16 command_id = READ_ONCE(cqe->command_id);
 	struct request *req;
 
 	/*
@@ -975,17 +976,17 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
 	 * aborts.  We don't even bother to allocate a struct request
 	 * for them but rather special case them here.
 	 */
-	if (unlikely(nvme_is_aen_req(nvmeq->qid, cqe->command_id))) {
+	if (unlikely(nvme_is_aen_req(nvmeq->qid, command_id))) {
 		nvme_complete_async_event(&nvmeq->dev->ctrl,
 				cqe->status, &cqe->result);
 		return;
 	}
 
-	req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), cqe->command_id);
+	req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), command_id);
 	if (unlikely(!req)) {
 		dev_warn(nvmeq->dev->ctrl.device,
 			"invalid id %d completed on queue %d\n",
-			cqe->command_id, le16_to_cpu(cqe->sq_id));
+			command_id, le16_to_cpu(cqe->sq_id));
 		return;
 	}
 

From 9b66fc02bec0ca613bc6d4c1d0049f727a95567d Mon Sep 17 00:00:00 2001
From: Minwoo Im <minwoo.im.dev@gmail.com>
Date: Wed, 30 Dec 2020 20:22:44 +0900
Subject: [PATCH 396/547] nvme: unexport functions with no external caller

There are no callers for nvme_reset_ctrl_sync() and
nvme_alloc_request_qid() so that we keep the symbols exported.

Unexport those functions, mark them static and update the header file
respectively.

Signed-off-by: Minwoo Im <minwoo.im.dev@gmail.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 6 ++----
 drivers/nvme/host/nvme.h | 3 ---
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index ce1b61519441..70a63d7c1d02 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -179,7 +179,7 @@ int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
 }
 EXPORT_SYMBOL_GPL(nvme_reset_ctrl);
 
-int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
+static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
 {
 	int ret;
 
@@ -192,7 +192,6 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(nvme_reset_ctrl_sync);
 
 static void nvme_do_delete_ctrl(struct nvme_ctrl *ctrl)
 {
@@ -578,7 +577,7 @@ struct request *nvme_alloc_request(struct request_queue *q,
 }
 EXPORT_SYMBOL_GPL(nvme_alloc_request);
 
-struct request *nvme_alloc_request_qid(struct request_queue *q,
+static struct request *nvme_alloc_request_qid(struct request_queue *q,
 		struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid)
 {
 	struct request *req;
@@ -589,7 +588,6 @@ struct request *nvme_alloc_request_qid(struct request_queue *q,
 		nvme_init_request(req, cmd);
 	return req;
 }
-EXPORT_SYMBOL_GPL(nvme_alloc_request_qid);
 
 static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable)
 {
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 7e49f61f81df..9c4fbfe44c00 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -610,8 +610,6 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl);
 #define NVME_QID_ANY -1
 struct request *nvme_alloc_request(struct request_queue *q,
 		struct nvme_command *cmd, blk_mq_req_flags_t flags);
-struct request *nvme_alloc_request_qid(struct request_queue *q,
-		struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid);
 void nvme_cleanup_cmd(struct request *req);
 blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
 		struct nvme_command *cmd);
@@ -630,7 +628,6 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned int fid,
 int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
 void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
 int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
-int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl);
 int nvme_try_sched_reset(struct nvme_ctrl *ctrl);
 int nvme_delete_ctrl(struct nvme_ctrl *ctrl);
 

From 9ceb7863537748c67fa43ac4f2f565819bbd36e4 Mon Sep 17 00:00:00 2001
From: Israel Rukshin <israelr@nvidia.com>
Date: Tue, 5 Jan 2021 10:46:54 +0200
Subject: [PATCH 397/547] nvmet-rdma: Fix list_del corruption on queue
 establishment failure

When a queue is in NVMET_RDMA_Q_CONNECTING state, it may has some
requests at rsp_wait_list. In case a disconnect occurs at this
state, no one will empty this list and will return the requests to
free_rsps list. Normally nvmet_rdma_queue_established() free those
requests after moving the queue to NVMET_RDMA_Q_LIVE state, but in
this case __nvmet_rdma_queue_disconnect() is called before. The
crash happens at nvmet_rdma_free_rsps() when calling
list_del(&rsp->free_list), because the request exists only at
the wait list. To fix the issue, simply clear rsp_wait_list when
destroying the queue.

Signed-off-by: Israel Rukshin <israelr@nvidia.com>
Reviewed-by: Max Gurtovoy <mgurtovoy@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/rdma.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 5c1e7cb7fe0d..bdfc22eb2a10 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -1641,6 +1641,16 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue)
 	spin_lock_irqsave(&queue->state_lock, flags);
 	switch (queue->state) {
 	case NVMET_RDMA_Q_CONNECTING:
+		while (!list_empty(&queue->rsp_wait_list)) {
+			struct nvmet_rdma_rsp *rsp;
+
+			rsp = list_first_entry(&queue->rsp_wait_list,
+					       struct nvmet_rdma_rsp,
+					       wait_list);
+			list_del(&rsp->wait_list);
+			nvmet_rdma_put_rsp(rsp);
+		}
+		fallthrough;
 	case NVMET_RDMA_Q_LIVE:
 		queue->state = NVMET_RDMA_Q_DISCONNECTING;
 		disconnect = true;

From 2b59787a223b79228fed9ade1bf6936194ddb8cd Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <mgurtovoy@nvidia.com>
Date: Tue, 5 Jan 2021 10:34:02 +0000
Subject: [PATCH 398/547] nvme: remove the unused status argument from
 nvme_trace_bio_complete

The only used argument in this function is the "req".

Signed-off-by: Max Gurtovoy <mgurtovoy@nvidia.com>
Reviewed-by: Minwoo Im <minwoo.im.dev@gmail.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 2 +-
 drivers/nvme/host/nvme.h | 6 ++----
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 70a63d7c1d02..f320273fc672 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -330,7 +330,7 @@ static inline void nvme_end_req(struct request *req)
 		req->__sector = nvme_lba_to_sect(req->q->queuedata,
 			le64_to_cpu(nvme_req(req)->result.u64));
 
-	nvme_trace_bio_complete(req, status);
+	nvme_trace_bio_complete(req);
 	blk_mq_end_request(req, status);
 }
 
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 9c4fbfe44c00..88a6b97247f5 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -672,8 +672,7 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
 		kblockd_schedule_work(&head->requeue_work);
 }
 
-static inline void nvme_trace_bio_complete(struct request *req,
-        blk_status_t status)
+static inline void nvme_trace_bio_complete(struct request *req)
 {
 	struct nvme_ns *ns = req->q->queuedata;
 
@@ -728,8 +727,7 @@ static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
 static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
 {
 }
-static inline void nvme_trace_bio_complete(struct request *req,
-        blk_status_t status)
+static inline void nvme_trace_bio_complete(struct request *req)
 {
 }
 static inline int nvme_mpath_init(struct nvme_ctrl *ctrl,

From 3ce47d95b7346dcafd9bed3556a8d072cb2b8571 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Mon, 4 Jan 2021 13:59:53 -0700
Subject: [PATCH 399/547] powerpc: Handle .text.{hot,unlikely}.* in linker
 script

Commit eff8728fe698 ("vmlinux.lds.h: Add PGO and AutoFDO input
sections") added ".text.unlikely.*" and ".text.hot.*" due to an LLVM
change [1].

After another LLVM change [2], these sections are seen in some PowerPC
builds, where there is a orphan section warning then build failure:

$ make -skj"$(nproc)" \
       ARCH=powerpc CROSS_COMPILE=powerpc64le-linux-gnu- LLVM=1 O=out \
       distclean powernv_defconfig zImage.epapr
ld.lld: warning: kernel/built-in.a(panic.o):(.text.unlikely.) is being placed in '.text.unlikely.'
...
ld.lld: warning: address (0xc000000000009314) of section .text is not a multiple of alignment (256)
...
ERROR: start_text address is c000000000009400, should be c000000000008000
ERROR: try to enable LD_HEAD_STUB_CATCH config option
ERROR: see comments in arch/powerpc/tools/head_check.sh
...

Explicitly handle these sections like in the main linker script so
there is no more build failure.

[1]: https://reviews.llvm.org/D79600
[2]: https://reviews.llvm.org/D92493

Fixes: 83a092cf95f2 ("powerpc: Link warning for orphan sections")
Cc: stable@vger.kernel.org
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://github.com/ClangBuiltLinux/linux/issues/1218
Link: https://lore.kernel.org/r/20210104205952.1399409-1-natechancellor@gmail.com
---
 arch/powerpc/kernel/vmlinux.lds.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 0318ba436f34..8e0b1298bf19 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -85,7 +85,7 @@ SECTIONS
 		ALIGN_FUNCTION();
 #endif
 		/* careful! __ftr_alt_* sections need to be close to .text */
-		*(.text.hot TEXT_MAIN .text.fixup .text.unlikely .fixup __ftr_alt_* .ref.text);
+		*(.text.hot .text.hot.* TEXT_MAIN .text.fixup .text.unlikely .text.unlikely.* .fixup __ftr_alt_* .ref.text);
 #ifdef CONFIG_PPC64
 		*(.tramp.ftrace.text);
 #endif

From cb7f4a8b1fb426a175d1708f05581939c61329d4 Mon Sep 17 00:00:00 2001
From: Ying-Tsun Huang <ying-tsun.huang@amd.com>
Date: Tue, 15 Dec 2020 15:07:20 +0800
Subject: [PATCH 400/547] x86/mtrr: Correct the range check before performing
 MTRR type lookups

In mtrr_type_lookup(), if the input memory address region is not in the
MTRR, over 4GB, and not over the top of memory, a write-back attribute
is returned. These condition checks are for ensuring the input memory
address region is actually mapped to the physical memory.

However, if the end address is just aligned with the top of memory,
the condition check treats the address is over the top of memory, and
write-back attribute is not returned.

And this hits in a real use case with NVDIMM: the nd_pmem module tries
to map NVDIMMs as cacheable memories when NVDIMMs are connected. If a
NVDIMM is the last of the DIMMs, the performance of this NVDIMM becomes
very low since it is aligned with the top of memory and its memory type
is uncached-minus.

Move the input end address change to inclusive up into
mtrr_type_lookup(), before checking for the top of memory in either
mtrr_type_lookup_{variable,fixed}() helpers.

 [ bp: Massage commit message. ]

Fixes: 0cc705f56e40 ("x86/mm/mtrr: Clean up mtrr_type_lookup()")
Signed-off-by: Ying-Tsun Huang <ying-tsun.huang@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20201215070721.4349-1-ying-tsun.huang@amd.com
---
 arch/x86/kernel/cpu/mtrr/generic.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 23ad8e953dfb..a29997e6cf9e 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -167,9 +167,6 @@ static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end,
 	*repeat = 0;
 	*uniform = 1;
 
-	/* Make end inclusive instead of exclusive */
-	end--;
-
 	prev_match = MTRR_TYPE_INVALID;
 	for (i = 0; i < num_var_ranges; ++i) {
 		unsigned short start_state, end_state, inclusive;
@@ -261,6 +258,9 @@ u8 mtrr_type_lookup(u64 start, u64 end, u8 *uniform)
 	int repeat;
 	u64 partial_end;
 
+	/* Make end inclusive instead of exclusive */
+	end--;
+
 	if (!mtrr_state_set)
 		return MTRR_TYPE_INVALID;
 

From 3e2224c5867fead6c0b94b84727cc676ac6353a3 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 6 Jan 2021 16:09:26 +0000
Subject: [PATCH 401/547] io_uring: Fix return value from
 alloc_fixed_file_ref_node

alloc_fixed_file_ref_node() currently returns an ERR_PTR on failure.
io_sqe_files_unregister() expects it to return NULL and since it can only
return -ENOMEM, it makes more sense to change alloc_fixed_file_ref_node()
to behave that way.

Fixes: 1ffc54220c44 ("io_uring: fix io_sqe_files_unregister() hangs")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index dc92ca5090a3..27a8c226abf8 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7696,12 +7696,12 @@ static struct fixed_file_ref_node *alloc_fixed_file_ref_node(
 
 	ref_node = kzalloc(sizeof(*ref_node), GFP_KERNEL);
 	if (!ref_node)
-		return ERR_PTR(-ENOMEM);
+		return NULL;
 
 	if (percpu_ref_init(&ref_node->refs, io_file_data_ref_zero,
 			    0, GFP_KERNEL)) {
 		kfree(ref_node);
-		return ERR_PTR(-ENOMEM);
+		return NULL;
 	}
 	INIT_LIST_HEAD(&ref_node->node);
 	INIT_LIST_HEAD(&ref_node->file_list);
@@ -7795,9 +7795,9 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
 	}
 
 	ref_node = alloc_fixed_file_ref_node(ctx);
-	if (IS_ERR(ref_node)) {
+	if (!ref_node) {
 		io_sqe_files_unregister(ctx);
-		return PTR_ERR(ref_node);
+		return -ENOMEM;
 	}
 
 	io_sqe_files_set_node(file_data, ref_node);
@@ -7897,8 +7897,8 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
 		return -EINVAL;
 
 	ref_node = alloc_fixed_file_ref_node(ctx);
-	if (IS_ERR(ref_node))
-		return PTR_ERR(ref_node);
+	if (!ref_node)
+		return -ENOMEM;
 
 	done = 0;
 	fds = u64_to_user_ptr(up->fds);

From 00b8c557d096f0930d5c07df618223d3d06902d6 Mon Sep 17 00:00:00 2001
From: Matthias Maennich <maennich@google.com>
Date: Wed, 6 Jan 2021 15:52:01 +0000
Subject: [PATCH 402/547] staging: ION: remove some references to CONFIG_ION

With commit e722a295cf49 ("staging: ion: remove from the tree"), ION and
its corresponding config CONFIG_ION is gone. Remove stale references
from drivers/staging/media/atomisp/pci and from the recommended Android
kernel config.

Fixes: e722a295cf49 ("staging: ion: remove from the tree")
Cc: Hridya Valsaraju <hridya@google.com>
Cc: Rob Herring <robh@kernel.org>
Cc: linux-media@vger.kernel.org
Cc: devel@driverdev.osuosl.org
Signed-off-by: Matthias Maennich <maennich@google.com>
Link: https://lore.kernel.org/r/20210106155201.2845319-1-maennich@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../media/atomisp/pci/atomisp_subdev.c        | 20 -------------------
 kernel/configs/android-recommended.config     |  1 -
 2 files changed, 21 deletions(-)

diff --git a/drivers/staging/media/atomisp/pci/atomisp_subdev.c b/drivers/staging/media/atomisp/pci/atomisp_subdev.c
index 52b9fb18c87f..b666cb23e5ca 100644
--- a/drivers/staging/media/atomisp/pci/atomisp_subdev.c
+++ b/drivers/staging/media/atomisp/pci/atomisp_subdev.c
@@ -1062,26 +1062,6 @@ static const struct v4l2_ctrl_config ctrl_select_isp_version = {
 	.def = 0,
 };
 
-#if 0 /* #ifdef CONFIG_ION */
-/*
- * Control for ISP ion device fd
- *
- * userspace will open ion device and pass the fd to kernel.
- * this fd will be used to map shared fd to buffer.
- */
-/* V4L2_CID_ATOMISP_ION_DEVICE_FD is not defined */
-static const struct v4l2_ctrl_config ctrl_ion_dev_fd = {
-	.ops = &ctrl_ops,
-	.id = V4L2_CID_ATOMISP_ION_DEVICE_FD,
-	.type = V4L2_CTRL_TYPE_INTEGER,
-	.name = "Ion Device Fd",
-	.min = -1,
-	.max = 1024,
-	.step = 1,
-	.def = ION_FD_UNSET
-};
-#endif
-
 static void atomisp_init_subdev_pipe(struct atomisp_sub_device *asd,
 				     struct atomisp_video_pipe *pipe, enum v4l2_buf_type buf_type)
 {
diff --git a/kernel/configs/android-recommended.config b/kernel/configs/android-recommended.config
index 53d688bdd894..eb0029c9a6a6 100644
--- a/kernel/configs/android-recommended.config
+++ b/kernel/configs/android-recommended.config
@@ -81,7 +81,6 @@ CONFIG_INPUT_JOYSTICK=y
 CONFIG_INPUT_MISC=y
 CONFIG_INPUT_TABLET=y
 CONFIG_INPUT_UINPUT=y
-CONFIG_ION=y
 CONFIG_JOYSTICK_XPAD=y
 CONFIG_JOYSTICK_XPAD_FF=y
 CONFIG_JOYSTICK_XPAD_LEDS=y

From 0e61f09af48beb41be0954e7be7d3ba2d18c9946 Mon Sep 17 00:00:00 2001
From: Xiaojian Du <Xiaojian.Du@amd.com>
Date: Mon, 14 Dec 2020 17:05:55 +0800
Subject: [PATCH 403/547] drm/amd/pm: correct the sensor value of power for
 vangogh

This patch is to correct the sensor value of power for vangogh.

Signed-off-by: Xiaojian Du <Xiaojian.Du@amd.com>
Reviewed-by: Evan Quan <evan.quan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
index 8cb4fcee9a2c..5c1482d4ca43 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
@@ -252,7 +252,8 @@ static int vangogh_get_smu_metrics_data(struct smu_context *smu,
 		*value = metrics->UvdActivity;
 		break;
 	case METRICS_AVERAGE_SOCKETPOWER:
-		*value = metrics->CurrentSocketPower;
+		*value = (metrics->CurrentSocketPower << 8) /
+		1000 ;
 		break;
 	case METRICS_TEMPERATURE_EDGE:
 		*value = metrics->GfxTemperature / 100 *

From 37030aba0f362cf8b16eb2347c7430b2e9ef719e Mon Sep 17 00:00:00 2001
From: Xiaojian Du <Xiaojian.Du@amd.com>
Date: Fri, 18 Dec 2020 14:32:02 +0800
Subject: [PATCH 404/547] drm/amd/pm: improve the fine grain tuning function
 for RV/RV2/PCO

This patch is to improve the fine grain tuning function for RV/RV2/PCO.
This patch adds two new commands: "restore" and "commit".
This function uses the pp_od_clk_voltage sysfs file to configure the min
and max value of gfx clock frequency manually or restore the default value.

Command guide:
echo "s level value" > pp_od_clk_voltage
        "s" - set the sclk frequency
        "level" - 0 or 1, "0" represents the min value,  "1" represents
        the max value
        "value" - the target value of sclk frequency, it should be limited in the
        safe range
echo "r" > pp_od_clk_voltage
        "r" - reset the sclk frequency, restore the default value instantly
echo "c" > pp_od_clk_voltage
        "c" - commit the min and max value of sclk frequency to the system
        only after the commit command, the target values set by "s" command
        will take effect.
Example:
1)change power profile from "auto" to "manual"
        $ cat power_dpm_force_performance_level
        auto
        $ echo "manual" > power_dpm_force_performance_level
        $ cat power_dpm_force_performance_level
        manual
2)check the default sclk frequency
        $ cat pp_od_clk_voltage
        OD_SCLK:
        0:        200Mhz
        1:       1400Mhz
        OD_RANGE:
        SCLK:     200MHz       1400MHz
3)use "s" -- set command to configure the min and max sclk frequency
        $ echo "s 0 600" > pp_od_clk_voltage
        $ echo "s 1 1000" > pp_od_clk_voltage
        $ echo "c" > pp_od_clk_voltage
        $ cat pp_od_clk_voltage
        OD_SCLK:
        0:        600Mhz
        1:       1000Mhz
        OD_RANGE:
        SCLK:     200MHz       1400MHz
4)use "r" -- reset command to restore the min or max sclk frequency
        $ echo "r" > pp_od_clk_voltage
        $ cat pp_od_clk_voltage
        OD_SCLK:
        0:        200Mhz
        1:       1400Mhz
        OD_RANGE:
        SCLK:     200MHz       1400MHz

Signed-off-by: Xiaojian Du <Xiaojian.Du@amd.com>
Reviewed-by: Evan Quan <evan.quan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c  | 114 +++++++++++++++---
 .../drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h  |   1 +
 2 files changed, 98 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c
index e57e64bbacdc..0cf899566e31 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c
@@ -251,7 +251,7 @@ static int smu10_set_hard_min_gfxclk_by_freq(struct pp_hwmgr *hwmgr, uint32_t cl
 		smu10_data->gfx_actual_soft_min_freq = clock;
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_SetHardMinGfxClk,
-					smu10_data->gfx_actual_soft_min_freq,
+					clock,
 					NULL);
 	}
 	return 0;
@@ -948,6 +948,8 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr,
 	struct smu10_voltage_dependency_table *mclk_table =
 			data->clock_vol_info.vdd_dep_on_fclk;
 	uint32_t i, now, size = 0;
+	uint32_t min_freq, max_freq = 0;
+	uint32_t ret = 0;
 
 	switch (type) {
 	case PP_SCLK:
@@ -983,18 +985,28 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr,
 		break;
 	case OD_SCLK:
 		if (hwmgr->od_enabled) {
-			size = sprintf(buf, "%s:\n", "OD_SCLK");
+			ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &min_freq);
+			if (ret)
+				return ret;
+			ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &max_freq);
+			if (ret)
+				return ret;
 
+			size = sprintf(buf, "%s:\n", "OD_SCLK");
 			size += sprintf(buf + size, "0: %10uMhz\n",
-			(data->gfx_actual_soft_min_freq > 0) ? data->gfx_actual_soft_min_freq : data->gfx_min_freq_limit/100);
-			size += sprintf(buf + size, "1: %10uMhz\n", data->gfx_max_freq_limit/100);
+			(data->gfx_actual_soft_min_freq > 0) ? data->gfx_actual_soft_min_freq : min_freq);
+			size += sprintf(buf + size, "1: %10uMhz\n",
+			(data->gfx_actual_soft_max_freq > 0) ? data->gfx_actual_soft_max_freq : max_freq);
 		}
 		break;
 	case OD_RANGE:
 		if (hwmgr->od_enabled) {
-			uint32_t min_freq, max_freq = 0;
-			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &min_freq);
-			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &max_freq);
+			ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &min_freq);
+			if (ret)
+				return ret;
+			ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &max_freq);
+			if (ret)
+				return ret;
 
 			size = sprintf(buf, "%s:\n", "OD_RANGE");
 			size += sprintf(buf + size, "SCLK: %7uMHz %10uMHz\n",
@@ -1414,23 +1426,91 @@ static int smu10_set_fine_grain_clk_vol(struct pp_hwmgr *hwmgr,
 					enum PP_OD_DPM_TABLE_COMMAND type,
 					long *input, uint32_t size)
 {
+	uint32_t min_freq, max_freq = 0;
+	struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend);
+	int ret = 0;
+
 	if (!hwmgr->od_enabled) {
 		pr_err("Fine grain not support\n");
 		return -EINVAL;
 	}
 
-	if (size != 2) {
-		pr_err("Input parameter number not correct\n");
-		return -EINVAL;
-	}
-
 	if (type == PP_OD_EDIT_SCLK_VDDC_TABLE) {
-		if (input[0] == 0)
-			smu10_set_hard_min_gfxclk_by_freq(hwmgr, input[1]);
-		else if (input[0] == 1)
-			smu10_set_soft_max_gfxclk_by_freq(hwmgr, input[1]);
-		else
+		if (size != 2) {
+			pr_err("Input parameter number not correct\n");
 			return -EINVAL;
+		}
+
+		if (input[0] == 0) {
+			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &min_freq);
+			if (input[1] < min_freq) {
+				pr_err("Fine grain setting minimum sclk (%ld) MHz is less than the minimum allowed (%d) MHz\n",
+					input[1], min_freq);
+				return -EINVAL;
+			}
+			smu10_data->gfx_actual_soft_min_freq = input[1];
+		} else if (input[0] == 1) {
+			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &max_freq);
+			if (input[1] > max_freq) {
+				pr_err("Fine grain setting maximum sclk (%ld) MHz is greater than the maximum allowed (%d) MHz\n",
+					input[1], max_freq);
+				return -EINVAL;
+			}
+			smu10_data->gfx_actual_soft_max_freq = input[1];
+		} else {
+			return -EINVAL;
+		}
+	} else if (type == PP_OD_RESTORE_DEFAULT_TABLE) {
+		if (size != 0) {
+			pr_err("Input parameter number not correct\n");
+			return -EINVAL;
+		}
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &min_freq);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &max_freq);
+
+		smu10_data->gfx_actual_soft_min_freq = min_freq;
+		smu10_data->gfx_actual_soft_max_freq = max_freq;
+
+		ret = smum_send_msg_to_smc_with_parameter(hwmgr,
+					PPSMC_MSG_SetHardMinGfxClk,
+					min_freq,
+					NULL);
+		if (ret)
+			return ret;
+
+		ret = smum_send_msg_to_smc_with_parameter(hwmgr,
+					PPSMC_MSG_SetSoftMaxGfxClk,
+					max_freq,
+					NULL);
+		if (ret)
+			return ret;
+	} else if (type == PP_OD_COMMIT_DPM_TABLE) {
+		if (size != 0) {
+			pr_err("Input parameter number not correct\n");
+			return -EINVAL;
+		}
+
+		if (smu10_data->gfx_actual_soft_min_freq > smu10_data->gfx_actual_soft_max_freq) {
+			pr_err("The setting minimun sclk (%d) MHz is greater than the setting maximum sclk (%d) MHz\n",
+					smu10_data->gfx_actual_soft_min_freq, smu10_data->gfx_actual_soft_max_freq);
+			return -EINVAL;
+		}
+
+		ret = smum_send_msg_to_smc_with_parameter(hwmgr,
+					PPSMC_MSG_SetHardMinGfxClk,
+					smu10_data->gfx_actual_soft_min_freq,
+					NULL);
+		if (ret)
+			return ret;
+
+		ret = smum_send_msg_to_smc_with_parameter(hwmgr,
+					PPSMC_MSG_SetSoftMaxGfxClk,
+					smu10_data->gfx_actual_soft_max_freq,
+					NULL);
+		if (ret)
+			return ret;
+	} else {
+		return -EINVAL;
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h
index 6c9b5f060902..28d86d354d50 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h
@@ -283,6 +283,7 @@ struct smu10_hwmgr {
 	uint32_t                        vclk_soft_min;
 	uint32_t                        dclk_soft_min;
 	uint32_t                        gfx_actual_soft_min_freq;
+	uint32_t                        gfx_actual_soft_max_freq;
 	uint32_t                        gfx_min_freq_limit;
 	uint32_t                        gfx_max_freq_limit; /* in 10Khz*/
 

From fc996f952df1c63b57e3a08ac612db53bf8abadc Mon Sep 17 00:00:00 2001
From: John Clements <john.clements@amd.com>
Date: Fri, 25 Dec 2020 12:22:51 +0800
Subject: [PATCH 405/547] drm/amd/pm: updated PM to I2C controller port on
 sienna cichlid

sienna cichlid interfaces with RAS eeprom on I2C controller port 1

Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: John Clements <john.clements@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 9608745d732f..12b36eb0ff6a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -2372,7 +2372,7 @@ static void sienna_cichlid_fill_i2c_req(SwI2cRequest_t  *req, bool write,
 {
 	int i;
 
-	req->I2CcontrollerPort = 0;
+	req->I2CcontrollerPort = 1;
 	req->I2CSpeed = 2;
 	req->SlaveAddress = address;
 	req->NumCmds = numbytes;

From a7b5d9dd57298333e6e9f4c167f01385d922bbfb Mon Sep 17 00:00:00 2001
From: Kevin Wang <kevin1.wang@amd.com>
Date: Tue, 29 Dec 2020 14:10:28 +0800
Subject: [PATCH 406/547] drm/amd/display: fix sysfs
 amdgpu_current_backlight_pwm NULL pointer issue

fix NULL pointer issue when read sysfs amdgpu_current_backlight_pwm sysfs node.

Call Trace:
[  248.273833] BUG: kernel NULL pointer dereference, address: 0000000000000130
[  248.273930] #PF: supervisor read access in kernel mode
[  248.273993] #PF: error_code(0x0000) - not-present page
[  248.274054] PGD 0 P4D 0
[  248.274092] Oops: 0000 [#1] SMP PTI
[  248.274138] CPU: 2 PID: 1377 Comm: cat Tainted: G           OE     5.9.0-rc5-drm-next-5.9+ #1
[  248.274233] Hardware name: System manufacturer System Product Name/Z170-A, BIOS 3802 03/15/2018
[  248.274641] RIP: 0010:dc_link_get_backlight_level+0x5/0x70 [amdgpu]
[  248.274718] Code: 67 ff ff ff 41 b9 03 00 00 00 e9 45 ff ff ff d1 ea e9 55 ff ff ff 0f 1f 44 00 00 66 2e
0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 <48> 8b 87 30 01 00 00 48 8b 00 48 8b 88 88 03 00 00 48 8d 81 e8 01
[  248.274919] RSP: 0018:ffffb5ad809b3df0 EFLAGS: 00010203
[  248.274982] RAX: ffffa0f77d1c0010 RBX: ffffa0f793ae9168 RCX: 0000000000000001
[  248.275064] RDX: ffffa0f79753db00 RSI: 0000000000000001 RDI: 0000000000000000
[  248.275145] RBP: ffffb5ad809b3e00 R08: ffffb5ad809b3da0 R09: 0000000000000000
[  248.275225] R10: ffffb5ad809b3e68 R11: 0000000000000000 R12: ffffa0f793ae9190
[  248.275306] R13: ffffb5ad809b3ef0 R14: 0000000000000001 R15: ffffa0f793ae9168
[  248.275388] FS:  00007f5f1ec4d540(0000) GS:ffffa0f79ec80000(0000) knlGS:0000000000000000
[  248.275480] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  248.275547] CR2: 0000000000000130 CR3: 000000042a03c005 CR4: 00000000003706e0
[  248.275628] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  248.275708] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  248.275789] Call Trace:
[  248.276124]  ? current_backlight_read+0x24/0x40 [amdgpu]
[  248.276194]  seq_read+0xc3/0x3f0
[  248.276240]  full_proxy_read+0x5c/0x90
[  248.276290]  vfs_read+0xa7/0x190
[  248.276334]  ksys_read+0xa7/0xe0
[  248.276379]  __x64_sys_read+0x1a/0x20
[  248.276429]  do_syscall_64+0x37/0x80
[  248.276477]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[  248.276538] RIP: 0033:0x7f5f1e75c191
[  248.276585] Code: fe ff ff 48 8d 3d b7 9d 0a 00 48 83 ec 08 e8 46 4d 02 00 66 0f 1f 44 00 00 48 8d 05 71 07
2e 00 8b 00 85 c0 75 13 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 57 f3 c3 0f 1f 44 00 00 41 54 55 49 89 d4 53Hw
[  248.276784] RSP: 002b:00007ffcb1fc3f38 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
[  248.276872] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007f5f1e75c191
[  248.276953] RDX: 0000000000020000 RSI: 00007f5f1ec2b000 RDI: 0000000000000003
[  248.277034] RBP: 0000000000020000 R08: 00000000ffffffff R09: 0000000000000000
[  248.277115] R10: 0000000000000022 R11: 0000000000000246 R12: 00007f5f1ec2b000
[  248.277195] R13: 0000000000000003 R14: 00007f5f1ec2b00f R15: 0000000000020000
[  248.277279] Modules linked in: amdgpu(OE) iommu_v2 gpu_sched ttm(OE) drm_kms_helper cec drm
i2c_algo_bit fb_sys_fops syscopyarea sysfillrect sysimgblt rpcsec_gss_krb5 auth_rpcgss nfsv4 nfs
lockd grace fscache nls_iso8859_1 snd_hda_codec_realtek snd_hda_codec_hdmi snd_hda_codec_generic
ledtrig_audio intel_rapl_msr intel_rapl_common snd_hda_intel snd_intel_dspcfg x86_pkg_temp_thermal
intel_powerclamp snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_seq_midi snd_seq_midi_event mei_hdcp
coretemp snd_rawmidi snd_seq kvm_intel kvm snd_seq_device snd_timer irqbypass joydev snd input_leds soundcore
crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel crypto_simd cryptd glue_helper rapl intel_cstate
mac_hid mei_me serio_raw mei eeepc_wmi wmi_bmof asus_wmi mxm_wmi intel_wmi_thunderbolt acpi_pad sparse_keymap
efi_pstore sch_fq_codel parport_pc ppdev lp parport sunrpc ip_tables x_tables autofs4 hid_logitech_hidpp
hid_logitech_dj hid_generic usbhid hid e1000e psmouse ahci libahci wmi video
[  248.278211] CR2: 0000000000000130
[  248.278221] ---[ end trace 1fbe72fe6f91091d ]---
[  248.357226] RIP: 0010:dc_link_get_backlight_level+0x5/0x70 [amdgpu]
[  248.357272] Code: 67 ff ff ff 41 b9 03 00 00 00 e9 45 ff ff ff d1 ea e9 55 ff ff ff 0f 1f 44 00 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 <48> 8b 87 30 01 00 00 48 8b 00 48 8b 88 88 03 00 00 48 8d 81 e8 01

Signed-off-by: Kevin Wang <kevin1.wang@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/core/dc_link.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index 9e1071b2181f..f4a2088ab179 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -2487,9 +2487,14 @@ enum dc_status dc_link_validate_mode_timing(
 static struct abm *get_abm_from_stream_res(const struct dc_link *link)
 {
 	int i;
-	struct dc *dc = link->ctx->dc;
+	struct dc *dc = NULL;
 	struct abm *abm = NULL;
 
+	if (!link || !link->ctx)
+		return NULL;
+
+	dc = link->ctx->dc;
+
 	for (i = 0; i < MAX_PIPES; i++) {
 		struct pipe_ctx pipe_ctx = dc->current_state->res_ctx.pipe_ctx[i];
 		struct dc_stream_state *stream = pipe_ctx.stream;

From ed1df58585632dff96cc01e14857175dfdf67376 Mon Sep 17 00:00:00 2001
From: Hawking Zhang <Hawking.Zhang@amd.com>
Date: Thu, 31 Dec 2020 13:05:09 +0800
Subject: [PATCH 407/547] drm/amdgpu: switched to cached noretry setting for
 vangogh

global noretry setting is cached to gmc.noretry

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
index b72c8e4ca36b..07104a1de308 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
@@ -310,7 +310,7 @@ static void mmhub_v2_3_setup_vmid_config(struct amdgpu_device *adev)
 		/* Send no-retry XNACK on fault to suppress VM fault storm. */
 		tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
 				    RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
-				    !amdgpu_noretry);
+				    !adev->gmc.noretry);
 		WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL,
 				    i * hub->ctx_distance, tmp);
 		WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,

From 9a029a3facc4d333100308a8e283d9210a36b94c Mon Sep 17 00:00:00 2001
From: Dennis Li <Dennis.Li@amd.com>
Date: Wed, 30 Dec 2020 10:27:42 +0800
Subject: [PATCH 408/547] drm/amdgpu: fix a memory protection fault when remove
 amdgpu device

ASD and TA share the same firmware in SIENNA_CICHLID and only TA
firmware is requested during boot, so only need release TA firmware when
remove device.

[   83.877150] general protection fault, probably for non-canonical address 0x1269f97e6ed04095: 0000 [#1] SMP PTI
[   83.888076] CPU: 0 PID: 1312 Comm: modprobe Tainted: G        W  OE     5.9.0-rc5-deli-amd-vangogh-0.0.6.6-114-gdd99d5669a96-dirty #2
[   83.901160] Hardware name: System manufacturer System Product Name/TUF Z370-PLUS GAMING II, BIOS 0411 09/21/2018
[   83.912353] RIP: 0010:free_fw_priv+0xc/0x120
[   83.917531] Code: e8 99 cd b0 ff b8 a1 ff ff ff eb 9f 4c 89 f7 e8 8a cd b0 ff b8 f4 ff ff ff eb 90 0f 1f 00 0f 1f 44 00 00 55 48 89 e5 41 54 53 <4c> 8b 67 18 48 89 fb 4c 89 e7 e8 45 94 41 00 b8 ff ff ff ff f0 0f
[   83.937576] RSP: 0018:ffffbc34c13a3ce0 EFLAGS: 00010206
[   83.943699] RAX: ffffffffbb681850 RBX: ffffa047f117eb60 RCX: 0000000080800055
[   83.951879] RDX: ffffbc34c1d5f000 RSI: 0000000080800055 RDI: 1269f97e6ed04095
[   83.959955] RBP: ffffbc34c13a3cf0 R08: 0000000000000000 R09: 0000000000000001
[   83.968107] R10: ffffbc34c13a3cc8 R11: 00000000ffffff00 R12: ffffa047d6b23378
[   83.976166] R13: ffffa047d6b23338 R14: ffffa047d6b240c8 R15: 0000000000000000
[   83.984295] FS:  00007f74f6712540(0000) GS:ffffa047fbe00000(0000) knlGS:0000000000000000
[   83.993323] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   84.000056] CR2: 0000556a1cca4e18 CR3: 000000021faa8004 CR4: 00000000003706f0
[   84.008128] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   84.016155] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[   84.024174] Call Trace:
[   84.027514]  release_firmware.part.11+0x4b/0x70
[   84.033017]  release_firmware+0x13/0x20
[   84.037803]  psp_sw_fini+0x77/0xb0 [amdgpu]
[   84.042857]  amdgpu_device_fini+0x38c/0x5d0 [amdgpu]
[   84.048815]  amdgpu_driver_unload_kms+0x43/0x70 [amdgpu]
[   84.055055]  drm_dev_unregister+0x73/0xb0 [drm]
[   84.060499]  drm_dev_unplug+0x28/0x30 [drm]
[   84.065598]  amdgpu_dev_uninit+0x1b/0x40 [amdgpu]
[   84.071223]  amdgpu_pci_remove+0x4e/0x70 [amdgpu]
[   84.076835]  pci_device_remove+0x3e/0xc0
[   84.081609]  device_release_driver_internal+0xfb/0x1c0
[   84.087558]  driver_detach+0x4d/0xa0
[   84.092041]  bus_remove_driver+0x5f/0xe0
[   84.096854]  driver_unregister+0x2f/0x50
[   84.101594]  pci_unregister_driver+0x22/0xa0
[   84.106806]  amdgpu_exit+0x15/0x2b [amdgpu]

Signed-off-by: Dennis Li <Dennis.Li@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 523d22db094b..5d6fc369e32c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -563,7 +563,7 @@ static int psp_asd_load(struct psp_context *psp)
 	 * add workaround to bypass it for sriov now.
 	 * TODO: add version check to make it common
 	 */
-	if (amdgpu_sriov_vf(psp->adev) || !psp->asd_fw)
+	if (amdgpu_sriov_vf(psp->adev) || !psp->asd_ucode_size)
 		return 0;
 
 	cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
@@ -2589,11 +2589,10 @@ static int parse_ta_bin_descriptor(struct psp_context *psp,
 
 	switch (desc->fw_type) {
 	case TA_FW_TYPE_PSP_ASD:
-		psp->asd_fw_version	   = le32_to_cpu(desc->fw_version);
+		psp->asd_fw_version        = le32_to_cpu(desc->fw_version);
 		psp->asd_feature_version   = le32_to_cpu(desc->fw_version);
-		psp->asd_ucode_size	   = le32_to_cpu(desc->size_bytes);
+		psp->asd_ucode_size        = le32_to_cpu(desc->size_bytes);
 		psp->asd_start_addr 	   = ucode_start_addr;
-		psp->asd_fw                = psp->ta_fw;
 		break;
 	case TA_FW_TYPE_PSP_XGMI:
 		psp->ta_xgmi_ucode_version = le32_to_cpu(desc->fw_version);

From 88e21af1b3f887d217f2fb14fc7e7d3cd87ebf57 Mon Sep 17 00:00:00 2001
From: Dennis Li <Dennis.Li@amd.com>
Date: Wed, 30 Dec 2020 19:45:15 +0800
Subject: [PATCH 409/547] drm/amdgpu: fix a GPU hang issue when remove device

When GFXOFF is enabled and GPU is idle, driver will fail to access some
registers. Therefore change to disable power gating before all access
registers with MMIO.

Dmesg log is as following:
amdgpu 0000:03:00.0: amdgpu: amdgpu: finishing device.
amdgpu: cp queue pipe 4 queue 0 preemption failed
amdgpu 0000:03:00.0: amdgpu: failed to write reg 2890 wait reg 28a2
amdgpu 0000:03:00.0: amdgpu: failed to write reg 1a6f4 wait reg 1a706
amdgpu 0000:03:00.0: amdgpu: failed to write reg 2890 wait reg 28a2
amdgpu 0000:03:00.0: amdgpu: failed to write reg 1a6f4 wait reg 1a706

Signed-off-by: Dennis Li <Dennis.Li@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1cb7d73f7317..b69c34074d8d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2548,11 +2548,11 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
 	if (adev->gmc.xgmi.num_physical_nodes > 1)
 		amdgpu_xgmi_remove_device(adev);
 
-	amdgpu_amdkfd_device_fini(adev);
-
 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
 
+	amdgpu_amdkfd_device_fini(adev);
+
 	/* need to disable SMC first */
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if (!adev->ip_blocks[i].status.hw)

From 44cb39e19a05ca711bcb6e776e0a4399223204a0 Mon Sep 17 00:00:00 2001
From: Xiaojian Du <Xiaojian.Du@amd.com>
Date: Wed, 30 Dec 2020 18:08:23 +0800
Subject: [PATCH 410/547] drm/amd/pm: fix the failure when change power profile
 for renoir

This patch is to fix the failure when change power profile to
"profile_peak" for renoir.

Signed-off-by: Xiaojian Du <Xiaojian.Du@amd.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c | 1 +
 drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c  | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
index dc75db8af371..f743685a20e8 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
@@ -188,6 +188,7 @@ static int renoir_get_dpm_clk_limited(struct smu_context *smu, enum smu_clk_type
 			return -EINVAL;
 		*freq = clk_table->SocClocks[dpm_level].Freq;
 		break;
+	case SMU_UCLK:
 	case SMU_MCLK:
 		if (dpm_level >= NUM_FCLK_DPM_LEVELS)
 			return -EINVAL;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c
index 522d55004655..06abf2a7ce9e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c
@@ -225,6 +225,7 @@ int smu_v12_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_
 	break;
 	case SMU_FCLK:
 	case SMU_MCLK:
+	case SMU_UCLK:
 		ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinFclkByFreq, min, NULL);
 		if (ret)
 			return ret;

From 98b64762080b96b0f8608da5fe161f1a7ab6f5de Mon Sep 17 00:00:00 2001
From: Xiaojian Du <Xiaojian.Du@amd.com>
Date: Tue, 29 Dec 2020 17:19:37 +0800
Subject: [PATCH 411/547] drm/amd/pm: improve the fine grain tuning function
 for RV/RV2/PCO

This patch is to improve the fine grain tuning function for RV/RV2/PCO.
The fine grain tuning function uses the sysfs node -- pp_od_clk_voltage
to config gfxclk. Meanwhile, another sysfs
node -- power_dpm_force_perfomance_level also affects the gfx clk.
It will cause confusion when these two sysfs nodes works
together. So this patch adds one flag to avoid this confusion, the flag
will make these two sysfs nodes work separately.
The flag is set as "disabled" by default, so the fine grain tuning function
will be disabled by default.
Only when power_dpm_force_perfomance_level is changed to
"manual" mode, the flag will be set as "enabled",
and the fine grain tuning function will be enabled.
In other profile modes, including "auto", "high", "low",
"profile_peak", "profile_standard", "profile_min_sclk",
"profile_min_mclk", the flag will be set as "disabled",
and the od range of fine grain tuning function will
be restored default value.

Signed-off-by: Xiaojian Du <Xiaojian.Du@amd.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c  | 58 ++++++++++++++++++-
 .../drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h  |  2 +
 2 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c
index 0cf899566e31..88322781e447 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c
@@ -558,7 +558,8 @@ static int smu10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 
 	/* enable the pp_od_clk_voltage sysfs file */
 	hwmgr->od_enabled = 1;
-
+	/* disabled fine grain tuning function by default */
+	data->fine_grain_enabled = 0;
 	return result;
 }
 
@@ -597,6 +598,7 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
 	uint32_t min_mclk = hwmgr->display_config->min_mem_set_clock/100;
 	uint32_t index_fclk = data->clock_vol_info.vdd_dep_on_fclk->count - 1;
 	uint32_t index_socclk = data->clock_vol_info.vdd_dep_on_socclk->count - 1;
+	uint32_t fine_grain_min_freq = 0, fine_grain_max_freq = 0;
 
 	if (hwmgr->smu_version < 0x1E3700) {
 		pr_info("smu firmware version too old, can not set dpm level\n");
@@ -613,6 +615,14 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
 	switch (level) {
 	case AMD_DPM_FORCED_LEVEL_HIGH:
 	case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
+		data->fine_grain_enabled = 0;
+
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &fine_grain_min_freq);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &fine_grain_max_freq);
+
+		data->gfx_actual_soft_min_freq = fine_grain_min_freq;
+		data->gfx_actual_soft_max_freq = fine_grain_max_freq;
+
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinGfxClk,
 						data->gfx_max_freq_limit/100,
@@ -648,6 +658,14 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
 						NULL);
 		break;
 	case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
+		data->fine_grain_enabled = 0;
+
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &fine_grain_min_freq);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &fine_grain_max_freq);
+
+		data->gfx_actual_soft_min_freq = fine_grain_min_freq;
+		data->gfx_actual_soft_max_freq = fine_grain_max_freq;
+
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinGfxClk,
 						min_sclk,
@@ -658,6 +676,14 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
 						NULL);
 		break;
 	case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK:
+		data->fine_grain_enabled = 0;
+
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &fine_grain_min_freq);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &fine_grain_max_freq);
+
+		data->gfx_actual_soft_min_freq = fine_grain_min_freq;
+		data->gfx_actual_soft_max_freq = fine_grain_max_freq;
+
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinFclkByFreq,
 						min_mclk,
@@ -668,6 +694,14 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
 						NULL);
 		break;
 	case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
+		data->fine_grain_enabled = 0;
+
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &fine_grain_min_freq);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &fine_grain_max_freq);
+
+		data->gfx_actual_soft_min_freq = fine_grain_min_freq;
+		data->gfx_actual_soft_max_freq = fine_grain_max_freq;
+
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinGfxClk,
 						SMU10_UMD_PSTATE_GFXCLK,
@@ -703,6 +737,14 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
 						NULL);
 		break;
 	case AMD_DPM_FORCED_LEVEL_AUTO:
+		data->fine_grain_enabled = 0;
+
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &fine_grain_min_freq);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &fine_grain_max_freq);
+
+		data->gfx_actual_soft_min_freq = fine_grain_min_freq;
+		data->gfx_actual_soft_max_freq = fine_grain_max_freq;
+
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinGfxClk,
 						min_sclk,
@@ -741,6 +783,14 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
 						NULL);
 		break;
 	case AMD_DPM_FORCED_LEVEL_LOW:
+		data->fine_grain_enabled = 0;
+
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &fine_grain_min_freq);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &fine_grain_max_freq);
+
+		data->gfx_actual_soft_min_freq = fine_grain_min_freq;
+		data->gfx_actual_soft_max_freq = fine_grain_max_freq;
+
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinGfxClk,
 						data->gfx_min_freq_limit/100,
@@ -759,6 +809,7 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
 						NULL);
 		break;
 	case AMD_DPM_FORCED_LEVEL_MANUAL:
+		data->fine_grain_enabled = 1;
 	case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT:
 	default:
 		break;
@@ -1435,6 +1486,11 @@ static int smu10_set_fine_grain_clk_vol(struct pp_hwmgr *hwmgr,
 		return -EINVAL;
 	}
 
+	if (!smu10_data->fine_grain_enabled) {
+		pr_err("Fine grain not started\n");
+		return -EINVAL;
+	}
+
 	if (type == PP_OD_EDIT_SCLK_VDDC_TABLE) {
 		if (size != 2) {
 			pr_err("Input parameter number not correct\n");
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h
index 28d86d354d50..808e0ecbe1f0 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h
@@ -300,6 +300,8 @@ struct smu10_hwmgr {
 	bool need_min_deep_sleep_dcefclk;
 	uint32_t                             deep_sleep_dcefclk;
 	uint32_t                             num_active_display;
+
+	bool							fine_grain_enabled;
 };
 
 struct pp_hwmgr;

From 4f6a05501eb9c57fb4c9efed70840aee523a393b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 3 Jan 2021 15:02:32 +0100
Subject: [PATCH 412/547] drm/amd/display: Fix unused variable warning

Some of the newly added code is hidden inside of #ifdef
blocks, but one variable is unused when debugfs is disabled:

drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:8370:8: error: unused variable 'configure_crc' [-Werror,-Wunused-variable]

Change the #ifdef to an if(IS_ENABLED()) check to fix the warning
and avoid adding more #ifdefs.

Fixes: c920888c604d ("drm/amd/display: Expose new CRC window property")
Reviewed-by: Wayne Lin <Wayne.Lin@amd.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c     | 4 +---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 519080e9a233..3fb6baf9b0ba 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -8379,8 +8379,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 			acrtc->dm_irq_params.stream = dm_new_crtc_state->stream;
 			manage_dm_interrupts(adev, acrtc, true);
 		}
-#ifdef CONFIG_DEBUG_FS
-		if (new_crtc_state->active &&
+		if (IS_ENABLED(CONFIG_DEBUG_FS) && new_crtc_state->active &&
 			amdgpu_dm_is_valid_crc_source(dm_new_crtc_state->crc_src)) {
 			/**
 			 * Frontend may have changed so reapply the CRC capture
@@ -8401,7 +8400,6 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 				amdgpu_dm_crtc_configure_crc_source(
 					crtc, dm_new_crtc_state, dm_new_crtc_state->crc_src);
 		}
-#endif
 	}
 
 	for_each_new_crtc_in_state(state, crtc, new_crtc_state, j)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h
index 0235bfb246e5..eba2f1d35d07 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h
@@ -46,13 +46,13 @@ static inline bool amdgpu_dm_is_valid_crc_source(enum amdgpu_dm_pipe_crc_source
 }
 
 /* amdgpu_dm_crc.c */
-#ifdef CONFIG_DEBUG_FS
 bool amdgpu_dm_crc_window_is_default(struct dm_crtc_state *dm_crtc_state);
 bool amdgpu_dm_crc_window_changed(struct dm_crtc_state *dm_new_crtc_state,
 					struct dm_crtc_state *dm_old_crtc_state);
 int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc,
 					struct dm_crtc_state *dm_crtc_state,
 					enum amdgpu_dm_pipe_crc_source source);
+#ifdef CONFIG_DEBUG_FS
 int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name);
 int amdgpu_dm_crtc_verify_crc_source(struct drm_crtc *crtc,
 				     const char *src_name,

From e6d5c64efaa34aae3815a9afeb1314a976142e83 Mon Sep 17 00:00:00 2001
From: Jiawei Gu <Jiawei.Gu@amd.com>
Date: Tue, 29 Dec 2020 20:35:33 +0800
Subject: [PATCH 413/547] drm/amdgpu: fix potential memory leak during navi12
 deinitialization

Navi12 HDCP & DTM deinitialization needs continue to free bo if already
created though initialized flag is not set.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Jiawei Gu <Jiawei.Gu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 5d6fc369e32c..347fec669424 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -1315,8 +1315,12 @@ static int psp_hdcp_terminate(struct psp_context *psp)
 	if (amdgpu_sriov_vf(psp->adev))
 		return 0;
 
-	if (!psp->hdcp_context.hdcp_initialized)
-		return 0;
+	if (!psp->hdcp_context.hdcp_initialized) {
+		if (psp->hdcp_context.hdcp_shared_buf)
+			goto out;
+		else
+			return 0;
+	}
 
 	ret = psp_hdcp_unload(psp);
 	if (ret)
@@ -1324,6 +1328,7 @@ static int psp_hdcp_terminate(struct psp_context *psp)
 
 	psp->hdcp_context.hdcp_initialized = false;
 
+out:
 	/* free hdcp shared memory */
 	amdgpu_bo_free_kernel(&psp->hdcp_context.hdcp_shared_bo,
 			      &psp->hdcp_context.hdcp_shared_mc_addr,
@@ -1462,8 +1467,12 @@ static int psp_dtm_terminate(struct psp_context *psp)
 	if (amdgpu_sriov_vf(psp->adev))
 		return 0;
 
-	if (!psp->dtm_context.dtm_initialized)
-		return 0;
+	if (!psp->dtm_context.dtm_initialized) {
+		if (psp->dtm_context.dtm_shared_buf)
+			goto out;
+		else
+			return 0;
+	}
 
 	ret = psp_dtm_unload(psp);
 	if (ret)
@@ -1471,6 +1480,7 @@ static int psp_dtm_terminate(struct psp_context *psp)
 
 	psp->dtm_context.dtm_initialized = false;
 
+out:
 	/* free hdcp shared memory */
 	amdgpu_bo_free_kernel(&psp->dtm_context.dtm_shared_bo,
 			      &psp->dtm_context.dtm_shared_mc_addr,

From 8a82b347e8732fd2b68d26a6e9f0d9a1c397560d Mon Sep 17 00:00:00 2001
From: Dennis Li <Dennis.Li@amd.com>
Date: Tue, 5 Jan 2021 08:37:21 +0800
Subject: [PATCH 414/547] drm/amdgpu: fix no bad_pages issue after umc ue
 injection

old code wrongly used the bad page status as the function return value,
which cause amdgpu_ras_badpages_read always return failed.

Signed-off-by: Dennis Li <Dennis.Li@amd.com>
Reviewed-by: Guchun Chen <guchun.chen@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index c136bd449744..82e952696d24 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1518,7 +1518,7 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 	struct ras_err_handler_data *data;
 	int i = 0;
-	int ret = 0;
+	int ret = 0, status;
 
 	if (!con || !con->eh_data || !bps || !count)
 		return -EINVAL;
@@ -1543,12 +1543,12 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
 			.size = AMDGPU_GPU_PAGE_SIZE,
 			.flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED,
 		};
-		ret = amdgpu_vram_mgr_query_page_status(
+		status = amdgpu_vram_mgr_query_page_status(
 				ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM),
 				data->bps[i].retired_page);
-		if (ret == -EBUSY)
+		if (status == -EBUSY)
 			(*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING;
-		else if (ret == -ENOENT)
+		else if (status == -ENOENT)
 			(*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT;
 	}
 

From 3851c90b7aa8f0c275d14636f0e7ccca69a2bf84 Mon Sep 17 00:00:00 2001
From: John Clements <john.clements@amd.com>
Date: Tue, 5 Jan 2021 14:53:14 +0800
Subject: [PATCH 415/547] drm/amdgpu: enable ras eeprom support for sienna
 cichlid

added I2C address and asic support flag

Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: John Clements <john.clements@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 1dd040166c63..19d9aa76cfbf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -30,6 +30,7 @@
 #define EEPROM_I2C_TARGET_ADDR_VEGA20		0xA0
 #define EEPROM_I2C_TARGET_ADDR_ARCTURUS		0xA8
 #define EEPROM_I2C_TARGET_ADDR_ARCTURUS_D342	0xA0
+#define EEPROM_I2C_TARGET_ADDR_SIENNA_CICHLID   0xA0
 
 /*
  * The 2 macros bellow represent the actual size in bytes that
@@ -62,7 +63,8 @@
 static bool __is_ras_eeprom_supported(struct amdgpu_device *adev)
 {
 	if ((adev->asic_type == CHIP_VEGA20) ||
-	    (adev->asic_type == CHIP_ARCTURUS))
+	    (adev->asic_type == CHIP_ARCTURUS) ||
+	    (adev->asic_type == CHIP_SIENNA_CICHLID))
 		return true;
 
 	return false;
@@ -100,6 +102,10 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev,
 	case CHIP_ARCTURUS:
 		return __get_eeprom_i2c_addr_arct(adev, i2c_addr);
 
+	case CHIP_SIENNA_CICHLID:
+		*i2c_addr = EEPROM_I2C_TARGET_ADDR_SIENNA_CICHLID;
+		break;
+
 	default:
 		return false;
 	}

From c241ed2f0ea549c18cff62a3708b43846b84dae3 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 4 Jan 2021 11:24:20 -0500
Subject: [PATCH 416/547] drm/amdgpu/display: drop DCN support for aarch64

From Ard:

"Simply disabling -mgeneral-regs-only left and right is risky, given that
the standard AArch64 ABI permits the use of FP/SIMD registers anywhere,
and GCC is known to use SIMD registers for spilling, and may invent
other uses of the FP/SIMD register file that have nothing to do with the
floating point code in question. Note that putting kernel_neon_begin()
and kernel_neon_end() around the code that does use FP is not sufficient
here, the problem is in all the other code that may be emitted with
references to SIMD registers in it.

So the only way to do this properly is to put all floating point code in
a separate compilation unit, and only compile that unit with
-mgeneral-regs-only."

Disable support until the code can be properly refactored to support this
properly on aarch64.

Acked-by: Will Deacon <will@kernel.org>
Reported-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/Kconfig           |  2 +-
 drivers/gpu/drm/amd/display/dc/calcs/Makefile |  4 ----
 .../gpu/drm/amd/display/dc/clk_mgr/Makefile   | 21 -------------------
 drivers/gpu/drm/amd/display/dc/dcn10/Makefile |  7 -------
 .../drm/amd/display/dc/dcn10/dcn10_resource.c |  7 -------
 drivers/gpu/drm/amd/display/dc/dcn20/Makefile |  4 ----
 drivers/gpu/drm/amd/display/dc/dcn21/Makefile |  4 ----
 drivers/gpu/drm/amd/display/dc/dcn30/Makefile |  5 -----
 .../gpu/drm/amd/display/dc/dcn301/Makefile    |  4 ----
 .../gpu/drm/amd/display/dc/dcn302/Makefile    |  4 ----
 drivers/gpu/drm/amd/display/dc/dml/Makefile   |  4 ----
 drivers/gpu/drm/amd/display/dc/dsc/Makefile   |  4 ----
 drivers/gpu/drm/amd/display/dc/os_types.h     |  4 ----
 13 files changed, 1 insertion(+), 73 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
index 797b5d4b43e5..e509a175ed17 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -6,7 +6,7 @@ config DRM_AMD_DC
 	bool "AMD DC - Enable new display engine"
 	default y
 	select SND_HDA_COMPONENT if SND_HDA_CORE
-	select DRM_AMD_DC_DCN if (X86 || PPC64 || (ARM64 && KERNEL_MODE_NEON)) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS)
+	select DRM_AMD_DC_DCN if (X86 || PPC64) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS)
 	help
 	  Choose this option if you want to use the new display engine
 	  support for AMDGPU. This adds required support for Vega and
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile b/drivers/gpu/drm/amd/display/dc/calcs/Makefile
index 64f515d74410..f3c00f479e1c 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile
@@ -33,10 +33,6 @@ ifdef CONFIG_PPC64
 calcs_ccflags := -mhard-float -maltivec
 endif
 
-ifdef CONFIG_ARM64
-calcs_rcflags := -mgeneral-regs-only
-endif
-
 ifdef CONFIG_CC_IS_GCC
 ifeq ($(call cc-ifversion, -lt, 0701, y), y)
 IS_OLD_GCC = 1
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
index d59b380e7b7f..ff96bee57bfc 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
@@ -104,13 +104,6 @@ ifdef CONFIG_PPC64
 CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn21/rn_clk_mgr.o := $(call cc-option,-mno-gnu-attribute)
 endif
 
-# prevent build errors:
-# ...: '-mgeneral-regs-only' is incompatible with the use of floating-point types
-# this file is unused on arm64, just like on ppc64
-ifdef CONFIG_ARM64
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/clk_mgr/dcn21/rn_clk_mgr.o := -mgeneral-regs-only
-endif
-
 AMD_DAL_CLK_MGR_DCN21 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn21/,$(CLK_MGR_DCN21))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN21)
@@ -125,13 +118,6 @@ ifdef CONFIG_PPC64
 CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn30/dcn30_clk_mgr.o := $(call cc-option,-mno-gnu-attribute)
 endif
 
-# prevent build errors:
-# ...: '-mgeneral-regs-only' is incompatible with the use of floating-point types
-# this file is unused on arm64, just like on ppc64
-ifdef CONFIG_ARM64
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/clk_mgr/dcn30/dcn30_clk_mgr.o := -mgeneral-regs-only
-endif
-
 AMD_DAL_CLK_MGR_DCN30 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn30/,$(CLK_MGR_DCN30))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN30)
@@ -146,13 +132,6 @@ ifdef CONFIG_PPC64
 CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn301/vg_clk_mgr.o := $(call cc-option,-mno-gnu-attribute)
 endif
 
-# prevent build errors:
-# ...: '-mgeneral-regs-only' is incompatible with the use of floating-point types
-# this file is unused on arm64, just like on ppc64
-ifdef CONFIG_ARM64
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/clk_mgr/dcn301/vg_clk_mgr.o := -mgeneral-regs-only
-endif
-
 AMD_DAL_CLK_MGR_DCN301 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn301/,$(CLK_MGR_DCN301))
 
 AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN301)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
index 733e6e6e43bd..62ad1a11bff9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
@@ -31,11 +31,4 @@ DCN10 = dcn10_init.o dcn10_resource.o dcn10_ipp.o dcn10_hw_sequencer.o \
 
 AMD_DAL_DCN10 = $(addprefix $(AMDDALPATH)/dc/dcn10/,$(DCN10))
 
-# fix:
-# ...: '-mgeneral-regs-only' is incompatible with the use of floating-point types
-# aarch64 does not support soft-float, so use hard-float and handle this in code
-ifdef CONFIG_ARM64
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn10/dcn10_resource.o := -mgeneral-regs-only
-endif
-
 AMD_DISPLAY_FILES += $(AMD_DAL_DCN10)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
index bdc37831535e..36745193c391 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
@@ -1534,15 +1534,8 @@ static bool dcn10_resource_construct(
 	memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults));
 	memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults));
 
-#if defined(CONFIG_ARM64)
-	/* Aarch64 does not support -msoft-float/-mfloat-abi=soft */
-	DC_FP_START();
-	dcn10_resource_construct_fp(dc);
-	DC_FP_END();
-#else
 	/* Other architectures we build for build this with soft-float */
 	dcn10_resource_construct_fp(dc);
-#endif
 
 	pool->base.pp_smu = dcn10_pp_smu_create(ctx);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
index 624cb1341ef1..5fcaf78334ff 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
@@ -17,10 +17,6 @@ ifdef CONFIG_PPC64
 CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -maltivec
 endif
 
-ifdef CONFIG_ARM64
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mgeneral-regs-only
-endif
-
 ifdef CONFIG_CC_IS_GCC
 ifeq ($(call cc-ifversion, -lt, 0701, y), y)
 IS_OLD_GCC = 1
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
index 1ee5fc03b7b3..bb8c95141082 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
@@ -13,10 +13,6 @@ ifdef CONFIG_PPC64
 CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -maltivec
 endif
 
-ifdef CONFIG_ARM64
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mgeneral-regs-only
-endif
-
 ifdef CONFIG_CC_IS_GCC
 ifeq ($(call cc-ifversion, -lt, 0701, y), y)
 IS_OLD_GCC = 1
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
index 248c2711aace..c20331eb62e0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
@@ -41,11 +41,6 @@ CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -mhard-float -maltivec
 CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mhard-float -maltivec
 endif
 
-ifdef CONFIG_ARM64
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -mgeneral-regs-only
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mgeneral-regs-only
-endif
-
 ifdef CONFIG_CC_IS_GCC
 ifeq ($(call cc-ifversion, -lt, 0701, y), y)
 IS_OLD_GCC = 1
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile
index 2fd5d34e4ba6..3ca7d911d25c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile
@@ -21,10 +21,6 @@ ifdef CONFIG_PPC64
 CFLAGS_$(AMDDALPATH)/dc/dcn301/dcn301_resource.o := -mhard-float -maltivec
 endif
 
-ifdef CONFIG_ARM64
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn301/dcn301_resource.o := -mgeneral-regs-only
-endif
-
 ifdef CONFIG_CC_IS_GCC
 ifeq ($(call cc-ifversion, -lt, 0701, y), y)
 IS_OLD_GCC = 1
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile
index 36e44e1b07fa..8d4924b7dc22 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile
@@ -20,10 +20,6 @@ ifdef CONFIG_PPC64
 CFLAGS_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o := -mhard-float -maltivec
 endif
 
-ifdef CONFIG_ARM64
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o := -mgeneral-regs-only
-endif
-
 ifdef CONFIG_CC_IS_GCC
 ifeq ($(call cc-ifversion, -lt, 0701, y), y)
 IS_OLD_GCC = 1
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index a02a33dcd70b..6bb7f2905821 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -33,10 +33,6 @@ ifdef CONFIG_PPC64
 dml_ccflags := -mhard-float -maltivec
 endif
 
-ifdef CONFIG_ARM64
-dml_rcflags := -mgeneral-regs-only
-endif
-
 ifdef CONFIG_CC_IS_GCC
 ifeq ($(call cc-ifversion, -lt, 0701, y), y)
 IS_OLD_GCC = 1
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile
index f2624a1156e5..8d31eb75c6a6 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile
@@ -10,10 +10,6 @@ ifdef CONFIG_PPC64
 dsc_ccflags := -mhard-float -maltivec
 endif
 
-ifdef CONFIG_ARM64
-dsc_rcflags := -mgeneral-regs-only
-endif
-
 ifdef CONFIG_CC_IS_GCC
 ifeq ($(call cc-ifversion, -lt, 0701, y), y)
 IS_OLD_GCC = 1
diff --git a/drivers/gpu/drm/amd/display/dc/os_types.h b/drivers/gpu/drm/amd/display/dc/os_types.h
index 95cb56929e79..126c2f3a4dd3 100644
--- a/drivers/gpu/drm/amd/display/dc/os_types.h
+++ b/drivers/gpu/drm/amd/display/dc/os_types.h
@@ -55,10 +55,6 @@
 #include <asm/fpu/api.h>
 #define DC_FP_START() kernel_fpu_begin()
 #define DC_FP_END() kernel_fpu_end()
-#elif defined(CONFIG_ARM64)
-#include <asm/neon.h>
-#define DC_FP_START() kernel_neon_begin()
-#define DC_FP_END() kernel_neon_end()
 #elif defined(CONFIG_PPC64)
 #include <asm/switch_to.h>
 #include <asm/cputable.h>

From 5efc1f4b454c6179d35e7b0c3eda0ad5763a00fc Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 5 Jan 2021 11:42:08 -0500
Subject: [PATCH 417/547] Revert "drm/amd/display: Fix memory leaks in S3
 resume"

This reverts commit a135a1b4c4db1f3b8cbed9676a40ede39feb3362.

This leads to blank screens on some boards after replugging a
display.  Revert until we understand the root cause and can
fix both the leak and the blank screen after replug.

Bug: https://bugzilla.kernel.org/show_bug.cgi?id=211033
Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1427
Cc: Stylon Wang <stylon.wang@amd.com>
Cc: Harry Wentland <harry.wentland@amd.com>
Cc: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Cc: Andre Tomt <andre@tomt.net>
Cc: Oleksandr Natalenko <oleksandr@natalenko.name>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 3fb6baf9b0ba..146486071d01 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2386,8 +2386,7 @@ void amdgpu_dm_update_connector_after_detect(
 
 			drm_connector_update_edid_property(connector,
 							   aconnector->edid);
-			aconnector->num_modes = drm_add_edid_modes(connector, aconnector->edid);
-			drm_connector_list_update(connector);
+			drm_add_edid_modes(connector, aconnector->edid);
 
 			if (aconnector->dc_link->aux_mode)
 				drm_dp_cec_set_edid(&aconnector->dm_dp_aux.aux,

From 67a5a68013056cbcf0a647e36cb6f4622fb6a470 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Valdis=20Kl=C4=93tnieks?= <valdis.kletnieks@vt.edu>
Date: Sat, 26 Dec 2020 13:21:58 -0500
Subject: [PATCH 418/547] gcc-plugins: fix gcc 11 indigestion with plugins...

Fedora Rawhide has started including gcc 11,and the g++ compiler
throws a wobbly when it hits scripts/gcc-plugins:

  HOSTCXX scripts/gcc-plugins/latent_entropy_plugin.so
In file included from /usr/include/c++/11/type_traits:35,
                 from /usr/lib/gcc/x86_64-redhat-linux/11/plugin/include/system.h:244,
                 from /usr/lib/gcc/x86_64-redhat-linux/11/plugin/include/gcc-plugin.h:28,
                 from scripts/gcc-plugins/gcc-common.h:7,
                 from scripts/gcc-plugins/latent_entropy_plugin.c:78:
/usr/include/c++/11/bits/c++0x_warning.h:32:2: error: #error This file requires compiler and library support for the ISO
 C++ 2011 standard. This support must be enabled with the -std=c++11 or -std=gnu++11 compiler options.
   32 | #error This file requires compiler and library support \

In fact, it works just fine with c++11, which has been in gcc since 4.8,
and we now require 4.9 as a minimum.

Signed-off-by: Valdis Kletnieks <valdis.kletnieks@vt.edu>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/82487.1609006918@turing-police
---
 scripts/gcc-plugins/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/gcc-plugins/Makefile b/scripts/gcc-plugins/Makefile
index d66949bfeba4..b5487cce69e8 100644
--- a/scripts/gcc-plugins/Makefile
+++ b/scripts/gcc-plugins/Makefile
@@ -22,9 +22,9 @@ always-y += $(GCC_PLUGIN)
 GCC_PLUGINS_DIR = $(shell $(CC) -print-file-name=plugin)
 
 plugin_cxxflags	= -Wp,-MMD,$(depfile) $(KBUILD_HOSTCXXFLAGS) -fPIC \
-		   -I $(GCC_PLUGINS_DIR)/include -I $(obj) -std=gnu++98 \
+		   -I $(GCC_PLUGINS_DIR)/include -I $(obj) -std=gnu++11 \
 		   -fno-rtti -fno-exceptions -fasynchronous-unwind-tables \
-		   -ggdb -Wno-narrowing -Wno-unused-variable -Wno-c++11-compat \
+		   -ggdb -Wno-narrowing -Wno-unused-variable \
 		   -Wno-format-diag
 
 plugin_ldflags	= -shared

From 6f02b540d7597f357bc6ee711346761045d4e108 Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Wed, 6 Jan 2021 15:59:06 +0000
Subject: [PATCH 419/547] bpftool: Fix compilation failure for net.o with older
 glibc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For older glibc ~2.17, #include'ing both linux/if.h and net/if.h
fails due to complaints about redefinition of interface flags:

  CC       net.o
In file included from net.c:13:0:
/usr/include/linux/if.h:71:2: error: redeclaration of enumerator ‘IFF_UP’
  IFF_UP    = 1<<0,  /* sysfs */
  ^
/usr/include/net/if.h:44:5: note: previous definition of ‘IFF_UP’ was here
     IFF_UP = 0x1,  /* Interface is up.  */

The issue was fixed in kernel headers in [1], but since compilation
of net.c picks up system headers the problem can recur.

Dropping #include <linux/if.h> resolves the issue and it is
not needed for compilation anyhow.

[1] https://lore.kernel.org/netdev/1461512707-23058-1-git-send-email-mikko.rapeli__34748.27880641$1462831734$gmane$org@iki.fi/

Fixes: f6f3bac08ff9 ("tools/bpf: bpftool: add net support")
Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/1609948746-15369-1-git-send-email-alan.maguire@oracle.com
---
 tools/bpf/bpftool/net.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
index 3fae61ef6339..ff3aa0cf3997 100644
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -11,7 +11,6 @@
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
 #include <net/if.h>
-#include <linux/if.h>
 #include <linux/rtnetlink.h>
 #include <linux/socket.h>
 #include <linux/tc_act/tc_bpf.h>

From 0d136f5cd9a7ba6ded7f8ff17e8b1ba680f37625 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Beh=C3=BAn?= <kabel@kernel.org>
Date: Tue, 5 Jan 2021 18:23:33 +0100
Subject: [PATCH 420/547] net: mvneta: fix error message when MTU too large for
 XDP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The error message says that "Jumbo frames are not supported on XDP", but
the code checks for mtu > MVNETA_MAX_RX_BUF_SIZE, not mtu > 1500.

Fix this error message.

Signed-off-by: Marek Behún <kabel@kernel.org>
Fixes: 0db51da7a8e9 ("net: mvneta: add basic XDP support")
Cc: Lorenzo Bianconi <lorenzo@kernel.org>
Cc: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Link: https://lore.kernel.org/r/20210105172333.21613-1-kabel@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/marvell/mvneta.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 3369ec717a51..bc4d8d144401 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -4432,7 +4432,7 @@ static int mvneta_xdp_setup(struct net_device *dev, struct bpf_prog *prog,
 	struct bpf_prog *old_prog;
 
 	if (prog && dev->mtu > MVNETA_MAX_RX_BUF_SIZE) {
-		NL_SET_ERR_MSG_MOD(extack, "Jumbo frames not supported on XDP");
+		NL_SET_ERR_MSG_MOD(extack, "MTU too large for XDP");
 		return -EOPNOTSUPP;
 	}
 

From 94bcfdbff0c210b17b27615f4952cc6ece7d5f5f Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 5 Jan 2021 11:07:25 -0800
Subject: [PATCH 421/547] net: bareudp: add missing error handling for
 bareudp_link_config()

.dellink does not get called after .newlink fails,
bareudp_newlink() must undo what bareudp_configure()
has done if bareudp_link_config() fails.

v2: call bareudp_dellink(), like bareudp_dev_create() does

Fixes: 571912c69f0e ("net: UDP tunnel encapsulation module for tunnelling different protocols like MPLS, IP, NSH etc.")
Link: https://lore.kernel.org/r/20210105190725.1736246-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/bareudp.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c
index 708171c0d628..85de5f96c02b 100644
--- a/drivers/net/bareudp.c
+++ b/drivers/net/bareudp.c
@@ -645,11 +645,20 @@ static int bareudp_link_config(struct net_device *dev,
 	return 0;
 }
 
+static void bareudp_dellink(struct net_device *dev, struct list_head *head)
+{
+	struct bareudp_dev *bareudp = netdev_priv(dev);
+
+	list_del(&bareudp->next);
+	unregister_netdevice_queue(dev, head);
+}
+
 static int bareudp_newlink(struct net *net, struct net_device *dev,
 			   struct nlattr *tb[], struct nlattr *data[],
 			   struct netlink_ext_ack *extack)
 {
 	struct bareudp_conf conf;
+	LIST_HEAD(list_kill);
 	int err;
 
 	err = bareudp2info(data, &conf, extack);
@@ -662,17 +671,14 @@ static int bareudp_newlink(struct net *net, struct net_device *dev,
 
 	err = bareudp_link_config(dev, tb);
 	if (err)
-		return err;
+		goto err_unconfig;
 
 	return 0;
-}
 
-static void bareudp_dellink(struct net_device *dev, struct list_head *head)
-{
-	struct bareudp_dev *bareudp = netdev_priv(dev);
-
-	list_del(&bareudp->next);
-	unregister_netdevice_queue(dev, head);
+err_unconfig:
+	bareudp_dellink(dev, &list_kill);
+	unregister_netdevice_many(&list_kill);
+	return err;
 }
 
 static size_t bareudp_get_size(const struct net_device *dev)

From 7f847db3040897f3ee25ce97265c545b5561f6c2 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 5 Jan 2021 18:18:15 -0800
Subject: [PATCH 422/547] net: dsa: fix led_classdev build errors

Fix build errors when LEDS_CLASS=m and NET_DSA_HIRSCHMANN_HELLCREEK=y.
This limits the latter to =m when LEDS_CLASS=m.

microblaze-linux-ld: drivers/net/dsa/hirschmann/hellcreek_ptp.o: in function `hellcreek_ptp_setup':
(.text+0xf80): undefined reference to `led_classdev_register_ext'
microblaze-linux-ld: (.text+0xf94): undefined reference to `led_classdev_register_ext'
microblaze-linux-ld: drivers/net/dsa/hirschmann/hellcreek_ptp.o: in function `hellcreek_ptp_free':
(.text+0x1018): undefined reference to `led_classdev_unregister'
microblaze-linux-ld: (.text+0x1024): undefined reference to `led_classdev_unregister'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: kernel test robot <lkp@intel.com>
Link: lore.kernel.org/r/202101060655.iUvMJqS2-lkp@intel.com
Cc: Kurt Kanzenbach <kurt@linutronix.de>
Link: https://lore.kernel.org/r/20210106021815.31796-1-rdunlap@infradead.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/hirschmann/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/dsa/hirschmann/Kconfig b/drivers/net/dsa/hirschmann/Kconfig
index 222dd35e2c9d..e01191107a4b 100644
--- a/drivers/net/dsa/hirschmann/Kconfig
+++ b/drivers/net/dsa/hirschmann/Kconfig
@@ -4,6 +4,7 @@ config NET_DSA_HIRSCHMANN_HELLCREEK
 	depends on HAS_IOMEM
 	depends on NET_DSA
 	depends on PTP_1588_CLOCK
+	depends on LEDS_CLASS
 	select NET_DSA_TAG_HELLCREEK
 	help
 	  This driver adds support for Hirschmann Hellcreek TSN switches.

From 1f685e6adbbe3c7b1bd9053be771b898d9efa655 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 5 Jan 2021 20:25:31 -0800
Subject: [PATCH 423/547] ptp: ptp_ines: prevent build when HAS_IOMEM is not
 set

ptp_ines.c uses devm_platform_ioremap_resource(), which is only
built/available when CONFIG_HAS_IOMEM is enabled.
CONFIG_HAS_IOMEM is not enabled for arch/s390/, so builds on S390
have a build error:

s390-linux-ld: drivers/ptp/ptp_ines.o: in function `ines_ptp_ctrl_probe':
ptp_ines.c:(.text+0x17e6): undefined reference to `devm_platform_ioremap_resource'

Prevent builds of ptp_ines.c when HAS_IOMEM is not set.

Fixes: bad1eaa6ac31 ("ptp: Add a driver for InES time stamping IP core.")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: kernel test robot <lkp@intel.com>
Link: lore.kernel.org/r/202101031125.ZEFCUiKi-lkp@intel.com
Acked-by: Richard Cochran <richardcochran@gmail.com>
Link: https://lore.kernel.org/r/20210106042531.1351-1-rdunlap@infradead.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/ptp/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig
index d2bf05ccbbe2..f2edef0df40f 100644
--- a/drivers/ptp/Kconfig
+++ b/drivers/ptp/Kconfig
@@ -79,6 +79,7 @@ config DP83640_PHY
 config PTP_1588_CLOCK_INES
 	tristate "ZHAW InES PTP time stamping IP core"
 	depends on NETWORK_PHY_TIMESTAMPING
+	depends on HAS_IOMEM
 	depends on PHYLIB
 	depends on PTP_1588_CLOCK
 	help

From c4aec381ab98c9189d47b935832541d520f1f67f Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Tue, 15 Dec 2020 11:32:37 +0100
Subject: [PATCH 424/547] can: m_can: m_can_class_unregister(): remove
 erroneous m_can_clk_stop()

In m_can_class_register() the clock is started, but stopped on exit. When
calling m_can_class_unregister(), the clock is stopped a second time.

This patch removes the erroneous m_can_clk_stop() in  m_can_class_unregister().

Fixes: f524f829b75a ("can: m_can: Create a m_can platform framework")
Cc: Dan Murphy <dmurphy@ti.com>
Cc: Sriram Dash <sriram.dash@samsung.com>
Reviewed-by: Sean Nyekjaer <sean@geanix.com>
Link: https://lore.kernel.org/r/20201215103238.524029-2-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/m_can/m_can.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index 2c9f12401276..da551fd0f502 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -1852,8 +1852,6 @@ EXPORT_SYMBOL_GPL(m_can_class_register);
 void m_can_class_unregister(struct m_can_classdev *cdev)
 {
 	unregister_candev(cdev->net);
-
-	m_can_clk_stop(cdev);
 }
 EXPORT_SYMBOL_GPL(m_can_class_unregister);
 

From aee2b3ccc8a63d1cd7da6a8a153d1f3712d40826 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Tue, 15 Dec 2020 11:32:38 +0100
Subject: [PATCH 425/547] can: tcan4x5x: fix bittiming const, use common
 bittiming from m_can driver

According to the TCAN4550 datasheet "SLLSF91 - DECEMBER 2018" the tcan4x5x has
the same bittiming constants as a m_can revision 3.2.x/3.3.0.

The tcan4x5x chip I'm using identifies itself as m_can revision 3.2.1, so
remove the tcan4x5x specific bittiming values and rely on the values in the
m_can driver, which are selected according to core revision.

Fixes: 5443c226ba91 ("can: tcan4x5x: Add tcan4x5x driver to the kernel")
Cc: Dan Murphy <dmurphy@ti.com>
Reviewed-by: Sean Nyekjaer <sean@geanix.com>
Link: https://lore.kernel.org/r/20201215103238.524029-3-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/m_can/tcan4x5x.c | 26 --------------------------
 1 file changed, 26 deletions(-)

diff --git a/drivers/net/can/m_can/tcan4x5x.c b/drivers/net/can/m_can/tcan4x5x.c
index 24c737c4fc44..970f0e9d19bf 100644
--- a/drivers/net/can/m_can/tcan4x5x.c
+++ b/drivers/net/can/m_can/tcan4x5x.c
@@ -131,30 +131,6 @@ static inline struct tcan4x5x_priv *cdev_to_priv(struct m_can_classdev *cdev)
 
 }
 
-static struct can_bittiming_const tcan4x5x_bittiming_const = {
-	.name = DEVICE_NAME,
-	.tseg1_min = 2,
-	.tseg1_max = 31,
-	.tseg2_min = 2,
-	.tseg2_max = 16,
-	.sjw_max = 16,
-	.brp_min = 1,
-	.brp_max = 32,
-	.brp_inc = 1,
-};
-
-static struct can_bittiming_const tcan4x5x_data_bittiming_const = {
-	.name = DEVICE_NAME,
-	.tseg1_min = 1,
-	.tseg1_max = 32,
-	.tseg2_min = 1,
-	.tseg2_max = 16,
-	.sjw_max = 16,
-	.brp_min = 1,
-	.brp_max = 32,
-	.brp_inc = 1,
-};
-
 static void tcan4x5x_check_wake(struct tcan4x5x_priv *priv)
 {
 	int wake_state = 0;
@@ -469,8 +445,6 @@ static int tcan4x5x_can_probe(struct spi_device *spi)
 	mcan_class->dev = &spi->dev;
 	mcan_class->ops = &tcan4x5x_ops;
 	mcan_class->is_peripheral = true;
-	mcan_class->bit_timing = &tcan4x5x_bittiming_const;
-	mcan_class->data_timing = &tcan4x5x_data_bittiming_const;
 	mcan_class->net->irq = spi->irq;
 
 	spi_set_drvdata(spi, priv);

From 6086f02a18aeae795a61a3fc6566920891ea3b52 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Tue, 5 Jan 2021 22:41:37 +0100
Subject: [PATCH 426/547] can: mcp251xfd: mcp251xfd_handle_tefif(): fix TEF vs.
 TX race condition

The mcp251xfd driver uses a TX FIFO for sending CAN frames and a TX Event FIFO
(TEF) for completed TX-requests.

The TEF event handling in the mcp251xfd_handle_tefif() function has a race
condition. It first increments the tx-ring's tail counter to signal that
there's room in the TX and TEF FIFO, then it increments the TEF FIFO in
hardware.

A running mcp251xfd_start_xmit() on a different CPU might not stop the txqueue
(as the tx-ring still shows free space). The next mcp251xfd_start_xmit() will
push a message into the chip and the TX complete event might overflow the TEF
FIFO.

This patch changes the order to fix the problem.

Fixes: 68c0c1c7f966 ("can: mcp251xfd: tef-path: reduce number of SPI core requests to set UINC bit")
Link: https://lore.kernel.org/r/20210105214138.3150886-2-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index 77129d5f410b..85a1a8b7c0e7 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -1368,13 +1368,10 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
 		struct mcp251xfd_tx_ring *tx_ring = priv->tx;
 		struct spi_transfer *last_xfer;
 
-		tx_ring->tail += len;
-
 		/* Increment the TEF FIFO tail pointer 'len' times in
 		 * a single SPI message.
-		 */
-
-		/* Note:
+		 *
+		 * Note:
 		 *
 		 * "cs_change == 1" on the last transfer results in an
 		 * active chip select after the complete SPI
@@ -1391,6 +1388,8 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
 		if (err)
 			return err;
 
+		tx_ring->tail += len;
+
 		err = mcp251xfd_check_tef_tail(priv);
 		if (err)
 			return err;

From 2fbb397f584077e3c90abd06829f5a1f66fdd5f4 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Tue, 5 Jan 2021 22:41:38 +0100
Subject: [PATCH 427/547] can: mcp251xfd: mcp251xfd_handle_rxif_ring(): first
 increment RX tail pointer in HW, then in driver

The previous patch fixes a TEF vs. TX race condition, by first updating the TEF
tail pointer in hardware, and then updating the driver internal pointer.

The same pattern exists in the RX-path, too. This should be no problem, as the
driver accesses the RX-FIFO from the interrupt handler only, thus the access is
properly serialized. Fix the order here, too, so that the TEF- and RX-path look
similar.

Fixes: 1f652bb6bae7 ("can: mcp25xxfd: rx-path: reduce number of SPI core requests to set UINC bit")
Link: https://lore.kernel.org/r/20210105214138.3150886-3-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index 85a1a8b7c0e7..36235afb0bc6 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -1552,10 +1552,8 @@ mcp251xfd_handle_rxif_ring(struct mcp251xfd_priv *priv,
 
 		/* Increment the RX FIFO tail pointer 'len' times in a
 		 * single SPI message.
-		 */
-		ring->tail += len;
-
-		/* Note:
+		 *
+		 * Note:
 		 *
 		 * "cs_change == 1" on the last transfer results in an
 		 * active chip select after the complete SPI
@@ -1571,6 +1569,8 @@ mcp251xfd_handle_rxif_ring(struct mcp251xfd_priv *priv,
 		last_xfer->cs_change = 1;
 		if (err)
 			return err;
+
+		ring->tail += len;
 	}
 
 	return 0;

From 1169ec8f5d71044082a9898bbd1f1bf4a690c5a4 Mon Sep 17 00:00:00 2001
From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Date: Mon, 4 Jan 2021 09:03:27 +0000
Subject: [PATCH 428/547] can: rcar: Kconfig: update help description for
 CAN_RCAR config

The rcar_can driver also supports RZ/G SoC's, update the description to reflect
this.

Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20210104090327.6547-1-prabhakar.mahadev-lad.rj@bp.renesas.com
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/rcar/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/can/rcar/Kconfig b/drivers/net/can/rcar/Kconfig
index 8d36101b78e3..29cabc20109e 100644
--- a/drivers/net/can/rcar/Kconfig
+++ b/drivers/net/can/rcar/Kconfig
@@ -1,10 +1,10 @@
 # SPDX-License-Identifier: GPL-2.0
 config CAN_RCAR
-	tristate "Renesas R-Car CAN controller"
+	tristate "Renesas R-Car and RZ/G CAN controller"
 	depends on ARCH_RENESAS || ARM
 	help
 	  Say Y here if you want to use CAN controller found on Renesas R-Car
-	  SoCs.
+	  or RZ/G SoCs.
 
 	  To compile this driver as a module, choose M here: the module will
 	  be called rcar_can.

From 6ee49118f87cf02b36f68812bc49855b7b627a2b Mon Sep 17 00:00:00 2001
From: Sriram Dash <sriram.dash@samsung.com>
Date: Mon, 4 Jan 2021 18:01:34 +0530
Subject: [PATCH 429/547] MAINTAINERS: Update MCAN MMIO device driver
 maintainer

Update Pankaj Sharma as maintainer for mcan mmio device driver as I
will be moving to a different role.

Signed-off-by: Sriram Dash <sriram.dash@samsung.com>
Acked-by: Pankaj Sharma <pankj.sharma@samsung.com>
Link: https://lore.kernel.org/r/20210104123134.16930-1-sriram.dash@samsung.com
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 7c1e45c416b1..b15514a770e3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10847,7 +10847,7 @@ F:	drivers/media/radio/radio-maxiradio*
 
 MCAN MMIO DEVICE DRIVER
 M:	Dan Murphy <dmurphy@ti.com>
-M:	Sriram Dash <sriram.dash@samsung.com>
+M:	Pankaj Sharma <pankj.sharma@samsung.com>
 L:	linux-can@vger.kernel.org
 S:	Maintained
 F:	Documentation/devicetree/bindings/net/can/bosch,m_can.yaml

From 83b5bd628f65e6b4d1924b307d6a88a57827bdb0 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 4 Jan 2021 12:36:14 +0000
Subject: [PATCH 430/547] arm64: Move PSTATE.TCO setting to separate functions

For consistency with __uaccess_{disable,enable}_hw_pan(), move the
PSTATE.TCO setting into dedicated __uaccess_{disable,enable}_tco()
functions.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
---
 arch/arm64/include/asm/uaccess.h | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 6f986e09a781..f0fe0cc6abe0 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -176,10 +176,21 @@ static inline void __uaccess_enable_hw_pan(void)
  * The Tag check override (TCO) bit disables temporarily the tag checking
  * preventing the issue.
  */
-static inline void uaccess_disable_privileged(void)
+static inline void __uaccess_disable_tco(void)
 {
 	asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(0),
 				 ARM64_MTE, CONFIG_KASAN_HW_TAGS));
+}
+
+static inline void __uaccess_enable_tco(void)
+{
+	asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(1),
+				 ARM64_MTE, CONFIG_KASAN_HW_TAGS));
+}
+
+static inline void uaccess_disable_privileged(void)
+{
+	__uaccess_disable_tco();
 
 	if (uaccess_ttbr0_disable())
 		return;
@@ -189,8 +200,7 @@ static inline void uaccess_disable_privileged(void)
 
 static inline void uaccess_enable_privileged(void)
 {
-	asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(1),
-				 ARM64_MTE, CONFIG_KASAN_HW_TAGS));
+	__uaccess_enable_tco();
 
 	if (uaccess_ttbr0_enable())
 		return;

From 05cd84691eafcd7959a1e120d5e72c0dd98c5d91 Mon Sep 17 00:00:00 2001
From: Charan Teja Reddy <charante@codeaurora.org>
Date: Tue, 5 Jan 2021 20:06:39 +0530
Subject: [PATCH 431/547] dmabuf: fix use-after-free of dmabuf's file->f_inode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It is observed 'use-after-free' on the dmabuf's file->f_inode with the
race between closing the dmabuf file and reading the dmabuf's debug
info.

Consider the below scenario where P1 is closing the dma_buf file
and P2 is reading the dma_buf's debug info in the system:

P1						P2
					dma_buf_debug_show()
dma_buf_put()
  __fput()
    file->f_op->release()
    dput()
    ....
      dentry_unlink_inode()
        iput(dentry->d_inode)
        (where the inode is freed)
					mutex_lock(&db_list.lock)
					read 'dma_buf->file->f_inode'
					(the same inode is freed by P1)
					mutex_unlock(&db_list.lock)
      dentry->d_op->d_release()-->
        dma_buf_release()
          .....
          mutex_lock(&db_list.lock)
          removes the dmabuf from the list
          mutex_unlock(&db_list.lock)

In the above scenario, when dma_buf_put() is called on a dma_buf, it
first frees the dma_buf's file->f_inode(=dentry->d_inode) and then
removes this dma_buf from the system db_list. In between P2 traversing
the db_list tries to access this dma_buf's file->f_inode that was freed
by P1 which is a use-after-free case.

Since, __fput() calls f_op->release first and then later calls the
d_op->d_release, move the dma_buf's db_list removal from d_release() to
f_op->release(). This ensures that dma_buf's file->f_inode is not
accessed after it is released.

Cc: <stable@vger.kernel.org> # 5.4.x-
Fixes: 4ab59c3c638c ("dma-buf: Move dma_buf_release() from fops to dentry_ops")
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Charan Teja Reddy <charante@codeaurora.org>
Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/1609857399-31549-1-git-send-email-charante@codeaurora.org
---
 drivers/dma-buf/dma-buf.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index e63684d4cd90..9ad6397aaa97 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -76,10 +76,6 @@ static void dma_buf_release(struct dentry *dentry)
 
 	dmabuf->ops->release(dmabuf);
 
-	mutex_lock(&db_list.lock);
-	list_del(&dmabuf->list_node);
-	mutex_unlock(&db_list.lock);
-
 	if (dmabuf->resv == (struct dma_resv *)&dmabuf[1])
 		dma_resv_fini(dmabuf->resv);
 
@@ -88,6 +84,22 @@ static void dma_buf_release(struct dentry *dentry)
 	kfree(dmabuf);
 }
 
+static int dma_buf_file_release(struct inode *inode, struct file *file)
+{
+	struct dma_buf *dmabuf;
+
+	if (!is_dma_buf_file(file))
+		return -EINVAL;
+
+	dmabuf = file->private_data;
+
+	mutex_lock(&db_list.lock);
+	list_del(&dmabuf->list_node);
+	mutex_unlock(&db_list.lock);
+
+	return 0;
+}
+
 static const struct dentry_operations dma_buf_dentry_ops = {
 	.d_dname = dmabuffs_dname,
 	.d_release = dma_buf_release,
@@ -413,6 +425,7 @@ static void dma_buf_show_fdinfo(struct seq_file *m, struct file *file)
 }
 
 static const struct file_operations dma_buf_fops = {
+	.release	= dma_buf_file_release,
 	.mmap		= dma_buf_mmap_internal,
 	.llseek		= dma_buf_llseek,
 	.poll		= dma_buf_poll,

From e89eed02a5f1b864fa5abafc8e8e71bd9fd66d1f Mon Sep 17 00:00:00 2001
From: Andrey Konovalov <andreyknvl@google.com>
Date: Tue, 5 Jan 2021 20:53:42 +0100
Subject: [PATCH 432/547] kcov, usb: hide in_serving_softirq checks in
 __usb_hcd_giveback_urb

Done opencode in_serving_softirq() checks in in_serving_softirq() to avoid
cluttering the code, hide them in kcov helpers instead.

Fixes: aee9ddb1d371 ("kcov, usb: only collect coverage from __usb_hcd_giveback_urb in softirq")
Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
Link: https://lore.kernel.org/r/aeb430c5bb90b0ccdf1ec302c70831c1a47b9c45.1609876340.git.andreyknvl@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hcd.c |  8 +++-----
 include/linux/kcov.h   | 21 +++++++++++++++++++++
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 60886a7464c3..ad5a0f405a75 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1649,14 +1649,12 @@ static void __usb_hcd_giveback_urb(struct urb *urb)
 	urb->status = status;
 	/*
 	 * This function can be called in task context inside another remote
-	 * coverage collection section, but KCOV doesn't support that kind of
+	 * coverage collection section, but kcov doesn't support that kind of
 	 * recursion yet. Only collect coverage in softirq context for now.
 	 */
-	if (in_serving_softirq())
-		kcov_remote_start_usb((u64)urb->dev->bus->busnum);
+	kcov_remote_start_usb_softirq((u64)urb->dev->bus->busnum);
 	urb->complete(urb);
-	if (in_serving_softirq())
-		kcov_remote_stop();
+	kcov_remote_stop_softirq();
 
 	usb_anchor_resume_wakeups(anchor);
 	atomic_dec(&urb->use_count);
diff --git a/include/linux/kcov.h b/include/linux/kcov.h
index a10e84707d82..4e3037dc1204 100644
--- a/include/linux/kcov.h
+++ b/include/linux/kcov.h
@@ -52,6 +52,25 @@ static inline void kcov_remote_start_usb(u64 id)
 	kcov_remote_start(kcov_remote_handle(KCOV_SUBSYSTEM_USB, id));
 }
 
+/*
+ * The softirq flavor of kcov_remote_*() functions is introduced as a temporary
+ * work around for kcov's lack of nested remote coverage sections support in
+ * task context. Adding suport for nested sections is tracked in:
+ * https://bugzilla.kernel.org/show_bug.cgi?id=210337
+ */
+
+static inline void kcov_remote_start_usb_softirq(u64 id)
+{
+	if (in_serving_softirq())
+		kcov_remote_start_usb(id);
+}
+
+static inline void kcov_remote_stop_softirq(void)
+{
+	if (in_serving_softirq())
+		kcov_remote_stop();
+}
+
 #else
 
 static inline void kcov_task_init(struct task_struct *t) {}
@@ -66,6 +85,8 @@ static inline u64 kcov_common_handle(void)
 }
 static inline void kcov_remote_start_common(u64 id) {}
 static inline void kcov_remote_start_usb(u64 id) {}
+static inline void kcov_remote_start_usb_softirq(u64 id) {}
+static inline void kcov_remote_stop_softirq(void) {}
 
 #endif /* CONFIG_KCOV */
 #endif /* _LINUX_KCOV_H */

From e2459108b5a0604c4b472cae2b3cb8d3444c77fb Mon Sep 17 00:00:00 2001
From: "taehyun.cho" <taehyun.cho@samsung.com>
Date: Thu, 7 Jan 2021 00:46:25 +0900
Subject: [PATCH 433/547] usb: gadget: enable super speed plus

Enable Super speed plus in configfs to support USB3.1 Gen2.
This ensures that when a USB gadget is plugged in, it is
enumerated as Gen 2 and connected at 10 Gbps if the host and
cable are capable of it.

Many in-tree gadget functions (fs, midi, acm, ncm, mass_storage,
etc.) already have SuperSpeed Plus support.

Tested: plugged gadget into Linux host and saw:
[284907.385986] usb 8-2: new SuperSpeedPlus Gen 2 USB device number 3 using xhci_hcd

Tested-by: Lorenzo Colitti <lorenzo@google.com>
Acked-by: Felipe Balbi <balbi@kernel.org>
Signed-off-by: taehyun.cho <taehyun.cho@samsung.com>
Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
Link: https://lore.kernel.org/r/20210106154625.2801030-1-lorenzo@google.com
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/configfs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index 408a5332a975..36ffb43f9c1a 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -1543,7 +1543,7 @@ static const struct usb_gadget_driver configfs_driver_template = {
 	.suspend	= configfs_composite_suspend,
 	.resume		= configfs_composite_resume,
 
-	.max_speed	= USB_SPEED_SUPER,
+	.max_speed	= USB_SPEED_SUPER_PLUS,
 	.driver = {
 		.owner          = THIS_MODULE,
 		.name		= "configfs-gadget",
@@ -1583,7 +1583,7 @@ static struct config_group *gadgets_make(
 	gi->composite.unbind = configfs_do_nothing;
 	gi->composite.suspend = NULL;
 	gi->composite.resume = NULL;
-	gi->composite.max_speed = USB_SPEED_SUPER;
+	gi->composite.max_speed = USB_SPEED_SUPER_PLUS;
 
 	spin_lock_init(&gi->spinlock);
 	mutex_init(&gi->lock);

From 41952a66015466c3208aac96b14ffd92e0943589 Mon Sep 17 00:00:00 2001
From: Peter Robinson <pbrobinson@gmail.com>
Date: Wed, 6 Jan 2021 00:16:05 +0000
Subject: [PATCH 434/547] usb: typec: Fix copy paste error for NVIDIA alt-mode
 description

The name of the module for the NVIDIA alt-mode is incorrect as it
looks to be a copy-paste error from the entry above, update it to
the correct typec_nvidia module name.

Cc: Ajay Gupta <ajayg@nvidia.com>
Cc: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Peter Robinson <pbrobinson@gmail.com>
Link: https://lore.kernel.org/r/20210106001605.167917-1-pbrobinson@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/altmodes/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/typec/altmodes/Kconfig b/drivers/usb/typec/altmodes/Kconfig
index 187690fd1a5b..60d375e9c3c7 100644
--- a/drivers/usb/typec/altmodes/Kconfig
+++ b/drivers/usb/typec/altmodes/Kconfig
@@ -20,6 +20,6 @@ config TYPEC_NVIDIA_ALTMODE
 	  to enable support for VirtualLink devices with NVIDIA GPUs.
 
 	  To compile this driver as a module, choose M here: the
-	  module will be called typec_displayport.
+	  module will be called typec_nvidia.
 
 endmenu

From 6c75c2bad36cfb43b144e6a0a76a69993c72097f Mon Sep 17 00:00:00 2001
From: Prashant Malani <pmalani@chromium.org>
Date: Wed, 6 Jan 2021 19:49:04 -0800
Subject: [PATCH 435/547] usb: typec: Send uevent for num_altmodes update

Generate a change uevent when the "number_of_alternate_modes" sysfs file
for partners and plugs is updated by a port driver.

Cc: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Cc: Benson Leung <bleung@chromium.org>
Signed-off-by: Prashant Malani <pmalani@chromium.org>
Link: https://lore.kernel.org/r/20210107034904.4112029-1-pmalani@chromium.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/class.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c
index ebfd3113a9a8..8f77669f9cf4 100644
--- a/drivers/usb/typec/class.c
+++ b/drivers/usb/typec/class.c
@@ -766,6 +766,7 @@ int typec_partner_set_num_altmodes(struct typec_partner *partner, int num_altmod
 		return ret;
 
 	sysfs_notify(&partner->dev.kobj, NULL, "number_of_alternate_modes");
+	kobject_uevent(&partner->dev.kobj, KOBJ_CHANGE);
 
 	return 0;
 }
@@ -923,6 +924,7 @@ int typec_plug_set_num_altmodes(struct typec_plug *plug, int num_altmodes)
 		return ret;
 
 	sysfs_notify(&plug->dev.kobj, NULL, "number_of_alternate_modes");
+	kobject_uevent(&plug->dev.kobj, KOBJ_CHANGE);
 
 	return 0;
 }

From a5c7682aaaa10e42928d73de1c9e1e02d2b14c2e Mon Sep 17 00:00:00 2001
From: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
Date: Mon, 4 Jan 2021 22:42:39 -0800
Subject: [PATCH 436/547] usb: dwc3: gadget: Clear wait flag on dequeue

If an active transfer is dequeued, then the endpoint is freed to start a
new transfer. Make sure to clear the endpoint's transfer wait flag for
this case.

Fixes: e0d19563eb6c ("usb: dwc3: gadget: Wait for transfer completion")
Cc: stable@vger.kernel.org
Acked-by: Felipe Balbi <balbi@kernel.org>
Signed-off-by: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
Link: https://lore.kernel.org/r/b81cd5b5281cfbfdadb002c4bcf5c9be7c017cfd.1609828485.git.Thinh.Nguyen@synopsys.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/gadget.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 25f654b79e48..ee44321fee38 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1763,6 +1763,8 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep,
 			list_for_each_entry_safe(r, t, &dep->started_list, list)
 				dwc3_gadget_move_cancelled_request(r);
 
+			dep->flags &= ~DWC3_EP_WAIT_TRANSFER_COMPLETE;
+
 			goto out;
 		}
 	}

From e0658f970a7f3d85431c6803b7d5169444fb11b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Tue, 5 Jan 2021 18:55:47 +0100
Subject: [PATCH 437/547] drm/radeon: stop re-init the TTM page pool
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drivers are not supposed to init the page pool directly any more.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
Link: https://patchwork.freedesktop.org/patch/412153/
---
 drivers/gpu/drm/radeon/radeon_ttm.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index d59ef6e92a40..23195d5d4e91 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -730,9 +730,6 @@ int radeon_ttm_init(struct radeon_device *rdev)
 	}
 	rdev->mman.initialized = true;
 
-	ttm_pool_init(&rdev->mman.bdev.pool, rdev->dev, rdev->need_swiotlb,
-		      dma_addressing_limited(&rdev->pdev->dev));
-
 	r = radeon_ttm_init_vram(rdev);
 	if (r) {
 		DRM_ERROR("Failed initializing VRAM heap.\n");

From a73858ef4d5e1d425e171f0f6a52864176a6a979 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Tue, 5 Jan 2021 18:56:56 +0100
Subject: [PATCH 438/547] drm/ttm: unexport ttm_pool_init/fini
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drivers are not supposed to use this directly any more.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
Link: https://patchwork.freedesktop.org/patch/412156/
---
 drivers/gpu/drm/ttm/ttm_pool.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 7b2f60616750..a00b7ab9c14c 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -507,7 +507,6 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
 			ttm_pool_type_init(&pool->caching[i].orders[j],
 					   pool, i, j);
 }
-EXPORT_SYMBOL(ttm_pool_init);
 
 /**
  * ttm_pool_fini - Cleanup a pool
@@ -525,7 +524,6 @@ void ttm_pool_fini(struct ttm_pool *pool)
 		for (j = 0; j < MAX_ORDER; ++j)
 			ttm_pool_type_fini(&pool->caching[i].orders[j]);
 }
-EXPORT_SYMBOL(ttm_pool_fini);
 
 #ifdef CONFIG_DEBUG_FS
 /* Count the number of pages available in a pool_type */

From 1efd17e7acb6692bffc6c58718f41f27fdfd62f5 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Thu, 31 Dec 2020 08:53:19 +0800
Subject: [PATCH 439/547] iommu/vt-d: Fix misuse of ALIGN in qi_flush_piotlb()

Use IS_ALIGNED() instead. Otherwise, an unaligned address will be ignored.

Fixes: 33cd6e642d6a ("iommu/vt-d: Flush PASID-based iotlb for iova over first level")
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/20201231005323.2178523-1-baolu.lu@linux.intel.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/iommu/intel/dmar.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index b46dbfa6d0ed..004feaed3c72 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -1461,8 +1461,8 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
 		int mask = ilog2(__roundup_pow_of_two(npages));
 		unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
 
-		if (WARN_ON_ONCE(!ALIGN(addr, align)))
-			addr &= ~(align - 1);
+		if (WARN_ON_ONCE(!IS_ALIGNED(addr, align)))
+			addr = ALIGN_DOWN(addr, align);
 
 		desc.qw0 = QI_EIOTLB_PASID(pasid) |
 				QI_EIOTLB_DID(did) |

From 4df7b2268ad81a74168130e1fb04550a8bc980e1 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Thu, 31 Dec 2020 08:53:22 +0800
Subject: [PATCH 440/547] Revert "iommu: Add quirk for Intel graphic devices in
 map_sg"

This reverts commit 65f746e8285f0a67d43517d86fedb9e29ead49f2.

As commit 8a473dbadccf ("drm/i915: Fix DMA mapped scatterlist walks") and
commit 934941ed5a30 ("drm/i915: Fix DMA mapped scatterlist lookup") fixed
the DMA scatterlist limitations in the i915 driver, remove this temporary
workaround.

Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Tom Murphy <murphyt7@tcd.ie>
Cc: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/20201231005323.2178523-4-baolu.lu@linux.intel.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/iommu/dma-iommu.c | 27 ---------------------------
 1 file changed, 27 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index f0305e6aac1b..4078358ed66e 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -863,33 +863,6 @@ static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents,
 	unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev);
 	int i, count = 0;
 
-	/*
-	 * The Intel graphic driver is used to assume that the returned
-	 * sg list is not combound. This blocks the efforts of converting
-	 * Intel IOMMU driver to dma-iommu api's. Add this quirk to make the
-	 * device driver work and should be removed once it's fixed in i915
-	 * driver.
-	 */
-	if (IS_ENABLED(CONFIG_DRM_I915) && dev_is_pci(dev) &&
-	    to_pci_dev(dev)->vendor == PCI_VENDOR_ID_INTEL &&
-	    (to_pci_dev(dev)->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
-		for_each_sg(sg, s, nents, i) {
-			unsigned int s_iova_off = sg_dma_address(s);
-			unsigned int s_length = sg_dma_len(s);
-			unsigned int s_iova_len = s->length;
-
-			s->offset += s_iova_off;
-			s->length = s_length;
-			sg_dma_address(s) = dma_addr + s_iova_off;
-			sg_dma_len(s) = s_length;
-			dma_addr += s_iova_len;
-
-			pr_info_once("sg combining disabled due to i915 driver\n");
-		}
-
-		return nents;
-	}
-
 	for_each_sg(sg, s, nents, i) {
 		/* Restore this segment's original unaligned fields first */
 		unsigned int s_iova_off = sg_dma_address(s);

From 420d42f6f9db27d88bc4f83e3e668fcdacbf7e29 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Thu, 31 Dec 2020 08:53:23 +0800
Subject: [PATCH 441/547] iommu/vt-d: Fix lockdep splat in sva bind()/unbind()

Lock(&iommu->lock) without disabling irq causes lockdep warnings.

========================================================
WARNING: possible irq lock inversion dependency detected
5.11.0-rc1+ #828 Not tainted
--------------------------------------------------------
kworker/0:1H/120 just changed the state of lock:
ffffffffad9ea1b8 (device_domain_lock){..-.}-{2:2}, at:
iommu_flush_dev_iotlb.part.0+0x32/0x120
but this lock took another, SOFTIRQ-unsafe lock in the past:
 (&iommu->lock){+.+.}-{2:2}

and interrupts could create inverse lock ordering between them.

other info that might help us debug this:
 Possible interrupt unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&iommu->lock);
                               local_irq_disable();
                               lock(device_domain_lock);
                               lock(&iommu->lock);
  <Interrupt>
    lock(device_domain_lock);

 *** DEADLOCK ***

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/20201231005323.2178523-5-baolu.lu@linux.intel.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/iommu/intel/svm.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 4fa248b98031..9bcedd360235 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -281,6 +281,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
 	struct dmar_domain *dmar_domain;
 	struct device_domain_info *info;
 	struct intel_svm *svm = NULL;
+	unsigned long iflags;
 	int ret = 0;
 
 	if (WARN_ON(!iommu) || !data)
@@ -381,12 +382,12 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
 	 * each bind of a new device even with an existing PASID, we need to
 	 * call the nested mode setup function here.
 	 */
-	spin_lock(&iommu->lock);
+	spin_lock_irqsave(&iommu->lock, iflags);
 	ret = intel_pasid_setup_nested(iommu, dev,
 				       (pgd_t *)(uintptr_t)data->gpgd,
 				       data->hpasid, &data->vendor.vtd, dmar_domain,
 				       data->addr_width);
-	spin_unlock(&iommu->lock);
+	spin_unlock_irqrestore(&iommu->lock, iflags);
 	if (ret) {
 		dev_err_ratelimited(dev, "Failed to set up PASID %llu in nested mode, Err %d\n",
 				    data->hpasid, ret);
@@ -486,6 +487,7 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags,
 	struct device_domain_info *info;
 	struct intel_svm_dev *sdev;
 	struct intel_svm *svm = NULL;
+	unsigned long iflags;
 	int pasid_max;
 	int ret;
 
@@ -605,14 +607,14 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags,
 			}
 		}
 
-		spin_lock(&iommu->lock);
+		spin_lock_irqsave(&iommu->lock, iflags);
 		ret = intel_pasid_setup_first_level(iommu, dev,
 				mm ? mm->pgd : init_mm.pgd,
 				svm->pasid, FLPT_DEFAULT_DID,
 				(mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) |
 				(cpu_feature_enabled(X86_FEATURE_LA57) ?
 				 PASID_FLAG_FL5LP : 0));
-		spin_unlock(&iommu->lock);
+		spin_unlock_irqrestore(&iommu->lock, iflags);
 		if (ret) {
 			if (mm)
 				mmu_notifier_unregister(&svm->notifier, mm);
@@ -632,14 +634,14 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags,
 		 * Binding a new device with existing PASID, need to setup
 		 * the PASID entry.
 		 */
-		spin_lock(&iommu->lock);
+		spin_lock_irqsave(&iommu->lock, iflags);
 		ret = intel_pasid_setup_first_level(iommu, dev,
 						mm ? mm->pgd : init_mm.pgd,
 						svm->pasid, FLPT_DEFAULT_DID,
 						(mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) |
 						(cpu_feature_enabled(X86_FEATURE_LA57) ?
 						PASID_FLAG_FL5LP : 0));
-		spin_unlock(&iommu->lock);
+		spin_unlock_irqrestore(&iommu->lock, iflags);
 		if (ret) {
 			kfree(sdev);
 			goto out;

From aded8c7c2b72f846a07a2c736b8e75bb8cf50a87 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Tue, 5 Jan 2021 16:50:38 -0800
Subject: [PATCH 442/547] iommu/arm-smmu-qcom: Initialize SCTLR of the bypass
 context

On SM8150 it's occasionally observed that the boot hangs in between the
writing of SMEs and context banks in arm_smmu_device_reset().

The problem seems to coincide with a display refresh happening after
updating the stream mapping, but before clearing - and there by
disabling translation - the context bank picked to emulate translation
bypass.

Resolve this by explicitly disabling the bypass context already in
cfg_probe.

Fixes: f9081b8ff593 ("iommu/arm-smmu-qcom: Implement S2CR quirk")
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/20210106005038.4152731-1-bjorn.andersson@linaro.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 5dff7ffbef11..1b83d140742f 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -196,6 +196,8 @@ static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu)
 
 		set_bit(qsmmu->bypass_cbndx, smmu->context_map);
 
+		arm_smmu_cb_write(smmu, qsmmu->bypass_cbndx, ARM_SMMU_CB_SCTLR, 0);
+
 		reg = FIELD_PREP(ARM_SMMU_CBAR_TYPE, CBAR_TYPE_S1_TRANS_S2_BYPASS);
 		arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(qsmmu->bypass_cbndx), reg);
 	}

From 9ad9f45b3b91162b33abfe175ae75ab65718dbf5 Mon Sep 17 00:00:00 2001
From: Liu Yi L <yi.l.liu@intel.com>
Date: Thu, 7 Jan 2021 00:03:55 +0800
Subject: [PATCH 443/547] iommu/vt-d: Move intel_iommu info from struct
 intel_svm to struct intel_svm_dev

'struct intel_svm' is shared by all devices bound to a give process,
but records only a single pointer to a 'struct intel_iommu'. Consequently,
cache invalidations may only be applied to a single DMAR unit, and are
erroneously skipped for the other devices.

In preparation for fixing this, rework the structures so that the iommu
pointer resides in 'struct intel_svm_dev', allowing 'struct intel_svm'
to track them in its device list.

Fixes: 1c4f88b7f1f9 ("iommu/vt-d: Shared virtual address in scalable mode")
Cc: Lu Baolu <baolu.lu@linux.intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Raj Ashok <ashok.raj@intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Reported-by: Guo Kaijie <Kaijie.Guo@intel.com>
Reported-by: Xin Zeng <xin.zeng@intel.com>
Signed-off-by: Guo Kaijie <Kaijie.Guo@intel.com>
Signed-off-by: Xin Zeng <xin.zeng@intel.com>
Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
Tested-by: Guo Kaijie <Kaijie.Guo@intel.com>
Cc: stable@vger.kernel.org # v5.0+
Acked-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/1609949037-25291-2-git-send-email-yi.l.liu@intel.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/iommu/intel/svm.c   | 9 +++++----
 include/linux/intel-iommu.h | 2 +-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 9bcedd360235..790ef3497e7e 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -142,7 +142,7 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d
 	}
 	desc.qw2 = 0;
 	desc.qw3 = 0;
-	qi_submit_sync(svm->iommu, &desc, 1, 0);
+	qi_submit_sync(sdev->iommu, &desc, 1, 0);
 
 	if (sdev->dev_iotlb) {
 		desc.qw0 = QI_DEV_EIOTLB_PASID(svm->pasid) |
@@ -166,7 +166,7 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d
 		}
 		desc.qw2 = 0;
 		desc.qw3 = 0;
-		qi_submit_sync(svm->iommu, &desc, 1, 0);
+		qi_submit_sync(sdev->iommu, &desc, 1, 0);
 	}
 }
 
@@ -211,7 +211,7 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
 	 */
 	rcu_read_lock();
 	list_for_each_entry_rcu(sdev, &svm->devs, list)
-		intel_pasid_tear_down_entry(svm->iommu, sdev->dev,
+		intel_pasid_tear_down_entry(sdev->iommu, sdev->dev,
 					    svm->pasid, true);
 	rcu_read_unlock();
 
@@ -364,6 +364,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
 	}
 	sdev->dev = dev;
 	sdev->sid = PCI_DEVID(info->bus, info->devfn);
+	sdev->iommu = iommu;
 
 	/* Only count users if device has aux domains */
 	if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX))
@@ -548,6 +549,7 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags,
 		goto out;
 	}
 	sdev->dev = dev;
+	sdev->iommu = iommu;
 
 	ret = intel_iommu_enable_pasid(iommu, dev);
 	if (ret) {
@@ -577,7 +579,6 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags,
 			kfree(sdev);
 			goto out;
 		}
-		svm->iommu = iommu;
 
 		if (pasid_max > intel_pasid_max_id)
 			pasid_max = intel_pasid_max_id;
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index d956987ed032..94522685a0d9 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -758,6 +758,7 @@ struct intel_svm_dev {
 	struct list_head list;
 	struct rcu_head rcu;
 	struct device *dev;
+	struct intel_iommu *iommu;
 	struct svm_dev_ops *ops;
 	struct iommu_sva sva;
 	u32 pasid;
@@ -771,7 +772,6 @@ struct intel_svm {
 	struct mmu_notifier notifier;
 	struct mm_struct *mm;
 
-	struct intel_iommu *iommu;
 	unsigned int flags;
 	u32 pasid;
 	int gpasid; /* In case that guest PASID is different from host PASID */

From 18abda7a2d555783d28ea1701f3ec95e96237a86 Mon Sep 17 00:00:00 2001
From: Liu Yi L <yi.l.liu@intel.com>
Date: Thu, 7 Jan 2021 00:03:56 +0800
Subject: [PATCH 444/547] iommu/vt-d: Fix general protection fault in
 aux_detach_device()

The aux-domain attach/detach are not tracked, some data structures might
be used after free. This causes general protection faults when multiple
subdevices are created and assigned to a same guest machine:

  | general protection fault, probably for non-canonical address 0xdead000000000100: 0000 [#1] SMP NOPTI
  | RIP: 0010:intel_iommu_aux_detach_device+0x12a/0x1f0
  | [...]
  | Call Trace:
  |  iommu_aux_detach_device+0x24/0x70
  |  vfio_mdev_detach_domain+0x3b/0x60
  |  ? vfio_mdev_set_domain+0x50/0x50
  |  iommu_group_for_each_dev+0x4f/0x80
  |  vfio_iommu_detach_group.isra.0+0x22/0x30
  |  vfio_iommu_type1_detach_group.cold+0x71/0x211
  |  ? find_exported_symbol_in_section+0x4a/0xd0
  |  ? each_symbol_section+0x28/0x50
  |  __vfio_group_unset_container+0x4d/0x150
  |  vfio_group_try_dissolve_container+0x25/0x30
  |  vfio_group_put_external_user+0x13/0x20
  |  kvm_vfio_group_put_external_user+0x27/0x40 [kvm]
  |  kvm_vfio_destroy+0x45/0xb0 [kvm]
  |  kvm_put_kvm+0x1bb/0x2e0 [kvm]
  |  kvm_vm_release+0x22/0x30 [kvm]
  |  __fput+0xcc/0x260
  |  ____fput+0xe/0x10
  |  task_work_run+0x8f/0xb0
  |  do_exit+0x358/0xaf0
  |  ? wake_up_state+0x10/0x20
  |  ? signal_wake_up_state+0x1a/0x30
  |  do_group_exit+0x47/0xb0
  |  __x64_sys_exit_group+0x18/0x20
  |  do_syscall_64+0x57/0x1d0
  |  entry_SYSCALL_64_after_hwframe+0x44/0xa9

Fix the crash by tracking the subdevices when attaching and detaching
aux-domains.

Fixes: 67b8e02b5e76 ("iommu/vt-d: Aux-domain specific domain attach/detach")
Co-developed-by: Xin Zeng <xin.zeng@intel.com>
Signed-off-by: Xin Zeng <xin.zeng@intel.com>
Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
Acked-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/1609949037-25291-3-git-send-email-yi.l.liu@intel.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/iommu/intel/iommu.c | 99 +++++++++++++++++++++++++++----------
 include/linux/intel-iommu.h | 16 ++++--
 2 files changed, 84 insertions(+), 31 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 788119c5b021..d7720a836268 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1877,6 +1877,7 @@ static struct dmar_domain *alloc_domain(int flags)
 		domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL;
 	domain->has_iotlb_device = false;
 	INIT_LIST_HEAD(&domain->devices);
+	INIT_LIST_HEAD(&domain->subdevices);
 
 	return domain;
 }
@@ -2547,7 +2548,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
 	info->iommu = iommu;
 	info->pasid_table = NULL;
 	info->auxd_enabled = 0;
-	INIT_LIST_HEAD(&info->auxiliary_domains);
+	INIT_LIST_HEAD(&info->subdevices);
 
 	if (dev && dev_is_pci(dev)) {
 		struct pci_dev *pdev = to_pci_dev(info->dev);
@@ -4475,33 +4476,61 @@ is_aux_domain(struct device *dev, struct iommu_domain *domain)
 			domain->type == IOMMU_DOMAIN_UNMANAGED;
 }
 
-static void auxiliary_link_device(struct dmar_domain *domain,
-				  struct device *dev)
+static inline struct subdev_domain_info *
+lookup_subdev_info(struct dmar_domain *domain, struct device *dev)
 {
-	struct device_domain_info *info = get_domain_info(dev);
+	struct subdev_domain_info *sinfo;
 
-	assert_spin_locked(&device_domain_lock);
-	if (WARN_ON(!info))
-		return;
+	if (!list_empty(&domain->subdevices)) {
+		list_for_each_entry(sinfo, &domain->subdevices, link_domain) {
+			if (sinfo->pdev == dev)
+				return sinfo;
+		}
+	}
 
-	domain->auxd_refcnt++;
-	list_add(&domain->auxd, &info->auxiliary_domains);
+	return NULL;
 }
 
-static void auxiliary_unlink_device(struct dmar_domain *domain,
-				    struct device *dev)
+static int auxiliary_link_device(struct dmar_domain *domain,
+				 struct device *dev)
 {
 	struct device_domain_info *info = get_domain_info(dev);
+	struct subdev_domain_info *sinfo = lookup_subdev_info(domain, dev);
 
 	assert_spin_locked(&device_domain_lock);
 	if (WARN_ON(!info))
-		return;
+		return -EINVAL;
 
-	list_del(&domain->auxd);
-	domain->auxd_refcnt--;
+	if (!sinfo) {
+		sinfo = kzalloc(sizeof(*sinfo), GFP_ATOMIC);
+		sinfo->domain = domain;
+		sinfo->pdev = dev;
+		list_add(&sinfo->link_phys, &info->subdevices);
+		list_add(&sinfo->link_domain, &domain->subdevices);
+	}
 
-	if (!domain->auxd_refcnt && domain->default_pasid > 0)
-		ioasid_put(domain->default_pasid);
+	return ++sinfo->users;
+}
+
+static int auxiliary_unlink_device(struct dmar_domain *domain,
+				   struct device *dev)
+{
+	struct device_domain_info *info = get_domain_info(dev);
+	struct subdev_domain_info *sinfo = lookup_subdev_info(domain, dev);
+	int ret;
+
+	assert_spin_locked(&device_domain_lock);
+	if (WARN_ON(!info || !sinfo || sinfo->users <= 0))
+		return -EINVAL;
+
+	ret = --sinfo->users;
+	if (!ret) {
+		list_del(&sinfo->link_phys);
+		list_del(&sinfo->link_domain);
+		kfree(sinfo);
+	}
+
+	return ret;
 }
 
 static int aux_domain_add_dev(struct dmar_domain *domain,
@@ -4530,6 +4559,19 @@ static int aux_domain_add_dev(struct dmar_domain *domain,
 	}
 
 	spin_lock_irqsave(&device_domain_lock, flags);
+	ret = auxiliary_link_device(domain, dev);
+	if (ret <= 0)
+		goto link_failed;
+
+	/*
+	 * Subdevices from the same physical device can be attached to the
+	 * same domain. For such cases, only the first subdevice attachment
+	 * needs to go through the full steps in this function. So if ret >
+	 * 1, just goto out.
+	 */
+	if (ret > 1)
+		goto out;
+
 	/*
 	 * iommu->lock must be held to attach domain to iommu and setup the
 	 * pasid entry for second level translation.
@@ -4548,10 +4590,9 @@ static int aux_domain_add_dev(struct dmar_domain *domain,
 						     domain->default_pasid);
 	if (ret)
 		goto table_failed;
+
 	spin_unlock(&iommu->lock);
-
-	auxiliary_link_device(domain, dev);
-
+out:
 	spin_unlock_irqrestore(&device_domain_lock, flags);
 
 	return 0;
@@ -4560,8 +4601,10 @@ table_failed:
 	domain_detach_iommu(domain, iommu);
 attach_failed:
 	spin_unlock(&iommu->lock);
+	auxiliary_unlink_device(domain, dev);
+link_failed:
 	spin_unlock_irqrestore(&device_domain_lock, flags);
-	if (!domain->auxd_refcnt && domain->default_pasid > 0)
+	if (list_empty(&domain->subdevices) && domain->default_pasid > 0)
 		ioasid_put(domain->default_pasid);
 
 	return ret;
@@ -4581,14 +4624,18 @@ static void aux_domain_remove_dev(struct dmar_domain *domain,
 	info = get_domain_info(dev);
 	iommu = info->iommu;
 
-	auxiliary_unlink_device(domain, dev);
-
-	spin_lock(&iommu->lock);
-	intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid, false);
-	domain_detach_iommu(domain, iommu);
-	spin_unlock(&iommu->lock);
+	if (!auxiliary_unlink_device(domain, dev)) {
+		spin_lock(&iommu->lock);
+		intel_pasid_tear_down_entry(iommu, dev,
+					    domain->default_pasid, false);
+		domain_detach_iommu(domain, iommu);
+		spin_unlock(&iommu->lock);
+	}
 
 	spin_unlock_irqrestore(&device_domain_lock, flags);
+
+	if (list_empty(&domain->subdevices) && domain->default_pasid > 0)
+		ioasid_put(domain->default_pasid);
 }
 
 static int prepare_domain_attach_device(struct iommu_domain *domain,
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 94522685a0d9..09c6a0bf3892 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -533,11 +533,10 @@ struct dmar_domain {
 					/* Domain ids per IOMMU. Use u16 since
 					 * domain ids are 16 bit wide according
 					 * to VT-d spec, section 9.3 */
-	unsigned int	auxd_refcnt;	/* Refcount of auxiliary attaching */
 
 	bool has_iotlb_device;
 	struct list_head devices;	/* all devices' list */
-	struct list_head auxd;		/* link to device's auxiliary list */
+	struct list_head subdevices;	/* all subdevices' list */
 	struct iova_domain iovad;	/* iova's that belong to this domain */
 
 	struct dma_pte	*pgd;		/* virtual address */
@@ -610,14 +609,21 @@ struct intel_iommu {
 	struct dmar_drhd_unit *drhd;
 };
 
+/* Per subdevice private data */
+struct subdev_domain_info {
+	struct list_head link_phys;	/* link to phys device siblings */
+	struct list_head link_domain;	/* link to domain siblings */
+	struct device *pdev;		/* physical device derived from */
+	struct dmar_domain *domain;	/* aux-domain */
+	int users;			/* user count */
+};
+
 /* PCI domain-device relationship */
 struct device_domain_info {
 	struct list_head link;	/* link to domain siblings */
 	struct list_head global; /* link to global list */
 	struct list_head table;	/* link to pasid table */
-	struct list_head auxiliary_domains; /* auxiliary domains
-					     * attached to this device
-					     */
+	struct list_head subdevices; /* subdevices sibling */
 	u32 segment;		/* PCI segment number */
 	u8 bus;			/* PCI bus number */
 	u8 devfn;		/* PCI devfn number */

From 7c29ada5e70083805bc3a68daa23441df421fbee Mon Sep 17 00:00:00 2001
From: Liu Yi L <yi.l.liu@intel.com>
Date: Thu, 7 Jan 2021 00:03:57 +0800
Subject: [PATCH 445/547] iommu/vt-d: Fix ineffective devTLB invalidation for
 subdevices

iommu_flush_dev_iotlb() is called to invalidate caches on a device but
only loops over the devices which are fully-attached to the domain. For
sub-devices, this is ineffective and can result in invalid caching
entries left on the device.

Fix the missing invalidation by adding a loop over the subdevices and
ensuring that 'domain->has_iotlb_device' is updated when attaching to
subdevices.

Fixes: 67b8e02b5e76 ("iommu/vt-d: Aux-domain specific domain attach/detach")
Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
Acked-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/1609949037-25291-4-git-send-email-yi.l.liu@intel.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/iommu/intel/iommu.c | 53 ++++++++++++++++++++++++++-----------
 1 file changed, 37 insertions(+), 16 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index d7720a836268..65cf06d70bf4 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -719,6 +719,8 @@ static int domain_update_device_node(struct dmar_domain *domain)
 	return nid;
 }
 
+static void domain_update_iotlb(struct dmar_domain *domain);
+
 /* Some capabilities may be different across iommus */
 static void domain_update_iommu_cap(struct dmar_domain *domain)
 {
@@ -744,6 +746,8 @@ static void domain_update_iommu_cap(struct dmar_domain *domain)
 		domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw - 1);
 	else
 		domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw);
+
+	domain_update_iotlb(domain);
 }
 
 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
@@ -1464,17 +1468,22 @@ static void domain_update_iotlb(struct dmar_domain *domain)
 
 	assert_spin_locked(&device_domain_lock);
 
-	list_for_each_entry(info, &domain->devices, link) {
-		struct pci_dev *pdev;
-
-		if (!info->dev || !dev_is_pci(info->dev))
-			continue;
-
-		pdev = to_pci_dev(info->dev);
-		if (pdev->ats_enabled) {
+	list_for_each_entry(info, &domain->devices, link)
+		if (info->ats_enabled) {
 			has_iotlb_device = true;
 			break;
 		}
+
+	if (!has_iotlb_device) {
+		struct subdev_domain_info *sinfo;
+
+		list_for_each_entry(sinfo, &domain->subdevices, link_domain) {
+			info = get_domain_info(sinfo->pdev);
+			if (info && info->ats_enabled) {
+				has_iotlb_device = true;
+				break;
+			}
+		}
 	}
 
 	domain->has_iotlb_device = has_iotlb_device;
@@ -1555,25 +1564,37 @@ static void iommu_disable_dev_iotlb(struct device_domain_info *info)
 #endif
 }
 
+static void __iommu_flush_dev_iotlb(struct device_domain_info *info,
+				    u64 addr, unsigned int mask)
+{
+	u16 sid, qdep;
+
+	if (!info || !info->ats_enabled)
+		return;
+
+	sid = info->bus << 8 | info->devfn;
+	qdep = info->ats_qdep;
+	qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
+			   qdep, addr, mask);
+}
+
 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
 				  u64 addr, unsigned mask)
 {
-	u16 sid, qdep;
 	unsigned long flags;
 	struct device_domain_info *info;
+	struct subdev_domain_info *sinfo;
 
 	if (!domain->has_iotlb_device)
 		return;
 
 	spin_lock_irqsave(&device_domain_lock, flags);
-	list_for_each_entry(info, &domain->devices, link) {
-		if (!info->ats_enabled)
-			continue;
+	list_for_each_entry(info, &domain->devices, link)
+		__iommu_flush_dev_iotlb(info, addr, mask);
 
-		sid = info->bus << 8 | info->devfn;
-		qdep = info->ats_qdep;
-		qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
-				qdep, addr, mask);
+	list_for_each_entry(sinfo, &domain->subdevices, link_domain) {
+		info = get_domain_info(sinfo->pdev);
+		__iommu_flush_dev_iotlb(info, addr, mask);
 	}
 	spin_unlock_irqrestore(&device_domain_lock, flags);
 }

From 80c18e4ac20c9cde420cb3ffab48c936147cf07d Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 7 Jan 2021 03:15:41 +0000
Subject: [PATCH 446/547] io_uring: trigger eventfd for IOPOLL

Make sure io_iopoll_complete() tries to wake up eventfd, which currently
is skipped together with io_cqring_ev_posted() for non-SQPOLL IOPOLL.

Add an iopoll version of io_cqring_ev_posted(), duplicates a bit of
code, but they actually use different sets of wait queues may be for
better.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 27a8c226abf8..91e517ad1421 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1713,6 +1713,16 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
 		eventfd_signal(ctx->cq_ev_fd, 1);
 }
 
+static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
+{
+	if (ctx->flags & IORING_SETUP_SQPOLL) {
+		if (waitqueue_active(&ctx->wait))
+			wake_up(&ctx->wait);
+	}
+	if (io_should_trigger_evfd(ctx))
+		eventfd_signal(ctx->cq_ev_fd, 1);
+}
+
 /* Returns true if there are no backlogged entries after the flush */
 static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
 				       struct task_struct *tsk,
@@ -2428,8 +2438,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
 	}
 
 	io_commit_cqring(ctx);
-	if (ctx->flags & IORING_SETUP_SQPOLL)
-		io_cqring_ev_posted(ctx);
+	io_cqring_ev_posted_iopoll(ctx);
 	io_req_free_batch_finish(ctx, &rb);
 
 	if (!list_empty(&again))

From 4aa84f2ffa81f71e15e5cffc2cc6090dbee78f8e Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 7 Jan 2021 03:15:42 +0000
Subject: [PATCH 447/547] io_uring: dont kill fasync under completion_lock

      CPU0                    CPU1
       ----                    ----
  lock(&new->fa_lock);
                               local_irq_disable();
                               lock(&ctx->completion_lock);
                               lock(&new->fa_lock);
  <Interrupt>
    lock(&ctx->completion_lock);

 *** DEADLOCK ***

Move kill_fasync() out of io_commit_cqring() to io_cqring_ev_posted(),
so it doesn't hold completion_lock while doing it. That saves from the
reported deadlock, and it's just nice to shorten the locking time and
untangle nested locks (compl_lock -> wq_head::lock).

Reported-by: syzbot+91ca3f25bd7f795f019c@syzkaller.appspotmail.com
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 91e517ad1421..401316fe2ae2 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1345,11 +1345,6 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx)
 
 	/* order cqe stores with ring update */
 	smp_store_release(&rings->cq.tail, ctx->cached_cq_tail);
-
-	if (wq_has_sleeper(&ctx->cq_wait)) {
-		wake_up_interruptible(&ctx->cq_wait);
-		kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN);
-	}
 }
 
 static void io_put_identity(struct io_uring_task *tctx, struct io_kiocb *req)
@@ -1711,6 +1706,10 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
 		wake_up(&ctx->sq_data->wait);
 	if (io_should_trigger_evfd(ctx))
 		eventfd_signal(ctx->cq_ev_fd, 1);
+	if (wq_has_sleeper(&ctx->cq_wait)) {
+		wake_up_interruptible(&ctx->cq_wait);
+		kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN);
+	}
 }
 
 static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
@@ -1721,6 +1720,10 @@ static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
 	}
 	if (io_should_trigger_evfd(ctx))
 		eventfd_signal(ctx->cq_ev_fd, 1);
+	if (wq_has_sleeper(&ctx->cq_wait)) {
+		wake_up_interruptible(&ctx->cq_wait);
+		kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN);
+	}
 }
 
 /* Returns true if there are no backlogged entries after the flush */

From b1445e59cc9a10fdb8f83810ae1f4feb941ab36b Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 7 Jan 2021 03:15:43 +0000
Subject: [PATCH 448/547] io_uring: synchronise ev_posted() with waitqueues

waitqueue_active() needs smp_mb() to be in sync with waitqueues
modification, but we miss it in io_cqring_ev_posted*() apart from
cq_wait() case.

Take an smb_mb() out of wq_has_sleeper() making it waitqueue_active(),
and place it a few lines before, so it can synchronise other
waitqueue_active() as well.

The patch doesn't add any additional overhead, so even if there are
no problems currently, it's just safer to have it this way.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 401316fe2ae2..cb57e0360fcb 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1700,13 +1700,16 @@ static inline unsigned __io_cqring_events(struct io_ring_ctx *ctx)
 
 static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
 {
+	/* see waitqueue_active() comment */
+	smp_mb();
+
 	if (waitqueue_active(&ctx->wait))
 		wake_up(&ctx->wait);
 	if (ctx->sq_data && waitqueue_active(&ctx->sq_data->wait))
 		wake_up(&ctx->sq_data->wait);
 	if (io_should_trigger_evfd(ctx))
 		eventfd_signal(ctx->cq_ev_fd, 1);
-	if (wq_has_sleeper(&ctx->cq_wait)) {
+	if (waitqueue_active(&ctx->cq_wait)) {
 		wake_up_interruptible(&ctx->cq_wait);
 		kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN);
 	}
@@ -1714,13 +1717,16 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
 
 static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
 {
+	/* see waitqueue_active() comment */
+	smp_mb();
+
 	if (ctx->flags & IORING_SETUP_SQPOLL) {
 		if (waitqueue_active(&ctx->wait))
 			wake_up(&ctx->wait);
 	}
 	if (io_should_trigger_evfd(ctx))
 		eventfd_signal(ctx->cq_ev_fd, 1);
-	if (wq_has_sleeper(&ctx->cq_wait)) {
+	if (waitqueue_active(&ctx->cq_wait)) {
 		wake_up_interruptible(&ctx->cq_wait);
 		kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN);
 	}

From 04a6a536bc3fd1436fc78c546c6b3ecdccbfaf6d Mon Sep 17 00:00:00 2001
From: Satya Tangirala <satyat@google.com>
Date: Thu, 24 Dec 2020 04:49:54 +0000
Subject: [PATCH 449/547] fs: Fix freeze_bdev()/thaw_bdev() accounting of
 bd_fsfreeze_sb

freeze/thaw_bdev() currently use bdev->bd_fsfreeze_count to infer
whether or not bdev->bd_fsfreeze_sb is valid (it's valid iff
bd_fsfreeze_count is non-zero). thaw_bdev() doesn't nullify
bd_fsfreeze_sb.

But this means a freeze_bdev() call followed by a thaw_bdev() call can
leave bd_fsfreeze_sb with a non-null value, while bd_fsfreeze_count is
zero. If freeze_bdev() is called again, and this time
get_active_super() returns NULL (e.g. because the FS is unmounted),
we'll end up with bd_fsfreeze_count > 0, but bd_fsfreeze_sb is
*untouched* - it stays the same (now garbage) value. A subsequent
thaw_bdev() will decide that the bd_fsfreeze_sb value is legitimate
(since bd_fsfreeze_count > 0), and attempt to use it.

Fix this by always setting bd_fsfreeze_sb to NULL when
bd_fsfreeze_count is successfully decremented to 0 in thaw_sb().
Alternatively, we could set bd_fsfreeze_sb to whatever
get_active_super() returns in freeze_bdev() whenever bd_fsfreeze_count
is successfully incremented to 1 from 0 (which can be achieved cleanly
by moving the line currently setting bd_fsfreeze_sb to immediately
after the "sync:" label, but it might be a little too subtle/easily
overlooked in future).

This fixes the currently panicking xfstests generic/085.

Fixes: 040f04bd2e82 ("fs: simplify freeze_bdev/thaw_bdev")
Signed-off-by: Satya Tangirala <satyat@google.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/block_dev.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 3e5b02f6606c..e454c5a81043 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -605,6 +605,8 @@ int thaw_bdev(struct block_device *bdev)
 		error = thaw_super(sb);
 	if (error)
 		bdev->bd_fsfreeze_count++;
+	else
+		bdev->bd_fsfreeze_sb = NULL;
 out:
 	mutex_unlock(&bdev->bd_fsfreeze_mutex);
 	return error;

From 17ffd35809c34b9564edb10727d02eb62958ba5c Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 5 Jan 2021 19:20:29 +0100
Subject: [PATCH 450/547] cpufreq: intel_pstate: Use HWP capabilities in
 intel_cpufreq_adjust_perf()

If turbo P-states cannot be used, either due to the configuration of
the processor, or because intel_pstate is not allowed to used them,
the maximum available P-state with HWP enabled corresponds to the
HWP_CAP.GUARANTEED value which is not static.  It can be adjusted by
an out-of-band agent or during an Intel Speed Select performance
level change, so long as it remains less than or equal to
HWP_CAP.MAX.

However, if turbo P-states cannot be used, intel_cpufreq_adjust_perf()
always uses pstate.max_pstate (set during the initialization of the
driver only) as the maximum available P-state, so it may miss a change
of the HWP_CAP.GUARANTEED value.

Prevent that from happening by modifyig intel_cpufreq_adjust_perf()
to always read the "guaranteed" and "maximum turbo" performance
levels from the cached HWP_CAP value.

Fixes: a365ab6b9dfb ("cpufreq: intel_pstate: Implement the ->adjust_perf() callback")
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
---
 drivers/cpufreq/intel_pstate.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 1a660466dd75..32bc11851b8d 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2653,12 +2653,13 @@ static void intel_cpufreq_adjust_perf(unsigned int cpunum,
 				      unsigned long capacity)
 {
 	struct cpudata *cpu = all_cpu_data[cpunum];
+	u64 hwp_cap = READ_ONCE(cpu->hwp_cap_cached);
 	int old_pstate = cpu->pstate.current_pstate;
 	int cap_pstate, min_pstate, max_pstate, target_pstate;
 
 	update_turbo_state();
-	cap_pstate = global.turbo_disabled ? cpu->pstate.max_pstate :
-					     cpu->pstate.turbo_pstate;
+	cap_pstate = global.turbo_disabled ? HWP_GUARANTEED_PERF(hwp_cap) :
+					     HWP_HIGHEST_PERF(hwp_cap);
 
 	/* Optimization: Avoid unnecessary divisions. */
 

From 943bdd0cecad06da8392a33093230e30e501eccc Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 5 Jan 2021 10:19:57 +0000
Subject: [PATCH 451/547] cpufreq: powernow-k8: pass policy rather than use
 cpufreq_cpu_get()

Currently there is an unlikely case where cpufreq_cpu_get() returns a
NULL policy and this will cause a NULL pointer dereference later on.

Fix this by passing the policy to transition_frequency_fidvid() from
the caller and hence eliminating the need for the cpufreq_cpu_get()
and cpufreq_cpu_put().

Thanks to Viresh Kumar for suggesting the fix.

Addresses-Coverity: ("Dereference null return")
Fixes: b43a7ffbf33b ("cpufreq: Notify all policy->cpus in cpufreq_notify_transition()")
Suggested-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/powernow-k8.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c
index 0acc9e241cd7..b9ccb6a3dad9 100644
--- a/drivers/cpufreq/powernow-k8.c
+++ b/drivers/cpufreq/powernow-k8.c
@@ -878,9 +878,9 @@ static int get_transition_latency(struct powernow_k8_data *data)
 
 /* Take a frequency, and issue the fid/vid transition command */
 static int transition_frequency_fidvid(struct powernow_k8_data *data,
-		unsigned int index)
+		unsigned int index,
+		struct cpufreq_policy *policy)
 {
-	struct cpufreq_policy *policy;
 	u32 fid = 0;
 	u32 vid = 0;
 	int res;
@@ -912,9 +912,6 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data,
 	freqs.old = find_khz_freq_from_fid(data->currfid);
 	freqs.new = find_khz_freq_from_fid(fid);
 
-	policy = cpufreq_cpu_get(smp_processor_id());
-	cpufreq_cpu_put(policy);
-
 	cpufreq_freq_transition_begin(policy, &freqs);
 	res = transition_fid_vid(data, fid, vid);
 	cpufreq_freq_transition_end(policy, &freqs, res);
@@ -969,7 +966,7 @@ static long powernowk8_target_fn(void *arg)
 
 	powernow_k8_acpi_pst_values(data, newstate);
 
-	ret = transition_frequency_fidvid(data, newstate);
+	ret = transition_frequency_fidvid(data, newstate, pol);
 
 	if (ret) {
 		pr_err("transition frequency failed\n");

From aa7a1bb02bb44399be69b0a1cbb6495d9eec29fc Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 5 Jan 2021 19:19:18 +0100
Subject: [PATCH 452/547] ACPI: PM: s2idle: Drop unused local variables and
 related code

Two local variables in drivers/acpi/x86/s2idle.c are never read, so
drop them along with the code updating their values (in vain).

Fixes: fef98671194b ("ACPI: PM: s2idle: Move x86-specific code to the x86 directory")
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/x86/s2idle.c | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c
index 25fea34b544c..2b69536cdccb 100644
--- a/drivers/acpi/x86/s2idle.c
+++ b/drivers/acpi/x86/s2idle.c
@@ -105,18 +105,8 @@ static void lpi_device_get_constraints_amd(void)
 
 	for (i = 0; i < out_obj->package.count; i++) {
 		union acpi_object *package = &out_obj->package.elements[i];
-		struct lpi_device_info_amd info = { };
 
-		if (package->type == ACPI_TYPE_INTEGER) {
-			switch (i) {
-			case 0:
-				info.revision = package->integer.value;
-				break;
-			case 1:
-				info.count = package->integer.value;
-				break;
-			}
-		} else if (package->type == ACPI_TYPE_PACKAGE) {
+		if (package->type == ACPI_TYPE_PACKAGE) {
 			lpi_constraints_table = kcalloc(package->package.count,
 							sizeof(*lpi_constraints_table),
 							GFP_KERNEL);
@@ -135,12 +125,10 @@ static void lpi_device_get_constraints_amd(void)
 
 				for (k = 0; k < info_obj->package.count; ++k) {
 					union acpi_object *obj = &info_obj->package.elements[k];
-					union acpi_object *obj_new;
 
 					list = &lpi_constraints_table[lpi_constraints_table_size];
 					list->min_dstate = -1;
 
-					obj_new = &obj[k];
 					switch (k) {
 					case 0:
 						dev_info.enabled = obj->integer.value;

From ee61cfd955a64a58ed35cbcfc54068fcbd486945 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Thu, 31 Dec 2020 19:35:25 +0800
Subject: [PATCH 453/547] ACPI: scan: add stub acpi_create_platform_device()
 for !CONFIG_ACPI

It adds a stub acpi_create_platform_device() for !CONFIG_ACPI build, so
that caller doesn't have to deal with !CONFIG_ACPI build issue.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 2630c2e953f7..053bf05fb1f7 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -885,6 +885,13 @@ static inline int acpi_device_modalias(struct device *dev,
 	return -ENODEV;
 }
 
+static inline struct platform_device *
+acpi_create_platform_device(struct acpi_device *adev,
+			    struct property_entry *properties)
+{
+	return NULL;
+}
+
 static inline bool acpi_dma_supported(struct acpi_device *adev)
 {
 	return false;

From 240bdc605e6a9d0309bd003de3413f6f729eca18 Mon Sep 17 00:00:00 2001
From: Peter Robinson <pbrobinson@gmail.com>
Date: Tue, 29 Dec 2020 11:17:59 +0000
Subject: [PATCH 454/547] ACPI: Update Kconfig help text for items that are no
 longer modular

The CONTAINER and HOTPLUG_MEMORY options mention modules but are bool
only, so if selected are always built in.

Drop the help text about modules.

Signed-off-by: Peter Robinson <pbrobinson@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/Kconfig | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index edf1558c1105..ebcf534514be 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -395,9 +395,6 @@ config ACPI_CONTAINER
 
 	  This helps support hotplug of nodes, CPUs, and memory.
 
-	  To compile this driver as a module, choose M here:
-	  the module will be called container.
-
 config ACPI_HOTPLUG_MEMORY
 	bool "Memory Hotplug"
 	depends on MEMORY_HOTPLUG
@@ -411,9 +408,6 @@ config ACPI_HOTPLUG_MEMORY
 	  removing memory devices at runtime, you need not enable
 	  this driver.
 
-	  To compile this driver as a module, choose M here:
-	  the module will be called acpi_memhotplug.
-
 config ACPI_HOTPLUG_IOAPIC
 	bool
 	depends on PCI

From 47f4469970d8861bc06d2d4d45ac8200ff07c693 Mon Sep 17 00:00:00 2001
From: Bard Liao <yung-chuan.liao@linux.intel.com>
Date: Tue, 5 Jan 2021 17:11:45 +0800
Subject: [PATCH 455/547] Revert "device property: Keep secondary firmware node
 secondary by type"

While commit d5dcce0c414f ("device property: Keep secondary firmware
node secondary by type") describes everything correct in its commit
message, the change it made does the opposite and original commit
c15e1bdda436 ("device property: Fix the secondary firmware node handling
in set_primary_fwnode()") was fully correct.

Revert the former one here and improve documentation in the next patch.

Fixes: d5dcce0c414f ("device property: Keep secondary firmware node secondary by type")
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Cc: 5.10+ <stable@vger.kernel.org> # 5.10+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 25e08e5f40bd..51b9545a050b 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -4433,7 +4433,7 @@ void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode)
 		if (fwnode_is_primary(fn)) {
 			dev->fwnode = fn->secondary;
 			if (!(parent && fn == parent->fwnode))
-				fn->secondary = ERR_PTR(-ENODEV);
+				fn->secondary = NULL;
 		} else {
 			dev->fwnode = NULL;
 		}

From 3f7bddaf5d5a83aa2eb1e6d72db221d3ec43c813 Mon Sep 17 00:00:00 2001
From: Bard Liao <yung-chuan.liao@linux.intel.com>
Date: Tue, 5 Jan 2021 17:11:46 +0800
Subject: [PATCH 456/547] device property: add description of fwnode cases

There are only four valid fwnode cases which are
- primary --> secondary --> -ENODEV
- primary --> NULL
- secondary --> -ENODEV
- NULL

dev->fwnode should be converted between the 4 cases above no matter
how/when set_primary_fwnode() and set_secondary_fwnode() are called.
Describe it in the code so people will keep it in mind.

Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
[ rjw: Comment edit ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/core.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 51b9545a050b..14f165816742 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -4414,6 +4414,12 @@ static inline bool fwnode_is_primary(struct fwnode_handle *fwnode)
  *
  * Set the device's firmware node pointer to @fwnode, but if a secondary
  * firmware node of the device is present, preserve it.
+ *
+ * Valid fwnode cases are:
+ *  - primary --> secondary --> -ENODEV
+ *  - primary --> NULL
+ *  - secondary --> -ENODEV
+ *  - NULL
  */
 void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode)
 {
@@ -4432,6 +4438,7 @@ void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode)
 	} else {
 		if (fwnode_is_primary(fn)) {
 			dev->fwnode = fn->secondary;
+			/* Set fn->secondary = NULL, so fn remains the primary fwnode */
 			if (!(parent && fn == parent->fwnode))
 				fn->secondary = NULL;
 		} else {

From 2b5f09cadfc576817c0450e01d454f750909b103 Mon Sep 17 00:00:00 2001
From: Kuogee Hsieh <khsieh@codeaurora.org>
Date: Fri, 18 Dec 2020 09:53:40 -0800
Subject: [PATCH 457/547] drm/msm/dp: postpone irq_hpd event during connection
 pending state

irq_hpd event can only be executed at connected state. Therefore
irq_hpd event should be postponed if it happened at connection
pending state. This patch also make sure both link rate and lane
are valid before start link training.

Signed-off-by: Kuogee Hsieh <khsieh@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/dp/dp_display.c |  7 +++++++
 drivers/gpu/drm/msm/dp/dp_panel.c   | 14 ++++++++++----
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
index 6e971d552911..3bc7ed21de28 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -693,6 +693,13 @@ static int dp_irq_hpd_handle(struct dp_display_private *dp, u32 data)
 		return 0;
 	}
 
+	if (state == ST_CONNECT_PENDING) {
+		/* wait until ST_CONNECTED */
+		dp_add_event(dp, EV_IRQ_HPD_INT, 0, 1); /* delay = 1 */
+		mutex_unlock(&dp->event_mutex);
+		return 0;
+	}
+
 	ret = dp_display_usbpd_attention_cb(&dp->pdev->dev);
 	if (ret == -ECONNRESET) { /* cable unplugged */
 		dp->core_initialized = false;
diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c b/drivers/gpu/drm/msm/dp/dp_panel.c
index 97dca3e378b7..d1780bcac8cc 100644
--- a/drivers/gpu/drm/msm/dp/dp_panel.c
+++ b/drivers/gpu/drm/msm/dp/dp_panel.c
@@ -167,14 +167,20 @@ int dp_panel_read_sink_caps(struct dp_panel *dp_panel,
 	panel = container_of(dp_panel, struct dp_panel_private, dp_panel);
 
 	rc = dp_panel_read_dpcd(dp_panel);
-	bw_code = drm_dp_link_rate_to_bw_code(dp_panel->link_info.rate);
-	if (rc || !is_link_rate_valid(bw_code) ||
-			!is_lane_count_valid(dp_panel->link_info.num_lanes) ||
-			(bw_code > dp_panel->max_bw_code)) {
+	if (rc) {
 		DRM_ERROR("read dpcd failed %d\n", rc);
 		return rc;
 	}
 
+	bw_code = drm_dp_link_rate_to_bw_code(dp_panel->link_info.rate);
+	if (!is_link_rate_valid(bw_code) ||
+			!is_lane_count_valid(dp_panel->link_info.num_lanes) ||
+			(bw_code > dp_panel->max_bw_code)) {
+		DRM_ERROR("Illegal link rate=%d lane=%d\n", dp_panel->link_info.rate,
+				dp_panel->link_info.num_lanes);
+		return -EINVAL;
+	}
+
 	if (dp_panel->dfp_present) {
 		rlen = drm_dp_dpcd_read(panel->aux, DP_SINK_COUNT,
 				&count, 1);

From d863f0c7b536288e2bd40cbc01c10465dd226b11 Mon Sep 17 00:00:00 2001
From: Craig Tatlor <ctatlor97@gmail.com>
Date: Wed, 30 Dec 2020 17:29:42 +0200
Subject: [PATCH 458/547] drm/msm: Call msm_init_vram before binding the gpu

vram.size is needed when binding a gpu without an iommu and is defined
in msm_init_vram(), so run that before binding it.

Signed-off-by: Craig Tatlor <ctatlor97@gmail.com>
Reviewed-by: Brian Masney <masneyb@onstation.org>
Tested-by: Alexey Minnekhanov <alexeymin@postmarketos.org>
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/msm_drv.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index ce9bb6e929c2..549ffb60e9ca 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -457,15 +457,15 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv)
 
 	drm_mode_config_init(ddev);
 
+	ret = msm_init_vram(ddev);
+	if (ret)
+		goto err_destroy_mdss;
+
 	/* Bind all our sub-components: */
 	ret = component_bind_all(dev, ddev);
 	if (ret)
 		goto err_destroy_mdss;
 
-	ret = msm_init_vram(ddev);
-	if (ret)
-		goto err_msm_uninit;
-
 	dma_set_max_seg_size(dev, UINT_MAX);
 
 	msm_gem_shrinker_init(ddev);

From 3f7759e7b7585a0bffda06d4eddc6b0b850ef6c3 Mon Sep 17 00:00:00 2001
From: Iskren Chernev <iskren.chernev@gmail.com>
Date: Wed, 30 Dec 2020 17:29:43 +0200
Subject: [PATCH 459/547] drm/msm: Add modparam to allow vram carveout

Using the GPU with a VRAM Carveout is a security vulnerability.
Nevertheless it is sometimes required, especially when no IOMMU
implementation is available for a certain platform.

Signed-off-by: Iskren Chernev <iskren.chernev@gmail.com>
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/adreno/a2xx_gpu.c      | 6 ++++--
 drivers/gpu/drm/msm/adreno/a3xx_gpu.c      | 6 ++++--
 drivers/gpu/drm/msm/adreno/a4xx_gpu.c      | 6 ++++--
 drivers/gpu/drm/msm/adreno/adreno_device.c | 4 ++++
 drivers/gpu/drm/msm/adreno/adreno_gpu.h    | 1 +
 5 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
index 7e82c41a85f1..bdc989183c64 100644
--- a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
@@ -534,8 +534,10 @@ struct msm_gpu *a2xx_gpu_init(struct drm_device *dev)
 
 	if (!gpu->aspace) {
 		dev_err(dev->dev, "No memory protection without MMU\n");
-		ret = -ENXIO;
-		goto fail;
+		if (!allow_vram_carveout) {
+			ret = -ENXIO;
+			goto fail;
+		}
 	}
 
 	return gpu;
diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
index 93da6683a866..4534633fe7cd 100644
--- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
@@ -564,8 +564,10 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
 		 * implement a cmdstream validator.
 		 */
 		DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
-		ret = -ENXIO;
-		goto fail;
+		if (!allow_vram_carveout) {
+			ret = -ENXIO;
+			goto fail;
+		}
 	}
 
 	icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem");
diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
index c0be3a0f36b2..82bebb40234d 100644
--- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
@@ -692,8 +692,10 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
 		 * implement a cmdstream validator.
 		 */
 		DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
-		ret = -ENXIO;
-		goto fail;
+		if (!allow_vram_carveout) {
+			ret = -ENXIO;
+			goto fail;
+		}
 	}
 
 	icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem");
diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
index 87c8b033ad1a..12e75ba360f9 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -18,6 +18,10 @@ bool snapshot_debugbus = false;
 MODULE_PARM_DESC(snapshot_debugbus, "Include debugbus sections in GPU devcoredump (if not fused off)");
 module_param_named(snapshot_debugbus, snapshot_debugbus, bool, 0600);
 
+bool allow_vram_carveout = false;
+MODULE_PARM_DESC(allow_vram_carveout, "Allow using VRAM Carveout, in place of IOMMU");
+module_param_named(allow_vram_carveout, allow_vram_carveout, bool, 0600);
+
 static const struct adreno_info gpulist[] = {
 	{
 		.rev   = ADRENO_REV(2, 0, 0, 0),
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index c3775f79525a..fe5444a1482a 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -18,6 +18,7 @@
 #include "adreno_pm4.xml.h"
 
 extern bool snapshot_debugbus;
+extern bool allow_vram_carveout;
 
 enum {
 	ADRENO_FW_PM4 = 0,

From c4151604f0603d5700072183a05828ff87d764e4 Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Mon, 21 Dec 2020 06:13:20 +0100
Subject: [PATCH 460/547] cpufreq: intel_pstate: remove obsolete functions

percent_fp() was used in intel_pstate_pid_reset(), which was removed in
commit 9d0ef7af1f2d ("cpufreq: intel_pstate: Do not use PID-based P-state
selection") and hence, percent_fp() is unused since then.

percent_ext_fp() was last used in intel_pstate_update_perf_limits(), which
was refactored in commit 1a4fe38add8b ("cpufreq: intel_pstate: Remove
max/min fractions to limit performance"), and hence, percent_ext_fp() is
unused since then.

make CC=clang W=1 points us those unused functions:

drivers/cpufreq/intel_pstate.c:79:23: warning: unused function 'percent_fp' [-Wunused-function]
static inline int32_t percent_fp(int percent)
                      ^

drivers/cpufreq/intel_pstate.c:94:23: warning: unused function 'percent_ext_fp' [-Wunused-function]
static inline int32_t percent_ext_fp(int percent)
                      ^

Remove those obsolete functions.

Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Reviewed-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 32bc11851b8d..be05e038d956 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -76,11 +76,6 @@ static inline int ceiling_fp(int32_t x)
 	return ret;
 }
 
-static inline int32_t percent_fp(int percent)
-{
-	return div_fp(percent, 100);
-}
-
 static inline u64 mul_ext_fp(u64 x, u64 y)
 {
 	return (x * y) >> EXT_FRAC_BITS;
@@ -91,11 +86,6 @@ static inline u64 div_ext_fp(u64 x, u64 y)
 	return div64_u64(x << EXT_FRAC_BITS, y);
 }
 
-static inline int32_t percent_ext_fp(int percent)
-{
-	return div_ext_fp(percent, 100);
-}
-
 /**
  * struct sample -	Store performance sample
  * @core_avg_perf:	Ratio of APERF/MPERF which is the actual average

From 00fd44a1a4700718d5d962432b55c09820f7e709 Mon Sep 17 00:00:00 2001
From: Konrad Dybcio <konrad.dybcio@somainline.org>
Date: Mon, 4 Jan 2021 20:30:41 +0100
Subject: [PATCH 461/547] drm/msm: Only enable A6xx LLCC code on A6xx

Using this code on A5xx (and probably older too) causes a
smmu bug.

Fixes: 474dadb8b0d5 ("drm/msm/a6xx: Add support for using system cache(LLC)")
Signed-off-by: Konrad Dybcio <konrad.dybcio@somainline.org>
Tested-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@somainline.org>
Reviewed-by: Jordan Crouse <jcrouse@codeaurora.org>
Reviewed-by: Sai Prakash Ranjan <saiprakash.ranjan@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 21 ++++++++++++---------
 drivers/gpu/drm/msm/adreno/adreno_gpu.h |  5 +++++
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 6cf9975e951e..f09175698827 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -191,8 +191,6 @@ adreno_iommu_create_address_space(struct msm_gpu *gpu,
 		struct platform_device *pdev)
 {
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
-	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
-	struct io_pgtable_domain_attr pgtbl_cfg;
 	struct iommu_domain *iommu;
 	struct msm_mmu *mmu;
 	struct msm_gem_address_space *aspace;
@@ -202,13 +200,18 @@ adreno_iommu_create_address_space(struct msm_gpu *gpu,
 	if (!iommu)
 		return NULL;
 
-	/*
-	 * This allows GPU to set the bus attributes required to use system
-	 * cache on behalf of the iommu page table walker.
-	 */
-	if (!IS_ERR(a6xx_gpu->htw_llc_slice)) {
-		pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_ARM_OUTER_WBWA;
-		iommu_domain_set_attr(iommu, DOMAIN_ATTR_IO_PGTABLE_CFG, &pgtbl_cfg);
+
+	if (adreno_is_a6xx(adreno_gpu)) {
+		struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+		struct io_pgtable_domain_attr pgtbl_cfg;
+		/*
+		* This allows GPU to set the bus attributes required to use system
+		* cache on behalf of the iommu page table walker.
+		*/
+		if (!IS_ERR(a6xx_gpu->htw_llc_slice)) {
+			pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_ARM_OUTER_WBWA;
+			iommu_domain_set_attr(iommu, DOMAIN_ATTR_IO_PGTABLE_CFG, &pgtbl_cfg);
+		}
 	}
 
 	mmu = msm_iommu_new(&pdev->dev, iommu);
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index fe5444a1482a..b3d9a333591b 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -212,6 +212,11 @@ static inline int adreno_is_a540(struct adreno_gpu *gpu)
 	return gpu->revn == 540;
 }
 
+static inline bool adreno_is_a6xx(struct adreno_gpu *gpu)
+{
+	return ((gpu->revn < 700 && gpu->revn > 599));
+}
+
 static inline int adreno_is_a618(struct adreno_gpu *gpu)
 {
        return gpu->revn == 618;

From afded6d83aa7b35dab675c730528109cc58d6847 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 28 Dec 2020 20:43:13 +0200
Subject: [PATCH 462/547] misc: pvpanic: Check devm_ioport_map() for NULL

Inconveniently devm_ioport_map() and devm_ioremap_resource()
return errors differently, i.e. former uses simply NULL pointer,
while the latter an error pointer.

Due to this, we have to check each of them separately.

Fixes: f104060813fe ("misc: pvpanic: Combine ACPI and platform drivers")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20201228184313.57610-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/pvpanic.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/misc/pvpanic.c b/drivers/misc/pvpanic.c
index 951b37da5e3c..41cab297d66e 100644
--- a/drivers/misc/pvpanic.c
+++ b/drivers/misc/pvpanic.c
@@ -55,12 +55,23 @@ static int pvpanic_mmio_probe(struct platform_device *pdev)
 	struct resource *res;
 
 	res = platform_get_mem_or_io(pdev, 0);
-	if (res && resource_type(res) == IORESOURCE_IO)
+	if (!res)
+		return -EINVAL;
+
+	switch (resource_type(res)) {
+	case IORESOURCE_IO:
 		base = devm_ioport_map(dev, res->start, resource_size(res));
-	else
+		if (!base)
+			return -ENOMEM;
+		break;
+	case IORESOURCE_MEM:
 		base = devm_ioremap_resource(dev, res);
-	if (IS_ERR(base))
-		return PTR_ERR(base);
+		if (IS_ERR(base))
+			return PTR_ERR(base);
+		break;
+	default:
+		return -EINVAL;
+	}
 
 	atomic_notifier_chain_register(&panic_notifier_list,
 				       &pvpanic_panic_nb);

From d8f5c29653c3f6995e8979be5623d263e92f6b86 Mon Sep 17 00:00:00 2001
From: Sean Tranchetti <stranche@codeaurora.org>
Date: Tue, 5 Jan 2021 16:22:25 -0800
Subject: [PATCH 463/547] net: ipv6: fib: flush exceptions when purging route

Route removal is handled by two code paths. The main removal path is via
fib6_del_route() which will handle purging any PMTU exceptions from the
cache, removing all per-cpu copies of the DST entry used by the route, and
releasing the fib6_info struct.

The second removal location is during fib6_add_rt2node() during a route
replacement operation. This path also calls fib6_purge_rt() to handle
cleaning up the per-cpu copies of the DST entries and releasing the
fib6_info associated with the older route, but it does not flush any PMTU
exceptions that the older route had. Since the older route is removed from
the tree during the replacement, we lose any way of accessing it again.

As these lingering DSTs and the fib6_info struct are holding references to
the underlying netdevice struct as well, unregistering that device from the
kernel can never complete.

Fixes: 2b760fcf5cfb3 ("ipv6: hook up exception table to store dst cache")
Signed-off-by: Sean Tranchetti <stranche@codeaurora.org>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/1609892546-11389-1-git-send-email-stranche@quicinc.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv6/ip6_fib.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 605cdd38a919..f43e27555725 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1025,6 +1025,8 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
 {
 	struct fib6_table *table = rt->fib6_table;
 
+	/* Flush all cached dst in exception table */
+	rt6_flush_exceptions(rt);
 	fib6_drop_pcpu_from(rt, table);
 
 	if (rt->nh && !list_empty(&rt->nh_list))
@@ -1927,9 +1929,6 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
 	net->ipv6.rt6_stats->fib_rt_entries--;
 	net->ipv6.rt6_stats->fib_discarded_routes++;
 
-	/* Flush all cached dst in exception table */
-	rt6_flush_exceptions(rt);
-
 	/* Reset round-robin state, if necessary */
 	if (rcu_access_pointer(fn->rr_ptr) == rt)
 		fn->rr_ptr = NULL;

From 5316a7c0130acf09bfc8bb0092407006010fcccc Mon Sep 17 00:00:00 2001
From: Sean Tranchetti <stranche@codeaurora.org>
Date: Tue, 5 Jan 2021 16:22:26 -0800
Subject: [PATCH 464/547] tools: selftests: add test for changing routes with
 PTMU exceptions

Adds new 2 new tests to the PTMU script: pmtu_ipv4/6_route_change.

These tests explicitly test for a recently discovered problem in the
IPv6 routing framework where PMTU exceptions were not properly released
when replacing a route via "ip route change ...".

After creating PMTU exceptions, the route from the device A to R1 will be
replaced with a new route, then device A will be deleted. If the PMTU
exceptions were properly cleaned up by the kernel, this device deletion
will succeed. Otherwise, the unregistration of the device will stall, and
messages such as the following will be logged in dmesg:

unregister_netdevice: waiting for veth_A-R1 to become free. Usage count = 4

Signed-off-by: Sean Tranchetti <stranche@codeaurora.org>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/1609892546-11389-2-git-send-email-stranche@quicinc.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/pmtu.sh | 71 ++++++++++++++++++++++++++++-
 1 file changed, 69 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 464e31eabc73..64cd2e23c568 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -162,7 +162,15 @@
 # - list_flush_ipv6_exception
 #	Using the same topology as in pmtu_ipv6, create exceptions, and check
 #	they are shown when listing exception caches, gone after flushing them
-
+#
+# - pmtu_ipv4_route_change
+#	Use the same topology as in pmtu_ipv4, but issue a route replacement
+#	command and delete the corresponding device afterward. This tests for
+#	proper cleanup of the PMTU exceptions by the route replacement path.
+#	Device unregistration should complete successfully
+#
+# - pmtu_ipv6_route_change
+#	Same as above but with IPv6
 
 # Kselftest framework requirement - SKIP code is 4.
 ksft_skip=4
@@ -224,7 +232,9 @@ tests="
 	cleanup_ipv4_exception		ipv4: cleanup of cached exceptions	1
 	cleanup_ipv6_exception		ipv6: cleanup of cached exceptions	1
 	list_flush_ipv4_exception	ipv4: list and flush cached exceptions	1
-	list_flush_ipv6_exception	ipv6: list and flush cached exceptions	1"
+	list_flush_ipv6_exception	ipv6: list and flush cached exceptions	1
+	pmtu_ipv4_route_change		ipv4: PMTU exception w/route replace	1
+	pmtu_ipv6_route_change		ipv6: PMTU exception w/route replace	1"
 
 NS_A="ns-A"
 NS_B="ns-B"
@@ -1782,6 +1792,63 @@ test_list_flush_ipv6_exception() {
 	return ${fail}
 }
 
+test_pmtu_ipvX_route_change() {
+	family=${1}
+
+	setup namespaces routing || return 2
+	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
+	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
+	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
+	      "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
+
+	if [ ${family} -eq 4 ]; then
+		ping=ping
+		dst1="${prefix4}.${b_r1}.1"
+		dst2="${prefix4}.${b_r2}.1"
+		gw="${prefix4}.${a_r1}.2"
+	else
+		ping=${ping6}
+		dst1="${prefix6}:${b_r1}::1"
+		dst2="${prefix6}:${b_r2}::1"
+		gw="${prefix6}:${a_r1}::2"
+	fi
+
+	# Set up initial MTU values
+	mtu "${ns_a}"  veth_A-R1 2000
+	mtu "${ns_r1}" veth_R1-A 2000
+	mtu "${ns_r1}" veth_R1-B 1400
+	mtu "${ns_b}"  veth_B-R1 1400
+
+	mtu "${ns_a}"  veth_A-R2 2000
+	mtu "${ns_r2}" veth_R2-A 2000
+	mtu "${ns_r2}" veth_R2-B 1500
+	mtu "${ns_b}"  veth_B-R2 1500
+
+	# Create route exceptions
+	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1}
+	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2}
+
+	# Check that exceptions have been created with the correct PMTU
+	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
+	check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
+	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+	check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
+
+	# Replace the route from A to R1
+	run_cmd ${ns_a} ip route change default via ${gw}
+
+	# Delete the device in A
+	run_cmd ${ns_a} ip link del "veth_A-R1"
+}
+
+test_pmtu_ipv4_route_change() {
+	test_pmtu_ipvX_route_change 4
+}
+
+test_pmtu_ipv6_route_change() {
+	test_pmtu_ipvX_route_change 6
+}
+
 usage() {
 	echo
 	echo "$0 [OPTIONS] [TEST]..."

From 384b77fd48fd683a82760bc88bef8611cba997fc Mon Sep 17 00:00:00 2001
From: Amanoel Dawod <kernel@amanoeldawod.com>
Date: Sat, 26 Dec 2020 18:58:40 -0500
Subject: [PATCH 465/547] Fonts: font_ter16x32: Update font with new upstream
 Terminus release

This is just a maintenance patch to update font_ter16x32.c with changes
and minor fixes added in new upstream Terminus v4.49.

>From release notes of new version 4.49, this brings:
- Altered ascii grave in some sizes to be more useful as a back quote.
- Fixed 21B5, added 21B2 and 21B3.

Just as my initial submission of the font, above changes were obtained from
new ter-i32b.psf font source.

Terminus font sources are available for download at SourceForge:
https://sourceforge.net/projects/terminus-font/files/terminus-font-4.49/

Simply running `make` in source directory will build the .psf font files.

Signed-off-by: Amanoel Dawod <kernel@amanoeldawod.com>
Link: https://lore.kernel.org/r/20201226235840.26290-1-kernel@amanoeldawod.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/fonts/font_ter16x32.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/fonts/font_ter16x32.c b/lib/fonts/font_ter16x32.c
index 1955d624177c..5baedc573dd6 100644
--- a/lib/fonts/font_ter16x32.c
+++ b/lib/fonts/font_ter16x32.c
@@ -774,8 +774,8 @@ static const struct font_data fontdata_ter16x32 = {
 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0xfc,
 	0x7f, 0xfc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,	/* 95 */
-	0x00, 0x00, 0x1c, 0x00, 0x0e, 0x00, 0x07, 0x00,
-	0x03, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x0e, 0x00,
+	0x07, 0x00, 0x03, 0x80, 0x00, 0x00, 0x00, 0x00,
 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -1169,7 +1169,7 @@ static const struct font_data fontdata_ter16x32 = {
 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	0x7f, 0xf8, 0x7f, 0xfc, 0x03, 0x9e, 0x03, 0x8e,
+	0x7e, 0xf8, 0x7f, 0xfc, 0x03, 0x9e, 0x03, 0x8e,
 	0x03, 0x8e, 0x3f, 0x8e, 0x7f, 0xfe, 0xf3, 0xfe,
 	0xe3, 0x80, 0xe3, 0x80, 0xe3, 0x80, 0xf3, 0xce,
 	0x7f, 0xfe, 0x3e, 0xfc, 0x00, 0x00, 0x00, 0x00,

From abf8ef953a43e74aac3c54a94975f21bd483199b Mon Sep 17 00:00:00 2001
From: Mark Zhang <markzhang@nvidia.com>
Date: Mon, 30 Nov 2020 04:38:11 +0200
Subject: [PATCH 466/547] net/mlx5: Check if lag is supported before creating
 one
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch fixes a memleak issue by preventing to create a lag and
add PFs if lag is not supported.

comm “python3”, pid 349349, jiffies 4296985507 (age 1446.976s)
hex dump (first 32 bytes):
  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  …………….
  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  …………….
 backtrace:
  [<000000005b216ae7>] mlx5_lag_add+0x1d5/0×3f0 [mlx5_core]
  [<000000000445aa55>] mlx5e_nic_enable+0x66/0×1b0 [mlx5_core]
  [<00000000c56734c3>] mlx5e_attach_netdev+0x16e/0×200 [mlx5_core]
  [<0000000030439d1f>] mlx5e_attach+0x5c/0×90 [mlx5_core]
  [<0000000018fd8615>] mlx5e_add+0x1a4/0×410 [mlx5_core]
  [<0000000068bc504b>] mlx5_add_device+0x72/0×120 [mlx5_core]
  [<000000009fce51f9>] mlx5_register_device+0x77/0xb0 [mlx5_core]
  [<00000000d0d81ff3>] mlx5_load_one+0xc58/0×1eb0 [mlx5_core]
  [<0000000045077adc>] init_one+0x3ea/0×920 [mlx5_core]
  [<0000000043287674>] pci_device_probe+0xcd/0×150
  [<00000000dafd3279>] really_probe+0x1c9/0×4b0
  [<00000000f06bdd84>] driver_probe_device+0x5d/0×140
  [<00000000e3d508b6>] device_driver_attach+0x4f/0×60
  [<0000000084fba0f0>] bind_store+0xbf/0×120
  [<00000000bf6622b3>] kernfs_fop_write+0x114/0×1b0

Fixes: 9b412cc35f00 ("net/mlx5e: Add LAG warning if bond slave is not lag master")
Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/lag.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index f3d45ef082cd..83a05371e2aa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -564,7 +564,9 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
 	struct mlx5_core_dev *tmp_dev;
 	int i, err;
 
-	if (!MLX5_CAP_GEN(dev, vport_group_manager))
+	if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
+	    !MLX5_CAP_GEN(dev, lag_master) ||
+	    MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)
 		return;
 
 	tmp_dev = mlx5_get_next_phys_dev(dev);
@@ -582,12 +584,9 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
 	if (mlx5_lag_dev_add_pf(ldev, dev, netdev) < 0)
 		return;
 
-	for (i = 0; i < MLX5_MAX_PORTS; i++) {
-		tmp_dev = ldev->pf[i].dev;
-		if (!tmp_dev || !MLX5_CAP_GEN(tmp_dev, lag_master) ||
-		    MLX5_CAP_GEN(tmp_dev, num_lag_ports) != MLX5_MAX_PORTS)
+	for (i = 0; i < MLX5_MAX_PORTS; i++)
+		if (!ldev->pf[i].dev)
 			break;
-	}
 
 	if (i >= MLX5_MAX_PORTS)
 		ldev->flags |= MLX5_LAG_FLAG_READY;

From 9c9be85f6b59d80efe4705109c0396df18d4e11d Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Tue, 24 Nov 2020 22:16:23 +0200
Subject: [PATCH 467/547] net/mlx5e: Add missing capability check for uplink
 follow

Expose firmware indication that it supports setting eswitch uplink state
to follow (follow the physical link). Condition setting the eswitch
uplink admin-state with this capability bit. Older FW may not support
the uplink state setting.

Fixes: 7d0314b11cdd ("net/mlx5e: Modify uplink state on interface up/down")
Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 ++-
 include/linux/mlx5/mlx5_ifc.h                     | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 7a79d330c075..6a852b4901aa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3161,7 +3161,8 @@ static void mlx5e_modify_admin_state(struct mlx5_core_dev *mdev,
 
 	mlx5_set_port_admin_status(mdev, state);
 
-	if (mlx5_eswitch_mode(mdev) != MLX5_ESWITCH_LEGACY)
+	if (mlx5_eswitch_mode(mdev) == MLX5_ESWITCH_OFFLOADS ||
+	    !MLX5_CAP_GEN(mdev, uplink_follow))
 		return;
 
 	if (state == MLX5_PORT_UP)
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 8fbddec26eb8..442c0160caab 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1280,7 +1280,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8	   ece_support[0x1];
 	u8	   reserved_at_a4[0x7];
 	u8         log_max_srq[0x5];
-	u8         reserved_at_b0[0x2];
+	u8         reserved_at_b0[0x1];
+	u8         uplink_follow[0x1];
 	u8         ts_cqe_to_dest_cqn[0x1];
 	u8         reserved_at_b3[0xd];
 

From 0f2dcade69f2af56b74bce432e48ff3957830ce2 Mon Sep 17 00:00:00 2001
From: Mark Zhang <markzhang@nvidia.com>
Date: Mon, 14 Dec 2020 03:38:40 +0200
Subject: [PATCH 468/547] net/mlx5: Use port_num 1 instead of 0 when delete a
 RoCE address

In multi-port mode, FW reports syndrome 0x2ea48 (invalid vhca_port_number)
if the port_num is not 1 or 2.

Fixes: 80f09dfc237f ("net/mlx5: Eswitch, enable RoCE loopback traffic")
Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/rdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
index 0fc7de4aa572..8e0dddc6383f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
@@ -116,7 +116,7 @@ free:
 static void mlx5_rdma_del_roce_addr(struct mlx5_core_dev *dev)
 {
 	mlx5_core_roce_gid_set(dev, 0, 0, 0,
-			       NULL, NULL, false, 0, 0);
+			       NULL, NULL, false, 0, 1);
 }
 
 static void mlx5_rdma_make_default_gid(struct mlx5_core_dev *dev, union ib_gid *gid)

From eed38eeee734756596e2cc163bdc7dac3be501b1 Mon Sep 17 00:00:00 2001
From: Oz Shlomo <ozsh@nvidia.com>
Date: Mon, 7 Dec 2020 08:15:18 +0000
Subject: [PATCH 469/547] net/mlx5e: CT: Use per flow counter when CT flow
 accounting is enabled

Connection counters may be shared for both directions when the counter
is used for connection aging purposes. However, if TC flow
accounting is enabled then a unique counter is required per direction.

Instantiate a unique counter per direction if the conntrack accounting
extension is enabled. Use a shared counter when the connection accounting
extension is disabled.

Fixes: 1edae2335adf ("net/mlx5e: CT: Use the same counter for both directions")
Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
Reported-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Reviewed-by: Paul Blakey <paulb@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/en/tc_ct.c    | 77 ++++++++++++-------
 1 file changed, 49 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index e521254d886e..072363e73f1c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -118,16 +118,17 @@ struct mlx5_ct_tuple {
 	u16 zone;
 };
 
-struct mlx5_ct_shared_counter {
+struct mlx5_ct_counter {
 	struct mlx5_fc *counter;
 	refcount_t refcount;
+	bool is_shared;
 };
 
 struct mlx5_ct_entry {
 	struct rhash_head node;
 	struct rhash_head tuple_node;
 	struct rhash_head tuple_nat_node;
-	struct mlx5_ct_shared_counter *shared_counter;
+	struct mlx5_ct_counter *counter;
 	unsigned long cookie;
 	unsigned long restore_cookie;
 	struct mlx5_ct_tuple tuple;
@@ -394,13 +395,14 @@ mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
 }
 
 static void
-mlx5_tc_ct_shared_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry)
+mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry)
 {
-	if (!refcount_dec_and_test(&entry->shared_counter->refcount))
+	if (entry->counter->is_shared &&
+	    !refcount_dec_and_test(&entry->counter->refcount))
 		return;
 
-	mlx5_fc_destroy(ct_priv->dev, entry->shared_counter->counter);
-	kfree(entry->shared_counter);
+	mlx5_fc_destroy(ct_priv->dev, entry->counter->counter);
+	kfree(entry->counter);
 }
 
 static void
@@ -699,7 +701,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
 	attr->dest_ft = ct_priv->post_ct;
 	attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
 	attr->outer_match_level = MLX5_MATCH_L4;
-	attr->counter = entry->shared_counter->counter;
+	attr->counter = entry->counter->counter;
 	attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
 
 	mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule);
@@ -732,13 +734,34 @@ err_attr:
 	return err;
 }
 
-static struct mlx5_ct_shared_counter *
+static struct mlx5_ct_counter *
+mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv)
+{
+	struct mlx5_ct_counter *counter;
+	int ret;
+
+	counter = kzalloc(sizeof(*counter), GFP_KERNEL);
+	if (!counter)
+		return ERR_PTR(-ENOMEM);
+
+	counter->is_shared = false;
+	counter->counter = mlx5_fc_create(ct_priv->dev, true);
+	if (IS_ERR(counter->counter)) {
+		ct_dbg("Failed to create counter for ct entry");
+		ret = PTR_ERR(counter->counter);
+		kfree(counter);
+		return ERR_PTR(ret);
+	}
+
+	return counter;
+}
+
+static struct mlx5_ct_counter *
 mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
 			      struct mlx5_ct_entry *entry)
 {
 	struct mlx5_ct_tuple rev_tuple = entry->tuple;
-	struct mlx5_ct_shared_counter *shared_counter;
-	struct mlx5_core_dev *dev = ct_priv->dev;
+	struct mlx5_ct_counter *shared_counter;
 	struct mlx5_ct_entry *rev_entry;
 	__be16 tmp_port;
 	int ret;
@@ -767,25 +790,20 @@ mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
 	rev_entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &rev_tuple,
 					   tuples_ht_params);
 	if (rev_entry) {
-		if (refcount_inc_not_zero(&rev_entry->shared_counter->refcount)) {
+		if (refcount_inc_not_zero(&rev_entry->counter->refcount)) {
 			mutex_unlock(&ct_priv->shared_counter_lock);
-			return rev_entry->shared_counter;
+			return rev_entry->counter;
 		}
 	}
 	mutex_unlock(&ct_priv->shared_counter_lock);
 
-	shared_counter = kzalloc(sizeof(*shared_counter), GFP_KERNEL);
-	if (!shared_counter)
-		return ERR_PTR(-ENOMEM);
-
-	shared_counter->counter = mlx5_fc_create(dev, true);
-	if (IS_ERR(shared_counter->counter)) {
-		ct_dbg("Failed to create counter for ct entry");
-		ret = PTR_ERR(shared_counter->counter);
-		kfree(shared_counter);
+	shared_counter = mlx5_tc_ct_counter_create(ct_priv);
+	if (IS_ERR(shared_counter)) {
+		ret = PTR_ERR(shared_counter);
 		return ERR_PTR(ret);
 	}
 
+	shared_counter->is_shared = true;
 	refcount_set(&shared_counter->refcount, 1);
 	return shared_counter;
 }
@@ -798,10 +816,13 @@ mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
 {
 	int err;
 
-	entry->shared_counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry);
-	if (IS_ERR(entry->shared_counter)) {
-		err = PTR_ERR(entry->shared_counter);
-		ct_dbg("Failed to create counter for ct entry");
+	if (nf_ct_acct_enabled(dev_net(ct_priv->netdev)))
+		entry->counter = mlx5_tc_ct_counter_create(ct_priv);
+	else
+		entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry);
+
+	if (IS_ERR(entry->counter)) {
+		err = PTR_ERR(entry->counter);
 		return err;
 	}
 
@@ -820,7 +841,7 @@ mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
 err_nat:
 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
 err_orig:
-	mlx5_tc_ct_shared_counter_put(ct_priv, entry);
+	mlx5_tc_ct_counter_put(ct_priv, entry);
 	return err;
 }
 
@@ -918,7 +939,7 @@ mlx5_tc_ct_del_ft_entry(struct mlx5_tc_ct_priv *ct_priv,
 	rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
 			       tuples_ht_params);
 	mutex_unlock(&ct_priv->shared_counter_lock);
-	mlx5_tc_ct_shared_counter_put(ct_priv, entry);
+	mlx5_tc_ct_counter_put(ct_priv, entry);
 
 }
 
@@ -956,7 +977,7 @@ mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
 	if (!entry)
 		return -ENOENT;
 
-	mlx5_fc_query_cached(entry->shared_counter->counter, &bytes, &packets, &lastuse);
+	mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse);
 	flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
 			  FLOW_ACTION_HW_STATS_DELAYED);
 

From b544011f0e58ce43c40105468d6dc67f980a0c7a Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Fri, 13 Nov 2020 06:06:28 +0200
Subject: [PATCH 470/547] net/mlx5e: Fix SWP offsets when vlan inserted by
 driver

In case WQE includes inline header the vlan is inserted by driver even
if vlan offload is set. On geneve over vlan interface where software
parser is used the SWP offsets should be updated according to the added
vlan.

Fixes: e3cfc7e6b7bd ("net/mlx5e: TX, Add geneve tunnel stateless offload support")
Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h        | 9 +++++++++
 .../net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h  | 8 +++++---
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c          | 9 +++++----
 3 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index 7943eb30b837..4880f2179273 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -371,6 +371,15 @@ struct mlx5e_swp_spec {
 	u8 tun_l4_proto;
 };
 
+static inline void mlx5e_eseg_swp_offsets_add_vlan(struct mlx5_wqe_eth_seg *eseg)
+{
+	/* SWP offsets are in 2-bytes words */
+	eseg->swp_outer_l3_offset += VLAN_HLEN / 2;
+	eseg->swp_outer_l4_offset += VLAN_HLEN / 2;
+	eseg->swp_inner_l3_offset += VLAN_HLEN / 2;
+	eseg->swp_inner_l4_offset += VLAN_HLEN / 2;
+}
+
 static inline void
 mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg,
 		   struct mlx5e_swp_spec *swp_spec)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
index 899b98aca0d3..1fae7fab8297 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
@@ -51,7 +51,7 @@ static inline bool mlx5_geneve_tx_allowed(struct mlx5_core_dev *mdev)
 }
 
 static inline void
-mlx5e_tx_tunnel_accel(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg)
+mlx5e_tx_tunnel_accel(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, u16 ihs)
 {
 	struct mlx5e_swp_spec swp_spec = {};
 	unsigned int offset = 0;
@@ -85,6 +85,8 @@ mlx5e_tx_tunnel_accel(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg)
 	}
 
 	mlx5e_set_eseg_swp(skb, eseg, &swp_spec);
+	if (skb_vlan_tag_present(skb) &&  ihs)
+		mlx5e_eseg_swp_offsets_add_vlan(eseg);
 }
 
 #else
@@ -163,7 +165,7 @@ static inline unsigned int mlx5e_accel_tx_ids_len(struct mlx5e_txqsq *sq,
 
 static inline bool mlx5e_accel_tx_eseg(struct mlx5e_priv *priv,
 				       struct sk_buff *skb,
-				       struct mlx5_wqe_eth_seg *eseg)
+				       struct mlx5_wqe_eth_seg *eseg, u16 ihs)
 {
 #ifdef CONFIG_MLX5_EN_IPSEC
 	if (xfrm_offload(skb))
@@ -172,7 +174,7 @@ static inline bool mlx5e_accel_tx_eseg(struct mlx5e_priv *priv,
 
 #if IS_ENABLED(CONFIG_GENEVE)
 	if (skb->encapsulation)
-		mlx5e_tx_tunnel_accel(skb, eseg);
+		mlx5e_tx_tunnel_accel(skb, eseg, ihs);
 #endif
 
 	return true;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index e47e2a0059d0..61ed671fe741 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -682,9 +682,9 @@ void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq)
 
 static bool mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq,
 				   struct sk_buff *skb, struct mlx5e_accel_tx_state *accel,
-				   struct mlx5_wqe_eth_seg *eseg)
+				   struct mlx5_wqe_eth_seg *eseg, u16 ihs)
 {
-	if (unlikely(!mlx5e_accel_tx_eseg(priv, skb, eseg)))
+	if (unlikely(!mlx5e_accel_tx_eseg(priv, skb, eseg, ihs)))
 		return false;
 
 	mlx5e_txwqe_build_eseg_csum(sq, skb, accel, eseg);
@@ -714,7 +714,8 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
 		if (mlx5e_tx_skb_supports_mpwqe(skb, &attr)) {
 			struct mlx5_wqe_eth_seg eseg = {};
 
-			if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &eseg)))
+			if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &eseg,
+							     attr.ihs)))
 				return NETDEV_TX_OK;
 
 			mlx5e_sq_xmit_mpwqe(sq, skb, &eseg, netdev_xmit_more());
@@ -731,7 +732,7 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* May update the WQE, but may not post other WQEs. */
 	mlx5e_accel_tx_finish(sq, wqe, &accel,
 			      (struct mlx5_wqe_inline_seg *)(wqe->data + wqe_attr.ds_cnt_inl));
-	if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &wqe->eth)))
+	if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &wqe->eth, attr.ihs)))
 		return NETDEV_TX_OK;
 
 	mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, netdev_xmit_more());

From 25c904b59aaf4816337acd415514b0c47715f604 Mon Sep 17 00:00:00 2001
From: Alaa Hleihel <alaa@nvidia.com>
Date: Mon, 4 Jan 2021 12:54:40 +0200
Subject: [PATCH 471/547] net/mlx5: E-Switch, fix changing vf VLANID

Adding vf VLANID for the first time, or after having cleared previously
defined VLANID works fine, however, attempting to change an existing vf
VLANID clears the rules on the firmware, but does not add new rules for
the new vf VLANID.

Fix this by changing the logic in function esw_acl_egress_lgcy_setup()
so that it will always configure egress rules.

Fixes: ea651a86d468 ("net/mlx5: E-Switch, Refactor eswitch egress acl codes")
Signed-off-by: Alaa Hleihel <alaa@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../mellanox/mlx5/core/esw/acl/egress_lgcy.c  | 25 +++++++++----------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
index 2b85d4777303..3e19b1721303 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
@@ -95,22 +95,21 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw,
 		return 0;
 	}
 
-	if (!IS_ERR_OR_NULL(vport->egress.acl))
-		return 0;
+	if (!vport->egress.acl) {
+		vport->egress.acl = esw_acl_table_create(esw, vport->vport,
+							 MLX5_FLOW_NAMESPACE_ESW_EGRESS,
+							 table_size);
+		if (IS_ERR(vport->egress.acl)) {
+			err = PTR_ERR(vport->egress.acl);
+			vport->egress.acl = NULL;
+			goto out;
+		}
 
-	vport->egress.acl = esw_acl_table_create(esw, vport->vport,
-						 MLX5_FLOW_NAMESPACE_ESW_EGRESS,
-						 table_size);
-	if (IS_ERR(vport->egress.acl)) {
-		err = PTR_ERR(vport->egress.acl);
-		vport->egress.acl = NULL;
-		goto out;
+		err = esw_acl_egress_lgcy_groups_create(esw, vport);
+		if (err)
+			goto out;
 	}
 
-	err = esw_acl_egress_lgcy_groups_create(esw, vport);
-	if (err)
-		goto out;
-
 	esw_debug(esw->dev,
 		  "vport[%d] configure egress rules, vlan(%d) qos(%d)\n",
 		  vport->vport, vport->info.vlan, vport->info.qos);

From e13ed0ac064dd6ee964155ba9fdc2f3c3785934c Mon Sep 17 00:00:00 2001
From: Maor Dickman <maord@nvidia.com>
Date: Mon, 14 Dec 2020 13:53:03 +0200
Subject: [PATCH 472/547] net/mlx5e: In skb build skip setting mark in
 switchdev mode

sop_drop_qpn field in the cqe is used by two features, in SWITCHDEV mode
to restore the chain id in case of a miss and in LEGACY mode to support
skbedit mark action. In build RX skb, the skb mark field is set regardless
of the configured mode which cause a corruption of the mark field in case
of switchdev mode.

Fix by overriding the mark value back to 0 in the representor tc update
skb flow.

Fixes: 8f1e0b97cc70 ("net/mlx5: E-Switch, Mark miss packets with new chain id mapping")
Signed-off-by: Maor Dickman <maord@nvidia.com>
Reviewed-by: Raed Salem <raeds@nvidia.com>
Reviewed-by: Oz Shlomo <ozsh@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index d29af7b9c695..76177f7c5ec2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -626,6 +626,11 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
 	if (!reg_c0)
 		return true;
 
+	/* If reg_c0 is not equal to the default flow tag then skb->mark
+	 * is not supported and must be reset back to 0.
+	 */
+	skb->mark = 0;
+
 	priv = netdev_priv(skb->dev);
 	esw = priv->mdev->priv.eswitch;
 

From b1c0aca3d3ddeebeec57ada9c2df9ed647939249 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Sun, 27 Dec 2020 16:33:19 +0200
Subject: [PATCH 473/547] net/mlx5e: ethtool, Fix restriction of autoneg with
 56G

Prior to this patch, configuring speed to 50G with autoneg off over
devices supporting 50G per lane failed.
Support for 50G per lane introduced a new set of link-modes, on which
driver always performed a speed validation as if only legacy link-modes
were configured. Fix driver speed validation to force setting autoneg
over 56G only if in legacy link-mode.

Fixes: 3d7cadae51f1 ("net/mlx5e: ethtool, Fix analysis of speed setting")
Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Eran Ben Elisha <eranbe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/en_ethtool.c  | 24 ++++++++++++++-----
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index d9076d543104..2d37742a888c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1010,6 +1010,22 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev,
 	return mlx5e_ethtool_get_link_ksettings(priv, link_ksettings);
 }
 
+static int mlx5e_speed_validate(struct net_device *netdev, bool ext,
+				const unsigned long link_modes, u8 autoneg)
+{
+	/* Extended link-mode has no speed limitations. */
+	if (ext)
+		return 0;
+
+	if ((link_modes & MLX5E_PROT_MASK(MLX5E_56GBASE_R4)) &&
+	    autoneg != AUTONEG_ENABLE) {
+		netdev_err(netdev, "%s: 56G link speed requires autoneg enabled\n",
+			   __func__);
+		return -EINVAL;
+	}
+	return 0;
+}
+
 static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes)
 {
 	u32 i, ptys_modes = 0;
@@ -1103,13 +1119,9 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
 	link_modes = autoneg == AUTONEG_ENABLE ? ethtool2ptys_adver_func(adver) :
 		mlx5e_port_speed2linkmodes(mdev, speed, !ext);
 
-	if ((link_modes & MLX5E_PROT_MASK(MLX5E_56GBASE_R4)) &&
-	    autoneg != AUTONEG_ENABLE) {
-		netdev_err(priv->netdev, "%s: 56G link speed requires autoneg enabled\n",
-			   __func__);
-		err = -EINVAL;
+	err = mlx5e_speed_validate(priv->netdev, ext, link_modes, autoneg);
+	if (err)
 		goto out;
-	}
 
 	link_modes = link_modes & eproto.cap;
 	if (!link_modes) {

From 4d8be21112f6fa2ac4b8a13f35866ad65b11d48c Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Mon, 4 Jan 2021 10:08:36 +0200
Subject: [PATCH 474/547] net/mlx5: Release devlink object if adev fails

Add missed freeing previously allocated devlink object.

Fixes: a925b5e309c9 ("net/mlx5: Register mlx5 devices to auxiliary virtual bus")
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index c08315b51fd3..ca6f2fc39ea0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1368,8 +1368,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 			 MLX5_COREDEV_VF : MLX5_COREDEV_PF;
 
 	dev->priv.adev_idx = mlx5_adev_idx_alloc();
-	if (dev->priv.adev_idx < 0)
-		return dev->priv.adev_idx;
+	if (dev->priv.adev_idx < 0) {
+		err = dev->priv.adev_idx;
+		goto adev_init_err;
+	}
 
 	err = mlx5_mdev_init(dev, prof_sel);
 	if (err)
@@ -1403,6 +1405,7 @@ pci_init_err:
 	mlx5_mdev_uninit(dev);
 mdev_init_err:
 	mlx5_adev_idx_free(dev->priv.adev_idx);
+adev_init_err:
 	mlx5_devlink_free(devlink);
 
 	return err;

From 7a6eb072a9548492ead086f3e820e9aac71c7138 Mon Sep 17 00:00:00 2001
From: Dinghao Liu <dinghao.liu@zju.edu.cn>
Date: Mon, 28 Dec 2020 16:48:40 +0800
Subject: [PATCH 475/547] net/mlx5e: Fix two double free cases

mlx5e_create_ttc_table_groups() frees ft->g on failure of
kvzalloc(), but such failure will be caught by its caller
in mlx5e_create_ttc_table() and ft->g will be freed again
in mlx5e_destroy_flow_table(). The same issue also occurs
in mlx5e_create_ttc_table_groups(). Set ft->g to NULL after
kfree() to avoid double free.

Fixes: 7b3722fa9ef6 ("net/mlx5e: Support RSS for GRE tunneled packets")
Fixes: 33cfaaa8f36f ("net/mlx5e: Split the main flow steering table")
Signed-off-by: Dinghao Liu <dinghao.liu@zju.edu.cn>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index fa8149f6eb08..44a2dfbc3853 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -942,6 +942,7 @@ static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc,
 	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in) {
 		kfree(ft->g);
+		ft->g = NULL;
 		return -ENOMEM;
 	}
 
@@ -1087,6 +1088,7 @@ static int mlx5e_create_inner_ttc_table_groups(struct mlx5e_ttc_table *ttc)
 	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in) {
 		kfree(ft->g);
+		ft->g = NULL;
 		return -ENOMEM;
 	}
 

From 5b0bb12c58ac7d22e05b5bfdaa30a116c8c32e32 Mon Sep 17 00:00:00 2001
From: Dinghao Liu <dinghao.liu@zju.edu.cn>
Date: Mon, 21 Dec 2020 19:27:31 +0800
Subject: [PATCH 476/547] net/mlx5e: Fix memleak in
 mlx5e_create_l2_table_groups

When mlx5_create_flow_group() fails, ft->g should be
freed just like when kvzalloc() fails. The caller of
mlx5e_create_l2_table_groups() does not catch this
issue on failure, which leads to memleak.

Fixes: 33cfaaa8f36f ("net/mlx5e: Split the main flow steering table")
Signed-off-by: Dinghao Liu <dinghao.liu@zju.edu.cn>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 44a2dfbc3853..e02e5895703d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -1392,6 +1392,7 @@ err_destroy_groups:
 	ft->g[ft->num_groups] = NULL;
 	mlx5e_destroy_groups(ft);
 	kvfree(in);
+	kfree(ft->g);
 
 	return err;
 }

From f3562f5e00bbae2a6b292941ec76a9140aa3b7dd Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Wed, 6 Jan 2021 17:17:35 +0100
Subject: [PATCH 477/547] docs: octeontx2: tune rst markup

Commit 80b9414832a1 ("docs: octeontx2: Add Documentation for NPA health
reporters") added new documentation with improper formatting for rst, and
caused a few new warnings for make htmldocs in octeontx2.rst:169--202.

Tune markup and formatting for better presentation in the HTML view.

Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: George Cherian <george.cherian@marvell.com>
Link: https://lore.kernel.org/r/20210106161735.21751-1-lukas.bulwahn@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../ethernet/marvell/octeontx2.rst            | 62 +++++++++++--------
 1 file changed, 36 insertions(+), 26 deletions(-)

diff --git a/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst b/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst
index d3fcf536d14e..61e850460e18 100644
--- a/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst
+++ b/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst
@@ -164,46 +164,56 @@ Devlink health reporters
 
 NPA Reporters
 -------------
-The NPA reporters are responsible for reporting and recovering the following group of errors
+The NPA reporters are responsible for reporting and recovering the following group of errors:
+
 1. GENERAL events
+
    - Error due to operation of unmapped PF.
    - Error due to disabled alloc/free for other HW blocks (NIX, SSO, TIM, DPI and AURA).
+
 2. ERROR events
+
    - Fault due to NPA_AQ_INST_S read or NPA_AQ_RES_S write.
    - AQ Doorbell Error.
+
 3. RAS events
+
    - RAS Error Reporting for NPA_AQ_INST_S/NPA_AQ_RES_S.
+
 4. RVU events
+
    - Error due to unmapped slot.
 
-Sample Output
--------------
-~# devlink health
-pci/0002:01:00.0:
-  reporter hw_npa_intr
-      state healthy error 2872 recover 2872 last_dump_date 2020-12-10 last_dump_time 09:39:09 grace_period 0 auto_recover true auto_dump true
-  reporter hw_npa_gen
-      state healthy error 2872 recover 2872 last_dump_date 2020-12-11 last_dump_time 04:43:04 grace_period 0 auto_recover true auto_dump true
-  reporter hw_npa_err
-      state healthy error 2871 recover 2871 last_dump_date 2020-12-10 last_dump_time 09:39:17 grace_period 0 auto_recover true auto_dump true
-   reporter hw_npa_ras
-      state healthy error 0 recover 0 last_dump_date 2020-12-10 last_dump_time 09:32:40 grace_period 0 auto_recover true auto_dump true
+Sample Output::
+
+	~# devlink health
+	pci/0002:01:00.0:
+	  reporter hw_npa_intr
+	      state healthy error 2872 recover 2872 last_dump_date 2020-12-10 last_dump_time 09:39:09 grace_period 0 auto_recover true auto_dump true
+	  reporter hw_npa_gen
+	      state healthy error 2872 recover 2872 last_dump_date 2020-12-11 last_dump_time 04:43:04 grace_period 0 auto_recover true auto_dump true
+	  reporter hw_npa_err
+	      state healthy error 2871 recover 2871 last_dump_date 2020-12-10 last_dump_time 09:39:17 grace_period 0 auto_recover true auto_dump true
+	   reporter hw_npa_ras
+	      state healthy error 0 recover 0 last_dump_date 2020-12-10 last_dump_time 09:32:40 grace_period 0 auto_recover true auto_dump true
 
 Each reporter dumps the
+
  - Error Type
  - Error Register value
  - Reason in words
 
-For eg:
-~# devlink health dump show  pci/0002:01:00.0 reporter hw_npa_gen
- NPA_AF_GENERAL:
-         NPA General Interrupt Reg : 1
-         NIX0: free disabled RX
-~# devlink health dump show  pci/0002:01:00.0 reporter hw_npa_intr
- NPA_AF_RVU:
-         NPA RVU Interrupt Reg : 1
-         Unmap Slot Error
-~# devlink health dump show  pci/0002:01:00.0 reporter hw_npa_err
- NPA_AF_ERR:
-        NPA Error Interrupt Reg : 4096
-        AQ Doorbell Error
+For example::
+
+	~# devlink health dump show  pci/0002:01:00.0 reporter hw_npa_gen
+	 NPA_AF_GENERAL:
+	         NPA General Interrupt Reg : 1
+	         NIX0: free disabled RX
+	~# devlink health dump show  pci/0002:01:00.0 reporter hw_npa_intr
+	 NPA_AF_RVU:
+	         NPA RVU Interrupt Reg : 1
+	         Unmap Slot Error
+	~# devlink health dump show  pci/0002:01:00.0 reporter hw_npa_err
+	 NPA_AF_ERR:
+	        NPA Error Interrupt Reg : 4096
+	        AQ Doorbell Error

From 0ef597c3ac49a62e1a2c1c10f88dd76fde1e1636 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Tue, 5 Jan 2021 06:58:15 +0100
Subject: [PATCH 478/547] docs: remove mention of ENABLE_MUST_CHECK

We removed ENABLE_MUST_CHECK in 196793946264 ("Compiler Attributes:
remove CONFIG_ENABLE_MUST_CHECK"), so let's remove docs' mentions.

At the same time, fix the outdated text related to
ENABLE_WARN_DEPRECATED that wasn't removed in 3337d5cfe5e08
("configs: get rid of obsolete CONFIG_ENABLE_WARN_DEPRECATED").

Finally, reflow the paragraph.

Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
Reviewed-by: Nathan Chancellor <natechancellor@gmail.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Link: https://lore.kernel.org/r/20210105055815.GA5173@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/process/4.Coding.rst | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/Documentation/process/4.Coding.rst b/Documentation/process/4.Coding.rst
index c27e59d2f702..0825dc496f22 100644
--- a/Documentation/process/4.Coding.rst
+++ b/Documentation/process/4.Coding.rst
@@ -249,10 +249,8 @@ features; most of these are found in the "kernel hacking" submenu.  Several
 of these options should be turned on for any kernel used for development or
 testing purposes.  In particular, you should turn on:
 
- - ENABLE_MUST_CHECK and FRAME_WARN to get an
-   extra set of warnings for problems like the use of deprecated interfaces
-   or ignoring an important return value from a function.  The output
-   generated by these warnings can be verbose, but one need not worry about
+ - FRAME_WARN to get warnings for stack frames larger than a given amount.
+   The output generated can be verbose, but one need not worry about
    warnings from other parts of the kernel.
 
  - DEBUG_OBJECTS will add code to track the lifetime of various objects

From a734a7235ef3768dd3c9b7034f663ae6b260375f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= <j.neuschaefer@gmx.net>
Date: Fri, 1 Jan 2021 22:14:47 +0100
Subject: [PATCH 479/547] docs: binfmt-misc: Fix .rst formatting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

"name below" is not part of the /proc path and should not be formatted
in monospace.

"doesn``t" is rendered in HTML with a double backtick. Revert it back to
"doesn't".

Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Link: https://lore.kernel.org/r/20210101211447.1021412-1-j.neuschaefer@gmx.net
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/binfmt-misc.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/binfmt-misc.rst b/Documentation/admin-guide/binfmt-misc.rst
index 7a864131e5ea..59cd902e3549 100644
--- a/Documentation/admin-guide/binfmt-misc.rst
+++ b/Documentation/admin-guide/binfmt-misc.rst
@@ -23,7 +23,7 @@ Here is what the fields mean:
 
 - ``name``
    is an identifier string. A new /proc file will be created with this
-   ``name below /proc/sys/fs/binfmt_misc``; cannot contain slashes ``/`` for
+   name below ``/proc/sys/fs/binfmt_misc``; cannot contain slashes ``/`` for
    obvious reasons.
 - ``type``
    is the type of recognition. Give ``M`` for magic and ``E`` for extension.
@@ -83,7 +83,7 @@ Here is what the fields mean:
       ``F`` - fix binary
             The usual behaviour of binfmt_misc is to spawn the
 	    binary lazily when the misc format file is invoked.  However,
-	    this doesn``t work very well in the face of mount namespaces and
+	    this doesn't work very well in the face of mount namespaces and
 	    changeroots, so the ``F`` mode opens the binary as soon as the
 	    emulation is installed and uses the opened image to spawn the
 	    emulator, meaning it is always available once installed,

From 25942e5ecbac33918ec2f0869ca9a374dbb023f2 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 31 Dec 2020 20:08:31 -0800
Subject: [PATCH 480/547] Documentation/admin-guide: kernel-parameters:
 hyphenate comma-separated

Hyphenate "comma separated" when it is used as a compound adjective.
hyphenated.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://lore.kernel.org/r/20210101040831.4148-1-rdunlap@infradead.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/kernel-parameters.txt | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index c722ec19cd00..9e3cdb271d06 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1385,7 +1385,7 @@
 
 	ftrace_filter=[function-list]
 			[FTRACE] Limit the functions traced by the function
-			tracer at boot up. function-list is a comma separated
+			tracer at boot up. function-list is a comma-separated
 			list of functions. This list can be changed at run
 			time by the set_ftrace_filter file in the debugfs
 			tracing directory.
@@ -1399,13 +1399,13 @@
 	ftrace_graph_filter=[function-list]
 			[FTRACE] Limit the top level callers functions traced
 			by the function graph tracer at boot up.
-			function-list is a comma separated list of functions
+			function-list is a comma-separated list of functions
 			that can be changed at run time by the
 			set_graph_function file in the debugfs tracing directory.
 
 	ftrace_graph_notrace=[function-list]
 			[FTRACE] Do not trace from the functions specified in
-			function-list.  This list is a comma separated list of
+			function-list.  This list is a comma-separated list of
 			functions that can be changed at run time by the
 			set_graph_notrace file in the debugfs tracing directory.
 
@@ -2421,7 +2421,7 @@
 			when set.
 			Format: <int>
 
-	libata.force=	[LIBATA] Force configurations.  The format is comma
+	libata.force=	[LIBATA] Force configurations.  The format is comma-
 			separated list of "[ID:]VAL" where ID is
 			PORT[.DEVICE].  PORT and DEVICE are decimal numbers
 			matching port, link or device.  Basically, it matches
@@ -5145,7 +5145,7 @@
 
 	stacktrace_filter=[function-list]
 			[FTRACE] Limit the functions that the stack tracer
-			will trace at boot up. function-list is a comma separated
+			will trace at boot up. function-list is a comma-separated
 			list of functions. This list can be changed at run
 			time by the stack_trace_filter file in the debugfs
 			tracing directory. Note, this enables stack tracing
@@ -5348,7 +5348,7 @@
 	trace_event=[event-list]
 			[FTRACE] Set and start specified trace events in order
 			to facilitate early boot debugging. The event-list is a
-			comma separated list of trace events to enable. See
+			comma-separated list of trace events to enable. See
 			also Documentation/trace/events.rst
 
 	trace_options=[option-list]

From 9d54ee78aef62c29b15ae2f58a70b1d1cd63a8f0 Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Thu, 7 Jan 2021 18:26:10 +0530
Subject: [PATCH 481/547] docs: admin-guide: bootconfig: Fix feils to fails

s/feils/fails/p

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://lore.kernel.org/r/20210107125610.1576368-1-unixbhaskar@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/bootconfig.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/bootconfig.rst b/Documentation/admin-guide/bootconfig.rst
index 9b90efcc3a35..452b7dcd7f6b 100644
--- a/Documentation/admin-guide/bootconfig.rst
+++ b/Documentation/admin-guide/bootconfig.rst
@@ -154,7 +154,7 @@ get the boot configuration data.
 Because of this "piggyback" method, there is no need to change or
 update the boot loader and the kernel image itself as long as the boot
 loader passes the correct initrd file size. If by any chance, the boot
-loader passes a longer size, the kernel feils to find the bootconfig data.
+loader passes a longer size, the kernel fails to find the bootconfig data.
 
 To do this operation, Linux kernel provides "bootconfig" command under
 tools/bootconfig, which allows admin to apply or delete the config file

From bb12433bf56e76789c6b08b36c546f745a6aa6e1 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Wed, 6 Jan 2021 12:34:36 -0800
Subject: [PATCH 482/547] ARC: unbork 5.11 bootup: fix snafu in
 _TIF_NOTIFY_SIGNAL handling

Linux 5.11.rcX was failing to boot on ARC HSDK board. Turns out we have
a couple of issues, this being the first one, and I'm to blame as I
didn't pay attention during review.

TIF_NOTIFY_SIGNAL support requires checking multiple TIF_* bits in
kernel return code path. Old code only needed to check a single bit so
BBIT0 <TIF_SIGPENDING> worked. New code needs to check multiple bits so
AND <bit-mask> instruction. So needs to use bit mask variant _TIF_SIGPENDING

Cc: Jens Axboe <axboe@kernel.dk>
Fixes: 53855e12588743ea128 ("arc: add support for TIF_NOTIFY_SIGNAL")
Link: https://github.com/foss-for-synopsys-dwc-arc-processors/linux/issues/34
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/entry.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index 1f5308abf36d..1743506081da 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -307,7 +307,7 @@ resume_user_mode_begin:
 	mov r0, sp	; pt_regs for arg to do_signal()/do_notify_resume()
 
 	GET_CURR_THR_INFO_FLAGS   r9
-	and.f  0,  r9, TIF_SIGPENDING|TIF_NOTIFY_SIGNAL
+	and.f  0,  r9, _TIF_SIGPENDING|_TIF_NOTIFY_SIGNAL
 	bz .Lchk_notify_resume
 
 	; Normal Trap/IRQ entry only saves Scratch (caller-saved) regs

From 9e7a67dee27902fedab880b9af909bd4acd0fba9 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 6 Jan 2021 00:15:21 +0100
Subject: [PATCH 483/547] selftests: netfilter: add selftest for ipip pmtu
 discovery with enabled connection tracking

Convert Christians bug description into a reproducer.

Cc: Shuah Khan <shuah@kernel.org>
Reported-by: Christian Perle <christian.perle@secunet.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/netfilter/Makefile    |   3 +-
 .../selftests/netfilter/ipip-conntrack-mtu.sh | 206 ++++++++++++++++++
 2 files changed, 208 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh

diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index a374e10ef506..3006a8e5b41a 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -4,7 +4,8 @@
 TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
 	conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
 	nft_concat_range.sh nft_conntrack_helper.sh \
-	nft_queue.sh nft_meta.sh
+	nft_queue.sh nft_meta.sh \
+	ipip-conntrack-mtu.sh
 
 LDLIBS = -lmnl
 TEST_GEN_FILES =  nf-queue
diff --git a/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh b/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh
new file mode 100755
index 000000000000..4a6f5c3b3215
--- /dev/null
+++ b/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh
@@ -0,0 +1,206 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# Conntrack needs to reassemble fragments in order to have complete
+# packets for rule matching.  Reassembly can lead to packet loss.
+
+# Consider the following setup:
+#            +--------+       +---------+       +--------+
+#            |Router A|-------|Wanrouter|-------|Router B|
+#            |        |.IPIP..|         |..IPIP.|        |
+#            +--------+       +---------+       +--------+
+#           /                  mtu 1400                   \
+#          /                                               \
+#+--------+                                                 +--------+
+#|Client A|                                                 |Client B|
+#|        |                                                 |        |
+#+--------+                                                 +--------+
+
+# Router A and Router B use IPIP tunnel interfaces to tunnel traffic
+# between Client A and Client B over WAN. Wanrouter has MTU 1400 set
+# on its interfaces.
+
+rnd=$(mktemp -u XXXXXXXX)
+rx=$(mktemp)
+
+r_a="ns-ra-$rnd"
+r_b="ns-rb-$rnd"
+r_w="ns-rw-$rnd"
+c_a="ns-ca-$rnd"
+c_b="ns-cb-$rnd"
+
+checktool (){
+	if ! $1 > /dev/null 2>&1; then
+		echo "SKIP: Could not $2"
+		exit $ksft_skip
+	fi
+}
+
+checktool "iptables --version" "run test without iptables"
+checktool "ip -Version" "run test without ip tool"
+checktool "which nc" "run test without nc (netcat)"
+checktool "ip netns add ${r_a}" "create net namespace"
+
+for n in ${r_b} ${r_w} ${c_a} ${c_b};do
+	ip netns add ${n}
+done
+
+cleanup() {
+	for n in ${r_a} ${r_b} ${r_w} ${c_a} ${c_b};do
+		ip netns del ${n}
+	done
+	rm -f ${rx}
+}
+
+trap cleanup EXIT
+
+test_path() {
+	msg="$1"
+
+	ip netns exec ${c_b} nc -n -w 3 -q 3 -u -l -p 5000 > ${rx} < /dev/null &
+
+	sleep 1
+	for i in 1 2 3; do
+		head -c1400 /dev/zero | tr "\000" "a" | ip netns exec ${c_a} nc -n -w 1 -u 192.168.20.2 5000
+	done
+
+	wait
+
+	bytes=$(wc -c < ${rx})
+
+	if [ $bytes -eq 1400 ];then
+		echo "OK: PMTU $msg connection tracking"
+	else
+		echo "FAIL: PMTU $msg connection tracking: got $bytes, expected 1400"
+		exit 1
+	fi
+}
+
+# Detailed setup for Router A
+# ---------------------------
+# Interfaces:
+# eth0: 10.2.2.1/24
+# eth1: 192.168.10.1/24
+# ipip0: No IP address, local 10.2.2.1 remote 10.4.4.1
+# Routes:
+# 192.168.20.0/24 dev ipip0    (192.168.20.0/24 is subnet of Client B)
+# 10.4.4.1 via 10.2.2.254      (Router B via Wanrouter)
+# No iptables rules at all.
+
+ip link add veth0 netns ${r_a} type veth peer name veth0 netns ${r_w}
+ip link add veth1 netns ${r_a} type veth peer name veth0 netns ${c_a}
+
+l_addr="10.2.2.1"
+r_addr="10.4.4.1"
+ip netns exec ${r_a} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip
+
+for dev in lo veth0 veth1 ipip0; do
+    ip -net ${r_a} link set $dev up
+done
+
+ip -net ${r_a} addr add 10.2.2.1/24 dev veth0
+ip -net ${r_a} addr add 192.168.10.1/24 dev veth1
+
+ip -net ${r_a} route add 192.168.20.0/24 dev ipip0
+ip -net ${r_a} route add 10.4.4.0/24 via 10.2.2.254
+
+ip netns exec ${r_a} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+
+# Detailed setup for Router B
+# ---------------------------
+# Interfaces:
+# eth0: 10.4.4.1/24
+# eth1: 192.168.20.1/24
+# ipip0: No IP address, local 10.4.4.1 remote 10.2.2.1
+# Routes:
+# 192.168.10.0/24 dev ipip0    (192.168.10.0/24 is subnet of Client A)
+# 10.2.2.1 via 10.4.4.254      (Router A via Wanrouter)
+# No iptables rules at all.
+
+ip link add veth0 netns ${r_b} type veth peer name veth1 netns ${r_w}
+ip link add veth1 netns ${r_b} type veth peer name veth0 netns ${c_b}
+
+l_addr="10.4.4.1"
+r_addr="10.2.2.1"
+
+ip netns exec ${r_b} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip
+
+for dev in lo veth0 veth1 ipip0; do
+	ip -net ${r_b} link set $dev up
+done
+
+ip -net ${r_b} addr add 10.4.4.1/24 dev veth0
+ip -net ${r_b} addr add 192.168.20.1/24 dev veth1
+
+ip -net ${r_b} route add 192.168.10.0/24 dev ipip0
+ip -net ${r_b} route add 10.2.2.0/24 via 10.4.4.254
+ip netns exec ${r_b} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+
+# Client A
+ip -net ${c_a} addr add 192.168.10.2/24 dev veth0
+ip -net ${c_a} link set dev lo up
+ip -net ${c_a} link set dev veth0 up
+ip -net ${c_a} route add default via 192.168.10.1
+
+# Client A
+ip -net ${c_b} addr add 192.168.20.2/24 dev veth0
+ip -net ${c_b} link set dev veth0 up
+ip -net ${c_b} link set dev lo up
+ip -net ${c_b} route add default via 192.168.20.1
+
+# Wan
+ip -net ${r_w} addr add 10.2.2.254/24 dev veth0
+ip -net ${r_w} addr add 10.4.4.254/24 dev veth1
+
+ip -net ${r_w} link set dev lo up
+ip -net ${r_w} link set dev veth0 up mtu 1400
+ip -net ${r_w} link set dev veth1 up mtu 1400
+
+ip -net ${r_a} link set dev veth0 mtu 1400
+ip -net ${r_b} link set dev veth0 mtu 1400
+
+ip netns exec ${r_w} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+
+# Path MTU discovery
+# ------------------
+# Running tracepath from Client A to Client B shows PMTU discovery is working
+# as expected:
+#
+# clienta:~# tracepath 192.168.20.2
+# 1?: [LOCALHOST]                      pmtu 1500
+# 1:  192.168.10.1                                          0.867ms
+# 1:  192.168.10.1                                          0.302ms
+# 2:  192.168.10.1                                          0.312ms pmtu 1480
+# 2:  no reply
+# 3:  192.168.10.1                                          0.510ms pmtu 1380
+# 3:  192.168.20.2                                          2.320ms reached
+# Resume: pmtu 1380 hops 3 back 3
+
+# ip netns exec ${c_a} traceroute --mtu 192.168.20.2
+
+# Router A has learned PMTU (1400) to Router B from Wanrouter.
+# Client A has learned PMTU (1400 - IPIP overhead = 1380) to Client B
+# from Router A.
+
+#Send large UDP packet
+#---------------------
+#Now we send a 1400 bytes UDP packet from Client A to Client B:
+
+# clienta:~# head -c1400 /dev/zero | tr "\000" "a" | nc -u 192.168.20.2 5000
+test_path "without"
+
+# The IPv4 stack on Client A already knows the PMTU to Client B, so the
+# UDP packet is sent as two fragments (1380 + 20). Router A forwards the
+# fragments between eth1 and ipip0. The fragments fit into the tunnel and
+# reach their destination.
+
+#When sending the large UDP packet again, Router A now reassembles the
+#fragments before routing the packet over ipip0. The resulting IPIP
+#packet is too big (1400) for the tunnel PMTU (1380) to Router B, it is
+#dropped on Router A before sending.
+
+ip netns exec ${r_a} iptables -A FORWARD -m conntrack --ctstate NEW
+test_path "with"

From 50c661670f6a3908c273503dfa206dfc7aa54c07 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 6 Jan 2021 00:15:22 +0100
Subject: [PATCH 484/547] net: fix pmtu check in nopmtudisc mode

For some reason ip_tunnel insist on setting the DF bit anyway when the
inner header has the DF bit set, EVEN if the tunnel was configured with
'nopmtudisc'.

This means that the script added in the previous commit
cannot be made to work by adding the 'nopmtudisc' flag to the
ip tunnel configuration. Doing so breaks connectivity even for the
without-conntrack/netfilter scenario.

When nopmtudisc is set, the tunnel will skip the mtu check, so no
icmp error is sent to client. Then, because inner header has DF set,
the outer header gets added with DF bit set as well.

IP stack then sends an error to itself because the packet exceeds
the device MTU.

Fixes: 23a3647bc4f93 ("ip_tunnels: Use skb-len to PMTU check.")
Cc: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv4/ip_tunnel.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index ee65c9225178..64594aa755f0 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -759,8 +759,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 		goto tx_error;
 	}
 
-	if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph,
-			    0, 0, false)) {
+	df = tnl_params->frag_off;
+	if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
+		df |= (inner_iph->frag_off & htons(IP_DF));
+
+	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) {
 		ip_rt_put(rt);
 		goto tx_error;
 	}
@@ -788,10 +791,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 			ttl = ip4_dst_hoplimit(&rt->dst);
 	}
 
-	df = tnl_params->frag_off;
-	if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
-		df |= (inner_iph->frag_off&htons(IP_DF));
-
 	max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
 			+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
 	if (max_headroom > dev->needed_headroom)

From bb4cc1a18856a73f0ff5137df0c2a31f4c50f6cf Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 6 Jan 2021 00:15:23 +0100
Subject: [PATCH 485/547] net: ip: always refragment ip defragmented packets

Conntrack reassembly records the largest fragment size seen in IPCB.
However, when this gets forwarded/transmitted, fragmentation will only
be forced if one of the fragmented packets had the DF bit set.

In that case, a flag in IPCB will force fragmentation even if the
MTU is large enough.

This should work fine, but this breaks with ip tunnels.
Consider client that sends a UDP datagram of size X to another host.

The client fragments the datagram, so two packets, of size y and z, are
sent. DF bit is not set on any of these packets.

Middlebox netfilter reassembles those packets back to single size-X
packet, before routing decision.

packet-size-vs-mtu checks in ip_forward are irrelevant, because DF bit
isn't set.  At output time, ip refragmentation is skipped as well
because x is still smaller than the mtu of the output device.

If ttransmit device is an ip tunnel, the packet size increases to
x+overhead.

Also, tunnel might be configured to force DF bit on outer header.

In this case, packet will be dropped (exceeds MTU) and an ICMP error is
generated back to sender.

But sender already respects the announced MTU, all the packets that
it sent did fit the announced mtu.

Force refragmentation as per original sizes unconditionally so ip tunnel
will encapsulate the fragments instead.

The only other solution I see is to place ip refragmentation in
the ip_tunnel code to handle this case.

Fixes: d6b915e29f4ad ("ip_fragment: don't forward defragmented DF packet")
Reported-by: Christian Perle <christian.perle@secunet.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv4/ip_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 89fff5f59eea..2ed0b01f72f0 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -302,7 +302,7 @@ static int __ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *
 	if (skb_is_gso(skb))
 		return ip_finish_output_gso(net, sk, skb, mtu);
 
-	if (skb->len > mtu || (IPCB(skb)->flags & IPSKB_FRAG_PMTU))
+	if (skb->len > mtu || IPCB(skb)->frag_max_size)
 		return ip_fragment(net, sk, skb, mtu, ip_finish_output2);
 
 	return ip_finish_output2(net, sk, skb);

From 2aa078932ff6c66bf10cc5b3144440dbfa7d813d Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 17 Dec 2020 16:31:36 -0800
Subject: [PATCH 486/547] KVM: x86/mmu: Use -1 to flag an undefined spte in
 get_mmio_spte()

Return -1 from the get_walk() helpers if the shadow walk doesn't fill at
least one spte, which can theoretically happen if the walk hits a
not-present PDPTR.  Returning the root level in such a case will cause
get_mmio_spte() to return garbage (uninitialized stack data).  In
practice, such a scenario should be impossible as KVM shouldn't get a
reserved-bit page fault with a not-present PDPTR.

Note, using mmu->root_level in get_walk() is wrong for other reasons,
too, but that's now a moot point.

Fixes: 95fb5b0258b7 ("kvm: x86/mmu: Support MMIO in the TDP MMU")
Cc: Ben Gardon <bgardon@google.com>
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20201218003139.2167891-2-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c     | 7 ++++++-
 arch/x86/kvm/mmu/tdp_mmu.c | 2 +-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 7a6ae9e90bd7..a48cd12c01d7 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3488,7 +3488,7 @@ static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
 static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
 {
 	struct kvm_shadow_walk_iterator iterator;
-	int leaf = vcpu->arch.mmu->root_level;
+	int leaf = -1;
 	u64 spte;
 
 
@@ -3532,6 +3532,11 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 	else
 		leaf = get_walk(vcpu, addr, sptes);
 
+	if (unlikely(leaf < 0)) {
+		*sptep = 0ull;
+		return reserved;
+	}
+
 	rsvd_check = &vcpu->arch.mmu->shadow_zero_check;
 
 	for (level = root; level >= leaf; level--) {
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 84c8f06bec26..50cec7a15ddb 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1152,8 +1152,8 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
 {
 	struct tdp_iter iter;
 	struct kvm_mmu *mmu = vcpu->arch.mmu;
-	int leaf = vcpu->arch.mmu->shadow_root_level;
 	gfn_t gfn = addr >> PAGE_SHIFT;
+	int leaf = -1;
 
 	tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
 		leaf = iter.level;

From 39b4d43e6003cee51cd119596d3c33d0449eb44c Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 17 Dec 2020 16:31:37 -0800
Subject: [PATCH 487/547] KVM: x86/mmu: Get root level from walkers when
 retrieving MMIO SPTE

Get the so called "root" level from the low level shadow page table
walkers instead of manually attempting to calculate it higher up the
stack, e.g. in get_mmio_spte().  When KVM is using PAE shadow paging,
the starting level of the walk, from the callers perspective, is not
the CR3 root but rather the PDPTR "root".  Checking for reserved bits
from the CR3 root causes get_mmio_spte() to consume uninitialized stack
data due to indexing into sptes[] for a level that was not filled by
get_walk().  This can result in false positives and/or negatives
depending on what garbage happens to be on the stack.

Opportunistically nuke a few extra newlines.

Fixes: 95fb5b0258b7 ("kvm: x86/mmu: Support MMIO in the TDP MMU")
Reported-by: Richard Herbert <rherbert@sympatico.ca>
Cc: Ben Gardon <bgardon@google.com>
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20201218003139.2167891-3-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c     | 15 ++++++---------
 arch/x86/kvm/mmu/tdp_mmu.c |  5 ++++-
 arch/x86/kvm/mmu/tdp_mmu.h |  4 +++-
 3 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index a48cd12c01d7..52f36c879086 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3485,16 +3485,16 @@ static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
  * Return the level of the lowest level SPTE added to sptes.
  * That SPTE may be non-present.
  */
-static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
+static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level)
 {
 	struct kvm_shadow_walk_iterator iterator;
 	int leaf = -1;
 	u64 spte;
 
-
 	walk_shadow_page_lockless_begin(vcpu);
 
-	for (shadow_walk_init(&iterator, vcpu, addr);
+	for (shadow_walk_init(&iterator, vcpu, addr),
+	     *root_level = iterator.level;
 	     shadow_walk_okay(&iterator);
 	     __shadow_walk_next(&iterator, spte)) {
 		leaf = iterator.level;
@@ -3504,7 +3504,6 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
 
 		if (!is_shadow_present_pte(spte))
 			break;
-
 	}
 
 	walk_shadow_page_lockless_end(vcpu);
@@ -3517,9 +3516,7 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 {
 	u64 sptes[PT64_ROOT_MAX_LEVEL];
 	struct rsvd_bits_validate *rsvd_check;
-	int root = vcpu->arch.mmu->shadow_root_level;
-	int leaf;
-	int level;
+	int root, leaf, level;
 	bool reserved = false;
 
 	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) {
@@ -3528,9 +3525,9 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 	}
 
 	if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa))
-		leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes);
+		leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root);
 	else
-		leaf = get_walk(vcpu, addr, sptes);
+		leaf = get_walk(vcpu, addr, sptes, &root);
 
 	if (unlikely(leaf < 0)) {
 		*sptep = 0ull;
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 50cec7a15ddb..a4f9447f8327 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1148,13 +1148,16 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
  * Return the level of the lowest level SPTE added to sptes.
  * That SPTE may be non-present.
  */
-int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
+int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
+			 int *root_level)
 {
 	struct tdp_iter iter;
 	struct kvm_mmu *mmu = vcpu->arch.mmu;
 	gfn_t gfn = addr >> PAGE_SHIFT;
 	int leaf = -1;
 
+	*root_level = vcpu->arch.mmu->shadow_root_level;
+
 	tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
 		leaf = iter.level;
 		sptes[leaf - 1] = iter.old_spte;
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 556e065503f6..cbbdbadd1526 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -44,5 +44,7 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
 bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
 				   struct kvm_memory_slot *slot, gfn_t gfn);
 
-int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes);
+int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
+			 int *root_level);
+
 #endif /* __KVM_X86_MMU_TDP_MMU_H */

From dde81f9477d018a96fba991c5928c6ab8cc109f8 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 17 Dec 2020 16:31:38 -0800
Subject: [PATCH 488/547] KVM: x86/mmu: Use raw level to index into MMIO walks'
 sptes array

Bump the size of the sptes array by one and use the raw level of the
SPTE to index into the sptes array.  Using the SPTE level directly
improves readability by eliminating the need to reason out why the level
is being adjusted when indexing the array.  The array is on the stack
and is not explicitly initialized; bumping its size is nothing more than
a superficial adjustment to the stack frame.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20201218003139.2167891-4-seanjc@google.com>
Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c     | 15 +++++++--------
 arch/x86/kvm/mmu/tdp_mmu.c |  2 +-
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 52f36c879086..4798a4472066 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3500,7 +3500,7 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level
 		leaf = iterator.level;
 		spte = mmu_spte_get_lockless(iterator.sptep);
 
-		sptes[leaf - 1] = spte;
+		sptes[leaf] = spte;
 
 		if (!is_shadow_present_pte(spte))
 			break;
@@ -3514,7 +3514,7 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level
 /* return true if reserved bit is detected on spte. */
 static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 {
-	u64 sptes[PT64_ROOT_MAX_LEVEL];
+	u64 sptes[PT64_ROOT_MAX_LEVEL + 1];
 	struct rsvd_bits_validate *rsvd_check;
 	int root, leaf, level;
 	bool reserved = false;
@@ -3537,16 +3537,15 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 	rsvd_check = &vcpu->arch.mmu->shadow_zero_check;
 
 	for (level = root; level >= leaf; level--) {
-		if (!is_shadow_present_pte(sptes[level - 1]))
+		if (!is_shadow_present_pte(sptes[level]))
 			break;
 		/*
 		 * Use a bitwise-OR instead of a logical-OR to aggregate the
 		 * reserved bit and EPT's invalid memtype/XWR checks to avoid
 		 * adding a Jcc in the loop.
 		 */
-		reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level - 1]) |
-			    __is_rsvd_bits_set(rsvd_check, sptes[level - 1],
-					       level);
+		reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level]) |
+			    __is_rsvd_bits_set(rsvd_check, sptes[level], level);
 	}
 
 	if (reserved) {
@@ -3554,10 +3553,10 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 		       __func__, addr);
 		for (level = root; level >= leaf; level--)
 			pr_err("------ spte 0x%llx level %d.\n",
-			       sptes[level - 1], level);
+			       sptes[level], level);
 	}
 
-	*sptep = sptes[leaf - 1];
+	*sptep = sptes[leaf];
 
 	return reserved;
 }
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index a4f9447f8327..efef571806ad 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1160,7 +1160,7 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
 
 	tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
 		leaf = iter.level;
-		sptes[leaf - 1] = iter.old_spte;
+		sptes[leaf] = iter.old_spte;
 	}
 
 	return leaf;

From 9aa418792f5f11ef5d6f72265e1f8ae07efd5784 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 17 Dec 2020 16:31:39 -0800
Subject: [PATCH 489/547] KVM: x86/mmu: Optimize not-present/MMIO SPTE check in
 get_mmio_spte()

Check only the terminal leaf for a "!PRESENT || MMIO" SPTE when looking
for reserved bits on valid, non-MMIO SPTEs.  The get_walk() helpers
terminate their walks if a not-present or MMIO SPTE is encountered, i.e.
the non-terminal SPTEs have already been verified to be regular SPTEs.
This eliminates an extra check-and-branch in a relatively hot loop.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20201218003139.2167891-5-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 4798a4472066..769855f5f0a1 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3511,7 +3511,7 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level
 	return leaf;
 }
 
-/* return true if reserved bit is detected on spte. */
+/* return true if reserved bit(s) are detected on a valid, non-MMIO SPTE. */
 static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 {
 	u64 sptes[PT64_ROOT_MAX_LEVEL + 1];
@@ -3534,11 +3534,20 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 		return reserved;
 	}
 
+	*sptep = sptes[leaf];
+
+	/*
+	 * Skip reserved bits checks on the terminal leaf if it's not a valid
+	 * SPTE.  Note, this also (intentionally) skips MMIO SPTEs, which, by
+	 * design, always have reserved bits set.  The purpose of the checks is
+	 * to detect reserved bits on non-MMIO SPTEs. i.e. buggy SPTEs.
+	 */
+	if (!is_shadow_present_pte(sptes[leaf]))
+		leaf++;
+
 	rsvd_check = &vcpu->arch.mmu->shadow_zero_check;
 
-	for (level = root; level >= leaf; level--) {
-		if (!is_shadow_present_pte(sptes[level]))
-			break;
+	for (level = root; level >= leaf; level--)
 		/*
 		 * Use a bitwise-OR instead of a logical-OR to aggregate the
 		 * reserved bit and EPT's invalid memtype/XWR checks to avoid
@@ -3546,7 +3555,6 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 		 */
 		reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level]) |
 			    __is_rsvd_bits_set(rsvd_check, sptes[level], level);
-	}
 
 	if (reserved) {
 		pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n",
@@ -3556,8 +3564,6 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 			       sptes[level], level);
 	}
 
-	*sptep = sptes[leaf];
-
 	return reserved;
 }
 

From f65cf84ee769767536dc367acc9568ddb6e4c9f4 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Fri, 18 Dec 2020 23:37:11 -0700
Subject: [PATCH 490/547] KVM: SVM: Add register operand to vmsave call in
 sev_es_vcpu_load

When using LLVM's integrated assembler (LLVM_IAS=1) while building
x86_64_defconfig + CONFIG_KVM=y + CONFIG_KVM_AMD=y, the following build
error occurs:

 $ make LLVM=1 LLVM_IAS=1 arch/x86/kvm/svm/sev.o
 arch/x86/kvm/svm/sev.c:2004:15: error: too few operands for instruction
         asm volatile(__ex("vmsave") : : "a" (__sme_page_pa(sd->save_area)) : "memory");
                      ^
 arch/x86/kvm/svm/sev.c:28:17: note: expanded from macro '__ex'
 #define __ex(x) __kvm_handle_fault_on_reboot(x)
                 ^
 ./arch/x86/include/asm/kvm_host.h:1646:10: note: expanded from macro '__kvm_handle_fault_on_reboot'
         "666: \n\t"                                                     \
                 ^
 <inline asm>:2:2: note: instantiated into assembly here
         vmsave
         ^
 1 error generated.

This happens because LLVM currently does not support calling vmsave
without the fixed register operand (%rax for 64-bit and %eax for
32-bit). This will be fixed in LLVM 12 but the kernel currently supports
LLVM 10.0.1 and newer so this needs to be handled.

Add the proper register using the _ASM_AX macro, which matches the
vmsave call in vmenter.S.

Fixes: 861377730aa9 ("KVM: SVM: Provide support for SEV-ES vCPU loading")
Link: https://reviews.llvm.org/D93524
Link: https://github.com/ClangBuiltLinux/linux/issues/1216
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Message-Id: <20201219063711.3526947-1-natechancellor@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/sev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 9858d5ae9ddd..563ced07b0b8 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2001,7 +2001,7 @@ void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu)
 	 * of which one step is to perform a VMLOAD. Since hardware does not
 	 * perform a VMSAVE on VMRUN, the host savearea must be updated.
 	 */
-	asm volatile(__ex("vmsave") : : "a" (__sme_page_pa(sd->save_area)) : "memory");
+	asm volatile(__ex("vmsave %0") : : "a" (__sme_page_pa(sd->save_area)) : "memory");
 
 	/*
 	 * Certain MSRs are restored on VMEXIT, only save ones that aren't

From 52782d5b63725a6c4bf642557c83507430064110 Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Sun, 20 Dec 2020 21:03:39 +0100
Subject: [PATCH 491/547] KVM/SVM: Remove leftover __svm_vcpu_run prototype
 from svm.c

Commit 16809ecdc1e8a moved __svm_vcpu_run the prototype to svm.h,
but forgot to remove the original from svm.c.

Fixes: 16809ecdc1e8a ("KVM: SVM: Provide an updated VMRUN invocation for SEV-ES guests")
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Message-Id: <20201220200339.65115-1-ubizjak@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index cce0143a6f80..6824d611dc5d 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3677,8 +3677,6 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
 	return EXIT_FASTPATH_NONE;
 }
 
-void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs);
-
 static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu,
 					struct vcpu_svm *svm)
 {

From e42ac777d661e878c3b9bac56df11e226cab3010 Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Fri, 18 Dec 2020 15:17:32 +0100
Subject: [PATCH 492/547] KVM: selftests: Factor out guest mode code

demand_paging_test, dirty_log_test, and dirty_log_perf_test have
redundant guest mode code. Factor it out.

Also, while adding a new include, remove the ones we don't need.

Reviewed-by: Ben Gardon <bgardon@google.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Andrew Jones <drjones@redhat.com>
Message-Id: <20201218141734.54359-2-drjones@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/Makefile          |   2 +-
 .../selftests/kvm/demand_paging_test.c        | 105 ++++-----------
 .../selftests/kvm/dirty_log_perf_test.c       | 121 +++++------------
 tools/testing/selftests/kvm/dirty_log_test.c  | 125 ++++++------------
 .../selftests/kvm/include/guest_modes.h       |  21 +++
 tools/testing/selftests/kvm/lib/guest_modes.c |  70 ++++++++++
 6 files changed, 188 insertions(+), 256 deletions(-)
 create mode 100644 tools/testing/selftests/kvm/include/guest_modes.h
 create mode 100644 tools/testing/selftests/kvm/lib/guest_modes.c

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index c7ca4faba272..a7286a08c3ae 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -33,7 +33,7 @@ ifeq ($(ARCH),s390)
 	UNAME_M := s390x
 endif
 
-LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c
+LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c
 LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
 LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
 LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 3d96a7bfaff3..946161a9ce2d 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -7,23 +7,20 @@
  * Copyright (C) 2019, Google, Inc.
  */
 
-#define _GNU_SOURCE /* for program_invocation_name */
+#define _GNU_SOURCE /* for program_invocation_name and pipe2 */
 
 #include <stdio.h>
 #include <stdlib.h>
-#include <sys/syscall.h>
-#include <unistd.h>
-#include <asm/unistd.h>
 #include <time.h>
 #include <poll.h>
 #include <pthread.h>
-#include <linux/bitmap.h>
-#include <linux/bitops.h>
 #include <linux/userfaultfd.h>
+#include <sys/syscall.h>
 
-#include "perf_test_util.h"
-#include "processor.h"
+#include "kvm_util.h"
 #include "test_util.h"
+#include "perf_test_util.h"
+#include "guest_modes.h"
 
 #ifdef __NR_userfaultfd
 
@@ -248,9 +245,14 @@ static int setup_demand_paging(struct kvm_vm *vm,
 	return 0;
 }
 
-static void run_test(enum vm_guest_mode mode, bool use_uffd,
-		     useconds_t uffd_delay)
+struct test_params {
+	bool use_uffd;
+	useconds_t uffd_delay;
+};
+
+static void run_test(enum vm_guest_mode mode, void *arg)
 {
+	struct test_params *p = arg;
 	pthread_t *vcpu_threads;
 	pthread_t *uffd_handler_threads = NULL;
 	struct uffd_handler_args *uffd_args = NULL;
@@ -275,7 +277,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
 
 	add_vcpus(vm, nr_vcpus, guest_percpu_mem_size);
 
-	if (use_uffd) {
+	if (p->use_uffd) {
 		uffd_handler_threads =
 			malloc(nr_vcpus * sizeof(*uffd_handler_threads));
 		TEST_ASSERT(uffd_handler_threads, "Memory allocation failed");
@@ -308,7 +310,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
 			r = setup_demand_paging(vm,
 						&uffd_handler_threads[vcpu_id],
 						pipefds[vcpu_id * 2],
-						uffd_delay, &uffd_args[vcpu_id],
+						p->uffd_delay, &uffd_args[vcpu_id],
 						vcpu_hva, guest_percpu_mem_size);
 			if (r < 0)
 				exit(-r);
@@ -339,7 +341,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
 
 	pr_info("All vCPU threads joined\n");
 
-	if (use_uffd) {
+	if (p->use_uffd) {
 		char c;
 
 		/* Tell the user fault fd handler threads to quit */
@@ -362,38 +364,19 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
 
 	free(guest_data_prototype);
 	free(vcpu_threads);
-	if (use_uffd) {
+	if (p->use_uffd) {
 		free(uffd_handler_threads);
 		free(uffd_args);
 		free(pipefds);
 	}
 }
 
-struct guest_mode {
-	bool supported;
-	bool enabled;
-};
-static struct guest_mode guest_modes[NUM_VM_MODES];
-
-#define guest_mode_init(mode, supported, enabled) ({ \
-	guest_modes[mode] = (struct guest_mode){ supported, enabled }; \
-})
-
 static void help(char *name)
 {
-	int i;
-
 	puts("");
 	printf("usage: %s [-h] [-m mode] [-u] [-d uffd_delay_usec]\n"
 	       "          [-b memory] [-v vcpus]\n", name);
-	printf(" -m: specify the guest mode ID to test\n"
-	       "     (default: test all supported modes)\n"
-	       "     This option may be used multiple times.\n"
-	       "     Guest mode IDs:\n");
-	for (i = 0; i < NUM_VM_MODES; ++i) {
-		printf("         %d:    %s%s\n", i, vm_guest_mode_string(i),
-		       guest_modes[i].supported ? " (supported)" : "");
-	}
+	guest_modes_help();
 	printf(" -u: use User Fault FD to handle vCPU page\n"
 	       "     faults.\n");
 	printf(" -d: add a delay in usec to the User Fault\n"
@@ -410,53 +393,22 @@ static void help(char *name)
 int main(int argc, char *argv[])
 {
 	int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
-	bool mode_selected = false;
-	unsigned int mode;
-	int opt, i;
-	bool use_uffd = false;
-	useconds_t uffd_delay = 0;
+	struct test_params p = {};
+	int opt;
 
-#ifdef __x86_64__
-	guest_mode_init(VM_MODE_PXXV48_4K, true, true);
-#endif
-#ifdef __aarch64__
-	guest_mode_init(VM_MODE_P40V48_4K, true, true);
-	guest_mode_init(VM_MODE_P40V48_64K, true, true);
-	{
-		unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
-
-		if (limit >= 52)
-			guest_mode_init(VM_MODE_P52V48_64K, true, true);
-		if (limit >= 48) {
-			guest_mode_init(VM_MODE_P48V48_4K, true, true);
-			guest_mode_init(VM_MODE_P48V48_64K, true, true);
-		}
-	}
-#endif
-#ifdef __s390x__
-	guest_mode_init(VM_MODE_P40V48_4K, true, true);
-#endif
+	guest_modes_append_default();
 
 	while ((opt = getopt(argc, argv, "hm:ud:b:v:")) != -1) {
 		switch (opt) {
 		case 'm':
-			if (!mode_selected) {
-				for (i = 0; i < NUM_VM_MODES; ++i)
-					guest_modes[i].enabled = false;
-				mode_selected = true;
-			}
-			mode = strtoul(optarg, NULL, 10);
-			TEST_ASSERT(mode < NUM_VM_MODES,
-				    "Guest mode ID %d too big", mode);
-			guest_modes[mode].enabled = true;
+			guest_modes_cmdline(optarg);
 			break;
 		case 'u':
-			use_uffd = true;
+			p.use_uffd = true;
 			break;
 		case 'd':
-			uffd_delay = strtoul(optarg, NULL, 0);
-			TEST_ASSERT(uffd_delay >= 0,
-				    "A negative UFFD delay is not supported.");
+			p.uffd_delay = strtoul(optarg, NULL, 0);
+			TEST_ASSERT(p.uffd_delay >= 0, "A negative UFFD delay is not supported.");
 			break;
 		case 'b':
 			guest_percpu_mem_size = parse_size(optarg);
@@ -473,14 +425,7 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	for (i = 0; i < NUM_VM_MODES; ++i) {
-		if (!guest_modes[i].enabled)
-			continue;
-		TEST_ASSERT(guest_modes[i].supported,
-			    "Guest mode ID %d (%s) not supported.",
-			    i, vm_guest_mode_string(i));
-		run_test(i, use_uffd, uffd_delay);
-	}
+	for_each_guest_mode(run_test, &p);
 
 	return 0;
 }
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 9c6a7be31e03..506741eb5d7f 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -12,16 +12,14 @@
 
 #include <stdio.h>
 #include <stdlib.h>
-#include <unistd.h>
 #include <time.h>
 #include <pthread.h>
 #include <linux/bitmap.h>
-#include <linux/bitops.h>
 
 #include "kvm_util.h"
-#include "perf_test_util.h"
-#include "processor.h"
 #include "test_util.h"
+#include "perf_test_util.h"
+#include "guest_modes.h"
 
 /* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/
 #define TEST_HOST_LOOP_N		2UL
@@ -89,9 +87,15 @@ static void *vcpu_worker(void *data)
 	return NULL;
 }
 
-static void run_test(enum vm_guest_mode mode, unsigned long iterations,
-		     uint64_t phys_offset, int wr_fract)
+struct test_params {
+	unsigned long iterations;
+	uint64_t phys_offset;
+	int wr_fract;
+};
+
+static void run_test(enum vm_guest_mode mode, void *arg)
 {
+	struct test_params *p = arg;
 	pthread_t *vcpu_threads;
 	struct kvm_vm *vm;
 	unsigned long *bmap;
@@ -108,7 +112,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 
 	vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size);
 
-	perf_test_args.wr_fract = wr_fract;
+	perf_test_args.wr_fract = p->wr_fract;
 
 	guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm);
 	guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
@@ -156,7 +160,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 	pr_info("Enabling dirty logging time: %ld.%.9lds\n\n",
 		ts_diff.tv_sec, ts_diff.tv_nsec);
 
-	while (iteration < iterations) {
+	while (iteration < p->iterations) {
 		/*
 		 * Incrementing the iteration number will start the vCPUs
 		 * dirtying memory again.
@@ -210,15 +214,15 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 	pr_info("Disabling dirty logging time: %ld.%.9lds\n",
 		ts_diff.tv_sec, ts_diff.tv_nsec);
 
-	avg = timespec_div(get_dirty_log_total, iterations);
+	avg = timespec_div(get_dirty_log_total, p->iterations);
 	pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
-		iterations, get_dirty_log_total.tv_sec,
+		p->iterations, get_dirty_log_total.tv_sec,
 		get_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
 
 	if (dirty_log_manual_caps) {
-		avg = timespec_div(clear_dirty_log_total, iterations);
+		avg = timespec_div(clear_dirty_log_total, p->iterations);
 		pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
-			iterations, clear_dirty_log_total.tv_sec,
+			p->iterations, clear_dirty_log_total.tv_sec,
 			clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
 	}
 
@@ -228,20 +232,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 	kvm_vm_free(vm);
 }
 
-struct guest_mode {
-	bool supported;
-	bool enabled;
-};
-static struct guest_mode guest_modes[NUM_VM_MODES];
-
-#define guest_mode_init(mode, supported, enabled) ({ \
-	guest_modes[mode] = (struct guest_mode){ supported, enabled }; \
-})
-
 static void help(char *name)
 {
-	int i;
-
 	puts("");
 	printf("usage: %s [-h] [-i iterations] [-p offset] "
 	       "[-m mode] [-b vcpu bytes] [-v vcpus]\n", name);
@@ -250,14 +242,7 @@ static void help(char *name)
 	       TEST_HOST_LOOP_N);
 	printf(" -p: specify guest physical test memory offset\n"
 	       "     Warning: a low offset can conflict with the loaded test code.\n");
-	printf(" -m: specify the guest mode ID to test "
-	       "(default: test all supported modes)\n"
-	       "     This option may be used multiple times.\n"
-	       "     Guest mode IDs:\n");
-	for (i = 0; i < NUM_VM_MODES; ++i) {
-		printf("         %d:    %s%s\n", i, vm_guest_mode_string(i),
-		       guest_modes[i].supported ? " (supported)" : "");
-	}
+	guest_modes_help();
 	printf(" -b: specify the size of the memory region which should be\n"
 	       "     dirtied by each vCPU. e.g. 10M or 3G.\n"
 	       "     (default: 1G)\n");
@@ -272,74 +257,43 @@ static void help(char *name)
 
 int main(int argc, char *argv[])
 {
-	unsigned long iterations = TEST_HOST_LOOP_N;
-	bool mode_selected = false;
-	uint64_t phys_offset = 0;
-	unsigned int mode;
-	int opt, i;
-	int wr_fract = 1;
+	int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
+	struct test_params p = {
+		.iterations = TEST_HOST_LOOP_N,
+		.wr_fract = 1,
+	};
+	int opt;
 
 	dirty_log_manual_caps =
 		kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
 	dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
 				  KVM_DIRTY_LOG_INITIALLY_SET);
 
-#ifdef __x86_64__
-	guest_mode_init(VM_MODE_PXXV48_4K, true, true);
-#endif
-#ifdef __aarch64__
-	guest_mode_init(VM_MODE_P40V48_4K, true, true);
-	guest_mode_init(VM_MODE_P40V48_64K, true, true);
-
-	{
-		unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
-
-		if (limit >= 52)
-			guest_mode_init(VM_MODE_P52V48_64K, true, true);
-		if (limit >= 48) {
-			guest_mode_init(VM_MODE_P48V48_4K, true, true);
-			guest_mode_init(VM_MODE_P48V48_64K, true, true);
-		}
-	}
-#endif
-#ifdef __s390x__
-	guest_mode_init(VM_MODE_P40V48_4K, true, true);
-#endif
+	guest_modes_append_default();
 
 	while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:")) != -1) {
 		switch (opt) {
 		case 'i':
-			iterations = strtol(optarg, NULL, 10);
+			p.iterations = strtol(optarg, NULL, 10);
 			break;
 		case 'p':
-			phys_offset = strtoull(optarg, NULL, 0);
+			p.phys_offset = strtoull(optarg, NULL, 0);
 			break;
 		case 'm':
-			if (!mode_selected) {
-				for (i = 0; i < NUM_VM_MODES; ++i)
-					guest_modes[i].enabled = false;
-				mode_selected = true;
-			}
-			mode = strtoul(optarg, NULL, 10);
-			TEST_ASSERT(mode < NUM_VM_MODES,
-				    "Guest mode ID %d too big", mode);
-			guest_modes[mode].enabled = true;
+			guest_modes_cmdline(optarg);
 			break;
 		case 'b':
 			guest_percpu_mem_size = parse_size(optarg);
 			break;
 		case 'f':
-			wr_fract = atoi(optarg);
-			TEST_ASSERT(wr_fract >= 1,
+			p.wr_fract = atoi(optarg);
+			TEST_ASSERT(p.wr_fract >= 1,
 				    "Write fraction cannot be less than one");
 			break;
 		case 'v':
 			nr_vcpus = atoi(optarg);
-			TEST_ASSERT(nr_vcpus > 0,
-				    "Must have a positive number of vCPUs");
-			TEST_ASSERT(nr_vcpus <= MAX_VCPUS,
-				    "This test does not currently support\n"
-				    "more than %d vCPUs.", MAX_VCPUS);
+			TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
+				    "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
 			break;
 		case 'h':
 		default:
@@ -348,18 +302,11 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	TEST_ASSERT(iterations >= 2, "The test should have at least two iterations");
+	TEST_ASSERT(p.iterations >= 2, "The test should have at least two iterations");
 
-	pr_info("Test iterations: %"PRIu64"\n",	iterations);
+	pr_info("Test iterations: %"PRIu64"\n",	p.iterations);
 
-	for (i = 0; i < NUM_VM_MODES; ++i) {
-		if (!guest_modes[i].enabled)
-			continue;
-		TEST_ASSERT(guest_modes[i].supported,
-			    "Guest mode ID %d (%s) not supported.",
-			    i, vm_guest_mode_string(i));
-		run_test(i, iterations, phys_offset, wr_fract);
-	}
+	for_each_guest_mode(run_test, &p);
 
 	return 0;
 }
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 471baecb7772..bb2752d78fe3 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -9,8 +9,6 @@
 
 #include <stdio.h>
 #include <stdlib.h>
-#include <unistd.h>
-#include <time.h>
 #include <pthread.h>
 #include <semaphore.h>
 #include <sys/types.h>
@@ -20,8 +18,9 @@
 #include <linux/bitops.h>
 #include <asm/barrier.h>
 
-#include "test_util.h"
 #include "kvm_util.h"
+#include "test_util.h"
+#include "guest_modes.h"
 #include "processor.h"
 
 #define VCPU_ID				1
@@ -673,9 +672,15 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
 #define DIRTY_MEM_BITS 30 /* 1G */
 #define PAGE_SHIFT_4K  12
 
-static void run_test(enum vm_guest_mode mode, unsigned long iterations,
-		     unsigned long interval, uint64_t phys_offset)
+struct test_params {
+	unsigned long iterations;
+	unsigned long interval;
+	uint64_t phys_offset;
+};
+
+static void run_test(enum vm_guest_mode mode, void *arg)
 {
+	struct test_params *p = arg;
 	struct kvm_vm *vm;
 	unsigned long *bmap;
 
@@ -709,12 +714,12 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 	host_page_size = getpagesize();
 	host_num_pages = vm_num_host_pages(mode, guest_num_pages);
 
-	if (!phys_offset) {
+	if (!p->phys_offset) {
 		guest_test_phys_mem = (vm_get_max_gfn(vm) -
 				       guest_num_pages) * guest_page_size;
 		guest_test_phys_mem &= ~(host_page_size - 1);
 	} else {
-		guest_test_phys_mem = phys_offset;
+		guest_test_phys_mem = p->phys_offset;
 	}
 
 #ifdef __s390x__
@@ -758,9 +763,9 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 
 	pthread_create(&vcpu_thread, NULL, vcpu_worker, vm);
 
-	while (iteration < iterations) {
+	while (iteration < p->iterations) {
 		/* Give the vcpu thread some time to dirty some pages */
-		usleep(interval * 1000);
+		usleep(p->interval * 1000);
 		log_mode_collect_dirty_pages(vm, TEST_MEM_SLOT_INDEX,
 					     bmap, host_num_pages);
 		vm_dirty_log_verify(mode, bmap);
@@ -783,20 +788,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 	kvm_vm_free(vm);
 }
 
-struct guest_mode {
-	bool supported;
-	bool enabled;
-};
-static struct guest_mode guest_modes[NUM_VM_MODES];
-
-#define guest_mode_init(mode, supported, enabled) ({ \
-	guest_modes[mode] = (struct guest_mode){ supported, enabled }; \
-})
-
 static void help(char *name)
 {
-	int i;
-
 	puts("");
 	printf("usage: %s [-h] [-i iterations] [-I interval] "
 	       "[-p offset] [-m mode]\n", name);
@@ -813,51 +806,23 @@ static void help(char *name)
 	printf(" -M: specify the host logging mode "
 	       "(default: run all log modes).  Supported modes: \n\t");
 	log_modes_dump();
-	printf(" -m: specify the guest mode ID to test "
-	       "(default: test all supported modes)\n"
-	       "     This option may be used multiple times.\n"
-	       "     Guest mode IDs:\n");
-	for (i = 0; i < NUM_VM_MODES; ++i) {
-		printf("         %d:    %s%s\n", i, vm_guest_mode_string(i),
-		       guest_modes[i].supported ? " (supported)" : "");
-	}
+	guest_modes_help();
 	puts("");
 	exit(0);
 }
 
 int main(int argc, char *argv[])
 {
-	unsigned long iterations = TEST_HOST_LOOP_N;
-	unsigned long interval = TEST_HOST_LOOP_INTERVAL;
-	bool mode_selected = false;
-	uint64_t phys_offset = 0;
-	unsigned int mode;
-	int opt, i, j;
+	struct test_params p = {
+		.iterations = TEST_HOST_LOOP_N,
+		.interval = TEST_HOST_LOOP_INTERVAL,
+	};
+	int opt, i;
 
 	sem_init(&dirty_ring_vcpu_stop, 0, 0);
 	sem_init(&dirty_ring_vcpu_cont, 0, 0);
 
-#ifdef __x86_64__
-	guest_mode_init(VM_MODE_PXXV48_4K, true, true);
-#endif
-#ifdef __aarch64__
-	guest_mode_init(VM_MODE_P40V48_4K, true, true);
-	guest_mode_init(VM_MODE_P40V48_64K, true, true);
-
-	{
-		unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
-
-		if (limit >= 52)
-			guest_mode_init(VM_MODE_P52V48_64K, true, true);
-		if (limit >= 48) {
-			guest_mode_init(VM_MODE_P48V48_4K, true, true);
-			guest_mode_init(VM_MODE_P48V48_64K, true, true);
-		}
-	}
-#endif
-#ifdef __s390x__
-	guest_mode_init(VM_MODE_P40V48_4K, true, true);
-#endif
+	guest_modes_append_default();
 
 	while ((opt = getopt(argc, argv, "c:hi:I:p:m:M:")) != -1) {
 		switch (opt) {
@@ -865,24 +830,16 @@ int main(int argc, char *argv[])
 			test_dirty_ring_count = strtol(optarg, NULL, 10);
 			break;
 		case 'i':
-			iterations = strtol(optarg, NULL, 10);
+			p.iterations = strtol(optarg, NULL, 10);
 			break;
 		case 'I':
-			interval = strtol(optarg, NULL, 10);
+			p.interval = strtol(optarg, NULL, 10);
 			break;
 		case 'p':
-			phys_offset = strtoull(optarg, NULL, 0);
+			p.phys_offset = strtoull(optarg, NULL, 0);
 			break;
 		case 'm':
-			if (!mode_selected) {
-				for (i = 0; i < NUM_VM_MODES; ++i)
-					guest_modes[i].enabled = false;
-				mode_selected = true;
-			}
-			mode = strtoul(optarg, NULL, 10);
-			TEST_ASSERT(mode < NUM_VM_MODES,
-				    "Guest mode ID %d too big", mode);
-			guest_modes[mode].enabled = true;
+			guest_modes_cmdline(optarg);
 			break;
 		case 'M':
 			if (!strcmp(optarg, "all")) {
@@ -911,32 +868,24 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	TEST_ASSERT(iterations > 2, "Iterations must be greater than two");
-	TEST_ASSERT(interval > 0, "Interval must be greater than zero");
+	TEST_ASSERT(p.iterations > 2, "Iterations must be greater than two");
+	TEST_ASSERT(p.interval > 0, "Interval must be greater than zero");
 
 	pr_info("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n",
-		iterations, interval);
+		p.iterations, p.interval);
 
 	srandom(time(0));
 
-	for (i = 0; i < NUM_VM_MODES; ++i) {
-		if (!guest_modes[i].enabled)
-			continue;
-		TEST_ASSERT(guest_modes[i].supported,
-			    "Guest mode ID %d (%s) not supported.",
-			    i, vm_guest_mode_string(i));
-		if (host_log_mode_option == LOG_MODE_ALL) {
-			/* Run each log mode */
-			for (j = 0; j < LOG_MODE_NUM; j++) {
-				pr_info("Testing Log Mode '%s'\n",
-					log_modes[j].name);
-				host_log_mode = j;
-				run_test(i, iterations, interval, phys_offset);
-			}
-		} else {
-			host_log_mode = host_log_mode_option;
-			run_test(i, iterations, interval, phys_offset);
+	if (host_log_mode_option == LOG_MODE_ALL) {
+		/* Run each log mode */
+		for (i = 0; i < LOG_MODE_NUM; i++) {
+			pr_info("Testing Log Mode '%s'\n", log_modes[i].name);
+			host_log_mode = i;
+			for_each_guest_mode(run_test, &p);
 		}
+	} else {
+		host_log_mode = host_log_mode_option;
+		for_each_guest_mode(run_test, &p);
 	}
 
 	return 0;
diff --git a/tools/testing/selftests/kvm/include/guest_modes.h b/tools/testing/selftests/kvm/include/guest_modes.h
new file mode 100644
index 000000000000..b691df33e64e
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/guest_modes.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include "kvm_util.h"
+
+struct guest_mode {
+	bool supported;
+	bool enabled;
+};
+
+extern struct guest_mode guest_modes[NUM_VM_MODES];
+
+#define guest_mode_append(mode, supported, enabled) ({ \
+	guest_modes[mode] = (struct guest_mode){ supported, enabled }; \
+})
+
+void guest_modes_append_default(void);
+void for_each_guest_mode(void (*func)(enum vm_guest_mode, void *), void *arg);
+void guest_modes_help(void);
+void guest_modes_cmdline(const char *arg);
diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c
new file mode 100644
index 000000000000..25bff307c71f
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/guest_modes.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include "guest_modes.h"
+
+struct guest_mode guest_modes[NUM_VM_MODES];
+
+void guest_modes_append_default(void)
+{
+	guest_mode_append(VM_MODE_DEFAULT, true, true);
+
+#ifdef __aarch64__
+	guest_mode_append(VM_MODE_P40V48_64K, true, true);
+	{
+		unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
+		if (limit >= 52)
+			guest_mode_append(VM_MODE_P52V48_64K, true, true);
+		if (limit >= 48) {
+			guest_mode_append(VM_MODE_P48V48_4K, true, true);
+			guest_mode_append(VM_MODE_P48V48_64K, true, true);
+		}
+	}
+#endif
+}
+
+void for_each_guest_mode(void (*func)(enum vm_guest_mode, void *), void *arg)
+{
+	int i;
+
+	for (i = 0; i < NUM_VM_MODES; ++i) {
+		if (!guest_modes[i].enabled)
+			continue;
+		TEST_ASSERT(guest_modes[i].supported,
+			    "Guest mode ID %d (%s) not supported.",
+			    i, vm_guest_mode_string(i));
+		func(i, arg);
+	}
+}
+
+void guest_modes_help(void)
+{
+	int i;
+
+	printf(" -m: specify the guest mode ID to test\n"
+	       "     (default: test all supported modes)\n"
+	       "     This option may be used multiple times.\n"
+	       "     Guest mode IDs:\n");
+	for (i = 0; i < NUM_VM_MODES; ++i) {
+		printf("         %d:    %s%s\n", i, vm_guest_mode_string(i),
+		       guest_modes[i].supported ? " (supported)" : "");
+	}
+}
+
+void guest_modes_cmdline(const char *arg)
+{
+	static bool mode_selected;
+	unsigned int mode;
+	int i;
+
+	if (!mode_selected) {
+		for (i = 0; i < NUM_VM_MODES; ++i)
+			guest_modes[i].enabled = false;
+		mode_selected = true;
+	}
+
+	mode = strtoul(optarg, NULL, 10);
+	TEST_ASSERT(mode < NUM_VM_MODES, "Guest mode ID %d too big", mode);
+	guest_modes[mode].enabled = true;
+}

From 1133e17ea7c9929ff7b90e81d8926f9e870748e9 Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Fri, 18 Dec 2020 15:17:33 +0100
Subject: [PATCH 493/547] KVM: selftests: Use vm_create_with_vcpus in create_vm

Reviewed-by: Ben Gardon <bgardon@google.com>
Signed-off-by: Andrew Jones <drjones@redhat.com>
Message-Id: <20201218141734.54359-3-drjones@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/demand_paging_test.c        |  2 +-
 .../selftests/kvm/dirty_log_perf_test.c       |  2 -
 .../testing/selftests/kvm/include/kvm_util.h  |  8 ++++
 .../selftests/kvm/include/perf_test_util.h    | 47 +++++--------------
 tools/testing/selftests/kvm/lib/kvm_util.c    |  9 +---
 5 files changed, 21 insertions(+), 47 deletions(-)

diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 946161a9ce2d..b0c41de32e9b 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -7,7 +7,7 @@
  * Copyright (C) 2019, Google, Inc.
  */
 
-#define _GNU_SOURCE /* for program_invocation_name and pipe2 */
+#define _GNU_SOURCE /* for pipe2 */
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 506741eb5d7f..36bea75a8d6f 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -8,8 +8,6 @@
  * Copyright (C) 2020, Google, Inc.
  */
 
-#define _GNU_SOURCE /* for program_invocation_name */
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <time.h>
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index dfa9d369e8fc..149766ecd68b 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -70,6 +70,14 @@ enum vm_guest_mode {
 #define vm_guest_mode_string(m) vm_guest_mode_string[m]
 extern const char * const vm_guest_mode_string[];
 
+struct vm_guest_mode_params {
+	unsigned int pa_bits;
+	unsigned int va_bits;
+	unsigned int page_size;
+	unsigned int page_shift;
+};
+extern const struct vm_guest_mode_params vm_guest_mode_params[];
+
 enum vm_mem_backing_src_type {
 	VM_MEM_SRC_ANONYMOUS,
 	VM_MEM_SRC_ANONYMOUS_THP,
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
index 239421e4f6b8..cd4c258f458d 100644
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -13,9 +13,6 @@
 
 #define MAX_VCPUS 512
 
-#define PAGE_SHIFT_4K  12
-#define PTES_PER_4K_PT 512
-
 #define TEST_MEM_SLOT_INDEX		1
 
 /* Default guest test virtual memory offset */
@@ -94,41 +91,26 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus,
 				uint64_t vcpu_memory_bytes)
 {
 	struct kvm_vm *vm;
-	uint64_t pages = DEFAULT_GUEST_PHY_PAGES;
 	uint64_t guest_num_pages;
 
-	/* Account for a few pages per-vCPU for stacks */
-	pages += DEFAULT_STACK_PGS * vcpus;
-
-	/*
-	 * Reserve twice the ammount of memory needed to map the test region and
-	 * the page table / stacks region, at 4k, for page tables. Do the
-	 * calculation with 4K page size: the smallest of all archs. (e.g., 64K
-	 * page size guest will need even less memory for page tables).
-	 */
-	pages += (2 * pages) / PTES_PER_4K_PT;
-	pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) /
-		 PTES_PER_4K_PT;
-	pages = vm_adjust_num_guest_pages(mode, pages);
-
 	pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
 
-	vm = vm_create(mode, pages, O_RDWR);
-	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
-#ifdef __x86_64__
-	vm_create_irqchip(vm);
-#endif
-
-	perf_test_args.vm = vm;
-	perf_test_args.guest_page_size = vm_get_page_size(vm);
 	perf_test_args.host_page_size = getpagesize();
+	perf_test_args.guest_page_size = vm_guest_mode_params[mode].page_size;
 
+	guest_num_pages = vm_adjust_num_guest_pages(mode,
+				(vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size);
+
+	TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0,
+		    "Guest memory size is not host page size aligned.");
 	TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0,
 		    "Guest memory size is not guest page size aligned.");
 
-	guest_num_pages = (vcpus * vcpu_memory_bytes) /
-			  perf_test_args.guest_page_size;
-	guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
+	vm = vm_create_with_vcpus(mode, vcpus,
+				  (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size,
+				  0, guest_code, NULL);
+
+	perf_test_args.vm = vm;
 
 	/*
 	 * If there should be more memory in the guest test region than there
@@ -140,18 +122,13 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus,
 		    guest_num_pages, vm_get_max_gfn(vm), vcpus,
 		    vcpu_memory_bytes);
 
-	TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0,
-		    "Guest memory size is not host page size aligned.");
-
 	guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) *
 			      perf_test_args.guest_page_size;
 	guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1);
-
 #ifdef __s390x__
 	/* Align to 1M (segment size) */
 	guest_test_phys_mem &= ~((1 << 20) - 1);
 #endif
-
 	pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
 
 	/* Add an extra memory slot for testing */
@@ -177,8 +154,6 @@ static void add_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes)
 	for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
 		vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
 
-		vm_vcpu_add_default(vm, vcpu_id, guest_code);
-
 		vcpu_args->vcpu_id = vcpu_id;
 		vcpu_args->gva = guest_test_virt_mem +
 				 (vcpu_id * vcpu_memory_bytes);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 88ef7067f1e6..fa5a90e6c6f0 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -153,14 +153,7 @@ const char * const vm_guest_mode_string[] = {
 _Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES,
 	       "Missing new mode strings?");
 
-struct vm_guest_mode_params {
-	unsigned int pa_bits;
-	unsigned int va_bits;
-	unsigned int page_size;
-	unsigned int page_shift;
-};
-
-static const struct vm_guest_mode_params vm_guest_mode_params[] = {
+const struct vm_guest_mode_params vm_guest_mode_params[] = {
 	{ 52, 48,  0x1000, 12 },
 	{ 52, 48, 0x10000, 16 },
 	{ 48, 48,  0x1000, 12 },

From b268b6f0bd36322358accb15c45683a9e1220231 Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Fri, 18 Dec 2020 15:17:34 +0100
Subject: [PATCH 494/547] KVM: selftests: Implement perf_test_util more
 conventionally

It's not conventional C to put non-inline functions in header
files. Create a source file for the functions instead. Also
reduce the amount of globals and rename the functions to
something less generic.

Reviewed-by: Ben Gardon <bgardon@google.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Andrew Jones <drjones@redhat.com>
Message-Id: <20201218141734.54359-4-drjones@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/Makefile          |   2 +-
 .../selftests/kvm/demand_paging_test.c        |  11 +-
 .../selftests/kvm/dirty_log_perf_test.c       |  22 +--
 .../testing/selftests/kvm/include/kvm_util.h  |   1 +
 .../selftests/kvm/include/perf_test_util.h    | 142 ++----------------
 .../selftests/kvm/lib/perf_test_util.c        | 134 +++++++++++++++++
 6 files changed, 166 insertions(+), 146 deletions(-)
 create mode 100644 tools/testing/selftests/kvm/lib/perf_test_util.c

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index a7286a08c3ae..fe41c6a0fa67 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -33,7 +33,7 @@ ifeq ($(ARCH),s390)
 	UNAME_M := s390x
 endif
 
-LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c
+LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c
 LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
 LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
 LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index b0c41de32e9b..cdad1eca72f7 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -36,12 +36,14 @@
 #define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__)
 #endif
 
+static int nr_vcpus = 1;
+static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
 static char *guest_data_prototype;
 
 static void *vcpu_worker(void *data)
 {
 	int ret;
-	struct vcpu_args *vcpu_args = (struct vcpu_args *)data;
+	struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data;
 	int vcpu_id = vcpu_args->vcpu_id;
 	struct kvm_vm *vm = perf_test_args.vm;
 	struct kvm_run *run;
@@ -263,7 +265,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	int vcpu_id;
 	int r;
 
-	vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size);
+	vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size);
 
 	perf_test_args.wr_fract = 1;
 
@@ -275,7 +277,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
 	TEST_ASSERT(vcpu_threads, "Memory allocation failed");
 
-	add_vcpus(vm, nr_vcpus, guest_percpu_mem_size);
+	perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size);
 
 	if (p->use_uffd) {
 		uffd_handler_threads =
@@ -359,8 +361,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 		perf_test_args.vcpu_args[0].pages * nr_vcpus /
 		((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
 
-	ucall_uninit(vm);
-	kvm_vm_free(vm);
+	perf_test_destroy_vm(vm);
 
 	free(guest_data_prototype);
 	free(vcpu_threads);
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 36bea75a8d6f..2283a0ec74a9 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -22,11 +22,14 @@
 /* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/
 #define TEST_HOST_LOOP_N		2UL
 
+static int nr_vcpus = 1;
+static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
+
 /* Host variables */
 static u64 dirty_log_manual_caps;
 static bool host_quit;
 static uint64_t iteration;
-static uint64_t vcpu_last_completed_iteration[MAX_VCPUS];
+static uint64_t vcpu_last_completed_iteration[KVM_MAX_VCPUS];
 
 static void *vcpu_worker(void *data)
 {
@@ -38,7 +41,7 @@ static void *vcpu_worker(void *data)
 	struct timespec ts_diff;
 	struct timespec total = (struct timespec){0};
 	struct timespec avg;
-	struct vcpu_args *vcpu_args = (struct vcpu_args *)data;
+	struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data;
 	int vcpu_id = vcpu_args->vcpu_id;
 
 	vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
@@ -108,7 +111,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	struct kvm_enable_cap cap = {};
 	struct timespec clear_dirty_log_total = (struct timespec){0};
 
-	vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size);
+	vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size);
 
 	perf_test_args.wr_fract = p->wr_fract;
 
@@ -126,7 +129,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
 	TEST_ASSERT(vcpu_threads, "Memory allocation failed");
 
-	add_vcpus(vm, nr_vcpus, guest_percpu_mem_size);
+	perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size);
 
 	sync_global_to_guest(vm, perf_test_args);
 
@@ -152,7 +155,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
 	/* Enable dirty logging */
 	clock_gettime(CLOCK_MONOTONIC, &start);
-	vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX,
+	vm_mem_region_set_flags(vm, PERF_TEST_MEM_SLOT_INDEX,
 				KVM_MEM_LOG_DIRTY_PAGES);
 	ts_diff = timespec_diff_now(start);
 	pr_info("Enabling dirty logging time: %ld.%.9lds\n\n",
@@ -179,7 +182,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 			iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
 
 		clock_gettime(CLOCK_MONOTONIC, &start);
-		kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
+		kvm_vm_get_dirty_log(vm, PERF_TEST_MEM_SLOT_INDEX, bmap);
 
 		ts_diff = timespec_diff_now(start);
 		get_dirty_log_total = timespec_add(get_dirty_log_total,
@@ -189,7 +192,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
 		if (dirty_log_manual_caps) {
 			clock_gettime(CLOCK_MONOTONIC, &start);
-			kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0,
+			kvm_vm_clear_dirty_log(vm, PERF_TEST_MEM_SLOT_INDEX, bmap, 0,
 					       host_num_pages);
 
 			ts_diff = timespec_diff_now(start);
@@ -207,7 +210,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
 	/* Disable dirty logging */
 	clock_gettime(CLOCK_MONOTONIC, &start);
-	vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, 0);
+	vm_mem_region_set_flags(vm, PERF_TEST_MEM_SLOT_INDEX, 0);
 	ts_diff = timespec_diff_now(start);
 	pr_info("Disabling dirty logging time: %ld.%.9lds\n",
 		ts_diff.tv_sec, ts_diff.tv_nsec);
@@ -226,8 +229,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
 	free(bmap);
 	free(vcpu_threads);
-	ucall_uninit(vm);
-	kvm_vm_free(vm);
+	perf_test_destroy_vm(vm);
 }
 
 static void help(char *name)
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 149766ecd68b..5cbb861525ed 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -16,6 +16,7 @@
 
 #include "sparsebit.h"
 
+#define KVM_MAX_VCPUS 512
 
 /*
  * Callers of kvm_util only have an incomplete/opaque description of the
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
index cd4c258f458d..b1188823c31b 100644
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -9,35 +9,15 @@
 #define SELFTEST_KVM_PERF_TEST_UTIL_H
 
 #include "kvm_util.h"
-#include "processor.h"
-
-#define MAX_VCPUS 512
-
-#define TEST_MEM_SLOT_INDEX		1
 
 /* Default guest test virtual memory offset */
 #define DEFAULT_GUEST_TEST_MEM		0xc0000000
 
 #define DEFAULT_PER_VCPU_MEM_SIZE	(1 << 30) /* 1G */
 
-/*
- * Guest physical memory offset of the testing memory slot.
- * This will be set to the topmost valid physical address minus
- * the test memory size.
- */
-static uint64_t guest_test_phys_mem;
+#define PERF_TEST_MEM_SLOT_INDEX	1
 
-/*
- * Guest virtual memory offset of the testing memory slot.
- * Must not conflict with identity mapped test code.
- */
-static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
-static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
-
-/* Number of VCPUs for the test */
-static int nr_vcpus = 1;
-
-struct vcpu_args {
+struct perf_test_vcpu_args {
 	uint64_t gva;
 	uint64_t pages;
 
@@ -51,119 +31,21 @@ struct perf_test_args {
 	uint64_t guest_page_size;
 	int wr_fract;
 
-	struct vcpu_args vcpu_args[MAX_VCPUS];
+	struct perf_test_vcpu_args vcpu_args[KVM_MAX_VCPUS];
 };
 
-static struct perf_test_args perf_test_args;
+extern struct perf_test_args perf_test_args;
 
 /*
- * Continuously write to the first 8 bytes of each page in the
- * specified region.
+ * Guest physical memory offset of the testing memory slot.
+ * This will be set to the topmost valid physical address minus
+ * the test memory size.
  */
-static void guest_code(uint32_t vcpu_id)
-{
-	struct vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
-	uint64_t gva;
-	uint64_t pages;
-	int i;
+extern uint64_t guest_test_phys_mem;
 
-	/* Make sure vCPU args data structure is not corrupt. */
-	GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id);
-
-	gva = vcpu_args->gva;
-	pages = vcpu_args->pages;
-
-	while (true) {
-		for (i = 0; i < pages; i++) {
-			uint64_t addr = gva + (i * perf_test_args.guest_page_size);
-
-			if (i % perf_test_args.wr_fract == 0)
-				*(uint64_t *)addr = 0x0123456789ABCDEF;
-			else
-				READ_ONCE(*(uint64_t *)addr);
-		}
-
-		GUEST_SYNC(1);
-	}
-}
-
-static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus,
-				uint64_t vcpu_memory_bytes)
-{
-	struct kvm_vm *vm;
-	uint64_t guest_num_pages;
-
-	pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
-
-	perf_test_args.host_page_size = getpagesize();
-	perf_test_args.guest_page_size = vm_guest_mode_params[mode].page_size;
-
-	guest_num_pages = vm_adjust_num_guest_pages(mode,
-				(vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size);
-
-	TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0,
-		    "Guest memory size is not host page size aligned.");
-	TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0,
-		    "Guest memory size is not guest page size aligned.");
-
-	vm = vm_create_with_vcpus(mode, vcpus,
-				  (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size,
-				  0, guest_code, NULL);
-
-	perf_test_args.vm = vm;
-
-	/*
-	 * If there should be more memory in the guest test region than there
-	 * can be pages in the guest, it will definitely cause problems.
-	 */
-	TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),
-		    "Requested more guest memory than address space allows.\n"
-		    "    guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n",
-		    guest_num_pages, vm_get_max_gfn(vm), vcpus,
-		    vcpu_memory_bytes);
-
-	guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) *
-			      perf_test_args.guest_page_size;
-	guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1);
-#ifdef __s390x__
-	/* Align to 1M (segment size) */
-	guest_test_phys_mem &= ~((1 << 20) - 1);
-#endif
-	pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
-
-	/* Add an extra memory slot for testing */
-	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
-				    guest_test_phys_mem,
-				    TEST_MEM_SLOT_INDEX,
-				    guest_num_pages, 0);
-
-	/* Do mapping for the demand paging memory slot */
-	virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
-
-	ucall_init(vm, NULL);
-
-	return vm;
-}
-
-static void add_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes)
-{
-	vm_paddr_t vcpu_gpa;
-	struct vcpu_args *vcpu_args;
-	int vcpu_id;
-
-	for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
-		vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
-
-		vcpu_args->vcpu_id = vcpu_id;
-		vcpu_args->gva = guest_test_virt_mem +
-				 (vcpu_id * vcpu_memory_bytes);
-		vcpu_args->pages = vcpu_memory_bytes /
-				   perf_test_args.guest_page_size;
-
-		vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes);
-		pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n",
-			 vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes);
-	}
-}
+struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
+				uint64_t vcpu_memory_bytes);
+void perf_test_destroy_vm(struct kvm_vm *vm);
+void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes);
 
 #endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
new file mode 100644
index 000000000000..9be1944c2d1c
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Google LLC.
+ */
+
+#include "kvm_util.h"
+#include "perf_test_util.h"
+#include "processor.h"
+
+struct perf_test_args perf_test_args;
+
+uint64_t guest_test_phys_mem;
+
+/*
+ * Guest virtual memory offset of the testing memory slot.
+ * Must not conflict with identity mapped test code.
+ */
+static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
+
+/*
+ * Continuously write to the first 8 bytes of each page in the
+ * specified region.
+ */
+static void guest_code(uint32_t vcpu_id)
+{
+	struct perf_test_vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
+	uint64_t gva;
+	uint64_t pages;
+	int i;
+
+	/* Make sure vCPU args data structure is not corrupt. */
+	GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id);
+
+	gva = vcpu_args->gva;
+	pages = vcpu_args->pages;
+
+	while (true) {
+		for (i = 0; i < pages; i++) {
+			uint64_t addr = gva + (i * perf_test_args.guest_page_size);
+
+			if (i % perf_test_args.wr_fract == 0)
+				*(uint64_t *)addr = 0x0123456789ABCDEF;
+			else
+				READ_ONCE(*(uint64_t *)addr);
+		}
+
+		GUEST_SYNC(1);
+	}
+}
+
+struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
+				   uint64_t vcpu_memory_bytes)
+{
+	struct kvm_vm *vm;
+	uint64_t guest_num_pages;
+
+	pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
+
+	perf_test_args.host_page_size = getpagesize();
+	perf_test_args.guest_page_size = vm_guest_mode_params[mode].page_size;
+
+	guest_num_pages = vm_adjust_num_guest_pages(mode,
+				(vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size);
+
+	TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0,
+		    "Guest memory size is not host page size aligned.");
+	TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0,
+		    "Guest memory size is not guest page size aligned.");
+
+	vm = vm_create_with_vcpus(mode, vcpus,
+				  (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size,
+				  0, guest_code, NULL);
+
+	perf_test_args.vm = vm;
+
+	/*
+	 * If there should be more memory in the guest test region than there
+	 * can be pages in the guest, it will definitely cause problems.
+	 */
+	TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),
+		    "Requested more guest memory than address space allows.\n"
+		    "    guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n",
+		    guest_num_pages, vm_get_max_gfn(vm), vcpus,
+		    vcpu_memory_bytes);
+
+	guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) *
+			      perf_test_args.guest_page_size;
+	guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1);
+#ifdef __s390x__
+	/* Align to 1M (segment size) */
+	guest_test_phys_mem &= ~((1 << 20) - 1);
+#endif
+	pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
+
+	/* Add an extra memory slot for testing */
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+				    guest_test_phys_mem,
+				    PERF_TEST_MEM_SLOT_INDEX,
+				    guest_num_pages, 0);
+
+	/* Do mapping for the demand paging memory slot */
+	virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
+
+	ucall_init(vm, NULL);
+
+	return vm;
+}
+
+void perf_test_destroy_vm(struct kvm_vm *vm)
+{
+	ucall_uninit(vm);
+	kvm_vm_free(vm);
+}
+
+void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes)
+{
+	vm_paddr_t vcpu_gpa;
+	struct perf_test_vcpu_args *vcpu_args;
+	int vcpu_id;
+
+	for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
+		vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
+
+		vcpu_args->vcpu_id = vcpu_id;
+		vcpu_args->gva = guest_test_virt_mem +
+				 (vcpu_id * vcpu_memory_bytes);
+		vcpu_args->pages = vcpu_memory_bytes /
+				   perf_test_args.guest_page_size;
+
+		vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes);
+		pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n",
+			 vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes);
+	}
+}

From 2f80d502d627f30257ba7e3655e71c373b7d1a5a Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 22 Dec 2020 05:20:43 -0500
Subject: [PATCH 495/547] KVM: x86: fix shift out of bounds reported by UBSAN

Since we know that e >= s, we can reassociate the left shift,
changing the shifted number from 1 to 2 in exchange for
decreasing the right hand side by 1.

Reported-by: syzbot+e87846c48bf72bc85311@syzkaller.appspotmail.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 9c4a9c8e43d9..581925e476d6 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -49,7 +49,7 @@ static inline u64 rsvd_bits(int s, int e)
 	if (e < s)
 		return 0;
 
-	return ((1ULL << (e - s + 1)) - 1) << s;
+	return ((2ULL << (e - s)) - 1) << s;
 }
 
 void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 access_mask);

From 7f0c1f1a8277de906a242a6ef907476149f006de Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 6 Jan 2021 10:29:16 -0800
Subject: [PATCH 496/547] MAINTAINERS: Really update email address for Sean
 Christopherson

Use my @google.com address in MAINTAINERS, somehow only the .mailmap
entry was added when the original update patch was applied.

Fixes: c2b1209d852f ("MAINTAINERS: Update email address for Sean Christopherson")
Cc: kvm@vger.kernel.org
Reported-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210106182916.331743-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 281de213ef47..9d8b77332ae3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9672,7 +9672,7 @@ F:	tools/testing/selftests/kvm/s390x/
 
 KERNEL VIRTUAL MACHINE FOR X86 (KVM/x86)
 M:	Paolo Bonzini <pbonzini@redhat.com>
-R:	Sean Christopherson <sean.j.christopherson@intel.com>
+R:	Sean Christopherson <seanjc@google.com>
 R:	Vitaly Kuznetsov <vkuznets@redhat.com>
 R:	Wanpeng Li <wanpengli@tencent.com>
 R:	Jim Mattson <jmattson@google.com>

From de7860c8a388e4cb757c7da26889b9e2641ffcfe Mon Sep 17 00:00:00 2001
From: Stephen Zhang <stephenzhangzsd@gmail.com>
Date: Fri, 18 Dec 2020 15:51:37 +0800
Subject: [PATCH 497/547] KVM: x86: change in pv_eoi_get_pending() to make code
 more readable

Signed-off-by: Stephen Zhang <stephenzhangzsd@gmail.com>
Message-Id: <1608277897-1932-1-git-send-email-stephenzhangzsd@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 3136e05831cf..78823227c592 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -674,7 +674,7 @@ static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
 			   (unsigned long long)vcpu->arch.pv_eoi.msr_val);
 		return false;
 	}
-	return val & 0x1;
+	return val & KVM_PV_EOI_ENABLED;
 }
 
 static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)

From 88bf56d04bc3564542049ec4ec168a8b60d0b48c Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@linux.alibaba.com>
Date: Thu, 17 Dec 2020 23:41:18 +0800
Subject: [PATCH 498/547] kvm: check tlbs_dirty directly

In kvm_mmu_notifier_invalidate_range_start(), tlbs_dirty is used as:
        need_tlb_flush |= kvm->tlbs_dirty;
with need_tlb_flush's type being int and tlbs_dirty's type being long.

It means that tlbs_dirty is always used as int and the higher 32 bits
is useless.  We need to check tlbs_dirty in a correct way and this
change checks it directly without propagating it to need_tlb_flush.

Note: it's _extremely_ unlikely this neglecting of higher 32 bits can
cause problems in practice.  It would require encountering tlbs_dirty
on a 4 billion count boundary, and KVM would need to be using shadow
paging or be running a nested guest.

Cc: stable@vger.kernel.org
Fixes: a4ee1ca4a36e ("KVM: MMU: delay flush all tlbs on sync_page path")
Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
Message-Id: <20201217154118.16497-1-jiangshanlai@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/kvm_main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3abcb2ce5b7d..19dae28904f7 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -485,9 +485,8 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
 	kvm->mmu_notifier_count++;
 	need_tlb_flush = kvm_unmap_hva_range(kvm, range->start, range->end,
 					     range->flags);
-	need_tlb_flush |= kvm->tlbs_dirty;
 	/* we've to flush the tlb before the pages can be freed */
-	if (need_tlb_flush)
+	if (need_tlb_flush || kvm->tlbs_dirty)
 		kvm_flush_remote_tlbs(kvm);
 
 	spin_unlock(&kvm->mmu_lock);

From a889ea54b3daa63ee1463dc19ed699407d61458b Mon Sep 17 00:00:00 2001
From: Ben Gardon <bgardon@google.com>
Date: Wed, 6 Jan 2021 16:19:34 -0800
Subject: [PATCH 499/547] KVM: x86/mmu: Ensure TDP MMU roots are freed after
 yield

Many TDP MMU functions which need to perform some action on all TDP MMU
roots hold a reference on that root so that they can safely drop the MMU
lock in order to yield to other threads. However, when releasing the
reference on the root, there is a bug: the root will not be freed even
if its reference count (root_count) is reduced to 0.

To simplify acquiring and releasing references on TDP MMU root pages, and
to ensure that these roots are properly freed, move the get/put operations
into another TDP MMU root iterator macro.

Moving the get/put operations into an iterator macro also helps
simplify control flow when a root does need to be freed. Note that using
the list_for_each_entry_safe macro would not have been appropriate in
this situation because it could keep a pointer to the next root across
an MMU lock release + reacquire, during which time that root could be
freed.

Reported-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Fixes: faaf05b00aec ("kvm: x86/mmu: Support zapping SPTEs in the TDP MMU")
Fixes: 063afacd8730 ("kvm: x86/mmu: Support invalidate range MMU notifier for TDP MMU")
Fixes: a6a0b05da9f3 ("kvm: x86/mmu: Support dirty logging for the TDP MMU")
Fixes: 14881998566d ("kvm: x86/mmu: Support disabling dirty logging for the tdp MMU")
Signed-off-by: Ben Gardon <bgardon@google.com>
Message-Id: <20210107001935.3732070-1-bgardon@google.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 104 +++++++++++++++++--------------------
 1 file changed, 48 insertions(+), 56 deletions(-)

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 6574af2d0994..2ef8615f9dba 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -44,7 +44,48 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
 	WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots));
 }
 
-#define for_each_tdp_mmu_root(_kvm, _root)			    \
+static void tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root)
+{
+	if (kvm_mmu_put_root(kvm, root))
+		kvm_tdp_mmu_free_root(kvm, root);
+}
+
+static inline bool tdp_mmu_next_root_valid(struct kvm *kvm,
+					   struct kvm_mmu_page *root)
+{
+	lockdep_assert_held(&kvm->mmu_lock);
+
+	if (list_entry_is_head(root, &kvm->arch.tdp_mmu_roots, link))
+		return false;
+
+	kvm_mmu_get_root(kvm, root);
+	return true;
+
+}
+
+static inline struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
+						     struct kvm_mmu_page *root)
+{
+	struct kvm_mmu_page *next_root;
+
+	next_root = list_next_entry(root, link);
+	tdp_mmu_put_root(kvm, root);
+	return next_root;
+}
+
+/*
+ * Note: this iterator gets and puts references to the roots it iterates over.
+ * This makes it safe to release the MMU lock and yield within the loop, but
+ * if exiting the loop early, the caller must drop the reference to the most
+ * recent root. (Unless keeping a live reference is desirable.)
+ */
+#define for_each_tdp_mmu_root_yield_safe(_kvm, _root)				\
+	for (_root = list_first_entry(&_kvm->arch.tdp_mmu_roots,	\
+				      typeof(*_root), link);		\
+	     tdp_mmu_next_root_valid(_kvm, _root);			\
+	     _root = tdp_mmu_next_root(_kvm, _root))
+
+#define for_each_tdp_mmu_root(_kvm, _root)				\
 	list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link)
 
 bool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa)
@@ -447,18 +488,9 @@ bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end)
 	struct kvm_mmu_page *root;
 	bool flush = false;
 
-	for_each_tdp_mmu_root(kvm, root) {
-		/*
-		 * Take a reference on the root so that it cannot be freed if
-		 * this thread releases the MMU lock and yields in this loop.
-		 */
-		kvm_mmu_get_root(kvm, root);
-
+	for_each_tdp_mmu_root_yield_safe(kvm, root)
 		flush |= zap_gfn_range(kvm, root, start, end, true);
 
-		kvm_mmu_put_root(kvm, root);
-	}
-
 	return flush;
 }
 
@@ -619,13 +651,7 @@ static int kvm_tdp_mmu_handle_hva_range(struct kvm *kvm, unsigned long start,
 	int ret = 0;
 	int as_id;
 
-	for_each_tdp_mmu_root(kvm, root) {
-		/*
-		 * Take a reference on the root so that it cannot be freed if
-		 * this thread releases the MMU lock and yields in this loop.
-		 */
-		kvm_mmu_get_root(kvm, root);
-
+	for_each_tdp_mmu_root_yield_safe(kvm, root) {
 		as_id = kvm_mmu_page_as_id(root);
 		slots = __kvm_memslots(kvm, as_id);
 		kvm_for_each_memslot(memslot, slots) {
@@ -647,8 +673,6 @@ static int kvm_tdp_mmu_handle_hva_range(struct kvm *kvm, unsigned long start,
 			ret |= handler(kvm, memslot, root, gfn_start,
 				       gfn_end, data);
 		}
-
-		kvm_mmu_put_root(kvm, root);
 	}
 
 	return ret;
@@ -838,21 +862,13 @@ bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, struct kvm_memory_slot *slot,
 	int root_as_id;
 	bool spte_set = false;
 
-	for_each_tdp_mmu_root(kvm, root) {
+	for_each_tdp_mmu_root_yield_safe(kvm, root) {
 		root_as_id = kvm_mmu_page_as_id(root);
 		if (root_as_id != slot->as_id)
 			continue;
 
-		/*
-		 * Take a reference on the root so that it cannot be freed if
-		 * this thread releases the MMU lock and yields in this loop.
-		 */
-		kvm_mmu_get_root(kvm, root);
-
 		spte_set |= wrprot_gfn_range(kvm, root, slot->base_gfn,
 			     slot->base_gfn + slot->npages, min_level);
-
-		kvm_mmu_put_root(kvm, root);
 	}
 
 	return spte_set;
@@ -906,21 +922,13 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
 	int root_as_id;
 	bool spte_set = false;
 
-	for_each_tdp_mmu_root(kvm, root) {
+	for_each_tdp_mmu_root_yield_safe(kvm, root) {
 		root_as_id = kvm_mmu_page_as_id(root);
 		if (root_as_id != slot->as_id)
 			continue;
 
-		/*
-		 * Take a reference on the root so that it cannot be freed if
-		 * this thread releases the MMU lock and yields in this loop.
-		 */
-		kvm_mmu_get_root(kvm, root);
-
 		spte_set |= clear_dirty_gfn_range(kvm, root, slot->base_gfn,
 				slot->base_gfn + slot->npages);
-
-		kvm_mmu_put_root(kvm, root);
 	}
 
 	return spte_set;
@@ -1029,21 +1037,13 @@ bool kvm_tdp_mmu_slot_set_dirty(struct kvm *kvm, struct kvm_memory_slot *slot)
 	int root_as_id;
 	bool spte_set = false;
 
-	for_each_tdp_mmu_root(kvm, root) {
+	for_each_tdp_mmu_root_yield_safe(kvm, root) {
 		root_as_id = kvm_mmu_page_as_id(root);
 		if (root_as_id != slot->as_id)
 			continue;
 
-		/*
-		 * Take a reference on the root so that it cannot be freed if
-		 * this thread releases the MMU lock and yields in this loop.
-		 */
-		kvm_mmu_get_root(kvm, root);
-
 		spte_set |= set_dirty_gfn_range(kvm, root, slot->base_gfn,
 				slot->base_gfn + slot->npages);
-
-		kvm_mmu_put_root(kvm, root);
 	}
 	return spte_set;
 }
@@ -1089,21 +1089,13 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
 	struct kvm_mmu_page *root;
 	int root_as_id;
 
-	for_each_tdp_mmu_root(kvm, root) {
+	for_each_tdp_mmu_root_yield_safe(kvm, root) {
 		root_as_id = kvm_mmu_page_as_id(root);
 		if (root_as_id != slot->as_id)
 			continue;
 
-		/*
-		 * Take a reference on the root so that it cannot be freed if
-		 * this thread releases the MMU lock and yields in this loop.
-		 */
-		kvm_mmu_get_root(kvm, root);
-
 		zap_collapsible_spte_range(kvm, root, slot->base_gfn,
 					   slot->base_gfn + slot->npages);
-
-		kvm_mmu_put_root(kvm, root);
 	}
 }
 

From c0dba6e46825716db15c4b3a8f05c85b4a59edda Mon Sep 17 00:00:00 2001
From: Ben Gardon <bgardon@google.com>
Date: Wed, 6 Jan 2021 16:19:35 -0800
Subject: [PATCH 500/547] KVM: x86/mmu: Clarify TDP MMU page list invariants

The tdp_mmu_roots and tdp_mmu_pages in struct kvm_arch should only contain
pages with tdp_mmu_page set to true. tdp_mmu_pages should not contain any
pages with a non-zero root_count and tdp_mmu_roots should only contain
pages with a positive root_count, unless a thread holds the MMU lock and
is in the process of modifying the list. Various functions expect these
invariants to be maintained, but they are not explictily documented. Add
to the comments on both fields to document the above invariants.

Signed-off-by: Ben Gardon <bgardon@google.com>
Message-Id: <20210107001935.3732070-2-bgardon@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3ab7b46087b7..afed3da3b3a0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1010,9 +1010,21 @@ struct kvm_arch {
 	 */
 	bool tdp_mmu_enabled;
 
-	/* List of struct tdp_mmu_pages being used as roots */
+	/*
+	 * List of struct kvmp_mmu_pages being used as roots.
+	 * All struct kvm_mmu_pages in the list should have
+	 * tdp_mmu_page set.
+	 * All struct kvm_mmu_pages in the list should have a positive
+	 * root_count except when a thread holds the MMU lock and is removing
+	 * an entry from the list.
+	 */
 	struct list_head tdp_mmu_roots;
-	/* List of struct tdp_mmu_pages not being used as roots */
+
+	/*
+	 * List of struct kvmp_mmu_pages not being used as roots.
+	 * All struct kvm_mmu_pages in the list should have
+	 * tdp_mmu_page set and a root_count of 0.
+	 */
 	struct list_head tdp_mmu_pages;
 };
 

From 81f76adad560dfc39cb9625cf1e00a7e2b7b88df Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Thu, 7 Jan 2021 11:38:52 +0200
Subject: [PATCH 501/547] KVM: nSVM: correctly restore nested_run_pending on
 migration

The code to store it on the migration exists, but no code was restoring it.

One of the side effects of fixing this is that L1->L2 injected events
are no longer lost when migration happens with nested run pending.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210107093854.882483-3-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/nested.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index b0b667456b2e..a466336aab43 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -1194,6 +1194,10 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
 	 * in the registers, the save area of the nested state instead
 	 * contains saved L1 state.
 	 */
+
+	svm->nested.nested_run_pending =
+		!!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
+
 	copy_vmcb_control_area(&hsave->control, &svm->vmcb->control);
 	hsave->save = *save;
 

From 56fe28de8c4f0167275c411c0daa5709e9a47bd7 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Thu, 7 Jan 2021 11:38:54 +0200
Subject: [PATCH 502/547] KVM: nSVM: mark vmcb as dirty when forcingly leaving
 the guest mode

We overwrite most of vmcb fields while doing so, so we must
mark it as dirty.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210107093854.882483-5-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/nested.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index a466336aab43..a622e63739b4 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -754,6 +754,7 @@ void svm_leave_nested(struct vcpu_svm *svm)
 		leave_guest_mode(&svm->vcpu);
 		copy_vmcb_control_area(&vmcb->control, &hsave->control);
 		nested_svm_uninit_mmu_context(&svm->vcpu);
+		vmcb_mark_all_dirty(svm->vmcb);
 	}
 
 	kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, &svm->vcpu);

From f2c7ef3ba9556d62a7e2bb23b563c6510007d55c Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Thu, 7 Jan 2021 11:38:51 +0200
Subject: [PATCH 503/547] KVM: nSVM: cancel KVM_REQ_GET_NESTED_STATE_PAGES on
 nested vmexit

It is possible to exit the nested guest mode, entered by
svm_set_nested_state prior to first vm entry to it (e.g due to pending event)
if the nested run was not pending during the migration.

In this case we must not switch to the nested msr permission bitmap.
Also add a warning to catch similar cases in the future.

Fixes: a7d5c7ce41ac1 ("KVM: nSVM: delay MSR permission processing to first nested VM run")

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210107093854.882483-2-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/nested.c | 3 +++
 arch/x86/kvm/vmx/nested.c | 2 ++
 arch/x86/kvm/x86.c        | 4 +++-
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index a622e63739b4..cb4c6ee10029 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -199,6 +199,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
 static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
+
 	if (!nested_svm_vmrun_msrpm(svm)) {
 		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 		vcpu->run->internal.suberror =
@@ -595,6 +596,8 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
 	svm->nested.vmcb12_gpa = 0;
 	WARN_ON_ONCE(svm->nested.nested_run_pending);
 
+	kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, &svm->vcpu);
+
 	/* in case we halted in L2 */
 	svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE;
 
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index e2f26564a12d..0fbb46990dfc 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -4442,6 +4442,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
 	/* trying to cancel vmlaunch/vmresume is a bug */
 	WARN_ON_ONCE(vmx->nested.nested_run_pending);
 
+	kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
+
 	/* Service the TLB flush request for L2 before switching to L1. */
 	if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
 		kvm_vcpu_flush_tlb_current(vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e28ab76b80dc..f6e7b25c40e2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8789,7 +8789,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
 	if (kvm_request_pending(vcpu)) {
 		if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
-			if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) {
+			if (WARN_ON_ONCE(!is_guest_mode(vcpu)))
+				;
+			else if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) {
 				r = 0;
 				goto out;
 			}

From 647daca25d24fb6eadc7b6cd680ad3e6eed0f3d5 Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Mon, 4 Jan 2021 14:20:01 -0600
Subject: [PATCH 504/547] KVM: SVM: Add support for booting APs in an SEV-ES
 guest

Typically under KVM, an AP is booted using the INIT-SIPI-SIPI sequence,
where the guest vCPU register state is updated and then the vCPU is VMRUN
to begin execution of the AP. For an SEV-ES guest, this won't work because
the guest register state is encrypted.

Following the GHCB specification, the hypervisor must not alter the guest
register state, so KVM must track an AP/vCPU boot. Should the guest want
to park the AP, it must use the AP Reset Hold exit event in place of, for
example, a HLT loop.

First AP boot (first INIT-SIPI-SIPI sequence):
  Execute the AP (vCPU) as it was initialized and measured by the SEV-ES
  support. It is up to the guest to transfer control of the AP to the
  proper location.

Subsequent AP boot:
  KVM will expect to receive an AP Reset Hold exit event indicating that
  the vCPU is being parked and will require an INIT-SIPI-SIPI sequence to
  awaken it. When the AP Reset Hold exit event is received, KVM will place
  the vCPU into a simulated HLT mode. Upon receiving the INIT-SIPI-SIPI
  sequence, KVM will make the vCPU runnable. It is again up to the guest
  to then transfer control of the AP to the proper location.

  To differentiate between an actual HLT and an AP Reset Hold, a new MP
  state is introduced, KVM_MP_STATE_AP_RESET_HOLD, which the vCPU is
  placed in upon receiving the AP Reset Hold exit event. Additionally, to
  communicate the AP Reset Hold exit event up to userspace (if needed), a
  new exit reason is introduced, KVM_EXIT_AP_RESET_HOLD.

A new x86 ops function is introduced, vcpu_deliver_sipi_vector, in order
to accomplish AP booting. For VMX, vcpu_deliver_sipi_vector is set to the
original SIPI delivery function, kvm_vcpu_deliver_sipi_vector(). SVM adds
a new function that, for non SEV-ES guests, invokes the original SIPI
delivery function, kvm_vcpu_deliver_sipi_vector(), but for SEV-ES guests,
implements the logic above.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Message-Id: <e8fbebe8eb161ceaabdad7c01a5859a78b424d5e.1609791600.git.thomas.lendacky@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  3 +++
 arch/x86/kvm/lapic.c            |  2 +-
 arch/x86/kvm/svm/sev.c          | 22 ++++++++++++++++++++++
 arch/x86/kvm/svm/svm.c          | 10 ++++++++++
 arch/x86/kvm/svm/svm.h          |  2 ++
 arch/x86/kvm/vmx/vmx.c          |  2 ++
 arch/x86/kvm/x86.c              | 26 +++++++++++++++++++++-----
 include/uapi/linux/kvm.h        |  2 ++
 8 files changed, 63 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index afed3da3b3a0..3d6616f6f6ef 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1299,6 +1299,8 @@ struct kvm_x86_ops {
 	void (*migrate_timers)(struct kvm_vcpu *vcpu);
 	void (*msr_filter_changed)(struct kvm_vcpu *vcpu);
 	int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err);
+
+	void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
 };
 
 struct kvm_x86_nested_ops {
@@ -1480,6 +1482,7 @@ int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in);
 int kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
 int kvm_emulate_halt(struct kvm_vcpu *vcpu);
 int kvm_vcpu_halt(struct kvm_vcpu *vcpu);
+int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu);
 int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
 
 void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 78823227c592..43cceadd073e 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2898,7 +2898,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
 			/* evaluate pending_events before reading the vector */
 			smp_rmb();
 			sipi_vector = apic->sipi_vector;
-			kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);
+			kvm_x86_ops.vcpu_deliver_sipi_vector(vcpu, sipi_vector);
 			vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 		}
 	}
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 563ced07b0b8..c8ffdbc81709 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1563,6 +1563,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
 			goto vmgexit_err;
 		break;
 	case SVM_VMGEXIT_NMI_COMPLETE:
+	case SVM_VMGEXIT_AP_HLT_LOOP:
 	case SVM_VMGEXIT_AP_JUMP_TABLE:
 	case SVM_VMGEXIT_UNSUPPORTED_EVENT:
 		break;
@@ -1888,6 +1889,9 @@ int sev_handle_vmgexit(struct vcpu_svm *svm)
 	case SVM_VMGEXIT_NMI_COMPLETE:
 		ret = svm_invoke_exit_handler(svm, SVM_EXIT_IRET);
 		break;
+	case SVM_VMGEXIT_AP_HLT_LOOP:
+		ret = kvm_emulate_ap_reset_hold(&svm->vcpu);
+		break;
 	case SVM_VMGEXIT_AP_JUMP_TABLE: {
 		struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
 
@@ -2040,3 +2044,21 @@ void sev_es_vcpu_put(struct vcpu_svm *svm)
 		wrmsrl(host_save_user_msrs[i].index, svm->host_user_msrs[i]);
 	}
 }
+
+void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	/* First SIPI: Use the values as initially set by the VMM */
+	if (!svm->received_first_sipi) {
+		svm->received_first_sipi = true;
+		return;
+	}
+
+	/*
+	 * Subsequent SIPI: Return from an AP Reset Hold VMGEXIT, where
+	 * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a
+	 * non-zero value.
+	 */
+	ghcb_set_sw_exit_info_2(svm->ghcb, 1);
+}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 6824d611dc5d..7ef171790d02 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4382,6 +4382,14 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
 		   (vmcb_is_intercept(&svm->vmcb->control, INTERCEPT_INIT));
 }
 
+static void svm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
+{
+	if (!sev_es_guest(vcpu->kvm))
+		return kvm_vcpu_deliver_sipi_vector(vcpu, vector);
+
+	sev_vcpu_deliver_sipi_vector(vcpu, vector);
+}
+
 static void svm_vm_destroy(struct kvm *kvm)
 {
 	avic_vm_destroy(kvm);
@@ -4524,6 +4532,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
 
 	.msr_filter_changed = svm_msr_filter_changed,
 	.complete_emulated_msr = svm_complete_emulated_msr,
+
+	.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
 };
 
 static struct kvm_x86_init_ops svm_init_ops __initdata = {
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 5431e6335e2e..0fe874ae5498 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -185,6 +185,7 @@ struct vcpu_svm {
 	struct vmcb_save_area *vmsa;
 	struct ghcb *ghcb;
 	struct kvm_host_map ghcb_map;
+	bool received_first_sipi;
 
 	/* SEV-ES scratch area support */
 	void *ghcb_sa;
@@ -591,6 +592,7 @@ void sev_es_init_vmcb(struct vcpu_svm *svm);
 void sev_es_create_vcpu(struct vcpu_svm *svm);
 void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu);
 void sev_es_vcpu_put(struct vcpu_svm *svm);
+void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
 
 /* vmenter.S */
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 75c9c6a0a3a4..2af05d3b0590 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7707,6 +7707,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 	.msr_filter_changed = vmx_msr_filter_changed,
 	.complete_emulated_msr = kvm_complete_insn_gp,
 	.cpu_dirty_log_size = vmx_cpu_dirty_log_size,
+
+	.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
 };
 
 static __init int hardware_setup(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f6e7b25c40e2..0287840b93e0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7976,17 +7976,22 @@ void kvm_arch_exit(void)
 	kmem_cache_destroy(x86_fpu_cache);
 }
 
-int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
+int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason)
 {
 	++vcpu->stat.halt_exits;
 	if (lapic_in_kernel(vcpu)) {
-		vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
+		vcpu->arch.mp_state = state;
 		return 1;
 	} else {
-		vcpu->run->exit_reason = KVM_EXIT_HLT;
+		vcpu->run->exit_reason = reason;
 		return 0;
 	}
 }
+
+int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
+{
+	return __kvm_vcpu_halt(vcpu, KVM_MP_STATE_HALTED, KVM_EXIT_HLT);
+}
 EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
 
 int kvm_emulate_halt(struct kvm_vcpu *vcpu)
@@ -8000,6 +8005,14 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_halt);
 
+int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu)
+{
+	int ret = kvm_skip_emulated_instruction(vcpu);
+
+	return __kvm_vcpu_halt(vcpu, KVM_MP_STATE_AP_RESET_HOLD, KVM_EXIT_AP_RESET_HOLD) && ret;
+}
+EXPORT_SYMBOL_GPL(kvm_emulate_ap_reset_hold);
+
 #ifdef CONFIG_X86_64
 static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
 			        unsigned long clock_type)
@@ -9096,6 +9109,7 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
 	kvm_apic_accept_events(vcpu);
 	switch(vcpu->arch.mp_state) {
 	case KVM_MP_STATE_HALTED:
+	case KVM_MP_STATE_AP_RESET_HOLD:
 		vcpu->arch.pv.pv_unhalted = false;
 		vcpu->arch.mp_state =
 			KVM_MP_STATE_RUNNABLE;
@@ -9522,8 +9536,9 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
 		kvm_load_guest_fpu(vcpu);
 
 	kvm_apic_accept_events(vcpu);
-	if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED &&
-					vcpu->arch.pv.pv_unhalted)
+	if ((vcpu->arch.mp_state == KVM_MP_STATE_HALTED ||
+	     vcpu->arch.mp_state == KVM_MP_STATE_AP_RESET_HOLD) &&
+	    vcpu->arch.pv.pv_unhalted)
 		mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
 	else
 		mp_state->mp_state = vcpu->arch.mp_state;
@@ -10154,6 +10169,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
 	kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
 	kvm_rip_write(vcpu, 0);
 }
+EXPORT_SYMBOL_GPL(kvm_vcpu_deliver_sipi_vector);
 
 int kvm_arch_hardware_enable(void)
 {
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 886802b8ffba..374c67875cdb 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -251,6 +251,7 @@ struct kvm_hyperv_exit {
 #define KVM_EXIT_X86_RDMSR        29
 #define KVM_EXIT_X86_WRMSR        30
 #define KVM_EXIT_DIRTY_RING_FULL  31
+#define KVM_EXIT_AP_RESET_HOLD    32
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -573,6 +574,7 @@ struct kvm_vapic_addr {
 #define KVM_MP_STATE_CHECK_STOP        6
 #define KVM_MP_STATE_OPERATING         7
 #define KVM_MP_STATE_LOAD              8
+#define KVM_MP_STATE_AP_RESET_HOLD     9
 
 struct kvm_mp_state {
 	__u32 mp_state;

From 717df0f4cdc9044c415431a3522b3e9ccca5b4a3 Mon Sep 17 00:00:00 2001
From: Ayush Sawal <ayush.sawal@chelsio.com>
Date: Wed, 6 Jan 2021 09:59:06 +0530
Subject: [PATCH 505/547] chtls: Fix hardware tid leak

send_abort_rpl() is not calculating cpl_abort_req_rss offset and
ends up sending wrong TID with abort_rpl WR causng tid leaks.
Replaced send_abort_rpl() with chtls_send_abort_rpl() as it is
redundant.

Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition")
Signed-off-by: Rohit Maheshwari <rohitm@chelsio.com>
Signed-off-by: Ayush Sawal <ayush.sawal@chelsio.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../chelsio/inline_crypto/chtls/chtls_cm.c    | 39 ++-----------------
 1 file changed, 3 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
index a0e0d8a83681..561b5f2273af 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
@@ -1997,39 +1997,6 @@ static void t4_defer_reply(struct sk_buff *skb, struct chtls_dev *cdev,
 	spin_unlock_bh(&cdev->deferq.lock);
 }
 
-static void send_abort_rpl(struct sock *sk, struct sk_buff *skb,
-			   struct chtls_dev *cdev, int status, int queue)
-{
-	struct cpl_abort_req_rss *req = cplhdr(skb);
-	struct sk_buff *reply_skb;
-	struct chtls_sock *csk;
-
-	csk = rcu_dereference_sk_user_data(sk);
-
-	reply_skb = alloc_skb(sizeof(struct cpl_abort_rpl),
-			      GFP_KERNEL);
-
-	if (!reply_skb) {
-		req->status = (queue << 1);
-		t4_defer_reply(skb, cdev, send_defer_abort_rpl);
-		return;
-	}
-
-	set_abort_rpl_wr(reply_skb, GET_TID(req), status);
-	kfree_skb(skb);
-
-	set_wr_txq(reply_skb, CPL_PRIORITY_DATA, queue);
-	if (csk_conn_inline(csk)) {
-		struct l2t_entry *e = csk->l2t_entry;
-
-		if (e && sk->sk_state != TCP_SYN_RECV) {
-			cxgb4_l2t_send(csk->egress_dev, reply_skb, e);
-			return;
-		}
-	}
-	cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb);
-}
-
 static void chtls_send_abort_rpl(struct sock *sk, struct sk_buff *skb,
 				 struct chtls_dev *cdev,
 				 int status, int queue)
@@ -2079,8 +2046,8 @@ static void bl_abort_syn_rcv(struct sock *lsk, struct sk_buff *skb)
 
 	skb->sk	= NULL;
 	do_abort_syn_rcv(child, lsk);
-	send_abort_rpl(child, skb, BLOG_SKB_CB(skb)->cdev,
-		       CPL_ABORT_NO_RST, queue);
+	chtls_send_abort_rpl(child, skb, BLOG_SKB_CB(skb)->cdev,
+			     CPL_ABORT_NO_RST, queue);
 }
 
 static int abort_syn_rcv(struct sock *sk, struct sk_buff *skb)
@@ -2111,7 +2078,7 @@ static int abort_syn_rcv(struct sock *sk, struct sk_buff *skb)
 		int queue = csk->txq_idx;
 
 		do_abort_syn_rcv(sk, psk);
-		send_abort_rpl(sk, skb, cdev, CPL_ABORT_NO_RST, queue);
+		chtls_send_abort_rpl(sk, skb, cdev, CPL_ABORT_NO_RST, queue);
 	} else {
 		skb->sk = sk;
 		BLOG_SKB_CB(skb)->backlog_rcv = bl_abort_syn_rcv;

From 827d329105bfde6701f0077e34a09c4a86e27145 Mon Sep 17 00:00:00 2001
From: Ayush Sawal <ayush.sawal@chelsio.com>
Date: Wed, 6 Jan 2021 09:59:07 +0530
Subject: [PATCH 506/547] chtls: Remove invalid set_tcb call

At the time of SYN_RECV, connection information is not
initialized at FW, updating tcb flag over uninitialized
connection causes adapter crash. We don't need to
update the flag during SYN_RECV state, so avoid this.

Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition")
Signed-off-by: Rohit Maheshwari <rohitm@chelsio.com>
Signed-off-by: Ayush Sawal <ayush.sawal@chelsio.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
index 561b5f2273af..431d1e3844ab 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
@@ -2096,9 +2096,6 @@ static void chtls_abort_req_rss(struct sock *sk, struct sk_buff *skb)
 	int queue = csk->txq_idx;
 
 	if (is_neg_adv(req->status)) {
-		if (sk->sk_state == TCP_SYN_RECV)
-			chtls_set_tcb_tflag(sk, 0, 0);
-
 		kfree_skb(skb);
 		return;
 	}

From 5a5fac9966bb6d513198634b0b1357be7e8447d2 Mon Sep 17 00:00:00 2001
From: Ayush Sawal <ayush.sawal@chelsio.com>
Date: Wed, 6 Jan 2021 09:59:08 +0530
Subject: [PATCH 507/547] chtls: Fix panic when route to peer not configured

If route to peer is not configured, we might get non tls
devices from dst_neigh_lookup() which is invalid, adding a
check to avoid it.

Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition")
Signed-off-by: Rohit Maheshwari <rohitm@chelsio.com>
Signed-off-by: Ayush Sawal <ayush.sawal@chelsio.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../chelsio/inline_crypto/chtls/chtls_cm.c         | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
index 431d1e3844ab..04a8bd5af3b9 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
@@ -1109,6 +1109,7 @@ static struct sock *chtls_recv_sock(struct sock *lsk,
 				    const struct cpl_pass_accept_req *req,
 				    struct chtls_dev *cdev)
 {
+	struct adapter *adap = pci_get_drvdata(cdev->pdev);
 	struct neighbour *n = NULL;
 	struct inet_sock *newinet;
 	const struct iphdr *iph;
@@ -1118,9 +1119,10 @@ static struct sock *chtls_recv_sock(struct sock *lsk,
 	struct dst_entry *dst;
 	struct tcp_sock *tp;
 	struct sock *newsk;
+	bool found = false;
 	u16 port_id;
 	int rxq_idx;
-	int step;
+	int step, i;
 
 	iph = (const struct iphdr *)network_hdr;
 	newsk = tcp_create_openreq_child(lsk, oreq, cdev->askb);
@@ -1152,7 +1154,7 @@ static struct sock *chtls_recv_sock(struct sock *lsk,
 		n = dst_neigh_lookup(dst, &ip6h->saddr);
 #endif
 	}
-	if (!n)
+	if (!n || !n->dev)
 		goto free_sk;
 
 	ndev = n->dev;
@@ -1161,6 +1163,13 @@ static struct sock *chtls_recv_sock(struct sock *lsk,
 	if (is_vlan_dev(ndev))
 		ndev = vlan_dev_real_dev(ndev);
 
+	for_each_port(adap, i)
+		if (cdev->ports[i] == ndev)
+			found = true;
+
+	if (!found)
+		goto free_dst;
+
 	port_id = cxgb4_port_idx(ndev);
 
 	csk = chtls_sock_create(cdev);
@@ -1238,6 +1247,7 @@ static struct sock *chtls_recv_sock(struct sock *lsk,
 free_csk:
 	chtls_sock_release(&csk->kref);
 free_dst:
+	neigh_release(n);
 	dst_release(dst);
 free_sk:
 	inet_csk_prepare_forced_close(newsk);

From f8d15d29d6e6b32704c8fce9229716ca145a0de2 Mon Sep 17 00:00:00 2001
From: Ayush Sawal <ayush.sawal@chelsio.com>
Date: Wed, 6 Jan 2021 09:59:09 +0530
Subject: [PATCH 508/547] chtls: Avoid unnecessary freeing of oreq pointer

In chtls_pass_accept_request(), removing the chtls_reqsk_free()
call to avoid oreq freeing twice. Here oreq is the pointer to
struct request_sock.

Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition")
Signed-off-by: Rohit Maheshwari <rohitm@chelsio.com>
Signed-off-by: Ayush Sawal <ayush.sawal@chelsio.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
index 04a8bd5af3b9..3022c802d09a 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
@@ -1397,7 +1397,7 @@ static void chtls_pass_accept_request(struct sock *sk,
 
 	newsk = chtls_recv_sock(sk, oreq, network_hdr, req, cdev);
 	if (!newsk)
-		goto free_oreq;
+		goto reject;
 
 	if (chtls_get_module(newsk))
 		goto reject;
@@ -1413,8 +1413,6 @@ static void chtls_pass_accept_request(struct sock *sk,
 	kfree_skb(skb);
 	return;
 
-free_oreq:
-	chtls_reqsk_free(oreq);
 reject:
 	mk_tid_release(reply_skb, 0, tid);
 	cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb);

From a84b2c0d5fa23da6d6c8c0d5f5c93184a2744d3e Mon Sep 17 00:00:00 2001
From: Ayush Sawal <ayush.sawal@chelsio.com>
Date: Wed, 6 Jan 2021 09:59:10 +0530
Subject: [PATCH 509/547] chtls: Replace skb_dequeue with skb_peek

The skb is unlinked twice, one in __skb_dequeue in function
chtls_reset_synq() and another in cleanup_syn_rcv_conn().
So in this patch using skb_peek() instead of __skb_dequeue(),
so that unlink will be handled only in cleanup_syn_rcv_conn().

Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition")
Signed-off-by: Vinay Kumar Yadav <vinay.yadav@chelsio.com>
Signed-off-by: Ayush Sawal <ayush.sawal@chelsio.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
index 3022c802d09a..ff3969a24d74 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
@@ -621,7 +621,7 @@ static void chtls_reset_synq(struct listen_ctx *listen_ctx)
 
 	while (!skb_queue_empty(&listen_ctx->synq)) {
 		struct chtls_sock *csk =
-			container_of((struct synq *)__skb_dequeue
+			container_of((struct synq *)skb_peek
 				(&listen_ctx->synq), struct chtls_sock, synq);
 		struct sock *child = csk->sk;
 

From eade1e0a4fb31d48eeb1589d9bb859ae4dd6181d Mon Sep 17 00:00:00 2001
From: Ayush Sawal <ayush.sawal@chelsio.com>
Date: Wed, 6 Jan 2021 09:59:11 +0530
Subject: [PATCH 510/547] chtls: Added a check to avoid NULL pointer
 dereference

In case of server removal lookup_stid() may return NULL pointer, which
is used as listen_ctx. So added a check before accessing this pointer.

Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition")
Signed-off-by: Vinay Kumar Yadav <vinay.yadav@chelsio.com>
Signed-off-by: Ayush Sawal <ayush.sawal@chelsio.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
index ff3969a24d74..1c6d3c93a0c8 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
@@ -1597,6 +1597,11 @@ static int chtls_pass_establish(struct chtls_dev *cdev, struct sk_buff *skb)
 			sk_wake_async(sk, 0, POLL_OUT);
 
 		data = lookup_stid(cdev->tids, stid);
+		if (!data) {
+			/* listening server close */
+			kfree_skb(skb);
+			goto unlock;
+		}
 		lsk = ((struct listen_ctx *)data)->lsk;
 
 		bh_lock_sock(lsk);

From 15ef6b0e30b354253e2c10b3836bc59767eb162b Mon Sep 17 00:00:00 2001
From: Ayush Sawal <ayush.sawal@chelsio.com>
Date: Wed, 6 Jan 2021 09:59:12 +0530
Subject: [PATCH 511/547] chtls: Fix chtls resources release sequence

CPL_ABORT_RPL is sent after releasing the resources by calling
chtls_release_resources(sk); and chtls_conn_done(sk);
eventually causing kernel panic. Fixing it by calling release
in appropriate order.

Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition")
Signed-off-by: Vinay Kumar Yadav <vinay.yadav@chelsio.com>
Signed-off-by: Ayush Sawal <ayush.sawal@chelsio.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c   | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
index 1c6d3c93a0c8..51dd030b3b36 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
@@ -2058,9 +2058,9 @@ static void bl_abort_syn_rcv(struct sock *lsk, struct sk_buff *skb)
 	queue = csk->txq_idx;
 
 	skb->sk	= NULL;
-	do_abort_syn_rcv(child, lsk);
 	chtls_send_abort_rpl(child, skb, BLOG_SKB_CB(skb)->cdev,
 			     CPL_ABORT_NO_RST, queue);
+	do_abort_syn_rcv(child, lsk);
 }
 
 static int abort_syn_rcv(struct sock *sk, struct sk_buff *skb)
@@ -2090,8 +2090,8 @@ static int abort_syn_rcv(struct sock *sk, struct sk_buff *skb)
 	if (!sock_owned_by_user(psk)) {
 		int queue = csk->txq_idx;
 
-		do_abort_syn_rcv(sk, psk);
 		chtls_send_abort_rpl(sk, skb, cdev, CPL_ABORT_NO_RST, queue);
+		do_abort_syn_rcv(sk, psk);
 	} else {
 		skb->sk = sk;
 		BLOG_SKB_CB(skb)->backlog_rcv = bl_abort_syn_rcv;
@@ -2135,12 +2135,12 @@ static void chtls_abort_req_rss(struct sock *sk, struct sk_buff *skb)
 		if (sk->sk_state == TCP_SYN_RECV && !abort_syn_rcv(sk, skb))
 			return;
 
-		chtls_release_resources(sk);
-		chtls_conn_done(sk);
 	}
 
 	chtls_send_abort_rpl(sk, skb, BLOG_SKB_CB(skb)->cdev,
 			     rst_status, queue);
+	chtls_release_resources(sk);
+	chtls_conn_done(sk);
 }
 
 static void chtls_abort_rpl_rss(struct sock *sk, struct sk_buff *skb)

From ac7996d680d8b4a51bb99bbdcee3dc838b985498 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 7 Jan 2021 12:39:16 +0000
Subject: [PATCH 512/547] octeontx2-af: fix memory leak of lmac and lmac->name

Currently the error return paths don't kfree lmac and lmac->name
leading to some memory leaks.  Fix this by adding two error return
paths that kfree these objects

Addresses-Coverity: ("Resource leak")
Fixes: 1463f382f58d ("octeontx2-af: Add support for CGX link management")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Link: https://lore.kernel.org/r/20210107123916.189748-1-colin.king@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
index 7d0f96290943..1a8f5a039d50 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
@@ -871,8 +871,10 @@ static int cgx_lmac_init(struct cgx *cgx)
 		if (!lmac)
 			return -ENOMEM;
 		lmac->name = kcalloc(1, sizeof("cgx_fwi_xxx_yyy"), GFP_KERNEL);
-		if (!lmac->name)
-			return -ENOMEM;
+		if (!lmac->name) {
+			err = -ENOMEM;
+			goto err_lmac_free;
+		}
 		sprintf(lmac->name, "cgx_fwi_%d_%d", cgx->cgx_id, i);
 		lmac->lmac_id = i;
 		lmac->cgx = cgx;
@@ -883,7 +885,7 @@ static int cgx_lmac_init(struct cgx *cgx)
 						 CGX_LMAC_FWI + i * 9),
 				   cgx_fwi_event_handler, 0, lmac->name, lmac);
 		if (err)
-			return err;
+			goto err_irq;
 
 		/* Enable interrupt */
 		cgx_write(cgx, lmac->lmac_id, CGXX_CMRX_INT_ENA_W1S,
@@ -895,6 +897,12 @@ static int cgx_lmac_init(struct cgx *cgx)
 	}
 
 	return cgx_lmac_verify_fwi_version(cgx);
+
+err_irq:
+	kfree(lmac->name);
+err_lmac_free:
+	kfree(lmac);
+	return err;
 }
 
 static int cgx_lmac_exit(struct cgx *cgx)

From 07e61a979ca4dddb3661f59328b3cd109f6b0070 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 7 Jan 2021 16:48:21 +0200
Subject: [PATCH 513/547] nexthop: Fix off-by-one error in error path

A reference was not taken for the current nexthop entry, so do not try
to put it in the error path.

Fixes: 430a049190de ("nexthop: Add support for nexthop groups")
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv4/nexthop.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 5e1b22d4f939..f8035cfa9c20 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -1459,7 +1459,7 @@ static struct nexthop *nexthop_create_group(struct net *net,
 	return nh;
 
 out_no_nh:
-	for (; i >= 0; --i)
+	for (i--; i >= 0; --i)
 		nexthop_put(nhg->nh_entries[i].nh);
 
 	kfree(nhg->spare);

From 7b01e53eee6dce7a8a6736e06b99b68cd0cc7a27 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 7 Jan 2021 16:48:22 +0200
Subject: [PATCH 514/547] nexthop: Unlink nexthop group entry in error path

In case of error, remove the nexthop group entry from the list to which
it was previously added.

Fixes: 430a049190de ("nexthop: Add support for nexthop groups")
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv4/nexthop.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index f8035cfa9c20..712cdc061cde 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -1459,8 +1459,10 @@ static struct nexthop *nexthop_create_group(struct net *net,
 	return nh;
 
 out_no_nh:
-	for (i--; i >= 0; --i)
+	for (i--; i >= 0; --i) {
+		list_del(&nhg->nh_entries[i].nh_list);
 		nexthop_put(nhg->nh_entries[i].nh);
+	}
 
 	kfree(nhg->spare);
 	kfree(nhg);

From b19218b27f3477316d296e8bcf4446aaf017aa69 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Thu, 7 Jan 2021 16:48:23 +0200
Subject: [PATCH 515/547] nexthop: Bounce NHA_GATEWAY in FDB nexthop groups

The function nh_check_attr_group() is called to validate nexthop groups.
The intention of that code seems to have been to bounce all attributes
above NHA_GROUP_TYPE except for NHA_FDB. However instead it bounces all
these attributes except when NHA_FDB attribute is present--then it accepts
them.

NHA_FDB validation that takes place before, in rtm_to_nh_config(), already
bounces NHA_OIF, NHA_BLACKHOLE, NHA_ENCAP and NHA_ENCAP_TYPE. Yet further
back, NHA_GROUPS and NHA_MASTER are bounced unconditionally.

But that still leaves NHA_GATEWAY as an attribute that would be accepted in
FDB nexthop groups (with no meaning), so long as it keeps the address
family as unspecified:

 # ip nexthop add id 1 fdb via 127.0.0.1
 # ip nexthop add id 10 fdb via default group 1

The nexthop code is still relatively new and likely not used very broadly,
and the FDB bits are newer still. Even though there is a reproducer out
there, it relies on an improbable gateway arguments "via default", "via
all" or "via any". Given all this, I believe it is OK to reformulate the
condition to do the right thing and bounce NHA_GATEWAY.

Fixes: 38428d68719c ("nexthop: support for fdb ecmp nexthops")
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv4/nexthop.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 712cdc061cde..e53e43aef785 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -627,7 +627,7 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[],
 	for (i = NHA_GROUP_TYPE + 1; i < __NHA_MAX; ++i) {
 		if (!tb[i])
 			continue;
-		if (tb[NHA_FDB])
+		if (i == NHA_FDB)
 			continue;
 		NL_SET_ERR_MSG(extack,
 			       "No other attributes can be set in nexthop groups");

From a5c9ca76a1c61fb5e4c35de8eb25aa925b03c9e4 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 7 Jan 2021 16:48:24 +0200
Subject: [PATCH 516/547] selftests: fib_nexthops: Fix wrong mausezahn
 invocation

For IPv6 traffic, mausezahn needs to be invoked with '-6'. Otherwise an
error is returned:

 # ip netns exec me mausezahn veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn"
 Failed to set source IPv4 address. Please check if source is set to a valid IPv4 address.
  Invalid command line parameters!

Fixes: 7c741868ceab ("selftests: Add torture tests to nexthop tests")
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/fib_nexthops.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index eb693a3b7b4a..4c7d33618437 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -869,7 +869,7 @@ ipv6_torture()
 	pid3=$!
 	ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 &
 	pid4=$!
-	ip netns exec me mausezahn veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+	ip netns exec me mausezahn -6 veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
 	pid5=$!
 
 	sleep 300

From 0b9902c1fcc59ba75268386c0420a554f8844168 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Thu, 7 Jan 2021 18:24:40 +0100
Subject: [PATCH 517/547] s390/qeth: fix deadlock during recovery

When qeth_dev_layer2_store() - holding the discipline_mutex - waits
inside qeth_l*_remove_device() for a qeth_do_reset() thread to complete,
we can hit a deadlock if qeth_do_reset() concurrently calls
qeth_set_online() and thus tries to aquire the discipline_mutex.

Move the discipline_mutex locking outside of qeth_set_online() and
qeth_set_offline(), and turn the discipline into a parameter so that
callers understand the dependency.

To fix the deadlock, we can now relax the locking:
As already established, qeth_l*_remove_device() waits for
qeth_do_reset() to complete. So qeth_do_reset() itself is under no risk
of having card->discipline ripped out while it's running, and thus
doesn't need to take the discipline_mutex.

Fixes: 9dc48ccc68b9 ("qeth: serialize sysfs-triggered device configurations")
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Reviewed-by: Alexandra Winter <wintera@linux.ibm.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/s390/net/qeth_core.h      |  3 ++-
 drivers/s390/net/qeth_core_main.c | 35 +++++++++++++++++++------------
 drivers/s390/net/qeth_l2_main.c   |  7 +++++--
 drivers/s390/net/qeth_l3_main.c   |  7 +++++--
 4 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 6f5ddc3eab8c..28f637042d44 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -1079,7 +1079,8 @@ struct qeth_card *qeth_get_card_by_busid(char *bus_id);
 void qeth_set_allowed_threads(struct qeth_card *card, unsigned long threads,
 			      int clear_start_mask);
 int qeth_threads_running(struct qeth_card *, unsigned long);
-int qeth_set_offline(struct qeth_card *card, bool resetting);
+int qeth_set_offline(struct qeth_card *card, const struct qeth_discipline *disc,
+		     bool resetting);
 
 int qeth_send_ipa_cmd(struct qeth_card *, struct qeth_cmd_buffer *,
 		  int (*reply_cb)
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index f4b60294a969..d45e223fc521 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -5507,12 +5507,12 @@ out:
 	return rc;
 }
 
-static int qeth_set_online(struct qeth_card *card)
+static int qeth_set_online(struct qeth_card *card,
+			   const struct qeth_discipline *disc)
 {
 	bool carrier_ok;
 	int rc;
 
-	mutex_lock(&card->discipline_mutex);
 	mutex_lock(&card->conf_mutex);
 	QETH_CARD_TEXT(card, 2, "setonlin");
 
@@ -5529,7 +5529,7 @@ static int qeth_set_online(struct qeth_card *card)
 		/* no need for locking / error handling at this early stage: */
 		qeth_set_real_num_tx_queues(card, qeth_tx_actual_queues(card));
 
-	rc = card->discipline->set_online(card, carrier_ok);
+	rc = disc->set_online(card, carrier_ok);
 	if (rc)
 		goto err_online;
 
@@ -5537,7 +5537,6 @@ static int qeth_set_online(struct qeth_card *card)
 	kobject_uevent(&card->gdev->dev.kobj, KOBJ_CHANGE);
 
 	mutex_unlock(&card->conf_mutex);
-	mutex_unlock(&card->discipline_mutex);
 	return 0;
 
 err_online:
@@ -5552,15 +5551,14 @@ err_hardsetup:
 	qdio_free(CARD_DDEV(card));
 
 	mutex_unlock(&card->conf_mutex);
-	mutex_unlock(&card->discipline_mutex);
 	return rc;
 }
 
-int qeth_set_offline(struct qeth_card *card, bool resetting)
+int qeth_set_offline(struct qeth_card *card, const struct qeth_discipline *disc,
+		     bool resetting)
 {
 	int rc, rc2, rc3;
 
-	mutex_lock(&card->discipline_mutex);
 	mutex_lock(&card->conf_mutex);
 	QETH_CARD_TEXT(card, 3, "setoffl");
 
@@ -5581,7 +5579,7 @@ int qeth_set_offline(struct qeth_card *card, bool resetting)
 
 	cancel_work_sync(&card->rx_mode_work);
 
-	card->discipline->set_offline(card);
+	disc->set_offline(card);
 
 	qeth_qdio_clear_card(card, 0);
 	qeth_drain_output_queues(card);
@@ -5602,16 +5600,19 @@ int qeth_set_offline(struct qeth_card *card, bool resetting)
 	kobject_uevent(&card->gdev->dev.kobj, KOBJ_CHANGE);
 
 	mutex_unlock(&card->conf_mutex);
-	mutex_unlock(&card->discipline_mutex);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(qeth_set_offline);
 
 static int qeth_do_reset(void *data)
 {
+	const struct qeth_discipline *disc;
 	struct qeth_card *card = data;
 	int rc;
 
+	/* Lock-free, other users will block until we are done. */
+	disc = card->discipline;
+
 	QETH_CARD_TEXT(card, 2, "recover1");
 	if (!qeth_do_run_thread(card, QETH_RECOVER_THREAD))
 		return 0;
@@ -5619,8 +5620,8 @@ static int qeth_do_reset(void *data)
 	dev_warn(&card->gdev->dev,
 		 "A recovery process has been started for the device\n");
 
-	qeth_set_offline(card, true);
-	rc = qeth_set_online(card);
+	qeth_set_offline(card, disc, true);
+	rc = qeth_set_online(card, disc);
 	if (!rc) {
 		dev_info(&card->gdev->dev,
 			 "Device successfully recovered!\n");
@@ -6647,7 +6648,10 @@ static int qeth_core_set_online(struct ccwgroup_device *gdev)
 		}
 	}
 
-	rc = qeth_set_online(card);
+	mutex_lock(&card->discipline_mutex);
+	rc = qeth_set_online(card, card->discipline);
+	mutex_unlock(&card->discipline_mutex);
+
 err:
 	return rc;
 }
@@ -6655,8 +6659,13 @@ err:
 static int qeth_core_set_offline(struct ccwgroup_device *gdev)
 {
 	struct qeth_card *card = dev_get_drvdata(&gdev->dev);
+	int rc;
 
-	return qeth_set_offline(card, false);
+	mutex_lock(&card->discipline_mutex);
+	rc = qeth_set_offline(card, card->discipline, false);
+	mutex_unlock(&card->discipline_mutex);
+
+	return rc;
 }
 
 static void qeth_core_shutdown(struct ccwgroup_device *gdev)
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 4ed0fb0705a5..37279b1e29f6 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -2207,8 +2207,11 @@ static void qeth_l2_remove_device(struct ccwgroup_device *gdev)
 	qeth_set_allowed_threads(card, 0, 1);
 	wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0);
 
-	if (gdev->state == CCWGROUP_ONLINE)
-		qeth_set_offline(card, false);
+	if (gdev->state == CCWGROUP_ONLINE) {
+		mutex_lock(&card->discipline_mutex);
+		qeth_set_offline(card, card->discipline, false);
+		mutex_unlock(&card->discipline_mutex);
+	}
 
 	cancel_work_sync(&card->close_dev_work);
 	if (card->dev->reg_state == NETREG_REGISTERED)
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index d138ac432d01..8d474179ce98 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1970,8 +1970,11 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev)
 	qeth_set_allowed_threads(card, 0, 1);
 	wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0);
 
-	if (cgdev->state == CCWGROUP_ONLINE)
-		qeth_set_offline(card, false);
+	if (cgdev->state == CCWGROUP_ONLINE) {
+		mutex_lock(&card->discipline_mutex);
+		qeth_set_offline(card, card->discipline, false);
+		mutex_unlock(&card->discipline_mutex);
+	}
 
 	cancel_work_sync(&card->close_dev_work);
 	if (card->dev->reg_state == NETREG_REGISTERED)

From b41b554c1ee75070a14c02a88496b1f231c7eacc Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Thu, 7 Jan 2021 18:24:41 +0100
Subject: [PATCH 518/547] s390/qeth: fix locking for discipline setup / removal

Due to insufficient locking, qeth_core_set_online() and
qeth_dev_layer2_store() can run in parallel, both attempting to load &
setup the discipline (and stepping on each other toes along the way).
A similar race can also occur between qeth_core_remove_device() and
qeth_dev_layer2_store().

Access to .discipline is meant to be protected by the discipline_mutex,
so add/expand the locking in qeth_core_remove_device() and
qeth_core_set_online().
Adjust the locking in qeth_l*_remove_device() accordingly, as it's now
handled by the callers in a consistent manner.

Based on an initial patch by Ursula Braun.

Fixes: 9dc48ccc68b9 ("qeth: serialize sysfs-triggered device configurations")
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Reviewed-by: Alexandra Winter <wintera@linux.ibm.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/s390/net/qeth_core_main.c | 7 +++++--
 drivers/s390/net/qeth_l2_main.c   | 5 +----
 drivers/s390/net/qeth_l3_main.c   | 5 +----
 3 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index d45e223fc521..cf18d87da41e 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -6585,6 +6585,7 @@ static int qeth_core_probe_device(struct ccwgroup_device *gdev)
 		break;
 	default:
 		card->info.layer_enforced = true;
+		/* It's so early that we don't need the discipline_mutex yet. */
 		rc = qeth_core_load_discipline(card, enforced_disc);
 		if (rc)
 			goto err_load;
@@ -6617,10 +6618,12 @@ static void qeth_core_remove_device(struct ccwgroup_device *gdev)
 
 	QETH_CARD_TEXT(card, 2, "removedv");
 
+	mutex_lock(&card->discipline_mutex);
 	if (card->discipline) {
 		card->discipline->remove(gdev);
 		qeth_core_free_discipline(card);
 	}
+	mutex_unlock(&card->discipline_mutex);
 
 	qeth_free_qdio_queues(card);
 
@@ -6635,6 +6638,7 @@ static int qeth_core_set_online(struct ccwgroup_device *gdev)
 	int rc = 0;
 	enum qeth_discipline_id def_discipline;
 
+	mutex_lock(&card->discipline_mutex);
 	if (!card->discipline) {
 		def_discipline = IS_IQD(card) ? QETH_DISCIPLINE_LAYER3 :
 						QETH_DISCIPLINE_LAYER2;
@@ -6648,11 +6652,10 @@ static int qeth_core_set_online(struct ccwgroup_device *gdev)
 		}
 	}
 
-	mutex_lock(&card->discipline_mutex);
 	rc = qeth_set_online(card, card->discipline);
-	mutex_unlock(&card->discipline_mutex);
 
 err:
+	mutex_unlock(&card->discipline_mutex);
 	return rc;
 }
 
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 37279b1e29f6..4254caf1d9b6 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -2207,11 +2207,8 @@ static void qeth_l2_remove_device(struct ccwgroup_device *gdev)
 	qeth_set_allowed_threads(card, 0, 1);
 	wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0);
 
-	if (gdev->state == CCWGROUP_ONLINE) {
-		mutex_lock(&card->discipline_mutex);
+	if (gdev->state == CCWGROUP_ONLINE)
 		qeth_set_offline(card, card->discipline, false);
-		mutex_unlock(&card->discipline_mutex);
-	}
 
 	cancel_work_sync(&card->close_dev_work);
 	if (card->dev->reg_state == NETREG_REGISTERED)
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 8d474179ce98..6970597bc885 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1970,11 +1970,8 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev)
 	qeth_set_allowed_threads(card, 0, 1);
 	wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0);
 
-	if (cgdev->state == CCWGROUP_ONLINE) {
-		mutex_lock(&card->discipline_mutex);
+	if (cgdev->state == CCWGROUP_ONLINE)
 		qeth_set_offline(card, card->discipline, false);
-		mutex_unlock(&card->discipline_mutex);
-	}
 
 	cancel_work_sync(&card->close_dev_work);
 	if (card->dev->reg_state == NETREG_REGISTERED)

From f9c4845385c8f6631ebd5dddfb019ea7a285fba4 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Thu, 7 Jan 2021 18:24:42 +0100
Subject: [PATCH 519/547] s390/qeth: fix L2 header access in
 qeth_l3_osa_features_check()

ip_finish_output_gso() may call .ndo_features_check() even before the
skb has a L2 header. This conflicts with qeth_get_ip_version()'s attempt
to inspect the L2 header via vlan_eth_hdr().

Switch to vlan_get_protocol(), as already used further down in the
common qeth_features_check() path.

Fixes: f13ade199391 ("s390/qeth: run non-offload L3 traffic over common xmit path")
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/s390/net/qeth_l3_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 6970597bc885..4c2cae7ae9a7 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1813,7 +1813,7 @@ static netdev_features_t qeth_l3_osa_features_check(struct sk_buff *skb,
 						    struct net_device *dev,
 						    netdev_features_t features)
 {
-	if (qeth_get_ip_version(skb) != 4)
+	if (vlan_get_protocol(skb) != htons(ETH_P_IP))
 		features &= ~NETIF_F_HW_VLAN_CTAG_TX;
 	return qeth_features_check(skb, dev, features);
 }

From 3545454c7801e391b0d966f82c98614d45394770 Mon Sep 17 00:00:00 2001
From: Aleksander Jan Bajkowski <olek2@wp.pl>
Date: Thu, 7 Jan 2021 20:58:18 +0100
Subject: [PATCH 520/547] net: dsa: lantiq_gswip: Exclude RMII from modes that
 report 1 GbE

Exclude RMII from modes that report 1 GbE support. Reduced MII supports
up to 100 MbE.

Fixes: 14fceff4771e ("net: dsa: Add Lantiq / Intel DSA driver for vrx200")
Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Link: https://lore.kernel.org/r/20210107195818.3878-1-olek2@wp.pl
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/lantiq_gswip.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c
index 4b36d89bec06..662e68a0e7e6 100644
--- a/drivers/net/dsa/lantiq_gswip.c
+++ b/drivers/net/dsa/lantiq_gswip.c
@@ -1436,11 +1436,12 @@ static void gswip_phylink_validate(struct dsa_switch *ds, int port,
 	phylink_set(mask, Pause);
 	phylink_set(mask, Asym_Pause);
 
-	/* With the exclusion of MII and Reverse MII, we support Gigabit,
-	 * including Half duplex
+	/* With the exclusion of MII, Reverse MII and Reduced MII, we
+	 * support Gigabit, including Half duplex
 	 */
 	if (state->interface != PHY_INTERFACE_MODE_MII &&
-	    state->interface != PHY_INTERFACE_MODE_REVMII) {
+	    state->interface != PHY_INTERFACE_MODE_REVMII &&
+	    state->interface != PHY_INTERFACE_MODE_RMII) {
 		phylink_set(mask, 1000baseT_Full);
 		phylink_set(mask, 1000baseT_Half);
 	}

From 2d2f6f1b4799428d160c021dd652bc3e3593945e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 7 Jan 2021 19:36:40 +0100
Subject: [PATCH 521/547] block: pre-initialize struct block_device in
 bdev_alloc_inode

bdev_evict_inode and bdev_free_inode are also called for the root inode
of bdevfs, for which bdev_alloc is never called.  Move the zeroing o
f struct block_device and the initialization of the bd_bdi field into
bdev_alloc_inode to make sure they are initialized for the root inode
as well.

Fixes: e6cb53827ed6 ("block: initialize struct block_device in bdev_alloc")
Reported-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Tested-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/block_dev.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index e454c5a81043..3b8963e228a1 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -776,8 +776,11 @@ static struct kmem_cache * bdev_cachep __read_mostly;
 static struct inode *bdev_alloc_inode(struct super_block *sb)
 {
 	struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
+
 	if (!ei)
 		return NULL;
+	memset(&ei->bdev, 0, sizeof(ei->bdev));
+	ei->bdev.bd_bdi = &noop_backing_dev_info;
 	return &ei->vfs_inode;
 }
 
@@ -871,14 +874,12 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
 	mapping_set_gfp_mask(&inode->i_data, GFP_USER);
 
 	bdev = I_BDEV(inode);
-	memset(bdev, 0, sizeof(*bdev));
 	mutex_init(&bdev->bd_mutex);
 	mutex_init(&bdev->bd_fsfreeze_mutex);
 	spin_lock_init(&bdev->bd_size_lock);
 	bdev->bd_disk = disk;
 	bdev->bd_partno = partno;
 	bdev->bd_inode = inode;
-	bdev->bd_bdi = &noop_backing_dev_info;
 #ifdef CONFIG_SYSFS
 	INIT_LIST_HEAD(&bdev->bd_holder_disks);
 #endif

From ae28d1aae48a1258bd09a6f707ebb4231d79a761 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Thu, 17 Dec 2020 14:31:18 -0800
Subject: [PATCH 522/547] x86/resctrl: Use an IPI instead of task_work_add() to
 update PQR_ASSOC MSR

Currently, when moving a task to a resource group the PQR_ASSOC MSR is
updated with the new closid and rmid in an added task callback. If the
task is running, the work is run as soon as possible. If the task is not
running, the work is executed later in the kernel exit path when the
kernel returns to the task again.

Updating the PQR_ASSOC MSR as soon as possible on the CPU a moved task
is running is the right thing to do. Queueing work for a task that is
not running is unnecessary (the PQR_ASSOC MSR is already updated when
the task is scheduled in) and causing system resource waste with the way
in which it is implemented: Work to update the PQR_ASSOC register is
queued every time the user writes a task id to the "tasks" file, even if
the task already belongs to the resource group.

This could result in multiple pending work items associated with a
single task even if they are all identical and even though only a single
update with most recent values is needed. Specifically, even if a task
is moved between different resource groups while it is sleeping then it
is only the last move that is relevant but yet a work item is queued
during each move.

This unnecessary queueing of work items could result in significant
system resource waste, especially on tasks sleeping for a long time.
For example, as demonstrated by Shakeel Butt in [1] writing the same
task id to the "tasks" file can quickly consume significant memory. The
same problem (wasted system resources) occurs when moving a task between
different resource groups.

As pointed out by Valentin Schneider in [2] there is an additional issue
with the way in which the queueing of work is done in that the task_struct
update is currently done after the work is queued, resulting in a race with
the register update possibly done before the data needed by the update is
available.

To solve these issues, update the PQR_ASSOC MSR in a synchronous way
right after the new closid and rmid are ready during the task movement,
only if the task is running. If a moved task is not running nothing
is done since the PQR_ASSOC MSR will be updated next time the task is
scheduled. This is the same way used to update the register when tasks
are moved as part of resource group removal.

[1] https://lore.kernel.org/lkml/CALvZod7E9zzHwenzf7objzGKsdBmVwTgEJ0nPgs0LUFU3SN5Pw@mail.gmail.com/
[2] https://lore.kernel.org/lkml/20201123022433.17905-1-valentin.schneider@arm.com

 [ bp: Massage commit message and drop the two update_task_closid_rmid()
   variants. ]

Fixes: e02737d5b826 ("x86/intel_rdt: Add tasks files")
Reported-by: Shakeel Butt <shakeelb@google.com>
Reported-by: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: James Morse <james.morse@arm.com>
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/17aa2fb38fc12ce7bb710106b3e7c7b45acb9e94.1608243147.git.reinette.chatre@intel.com
---
 arch/x86/kernel/cpu/resctrl/rdtgroup.c | 112 ++++++++++---------------
 1 file changed, 43 insertions(+), 69 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 29ffb95b25ff..1c6f8a60ac52 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -525,89 +525,63 @@ static void rdtgroup_remove(struct rdtgroup *rdtgrp)
 	kfree(rdtgrp);
 }
 
-struct task_move_callback {
-	struct callback_head	work;
-	struct rdtgroup		*rdtgrp;
-};
-
-static void move_myself(struct callback_head *head)
+static void _update_task_closid_rmid(void *task)
 {
-	struct task_move_callback *callback;
-	struct rdtgroup *rdtgrp;
-
-	callback = container_of(head, struct task_move_callback, work);
-	rdtgrp = callback->rdtgrp;
-
 	/*
-	 * If resource group was deleted before this task work callback
-	 * was invoked, then assign the task to root group and free the
-	 * resource group.
+	 * If the task is still current on this CPU, update PQR_ASSOC MSR.
+	 * Otherwise, the MSR is updated when the task is scheduled in.
 	 */
-	if (atomic_dec_and_test(&rdtgrp->waitcount) &&
-	    (rdtgrp->flags & RDT_DELETED)) {
-		current->closid = 0;
-		current->rmid = 0;
-		rdtgroup_remove(rdtgrp);
-	}
+	if (task == current)
+		resctrl_sched_in();
+}
 
-	if (unlikely(current->flags & PF_EXITING))
-		goto out;
-
-	preempt_disable();
-	/* update PQR_ASSOC MSR to make resource group go into effect */
-	resctrl_sched_in();
-	preempt_enable();
-
-out:
-	kfree(callback);
+static void update_task_closid_rmid(struct task_struct *t)
+{
+	if (IS_ENABLED(CONFIG_SMP) && task_curr(t))
+		smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1);
+	else
+		_update_task_closid_rmid(t);
 }
 
 static int __rdtgroup_move_task(struct task_struct *tsk,
 				struct rdtgroup *rdtgrp)
 {
-	struct task_move_callback *callback;
-	int ret;
-
-	callback = kzalloc(sizeof(*callback), GFP_KERNEL);
-	if (!callback)
-		return -ENOMEM;
-	callback->work.func = move_myself;
-	callback->rdtgrp = rdtgrp;
-
 	/*
-	 * Take a refcount, so rdtgrp cannot be freed before the
-	 * callback has been invoked.
+	 * Set the task's closid/rmid before the PQR_ASSOC MSR can be
+	 * updated by them.
+	 *
+	 * For ctrl_mon groups, move both closid and rmid.
+	 * For monitor groups, can move the tasks only from
+	 * their parent CTRL group.
 	 */
-	atomic_inc(&rdtgrp->waitcount);
-	ret = task_work_add(tsk, &callback->work, TWA_RESUME);
-	if (ret) {
-		/*
-		 * Task is exiting. Drop the refcount and free the callback.
-		 * No need to check the refcount as the group cannot be
-		 * deleted before the write function unlocks rdtgroup_mutex.
-		 */
-		atomic_dec(&rdtgrp->waitcount);
-		kfree(callback);
-		rdt_last_cmd_puts("Task exited\n");
-	} else {
-		/*
-		 * For ctrl_mon groups move both closid and rmid.
-		 * For monitor groups, can move the tasks only from
-		 * their parent CTRL group.
-		 */
-		if (rdtgrp->type == RDTCTRL_GROUP) {
-			tsk->closid = rdtgrp->closid;
+
+	if (rdtgrp->type == RDTCTRL_GROUP) {
+		tsk->closid = rdtgrp->closid;
+		tsk->rmid = rdtgrp->mon.rmid;
+	} else if (rdtgrp->type == RDTMON_GROUP) {
+		if (rdtgrp->mon.parent->closid == tsk->closid) {
 			tsk->rmid = rdtgrp->mon.rmid;
-		} else if (rdtgrp->type == RDTMON_GROUP) {
-			if (rdtgrp->mon.parent->closid == tsk->closid) {
-				tsk->rmid = rdtgrp->mon.rmid;
-			} else {
-				rdt_last_cmd_puts("Can't move task to different control group\n");
-				ret = -EINVAL;
-			}
+		} else {
+			rdt_last_cmd_puts("Can't move task to different control group\n");
+			return -EINVAL;
 		}
 	}
-	return ret;
+
+	/*
+	 * Ensure the task's closid and rmid are written before determining if
+	 * the task is current that will decide if it will be interrupted.
+	 */
+	barrier();
+
+	/*
+	 * By now, the task's closid and rmid are set. If the task is current
+	 * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource
+	 * group go into effect. If the task is not current, the MSR will be
+	 * updated when the task is scheduled in.
+	 */
+	update_task_closid_rmid(tsk);
+
+	return 0;
 }
 
 static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)

From a0195f314a25582b38993bf30db11c300f4f4611 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Thu, 17 Dec 2020 14:31:19 -0800
Subject: [PATCH 523/547] x86/resctrl: Don't move a task to the same resource
 group

Shakeel Butt reported in [1] that a user can request a task to be moved
to a resource group even if the task is already in the group. It just
wastes time to do the move operation which could be costly to send IPI
to a different CPU.

Add a sanity check to ensure that the move operation only happens when
the task is not already in the resource group.

[1] https://lore.kernel.org/lkml/CALvZod7E9zzHwenzf7objzGKsdBmVwTgEJ0nPgs0LUFU3SN5Pw@mail.gmail.com/

Fixes: e02737d5b826 ("x86/intel_rdt: Add tasks files")
Reported-by: Shakeel Butt <shakeelb@google.com>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/962ede65d8e95be793cb61102cca37f7bb018e66.1608243147.git.reinette.chatre@intel.com
---
 arch/x86/kernel/cpu/resctrl/rdtgroup.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 1c6f8a60ac52..460f3e0df106 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -546,6 +546,13 @@ static void update_task_closid_rmid(struct task_struct *t)
 static int __rdtgroup_move_task(struct task_struct *tsk,
 				struct rdtgroup *rdtgrp)
 {
+	/* If the task is already in rdtgrp, no need to move the task. */
+	if ((rdtgrp->type == RDTCTRL_GROUP && tsk->closid == rdtgrp->closid &&
+	     tsk->rmid == rdtgrp->mon.rmid) ||
+	    (rdtgrp->type == RDTMON_GROUP && tsk->rmid == rdtgrp->mon.rmid &&
+	     tsk->closid == rdtgrp->mon.parent->closid))
+		return 0;
+
 	/*
 	 * Set the task's closid/rmid before the PQR_ASSOC MSR can be
 	 * updated by them.

From 872f36eb0b0f4f0e3a81ea1e51a6bdf58ccfdc6e Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 8 Jan 2021 05:54:44 -0500
Subject: [PATCH 524/547] KVM: x86: __kvm_vcpu_halt can be static

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0287840b93e0..a480804ae27a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7976,7 +7976,7 @@ void kvm_arch_exit(void)
 	kmem_cache_destroy(x86_fpu_cache);
 }
 
-int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason)
+static int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason)
 {
 	++vcpu->stat.halt_exits;
 	if (lapic_in_kernel(vcpu)) {

From 74acfa996b2aec2a4ea8587104c7e2f8d4c6aec2 Mon Sep 17 00:00:00 2001
From: Jack Wang <jinpu.wang@cloud.ionos.com>
Date: Fri, 8 Jan 2021 15:36:30 +0100
Subject: [PATCH 525/547] block/rnbd: Select SG_POOL for RNBD_CLIENT

lkp reboot following build error:
 drivers/block/rnbd/rnbd-clt.c: In function 'rnbd_softirq_done_fn':
>> drivers/block/rnbd/rnbd-clt.c:387:2: error: implicit declaration of function 'sg_free_table_chained' [-Werror=implicit-function-declaration]
     387 |  sg_free_table_chained(&iu->sgt, RNBD_INLINE_SG_CNT);
         |  ^~~~~~~~~~~~~~~~~~~~~

The reason is CONFIG_SG_POOL is not enabled in the config, to
avoid such failure, select SG_POOL in Kconfig for RNBD_CLIENT.

Fixes: 5a1328d0c3a7 ("block/rnbd-clt: Dynamically allocate sglist for rnbd_iu")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Jack Wang <jinpu.wang@cloud.ionos.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rnbd/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/block/rnbd/Kconfig b/drivers/block/rnbd/Kconfig
index 4b6d3d816d1f..2ff05a0d2646 100644
--- a/drivers/block/rnbd/Kconfig
+++ b/drivers/block/rnbd/Kconfig
@@ -7,6 +7,7 @@ config BLK_DEV_RNBD_CLIENT
 	tristate "RDMA Network Block Device driver client"
 	depends on INFINIBAND_RTRS_CLIENT
 	select BLK_DEV_RNBD
+	select SG_POOL
 	help
 	  RNBD client is a network block device driver using rdma transport.
 

From 1a84e7c629f8f288e02236bc799f9b0be1cab4a7 Mon Sep 17 00:00:00 2001
From: Jack Wang <jinpu.wang@cloud.ionos.com>
Date: Fri, 8 Jan 2021 15:36:31 +0100
Subject: [PATCH 526/547] block/rnbd-srv: Fix use after free in
 rnbd_srv_sess_dev_force_close

KASAN detect following BUG:
[  778.215311] ==================================================================
[  778.216696] BUG: KASAN: use-after-free in rnbd_srv_sess_dev_force_close+0x38/0x60 [rnbd_server]
[  778.219037] Read of size 8 at addr ffff88b1d6516c28 by task tee/8842

[  778.220500] CPU: 37 PID: 8842 Comm: tee Kdump: loaded Not tainted 5.10.0-pserver #5.10.0-1+feature+linux+next+20201214.1025+0910d71
[  778.220529] Hardware name: Supermicro Super Server/X11DDW-L, BIOS 3.3 02/21/2020
[  778.220555] Call Trace:
[  778.220609]  dump_stack+0x99/0xcb
[  778.220667]  ? rnbd_srv_sess_dev_force_close+0x38/0x60 [rnbd_server]
[  778.220715]  print_address_description.constprop.7+0x1e/0x230
[  778.220750]  ? freeze_kernel_threads+0x73/0x73
[  778.220896]  ? rnbd_srv_sess_dev_force_close+0x38/0x60 [rnbd_server]
[  778.220932]  ? rnbd_srv_sess_dev_force_close+0x38/0x60 [rnbd_server]
[  778.220994]  kasan_report.cold.9+0x37/0x7c
[  778.221066]  ? kobject_put+0x80/0x270
[  778.221102]  ? rnbd_srv_sess_dev_force_close+0x38/0x60 [rnbd_server]
[  778.221184]  rnbd_srv_sess_dev_force_close+0x38/0x60 [rnbd_server]
[  778.221240]  rnbd_srv_dev_session_force_close_store+0x6a/0xc0 [rnbd_server]
[  778.221304]  ? sysfs_file_ops+0x90/0x90
[  778.221353]  kernfs_fop_write+0x141/0x240
[  778.221451]  vfs_write+0x142/0x4d0
[  778.221553]  ksys_write+0xc0/0x160
[  778.221602]  ? __ia32_sys_read+0x50/0x50
[  778.221684]  ? lockdep_hardirqs_on_prepare+0x13d/0x210
[  778.221718]  ? syscall_enter_from_user_mode+0x1c/0x50
[  778.221821]  do_syscall_64+0x33/0x40
[  778.221862]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[  778.221896] RIP: 0033:0x7f4affdd9504
[  778.221928] Code: 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b3 0f 1f 80 00 00 00 00 48 8d 05 f9 61 0d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53
[  778.221956] RSP: 002b:00007fffebb36b28 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[  778.222011] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f4affdd9504
[  778.222038] RDX: 0000000000000002 RSI: 00007fffebb36c50 RDI: 0000000000000003
[  778.222066] RBP: 00007fffebb36c50 R08: 0000556a151aa600 R09: 00007f4affeb1540
[  778.222094] R10: fffffffffffffc19 R11: 0000000000000246 R12: 0000556a151aa520
[  778.222121] R13: 0000000000000002 R14: 00007f4affea6760 R15: 0000000000000002

[  778.222764] Allocated by task 3212:
[  778.223285]  kasan_save_stack+0x19/0x40
[  778.223316]  __kasan_kmalloc.constprop.7+0xc1/0xd0
[  778.223347]  kmem_cache_alloc_trace+0x186/0x350
[  778.223382]  rnbd_srv_rdma_ev+0xf16/0x1690 [rnbd_server]
[  778.223422]  process_io_req+0x4d1/0x670 [rtrs_server]
[  778.223573]  __ib_process_cq+0x10a/0x350 [ib_core]
[  778.223709]  ib_cq_poll_work+0x31/0xb0 [ib_core]
[  778.223743]  process_one_work+0x521/0xa90
[  778.223773]  worker_thread+0x65/0x5b0
[  778.223802]  kthread+0x1f2/0x210
[  778.223833]  ret_from_fork+0x22/0x30

[  778.224296] Freed by task 8842:
[  778.224800]  kasan_save_stack+0x19/0x40
[  778.224829]  kasan_set_track+0x1c/0x30
[  778.224860]  kasan_set_free_info+0x1b/0x30
[  778.224889]  __kasan_slab_free+0x108/0x150
[  778.224919]  slab_free_freelist_hook+0x64/0x190
[  778.224947]  kfree+0xe2/0x650
[  778.224982]  rnbd_destroy_sess_dev+0x2fa/0x3b0 [rnbd_server]
[  778.225011]  kobject_put+0xda/0x270
[  778.225046]  rnbd_srv_sess_dev_force_close+0x30/0x60 [rnbd_server]
[  778.225081]  rnbd_srv_dev_session_force_close_store+0x6a/0xc0 [rnbd_server]
[  778.225111]  kernfs_fop_write+0x141/0x240
[  778.225140]  vfs_write+0x142/0x4d0
[  778.225169]  ksys_write+0xc0/0x160
[  778.225198]  do_syscall_64+0x33/0x40
[  778.225227]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

[  778.226506] The buggy address belongs to the object at ffff88b1d6516c00
                which belongs to the cache kmalloc-512 of size 512
[  778.227464] The buggy address is located 40 bytes inside of
                512-byte region [ffff88b1d6516c00, ffff88b1d6516e00)

The problem is in the sess_dev release function we call
rnbd_destroy_sess_dev, and could free the sess_dev already, but we still
set the keep_id in rnbd_srv_sess_dev_force_close, which lead to use
after free.

To fix it, move the keep_id before the sysfs removal, and cache the
rnbd_srv_session for lock accessing,

Fixes: 786998050cbc ("block/rnbd-srv: close a mapped device from server side.")
Signed-off-by: Jack Wang <jinpu.wang@cloud.ionos.com>
Reviewed-by: Guoqing Jiang <guoqing.jiang@cloud.ionos.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rnbd/rnbd-srv.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c
index b8e44331e494..a6a68d44f517 100644
--- a/drivers/block/rnbd/rnbd-srv.c
+++ b/drivers/block/rnbd/rnbd-srv.c
@@ -338,10 +338,12 @@ static int rnbd_srv_link_ev(struct rtrs_srv *rtrs,
 
 void rnbd_srv_sess_dev_force_close(struct rnbd_srv_sess_dev *sess_dev)
 {
-	mutex_lock(&sess_dev->sess->lock);
-	rnbd_srv_destroy_dev_session_sysfs(sess_dev);
-	mutex_unlock(&sess_dev->sess->lock);
+	struct rnbd_srv_session	*sess = sess_dev->sess;
+
 	sess_dev->keep_id = true;
+	mutex_lock(&sess->lock);
+	rnbd_srv_destroy_dev_session_sysfs(sess_dev);
+	mutex_unlock(&sess->lock);
 }
 
 static int process_msg_close(struct rtrs_srv *rtrs,

From 80f99093d81370c5cec37fca3b5a6bdf6bddf0f6 Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <guoqing.jiang@cloud.ionos.com>
Date: Fri, 8 Jan 2021 15:36:32 +0100
Subject: [PATCH 527/547] block/rnbd-clt: Fix sg table use after free

Since dynamically allocate sglist is used for rnbd_iu, we can't free sg
table after send_usr_msg since the callback function (cqe.done) could
still access the sglist.

Otherwise KASAN reports UAF issue:

[ 4856.600257] BUG: KASAN: use-after-free in dma_direct_unmap_sg+0x53/0x290
[ 4856.600772] Read of size 4 at addr ffff888206af3a98 by task swapper/1/0

[ 4856.601729] CPU: 1 PID: 0 Comm: swapper/1 Kdump: loaded Tainted: G        W         5.10.0-pserver #5.10.0-1+feature+linux+next+20201214.1025+0910d71
[ 4856.601748] Hardware name: Supermicro Super Server/X11DDW-L, BIOS 3.3 02/21/2020
[ 4856.601766] Call Trace:
[ 4856.601785]  <IRQ>
[ 4856.601822]  dump_stack+0x99/0xcb
[ 4856.601856]  ? dma_direct_unmap_sg+0x53/0x290
[ 4856.601888]  print_address_description.constprop.7+0x1e/0x230
[ 4856.601913]  ? freeze_kernel_threads+0x73/0x73
[ 4856.601965]  ? mark_held_locks+0x29/0xa0
[ 4856.602019]  ? dma_direct_unmap_sg+0x53/0x290
[ 4856.602039]  ? dma_direct_unmap_sg+0x53/0x290
[ 4856.602079]  kasan_report.cold.9+0x37/0x7c
[ 4856.602188]  ? mlx5_ib_post_recv+0x430/0x520 [mlx5_ib]
[ 4856.602209]  ? dma_direct_unmap_sg+0x53/0x290
[ 4856.602256]  dma_direct_unmap_sg+0x53/0x290
[ 4856.602366]  complete_rdma_req+0x188/0x4b0 [rtrs_client]
[ 4856.602451]  ? rtrs_clt_close+0x80/0x80 [rtrs_client]
[ 4856.602535]  ? mlx5_ib_poll_cq+0x48b/0x16e0 [mlx5_ib]
[ 4856.602589]  ? radix_tree_insert+0x3a0/0x3a0
[ 4856.602610]  ? do_raw_spin_lock+0x119/0x1d0
[ 4856.602647]  ? rwlock_bug.part.1+0x60/0x60
[ 4856.602740]  rtrs_clt_rdma_done+0x3f7/0x670 [rtrs_client]
[ 4856.602804]  ? rtrs_clt_rdma_cm_handler+0xda0/0xda0 [rtrs_client]
[ 4856.602857]  ? check_flags.part.31+0x6c/0x1f0
[ 4856.602927]  ? rcu_read_lock_sched_held+0xaf/0xe0
[ 4856.602963]  ? rcu_read_lock_bh_held+0xc0/0xc0
[ 4856.603137]  __ib_process_cq+0x10a/0x350 [ib_core]
[ 4856.603309]  ib_poll_handler+0x41/0x1c0 [ib_core]
[ 4856.603358]  irq_poll_softirq+0xe6/0x280
[ 4856.603392]  ? lockdep_hardirqs_on_prepare+0x111/0x210
[ 4856.603446]  __do_softirq+0x10d/0x646
[ 4856.603540]  asm_call_irq_on_stack+0x12/0x20
[ 4856.603563]  </IRQ>

[ 4856.605096] Allocated by task 8914:
[ 4856.605510]  kasan_save_stack+0x19/0x40
[ 4856.605532]  __kasan_kmalloc.constprop.7+0xc1/0xd0
[ 4856.605552]  __kmalloc+0x155/0x320
[ 4856.605574]  __sg_alloc_table+0x155/0x1c0
[ 4856.605594]  sg_alloc_table+0x1f/0x50
[ 4856.605620]  send_msg_sess_info+0x119/0x2e0 [rnbd_client]
[ 4856.605646]  remap_devs+0x71/0x210 [rnbd_client]
[ 4856.605676]  init_sess+0xad8/0xe10 [rtrs_client]
[ 4856.605706]  rtrs_clt_reconnect_work+0xd6/0x170 [rtrs_client]
[ 4856.605728]  process_one_work+0x521/0xa90
[ 4856.605748]  worker_thread+0x65/0x5b0
[ 4856.605769]  kthread+0x1f2/0x210
[ 4856.605789]  ret_from_fork+0x22/0x30

[ 4856.606159] Freed by task 8914:
[ 4856.606559]  kasan_save_stack+0x19/0x40
[ 4856.606580]  kasan_set_track+0x1c/0x30
[ 4856.606601]  kasan_set_free_info+0x1b/0x30
[ 4856.606622]  __kasan_slab_free+0x108/0x150
[ 4856.606642]  slab_free_freelist_hook+0x64/0x190
[ 4856.606661]  kfree+0xe2/0x650
[ 4856.606681]  __sg_free_table+0xa4/0x100
[ 4856.606707]  send_msg_sess_info+0x1d6/0x2e0 [rnbd_client]
[ 4856.606733]  remap_devs+0x71/0x210 [rnbd_client]
[ 4856.606763]  init_sess+0xad8/0xe10 [rtrs_client]
[ 4856.606792]  rtrs_clt_reconnect_work+0xd6/0x170 [rtrs_client]
[ 4856.606813]  process_one_work+0x521/0xa90
[ 4856.606833]  worker_thread+0x65/0x5b0
[ 4856.606853]  kthread+0x1f2/0x210
[ 4856.606872]  ret_from_fork+0x22/0x30

The solution is to free iu's sgtable after the iu is not used anymore.
And also move sg_alloc_table into rnbd_get_iu accordingly.

Fixes: 5a1328d0c3a7 ("block/rnbd-clt: Dynamically allocate sglist for rnbd_iu")
Signed-off-by: Guoqing Jiang <guoqing.jiang@cloud.ionos.com>
Signed-off-by: Jack Wang <jinpu.wang@cloud.ionos.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rnbd/rnbd-clt.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index 96e3f9fe8241..506e9094487f 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -375,12 +375,19 @@ static struct rnbd_iu *rnbd_get_iu(struct rnbd_clt_session *sess,
 	init_waitqueue_head(&iu->comp.wait);
 	iu->comp.errno = INT_MAX;
 
+	if (sg_alloc_table(&iu->sgt, 1, GFP_KERNEL)) {
+		rnbd_put_permit(sess, permit);
+		kfree(iu);
+		return NULL;
+	}
+
 	return iu;
 }
 
 static void rnbd_put_iu(struct rnbd_clt_session *sess, struct rnbd_iu *iu)
 {
 	if (atomic_dec_and_test(&iu->refcount)) {
+		sg_free_table(&iu->sgt);
 		rnbd_put_permit(sess, iu->permit);
 		kfree(iu);
 	}
@@ -487,8 +494,6 @@ static int send_msg_close(struct rnbd_clt_dev *dev, u32 device_id, bool wait)
 	iu->buf = NULL;
 	iu->dev = dev;
 
-	sg_alloc_table(&iu->sgt, 1, GFP_KERNEL);
-
 	msg.hdr.type	= cpu_to_le16(RNBD_MSG_CLOSE);
 	msg.device_id	= cpu_to_le32(device_id);
 
@@ -502,7 +507,6 @@ static int send_msg_close(struct rnbd_clt_dev *dev, u32 device_id, bool wait)
 		err = errno;
 	}
 
-	sg_free_table(&iu->sgt);
 	rnbd_put_iu(sess, iu);
 	return err;
 }
@@ -575,7 +579,6 @@ static int send_msg_open(struct rnbd_clt_dev *dev, bool wait)
 	iu->buf = rsp;
 	iu->dev = dev;
 
-	sg_alloc_table(&iu->sgt, 1, GFP_KERNEL);
 	sg_init_one(iu->sgt.sgl, rsp, sizeof(*rsp));
 
 	msg.hdr.type	= cpu_to_le16(RNBD_MSG_OPEN);
@@ -594,7 +597,6 @@ static int send_msg_open(struct rnbd_clt_dev *dev, bool wait)
 		err = errno;
 	}
 
-	sg_free_table(&iu->sgt);
 	rnbd_put_iu(sess, iu);
 	return err;
 }
@@ -622,8 +624,6 @@ static int send_msg_sess_info(struct rnbd_clt_session *sess, bool wait)
 
 	iu->buf = rsp;
 	iu->sess = sess;
-
-	sg_alloc_table(&iu->sgt, 1, GFP_KERNEL);
 	sg_init_one(iu->sgt.sgl, rsp, sizeof(*rsp));
 
 	msg.hdr.type = cpu_to_le16(RNBD_MSG_SESS_INFO);
@@ -650,7 +650,6 @@ put_iu:
 	} else {
 		err = errno;
 	}
-	sg_free_table(&iu->sgt);
 	rnbd_put_iu(sess, iu);
 	return err;
 }

From ef8048dd2345d070c41bc7df16763fd4d8fac296 Mon Sep 17 00:00:00 2001
From: Swapnil Ingle <ingleswapnil@gmail.com>
Date: Fri, 8 Jan 2021 15:36:33 +0100
Subject: [PATCH 528/547] block/rnbd: Adding name to the Contributors List

Adding name to the Contributors List

Signed-off-by: Swapnil Ingle <ingleswapnil@gmail.com>
Acked-by: Jack Wang <jinpu.wang@cloud.ionos.com>
Acked-by: Danil Kipnis <danil.kipnis@cloud.ionos.com>
Signed-off-by: Jack Wang <jinpu.wang@cloud.ionos.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rnbd/README | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/block/rnbd/README b/drivers/block/rnbd/README
index 1773c0aa0bd4..080f58a5400a 100644
--- a/drivers/block/rnbd/README
+++ b/drivers/block/rnbd/README
@@ -90,3 +90,4 @@ Kleber Souza <kleber.souza@profitbricks.com>
 Lutz Pogrell <lutz.pogrell@cloud.ionos.com>
 Milind Dumbare <Milind.dumbare@gmail.com>
 Roman Penyaev <roman.penyaev@profitbricks.com>
+Swapnil Ingle <ingleswapnil@gmail.com>

From 3a21777c6ee99749bac10727b3c17e5bcfebe5c1 Mon Sep 17 00:00:00 2001
From: Jack Wang <jinpu.wang@cloud.ionos.com>
Date: Fri, 8 Jan 2021 15:36:34 +0100
Subject: [PATCH 529/547] block/rnbd-clt: avoid module unload race with close
 confirmation

We had kernel panic, it is caused by unload module and last
close confirmation.

call trace:
[1196029.743127]  free_sess+0x15/0x50 [rtrs_client]
[1196029.743128]  rtrs_clt_close+0x4c/0x70 [rtrs_client]
[1196029.743129]  ? rnbd_clt_unmap_device+0x1b0/0x1b0 [rnbd_client]
[1196029.743130]  close_rtrs+0x25/0x50 [rnbd_client]
[1196029.743131]  rnbd_client_exit+0x93/0xb99 [rnbd_client]
[1196029.743132]  __x64_sys_delete_module+0x190/0x260

And in the crashdump confirmation kworker is also running.
PID: 6943   TASK: ffff9e2ac8098000  CPU: 4   COMMAND: "kworker/4:2"
 #0 [ffffb206cf337c30] __schedule at ffffffff9f93f891
 #1 [ffffb206cf337cc8] schedule at ffffffff9f93fe98
 #2 [ffffb206cf337cd0] schedule_timeout at ffffffff9f943938
 #3 [ffffb206cf337d50] wait_for_completion at ffffffff9f9410a7
 #4 [ffffb206cf337da0] __flush_work at ffffffff9f08ce0e
 #5 [ffffb206cf337e20] rtrs_clt_close_conns at ffffffffc0d5f668 [rtrs_client]
 #6 [ffffb206cf337e48] rtrs_clt_close at ffffffffc0d5f801 [rtrs_client]
 #7 [ffffb206cf337e68] close_rtrs at ffffffffc0d26255 [rnbd_client]
 #8 [ffffb206cf337e78] free_sess at ffffffffc0d262ad [rnbd_client]
 #9 [ffffb206cf337e88] rnbd_clt_put_dev at ffffffffc0d266a7 [rnbd_client]

The problem is both code path try to close same session, which lead to
panic.

To fix it, just skip the sess if the refcount already drop to 0.

Fixes: f7a7a5c228d4 ("block/rnbd: client: main functionality")
Signed-off-by: Jack Wang <jinpu.wang@cloud.ionos.com>
Reviewed-by: Gioh Kim <gi-oh.kim@cloud.ionos.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rnbd/rnbd-clt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index 506e9094487f..45a470076652 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -1697,7 +1697,8 @@ static void rnbd_destroy_sessions(void)
 	 */
 
 	list_for_each_entry_safe(sess, sn, &sess_list, list) {
-		WARN_ON(!rnbd_clt_get_sess(sess));
+		if (!rnbd_clt_get_sess(sess))
+			continue;
 		close_rtrs(sess);
 		list_for_each_entry_safe(dev, tn, &sess->devs_list, list) {
 			/*

From 02f938e9fed1681791605ca8b96c2d9da9355f6a Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Fri, 8 Jan 2021 16:55:37 +0800
Subject: [PATCH 530/547] blk-mq-debugfs: Add decode for
 BLK_MQ_F_TAG_HCTX_SHARED

Showing the hctx flags for when BLK_MQ_F_TAG_HCTX_SHARED is set gives
something like:

root@debian:/home/john# more /sys/kernel/debug/block/sda/hctx0/flags
alloc_policy=FIFO SHOULD_MERGE|TAG_QUEUE_SHARED|3

Add the decoding for that flag.

Fixes: 32bc15afed04b ("blk-mq: Facilitate a shared sbitmap per tagset")
Signed-off-by: John Garry <john.garry@huawei.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-debugfs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index e21eed20a155..712a95f74ccb 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -246,6 +246,7 @@ static const char *const hctx_flag_name[] = {
 	HCTX_FLAG_NAME(BLOCKING),
 	HCTX_FLAG_NAME(NO_SCHED),
 	HCTX_FLAG_NAME(STACKING),
+	HCTX_FLAG_NAME(TAG_HCTX_SHARED),
 };
 #undef HCTX_FLAG_NAME
 

From bac717171971176b78c72d15a8b6961764ab197f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 30 Dec 2020 16:20:05 +0100
Subject: [PATCH 531/547] ARM: picoxcell: fix missing interrupt-parent
 properties

dtc points out that the interrupts for some devices are not parsable:

picoxcell-pc3x2.dtsi:45.19-49.5: Warning (interrupts_property): /paxi/gem@30000: Missing interrupt-parent
picoxcell-pc3x2.dtsi:51.21-55.5: Warning (interrupts_property): /paxi/dmac@40000: Missing interrupt-parent
picoxcell-pc3x2.dtsi:57.21-61.5: Warning (interrupts_property): /paxi/dmac@50000: Missing interrupt-parent
picoxcell-pc3x2.dtsi:233.21-237.5: Warning (interrupts_property): /rwid-axi/axi2pico@c0000000: Missing interrupt-parent

There are two VIC instances, so it's not clear which one needs to be
used. I found the BSP sources that reference VIC0, so use that:

https://github.com/r1mikey/meta-picoxcell/blob/master/recipes-kernel/linux/linux-picochip-3.0/0001-picoxcell-support-for-Picochip-picoXcell-SoC.patch

Acked-by: Jamie Iles <jamie@jamieiles.com>
Link: https://lore.kernel.org/r/20201230152010.3914962-1-arnd@kernel.org'
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/boot/dts/picoxcell-pc3x2.dtsi | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/arm/boot/dts/picoxcell-pc3x2.dtsi b/arch/arm/boot/dts/picoxcell-pc3x2.dtsi
index c4c6c7e9e37b..5898879a3038 100644
--- a/arch/arm/boot/dts/picoxcell-pc3x2.dtsi
+++ b/arch/arm/boot/dts/picoxcell-pc3x2.dtsi
@@ -45,18 +45,21 @@
 		emac: gem@30000 {
 			compatible = "cadence,gem";
 			reg = <0x30000 0x10000>;
+			interrupt-parent = <&vic0>;
 			interrupts = <31>;
 		};
 
 		dmac1: dmac@40000 {
 			compatible = "snps,dw-dmac";
 			reg = <0x40000 0x10000>;
+			interrupt-parent = <&vic0>;
 			interrupts = <25>;
 		};
 
 		dmac2: dmac@50000 {
 			compatible = "snps,dw-dmac";
 			reg = <0x50000 0x10000>;
+			interrupt-parent = <&vic0>;
 			interrupts = <26>;
 		};
 
@@ -233,6 +236,7 @@
 		axi2pico@c0000000 {
 			compatible = "picochip,axi2pico-pc3x2";
 			reg = <0xc0000000 0x10000>;
+			interrupt-parent = <&vic0>;
 			interrupts = <13 14 15 16 17 18 19 20 21>;
 		};
 	};

From 84e261553e6f919bf0b4d65244599ab2b41f1da5 Mon Sep 17 00:00:00 2001
From: David Arcari <darcari@redhat.com>
Date: Thu, 7 Jan 2021 09:47:07 -0500
Subject: [PATCH 532/547] hwmon: (amd_energy) fix allocation of
 hwmon_channel_info config

hwmon, specifically hwmon_num_channel_attrs, expects the config
array in the hwmon_channel_info structure to be terminated by
a zero entry.  amd_energy does not honor this convention.  As
result, a KASAN warning is possible.  Fix this by adding an
additional entry and setting it to zero.

Fixes: 8abee9566b7e ("hwmon: Add amd_energy driver to report energy counters")

Signed-off-by: David Arcari <darcari@redhat.com>
Cc: Naveen Krishna Chatradhi <nchatrad@amd.com>
Cc: Jean Delvare <jdelvare@suse.com>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: linux-kernel@vger.kernel.org
Cc: stable@vger.kernel.org
Signed-off-by: David Arcari <darcari@redhat.com>
Acked-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
Link: https://lore.kernel.org/r/20210107144707.6927-1-darcari@redhat.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/amd_energy.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/hwmon/amd_energy.c b/drivers/hwmon/amd_energy.c
index 9b306448b7a0..822c2e74b98d 100644
--- a/drivers/hwmon/amd_energy.c
+++ b/drivers/hwmon/amd_energy.c
@@ -222,7 +222,7 @@ static int amd_create_sensor(struct device *dev,
 	 */
 	cpus = num_present_cpus() / num_siblings;
 
-	s_config = devm_kcalloc(dev, cpus + sockets,
+	s_config = devm_kcalloc(dev, cpus + sockets + 1,
 				sizeof(u32), GFP_KERNEL);
 	if (!s_config)
 		return -ENOMEM;
@@ -254,6 +254,7 @@ static int amd_create_sensor(struct device *dev,
 			scnprintf(label_l[i], 10, "Esocket%u", (i - cpus));
 	}
 
+	s_config[i] = 0;
 	return 0;
 }
 

From a91bd6223ecd46addc71ee6fcd432206d39365d2 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Fri, 8 Jan 2021 12:48:47 +0100
Subject: [PATCH 533/547] Revert "init/console: Use ttynull as a fallback when
 there is no console"

This reverts commit 757055ae8dedf5333af17b3b5b4b70ba9bc9da4e.

The commit caused that ttynull was used as the default console
on several systems[1][2][3]. As a result, the console was
blank even when a better alternative existed.

It happened when there was no console configured
on the command line and ttynull_init() was the first initcall
calling register_console().

Or it happened when /dev/ did not exist when console_on_rootfs()
was called. It was not able to open /dev/console even though
a console driver was registered. It tried to add ttynull console
but it obviously did not help. But ttynull became the preferred
console and was used by /dev/console when it was available later.

The commit tried to fix a historical problem that have been there
for ages. The primary motivation was the commit 3cffa06aeef7ece30f6
("printk/console: Allow to disable console output by using console=""
 or console=null"). It provided a clean solution for a workaround
 that was widely used and worked only by chance.

This revert causes that the console="" or console=null command line
options will again work only by chance. These options will cause that
a particular console will be preferred and the default (tty) ones
will not get enabled. There will be no console registered at
all. As a result there won't be stdin, stdout, and stderr for
the init process. But it worked exactly this way even before.

The proper solution has to fulfill many conditions:

  + Register ttynull only when explicitly required or as
    the ultimate fallback.

  + ttynull should get associated with /dev/console but it must
    not become preferred console when used as a fallback.
    Especially, it must still be possible to replace it
    by a better console later.

Such a change requires clean up of the register_console() code.
Otherwise, it would be even harder to follow. Especially, the use
of has_preferred_console and CON_CONSDEV flag is tricky. The clean
up is risky. The ordering of consoles is not well defined. And
any changes tend to break existing user settings.

Do the revert at the least risky solution for now.

[1] https://lore.kernel.org/linux-kselftest/20201221144302.GR4077@smile.fi.intel.com/
[2] https://lore.kernel.org/lkml/d2a3b3c0-e548-7dd1-730f-59bc5c04e191@synopsys.com/
[3] https://patchwork.ozlabs.org/project/linux-um/patch/20210105120128.10854-1-thomas@m3y3r.de/

Reported-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reported-by: Vineet Gupta <vgupta@synopsys.com>
Reported-by: Thomas Meyer <thomas@m3y3r.de>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/tty/Kconfig     | 14 ++++++++++++++
 drivers/tty/Makefile    |  3 ++-
 drivers/tty/ttynull.c   | 18 ------------------
 include/linux/console.h |  3 ---
 init/main.c             | 10 ++--------
 5 files changed, 18 insertions(+), 30 deletions(-)

diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig
index 47a6e42f0d04..e15cd6b5bb99 100644
--- a/drivers/tty/Kconfig
+++ b/drivers/tty/Kconfig
@@ -401,6 +401,20 @@ config MIPS_EJTAG_FDC_KGDB_CHAN
 	help
 	  FDC channel number to use for KGDB.
 
+config NULL_TTY
+	tristate "NULL TTY driver"
+	help
+	  Say Y here if you want a NULL TTY which simply discards messages.
+
+	  This is useful to allow userspace applications which expect a console
+	  device to work without modifications even when no console is
+	  available or desired.
+
+	  In order to use this driver, you should redirect the console to this
+	  TTY, or boot the kernel with console=ttynull.
+
+	  If unsure, say N.
+
 config TRACE_ROUTER
 	tristate "Trace data router for MIPI P1149.7 cJTAG standard"
 	depends on TRACE_SINK
diff --git a/drivers/tty/Makefile b/drivers/tty/Makefile
index 3c1c5a9240a7..b3ccae932660 100644
--- a/drivers/tty/Makefile
+++ b/drivers/tty/Makefile
@@ -2,7 +2,7 @@
 obj-$(CONFIG_TTY)		+= tty_io.o n_tty.o tty_ioctl.o tty_ldisc.o \
 				   tty_buffer.o tty_port.o tty_mutex.o \
 				   tty_ldsem.o tty_baudrate.o tty_jobctrl.o \
-				   n_null.o ttynull.o
+				   n_null.o
 obj-$(CONFIG_LEGACY_PTYS)	+= pty.o
 obj-$(CONFIG_UNIX98_PTYS)	+= pty.o
 obj-$(CONFIG_AUDIT)		+= tty_audit.o
@@ -25,6 +25,7 @@ obj-$(CONFIG_ISI)		+= isicom.o
 obj-$(CONFIG_MOXA_INTELLIO)	+= moxa.o
 obj-$(CONFIG_MOXA_SMARTIO)	+= mxser.o
 obj-$(CONFIG_NOZOMI)		+= nozomi.o
+obj-$(CONFIG_NULL_TTY)	        += ttynull.o
 obj-$(CONFIG_ROCKETPORT)	+= rocket.o
 obj-$(CONFIG_SYNCLINK_GT)	+= synclink_gt.o
 obj-$(CONFIG_PPC_EPAPR_HV_BYTECHAN) += ehv_bytechan.o
diff --git a/drivers/tty/ttynull.c b/drivers/tty/ttynull.c
index eced70ec54e1..17f05b7eb6d3 100644
--- a/drivers/tty/ttynull.c
+++ b/drivers/tty/ttynull.c
@@ -2,13 +2,6 @@
 /*
  * Copyright (C) 2019 Axis Communications AB
  *
- * The console is useful for userspace applications which expect a console
- * device to work without modifications even when no console is available
- * or desired.
- *
- * In order to use this driver, you should redirect the console to this
- * TTY, or boot the kernel with console=ttynull.
- *
  * Based on ttyprintk.c:
  *  Copyright (C) 2010 Samo Pogacnik
  */
@@ -66,17 +59,6 @@ static struct console ttynull_console = {
 	.device = ttynull_device,
 };
 
-void __init register_ttynull_console(void)
-{
-	if (!ttynull_driver)
-		return;
-
-	if (add_preferred_console(ttynull_console.name, 0, NULL))
-		return;
-
-	register_console(&ttynull_console);
-}
-
 static int __init ttynull_init(void)
 {
 	struct tty_driver *driver;
diff --git a/include/linux/console.h b/include/linux/console.h
index dbe78e8e2602..20874db50bc8 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -186,12 +186,9 @@ extern int braille_register_console(struct console *, int index,
 extern int braille_unregister_console(struct console *);
 #ifdef CONFIG_TTY
 extern void console_sysfs_notify(void);
-extern void register_ttynull_console(void);
 #else
 static inline void console_sysfs_notify(void)
 { }
-static inline void register_ttynull_console(void)
-{ }
 #endif
 extern bool console_suspend_enabled;
 
diff --git a/init/main.c b/init/main.c
index 421640fca375..c68d784376ca 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1480,14 +1480,8 @@ void __init console_on_rootfs(void)
 	struct file *file = filp_open("/dev/console", O_RDWR, 0);
 
 	if (IS_ERR(file)) {
-		pr_err("Warning: unable to open an initial console. Fallback to ttynull.\n");
-		register_ttynull_console();
-
-		file = filp_open("/dev/console", O_RDWR, 0);
-		if (IS_ERR(file)) {
-			pr_err("Warning: Failed to add ttynull console. No stdin, stdout, and stderr for the init process!\n");
-			return;
-		}
+		pr_err("Warning: unable to open an initial console.\n");
+		return;
 	}
 	init_dup(file);
 	init_dup(file);

From ef0ba05538299f1391cbe097de36895bb36ecfe6 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 7 Jan 2021 09:43:54 -0800
Subject: [PATCH 534/547] poll: fix performance regression due to out-of-line
 __put_user()

The kernel test robot reported a -5.8% performance regression on the
"poll2" test of will-it-scale, and bisected it to commit d55564cfc222
("x86: Make __put_user() generate an out-of-line call").

I didn't expect an out-of-line __put_user() to matter, because no normal
core code should use that non-checking legacy version of user access any
more.  But I had overlooked the very odd poll() usage, which does a
__put_user() to update the 'revents' values of the poll array.

Now, Al Viro correctly points out that instead of updating just the
'revents' field, it would be much simpler to just copy the _whole_
pollfd entry, and then we could just use "copy_to_user()" on the whole
array of entries, the same way we use "copy_from_user()" a few lines
earlier to get the original values.

But that is not what we've traditionally done, and I worry that threaded
applications might be concurrently modifying the other fields of the
pollfd array.  So while Al's suggestion is simpler - and perhaps worth
trying in the future - this instead keeps the "just update revents"
model.

To fix the performance regression, use the modern "unsafe_put_user()"
instead of __put_user(), with the proper "user_write_access_begin()"
guarding in place. This improves code generation enormously.

Link: https://lore.kernel.org/lkml/20210107134723.GA28532@xsang-OptiPlex-9020/
Reported-by: kernel test robot <oliver.sang@intel.com>
Tested-by: Oliver Sang <oliver.sang@intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: David Laight <David.Laight@aculab.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/select.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/fs/select.c b/fs/select.c
index ebfebdfe5c69..37aaa8317f3a 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -1011,14 +1011,17 @@ static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
 	fdcount = do_poll(head, &table, end_time);
 	poll_freewait(&table);
 
+	if (!user_write_access_begin(ufds, nfds * sizeof(*ufds)))
+		goto out_fds;
+
 	for (walk = head; walk; walk = walk->next) {
 		struct pollfd *fds = walk->entries;
 		int j;
 
-		for (j = 0; j < walk->len; j++, ufds++)
-			if (__put_user(fds[j].revents, &ufds->revents))
-				goto out_fds;
+		for (j = walk->len; j; fds++, ufds++, j--)
+			unsafe_put_user(fds->revents, &ufds->revents, Efault);
   	}
+	user_write_access_end();
 
 	err = fdcount;
 out_fds:
@@ -1030,6 +1033,11 @@ out_fds:
 	}
 
 	return err;
+
+Efault:
+	user_write_access_end();
+	err = -EFAULT;
+	goto out_fds;
 }
 
 static long do_restart_poll(struct restart_block *restart_block)

From e8deee4f1543eda9b75278f63322f412cad52f6a Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 8 Jan 2021 13:46:55 -0800
Subject: [PATCH 535/547] ARC: [hsdk]: Enable FPU_SAVE_RESTORE

HSDK has hardware floating point and the common use case is with
glibc+hf so enable that as default.

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/plat-hsdk/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arc/plat-hsdk/Kconfig b/arch/arc/plat-hsdk/Kconfig
index 6b5c54576f54..a2d10c29fbcc 100644
--- a/arch/arc/plat-hsdk/Kconfig
+++ b/arch/arc/plat-hsdk/Kconfig
@@ -7,6 +7,7 @@ menuconfig ARC_SOC_HSDK
 	depends on ISA_ARCV2
 	select ARC_HAS_ACCL_REGS
 	select ARC_IRQ_NO_AUTOSAVE
+	select ARC_FPU_SAVE_RESTORE
 	select CLK_HSDK
 	select RESET_CONTROLLER
 	select RESET_HSDK

From e80927079fd97b4d5457e3af2400a0087b561564 Mon Sep 17 00:00:00 2001
From: Yi Li <yili@winhong.com>
Date: Mon, 4 Jan 2021 15:41:18 +0800
Subject: [PATCH 536/547] bcache: set pdev_set_uuid before scond loop iteration

There is no need to reassign pdev_set_uuid in the second loop iteration,
so move it to the place before second loop.

Signed-off-by: Yi Li <yili@winhong.com>
Signed-off-by: Coly Li <colyli@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/bcache/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index a4752ac410dc..6aa23a6fb394 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -2644,8 +2644,8 @@ static ssize_t bch_pending_bdevs_cleanup(struct kobject *k,
 	}
 
 	list_for_each_entry_safe(pdev, tpdev, &pending_devs, list) {
+		char *pdev_set_uuid = pdev->dc->sb.set_uuid;
 		list_for_each_entry_safe(c, tc, &bch_cache_sets, list) {
-			char *pdev_set_uuid = pdev->dc->sb.set_uuid;
 			char *set_uuid = c->set_uuid;
 
 			if (!memcmp(pdev_set_uuid, set_uuid, 16)) {

From f7b4943dea48a572ad751ce1f18a245d43debe7e Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Mon, 4 Jan 2021 15:41:19 +0800
Subject: [PATCH 537/547] bcache: fix typo from SUUP to SUPP in features.h

This patch fixes the following typos,
from BCH_FEATURE_COMPAT_SUUP to BCH_FEATURE_COMPAT_SUPP
from BCH_FEATURE_INCOMPAT_SUUP to BCH_FEATURE_INCOMPAT_SUPP
from BCH_FEATURE_INCOMPAT_SUUP to BCH_FEATURE_RO_COMPAT_SUPP

Fixes: d721a43ff69c ("bcache: increase super block version for cache device and backing device")
Fixes: ffa470327572 ("bcache: add bucket_size_hi into struct cache_sb_disk for large bucket")
Signed-off-by: Coly Li <colyli@suse.de>
Cc: stable@vger.kernel.org # 5.9+
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/bcache/features.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/md/bcache/features.h b/drivers/md/bcache/features.h
index a1653c478041..32c5bbda2f0d 100644
--- a/drivers/md/bcache/features.h
+++ b/drivers/md/bcache/features.h
@@ -15,9 +15,9 @@
 /* Incompat feature set */
 #define BCH_FEATURE_INCOMPAT_LARGE_BUCKET	0x0001 /* 32bit bucket size */
 
-#define BCH_FEATURE_COMPAT_SUUP		0
-#define BCH_FEATURE_RO_COMPAT_SUUP	0
-#define BCH_FEATURE_INCOMPAT_SUUP	BCH_FEATURE_INCOMPAT_LARGE_BUCKET
+#define BCH_FEATURE_COMPAT_SUPP		0
+#define BCH_FEATURE_RO_COMPAT_SUPP	0
+#define BCH_FEATURE_INCOMPAT_SUPP	BCH_FEATURE_INCOMPAT_LARGE_BUCKET
 
 #define BCH_HAS_COMPAT_FEATURE(sb, mask) \
 		((sb)->feature_compat & (mask))

From 1dfc0686c29a9bbd3a446a29f9ccde3dec3bc75a Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Mon, 4 Jan 2021 15:41:20 +0800
Subject: [PATCH 538/547] bcache: check unsupported feature sets for bcache
 register

This patch adds the check for features which is incompatible for
current supported feature sets.

Now if the bcache device created by bcache-tools has features that
current kernel doesn't support, read_super() will fail with error
messoage. E.g. if an unsupported incompatible feature detected,
bcache register will fail with dmesg "bcache: register_bcache() error :
Unsupported incompatible feature found".

Fixes: d721a43ff69c ("bcache: increase super block version for cache device and backing device")
Fixes: ffa470327572 ("bcache: add bucket_size_hi into struct cache_sb_disk for large bucket")
Signed-off-by: Coly Li <colyli@suse.de>
Cc: stable@vger.kernel.org # 5.9+
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/bcache/features.h | 15 +++++++++++++++
 drivers/md/bcache/super.c    | 14 ++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/drivers/md/bcache/features.h b/drivers/md/bcache/features.h
index 32c5bbda2f0d..e73724c2b49b 100644
--- a/drivers/md/bcache/features.h
+++ b/drivers/md/bcache/features.h
@@ -79,6 +79,21 @@ static inline void bch_clear_feature_##name(struct cache_sb *sb) \
 
 BCH_FEATURE_INCOMPAT_FUNCS(large_bucket, LARGE_BUCKET);
 
+static inline bool bch_has_unknown_compat_features(struct cache_sb *sb)
+{
+	return ((sb->feature_compat & ~BCH_FEATURE_COMPAT_SUPP) != 0);
+}
+
+static inline bool bch_has_unknown_ro_compat_features(struct cache_sb *sb)
+{
+	return ((sb->feature_ro_compat & ~BCH_FEATURE_RO_COMPAT_SUPP) != 0);
+}
+
+static inline bool bch_has_unknown_incompat_features(struct cache_sb *sb)
+{
+	return ((sb->feature_incompat & ~BCH_FEATURE_INCOMPAT_SUPP) != 0);
+}
+
 int bch_print_cache_set_feature_compat(struct cache_set *c, char *buf, int size);
 int bch_print_cache_set_feature_ro_compat(struct cache_set *c, char *buf, int size);
 int bch_print_cache_set_feature_incompat(struct cache_set *c, char *buf, int size);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 6aa23a6fb394..f4674a3298af 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -228,6 +228,20 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
 		sb->feature_compat = le64_to_cpu(s->feature_compat);
 		sb->feature_incompat = le64_to_cpu(s->feature_incompat);
 		sb->feature_ro_compat = le64_to_cpu(s->feature_ro_compat);
+
+		/* Check incompatible features */
+		err = "Unsupported compatible feature found";
+		if (bch_has_unknown_compat_features(sb))
+			goto err;
+
+		err = "Unsupported read-only compatible feature found";
+		if (bch_has_unknown_ro_compat_features(sb))
+			goto err;
+
+		err = "Unsupported incompatible feature found";
+		if (bch_has_unknown_incompat_features(sb))
+			goto err;
+
 		err = read_super_common(sb, bdev, s);
 		if (err)
 			goto err;

From b16671e8f493e3df40b1fb0dff4078f391c5099a Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Mon, 4 Jan 2021 15:41:21 +0800
Subject: [PATCH 539/547] bcache: introduce
 BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE for large bucket

When large bucket feature was added, BCH_FEATURE_INCOMPAT_LARGE_BUCKET
was introduced into the incompat feature set. It used bucket_size_hi
(which was added at the tail of struct cache_sb_disk) to extend current
16bit bucket size to 32bit with existing bucket_size in struct
cache_sb_disk.

This is not a good idea, there are two obvious problems,
- Bucket size is always value power of 2, if store log2(bucket size) in
  existing bucket_size of struct cache_sb_disk, it is unnecessary to add
  bucket_size_hi.
- Macro csum_set() assumes d[SB_JOURNAL_BUCKETS] is the last member in
  struct cache_sb_disk, bucket_size_hi was added after d[] which makes
  csum_set calculate an unexpected super block checksum.

To fix the above problems, this patch introduces a new incompat feature
bit BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE, when this bit is set, it
means bucket_size in struct cache_sb_disk stores the order of power-of-2
bucket size value. When user specifies a bucket size larger than 32768
sectors, BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE will be set to
incompat feature set, and bucket_size stores log2(bucket size) more
than store the real bucket size value.

The obsoleted BCH_FEATURE_INCOMPAT_LARGE_BUCKET won't be used anymore,
it is renamed to BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET and still only
recognized by kernel driver for legacy compatible purpose. The previous
bucket_size_hi is renmaed to obso_bucket_size_hi in struct cache_sb_disk
and not used in bcache-tools anymore.

For cache device created with BCH_FEATURE_INCOMPAT_LARGE_BUCKET feature,
bcache-tools and kernel driver still recognize the feature string and
display it as "obso_large_bucket".

With this change, the unnecessary extra space extend of bcache on-disk
super block can be avoided, and csum_set() may generate expected check
sum as well.

Fixes: ffa470327572 ("bcache: add bucket_size_hi into struct cache_sb_disk for large bucket")
Signed-off-by: Coly Li <colyli@suse.de>
Cc: stable@vger.kernel.org # 5.9+
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/bcache/features.c |  2 +-
 drivers/md/bcache/features.h | 11 ++++++++---
 drivers/md/bcache/super.c    | 22 +++++++++++++++++++---
 include/uapi/linux/bcache.h  |  2 +-
 4 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/drivers/md/bcache/features.c b/drivers/md/bcache/features.c
index 6469223f0b77..d636b7b2d070 100644
--- a/drivers/md/bcache/features.c
+++ b/drivers/md/bcache/features.c
@@ -17,7 +17,7 @@ struct feature {
 };
 
 static struct feature feature_list[] = {
-	{BCH_FEATURE_INCOMPAT, BCH_FEATURE_INCOMPAT_LARGE_BUCKET,
+	{BCH_FEATURE_INCOMPAT, BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE,
 		"large_bucket"},
 	{0, 0, 0 },
 };
diff --git a/drivers/md/bcache/features.h b/drivers/md/bcache/features.h
index e73724c2b49b..84fc2c0f0101 100644
--- a/drivers/md/bcache/features.h
+++ b/drivers/md/bcache/features.h
@@ -13,11 +13,15 @@
 
 /* Feature set definition */
 /* Incompat feature set */
-#define BCH_FEATURE_INCOMPAT_LARGE_BUCKET	0x0001 /* 32bit bucket size */
+/* 32bit bucket size, obsoleted */
+#define BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET		0x0001
+/* real bucket size is (1 << bucket_size) */
+#define BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE	0x0002
 
 #define BCH_FEATURE_COMPAT_SUPP		0
 #define BCH_FEATURE_RO_COMPAT_SUPP	0
-#define BCH_FEATURE_INCOMPAT_SUPP	BCH_FEATURE_INCOMPAT_LARGE_BUCKET
+#define BCH_FEATURE_INCOMPAT_SUPP	(BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET| \
+					 BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE)
 
 #define BCH_HAS_COMPAT_FEATURE(sb, mask) \
 		((sb)->feature_compat & (mask))
@@ -77,7 +81,8 @@ static inline void bch_clear_feature_##name(struct cache_sb *sb) \
 		~BCH##_FEATURE_INCOMPAT_##flagname; \
 }
 
-BCH_FEATURE_INCOMPAT_FUNCS(large_bucket, LARGE_BUCKET);
+BCH_FEATURE_INCOMPAT_FUNCS(obso_large_bucket, OBSO_LARGE_BUCKET);
+BCH_FEATURE_INCOMPAT_FUNCS(large_bucket, LOG_LARGE_BUCKET_SIZE);
 
 static inline bool bch_has_unknown_compat_features(struct cache_sb *sb)
 {
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index f4674a3298af..3999641f1775 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -64,9 +64,25 @@ static unsigned int get_bucket_size(struct cache_sb *sb, struct cache_sb_disk *s
 {
 	unsigned int bucket_size = le16_to_cpu(s->bucket_size);
 
-	if (sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES &&
-	     bch_has_feature_large_bucket(sb))
-		bucket_size |= le16_to_cpu(s->bucket_size_hi) << 16;
+	if (sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES) {
+		if (bch_has_feature_large_bucket(sb)) {
+			unsigned int max, order;
+
+			max = sizeof(unsigned int) * BITS_PER_BYTE - 1;
+			order = le16_to_cpu(s->bucket_size);
+			/*
+			 * bcache tool will make sure the overflow won't
+			 * happen, an error message here is enough.
+			 */
+			if (order > max)
+				pr_err("Bucket size (1 << %u) overflows\n",
+					order);
+			bucket_size = 1 << order;
+		} else if (bch_has_feature_obso_large_bucket(sb)) {
+			bucket_size +=
+				le16_to_cpu(s->obso_bucket_size_hi) << 16;
+		}
+	}
 
 	return bucket_size;
 }
diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h
index 52e8bcb33981..cf7399f03b71 100644
--- a/include/uapi/linux/bcache.h
+++ b/include/uapi/linux/bcache.h
@@ -213,7 +213,7 @@ struct cache_sb_disk {
 		__le16		keys;
 	};
 	__le64			d[SB_JOURNAL_BUCKETS];	/* journal buckets */
-	__le16			bucket_size_hi;
+	__le16			obso_bucket_size_hi;	/* obsoleted */
 };
 
 /*

From 5342fd4255021ef0c4ce7be52eea1c4ebda11c63 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Mon, 4 Jan 2021 15:41:22 +0800
Subject: [PATCH 540/547] bcache: set bcache device into read-only mode for
 BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET

If BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET is set in incompat feature
set, it means the cache device is created with obsoleted layout with
obso_bucket_site_hi. Now bcache does not support this feature bit, a new
BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE incompat feature bit is added
for a better layout to support large bucket size.

For the legacy compatibility purpose, if a cache device created with
obsoleted BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET feature bit, all bcache
devices attached to this cache set should be set to read-only. Then the
dirty data can be written back to backing device before re-create the
cache device with BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE feature bit
by the latest bcache-tools.

This patch checks BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET feature bit
when running a cache set and attach a bcache device to the cache set. If
this bit is set,
- When run a cache set, print an error kernel message to indicate all
  following attached bcache device will be read-only.
- When attach a bcache device, print an error kernel message to indicate
  the attached bcache device will be read-only, and ask users to update
  to latest bcache-tools.

Such change is only for cache device whose bucket size >= 32MB, this is
for the zoned SSD and almost nobody uses such large bucket size at this
moment. If you don't explicit set a large bucket size for a zoned SSD,
such change is totally transparent to your bcache device.

Fixes: ffa470327572 ("bcache: add bucket_size_hi into struct cache_sb_disk for large bucket")
Signed-off-by: Coly Li <colyli@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/bcache/super.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 3999641f1775..2047a9cccdb5 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1332,6 +1332,12 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
 	bcache_device_link(&dc->disk, c, "bdev");
 	atomic_inc(&c->attached_dev_nr);
 
+	if (bch_has_feature_obso_large_bucket(&(c->cache->sb))) {
+		pr_err("The obsoleted large bucket layout is unsupported, set the bcache device into read-only\n");
+		pr_err("Please update to the latest bcache-tools to create the cache device\n");
+		set_disk_ro(dc->disk.disk, 1);
+	}
+
 	/* Allow the writeback thread to proceed */
 	up_write(&dc->writeback_lock);
 
@@ -1554,6 +1560,12 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u)
 
 	bcache_device_link(d, c, "volume");
 
+	if (bch_has_feature_obso_large_bucket(&c->cache->sb)) {
+		pr_err("The obsoleted large bucket layout is unsupported, set the bcache device into read-only\n");
+		pr_err("Please update to the latest bcache-tools to create the cache device\n");
+		set_disk_ro(d->disk, 1);
+	}
+
 	return 0;
 err:
 	kobject_put(&d->kobj);
@@ -2113,6 +2125,9 @@ static int run_cache_set(struct cache_set *c)
 	c->cache->sb.last_mount = (u32)ktime_get_real_seconds();
 	bcache_write_super(c);
 
+	if (bch_has_feature_obso_large_bucket(&c->cache->sb))
+		pr_err("Detect obsoleted large bucket layout, all attached bcache device will be read-only\n");
+
 	list_for_each_entry_safe(dc, t, &uncached_devices, list)
 		bch_cached_dev_attach(dc, c, NULL);
 

From 55e6ac1e1f31c7f678d9f3c8d54c6f102e5f1550 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Fri, 8 Jan 2021 20:57:22 +0000
Subject: [PATCH 541/547] io_uring: io_rw_reissue lockdep annotations

We expect io_rw_reissue() to take place only during submission with
uring_lock held. Add a lockdep annotation to check that invariant.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index cb57e0360fcb..55ba1922a349 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2692,6 +2692,8 @@ static bool io_rw_reissue(struct io_kiocb *req, long res)
 	if ((res != -EAGAIN && res != -EOPNOTSUPP) || io_wq_current_is_worker())
 		return false;
 
+	lockdep_assert_held(&req->ctx->uring_lock);
+
 	ret = io_sq_thread_acquire_mm_files(req->ctx, req);
 
 	if (io_resubmit_prep(req, ret)) {

From 4f793dc40bc605b97624fd36baf085b3c35e8bfd Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Fri, 8 Jan 2021 20:57:23 +0000
Subject: [PATCH 542/547] io_uring: inline io_uring_attempt_task_drop()

A simple preparation change inlining io_uring_attempt_task_drop() into
io_uring_flush().

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 29 +++++++++++------------------
 1 file changed, 11 insertions(+), 18 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 55ba1922a349..1c931e7a3948 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -8964,23 +8964,6 @@ static void io_uring_del_task_file(struct file *file)
 		fput(file);
 }
 
-/*
- * Drop task note for this file if we're the only ones that hold it after
- * pending fput()
- */
-static void io_uring_attempt_task_drop(struct file *file)
-{
-	if (!current->io_uring)
-		return;
-	/*
-	 * fput() is pending, will be 2 if the only other ref is our potential
-	 * task file note. If the task is exiting, drop regardless of count.
-	 */
-	if (fatal_signal_pending(current) || (current->flags & PF_EXITING) ||
-	    atomic_long_read(&file->f_count) == 2)
-		io_uring_del_task_file(file);
-}
-
 static void io_uring_remove_task_files(struct io_uring_task *tctx)
 {
 	struct file *file;
@@ -9072,7 +9055,17 @@ void __io_uring_task_cancel(void)
 
 static int io_uring_flush(struct file *file, void *data)
 {
-	io_uring_attempt_task_drop(file);
+	if (!current->io_uring)
+		return 0;
+
+	/*
+	 * fput() is pending, will be 2 if the only other ref is our potential
+	 * task file note. If the task is exiting, drop regardless of count.
+	 */
+	if (fatal_signal_pending(current) || (current->flags & PF_EXITING) ||
+	    atomic_long_read(&file->f_count) == 2)
+		io_uring_del_task_file(file);
+
 	return 0;
 }
 

From 6b5733eb638b7068ab7cb34e663b55a1d1892d85 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Fri, 8 Jan 2021 20:57:24 +0000
Subject: [PATCH 543/547] io_uring: add warn_once for io_uring_flush()

files_cancel() should cancel all relevant requests and drop file notes,
so we should never have file notes after that, including on-exit fput
and flush. Add a WARN_ONCE to be sure.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 1c931e7a3948..f39671a0d84f 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -9055,17 +9055,23 @@ void __io_uring_task_cancel(void)
 
 static int io_uring_flush(struct file *file, void *data)
 {
-	if (!current->io_uring)
+	struct io_uring_task *tctx = current->io_uring;
+
+	if (!tctx)
 		return 0;
 
+	/* we should have cancelled and erased it before PF_EXITING */
+	WARN_ON_ONCE((current->flags & PF_EXITING) &&
+		     xa_load(&tctx->xa, (unsigned long)file));
+
 	/*
 	 * fput() is pending, will be 2 if the only other ref is our potential
 	 * task file note. If the task is exiting, drop regardless of count.
 	 */
-	if (fatal_signal_pending(current) || (current->flags & PF_EXITING) ||
-	    atomic_long_read(&file->f_count) == 2)
-		io_uring_del_task_file(file);
+	if (atomic_long_read(&file->f_count) != 2)
+		return 0;
 
+	io_uring_del_task_file(file);
 	return 0;
 }
 

From d9d05217cb6990b9a56e13b56e7a1b71e2551f6c Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Fri, 8 Jan 2021 20:57:25 +0000
Subject: [PATCH 544/547] io_uring: stop SQPOLL submit on creator's death

When the creator of SQPOLL io_uring dies (i.e. sqo_task), we don't want
its internals like ->files and ->mm to be poked by the SQPOLL task, it
have never been nice and recently got racy. That can happen when the
owner undergoes destruction and SQPOLL tasks tries to submit new
requests in parallel, and so calls io_sq_thread_acquire*().

That patch halts SQPOLL submissions when sqo_task dies by introducing
sqo_dead flag. Once set, the SQPOLL task must not do any submission,
which is synchronised by uring_lock as well as the new flag.

The tricky part is to make sure that disabling always happens, that
means either the ring is discovered by creator's do_exit() -> cancel,
or if the final close() happens before it's done by the creator. The
last is guaranteed by the fact that for SQPOLL the creator task and only
it holds exactly one file note, so either it pins up to do_exit() or
removed by the creator on the final put in flush. (see comments in
uring_flush() around file->f_count == 2).

One more place that can trigger io_sq_thread_acquire_*() is
__io_req_task_submit(). Shoot off requests on sqo_dead there, even
though actually we don't need to. That's because cancellation of
sqo_task should wait for the request before going any further.

note 1: io_disable_sqo_submit() does io_ring_set_wakeup_flag() so the
caller would enter the ring to get an error, but it still doesn't
guarantee that the flag won't be cleared.

note 2: if final __userspace__ close happens not from the creator
task, the file note will pin the ring until the task dies.

Fixed: b1b6b5a30dce8 ("kernel/io_uring: cancel io_uring before task works")
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 62 +++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 53 insertions(+), 9 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index f39671a0d84f..2f305c097bd5 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -262,6 +262,7 @@ struct io_ring_ctx {
 		unsigned int		drain_next: 1;
 		unsigned int		eventfd_async: 1;
 		unsigned int		restricted: 1;
+		unsigned int		sqo_dead: 1;
 
 		/*
 		 * Ring buffer of indices into array of io_uring_sqe, which is
@@ -2160,12 +2161,11 @@ static void io_req_task_cancel(struct callback_head *cb)
 static void __io_req_task_submit(struct io_kiocb *req)
 {
 	struct io_ring_ctx *ctx = req->ctx;
-	bool fail;
 
-	fail = __io_sq_thread_acquire_mm(ctx) ||
-		__io_sq_thread_acquire_files(ctx);
 	mutex_lock(&ctx->uring_lock);
-	if (!fail)
+	if (!ctx->sqo_dead &&
+	    !__io_sq_thread_acquire_mm(ctx) &&
+	    !__io_sq_thread_acquire_files(ctx))
 		__io_queue_sqe(req, NULL);
 	else
 		__io_req_task_cancel(req, -EFAULT);
@@ -6954,7 +6954,8 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
 		if (!list_empty(&ctx->iopoll_list))
 			io_do_iopoll(ctx, &nr_events, 0);
 
-		if (to_submit && likely(!percpu_ref_is_dying(&ctx->refs)))
+		if (to_submit && !ctx->sqo_dead &&
+		    likely(!percpu_ref_is_dying(&ctx->refs)))
 			ret = io_submit_sqes(ctx, to_submit);
 		mutex_unlock(&ctx->uring_lock);
 	}
@@ -8712,6 +8713,10 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
 {
 	mutex_lock(&ctx->uring_lock);
 	percpu_ref_kill(&ctx->refs);
+
+	if (WARN_ON_ONCE((ctx->flags & IORING_SETUP_SQPOLL) && !ctx->sqo_dead))
+		ctx->sqo_dead = 1;
+
 	/* if force is set, the ring is going away. always drop after that */
 	ctx->cq_overflow_flushed = 1;
 	if (ctx->rings)
@@ -8874,6 +8879,18 @@ static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
 	}
 }
 
+static void io_disable_sqo_submit(struct io_ring_ctx *ctx)
+{
+	WARN_ON_ONCE(ctx->sqo_task != current);
+
+	mutex_lock(&ctx->uring_lock);
+	ctx->sqo_dead = 1;
+	mutex_unlock(&ctx->uring_lock);
+
+	/* make sure callers enter the ring to get error */
+	io_ring_set_wakeup_flag(ctx);
+}
+
 /*
  * We need to iteratively cancel requests, in case a request has dependent
  * hard links. These persist even for failure of cancelations, hence keep
@@ -8885,6 +8902,8 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
 	struct task_struct *task = current;
 
 	if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) {
+		/* for SQPOLL only sqo_task has task notes */
+		io_disable_sqo_submit(ctx);
 		task = ctx->sq_data->thread;
 		atomic_inc(&task->io_uring->in_idle);
 		io_sq_thread_park(ctx->sq_data);
@@ -9056,6 +9075,7 @@ void __io_uring_task_cancel(void)
 static int io_uring_flush(struct file *file, void *data)
 {
 	struct io_uring_task *tctx = current->io_uring;
+	struct io_ring_ctx *ctx = file->private_data;
 
 	if (!tctx)
 		return 0;
@@ -9071,7 +9091,16 @@ static int io_uring_flush(struct file *file, void *data)
 	if (atomic_long_read(&file->f_count) != 2)
 		return 0;
 
-	io_uring_del_task_file(file);
+	if (ctx->flags & IORING_SETUP_SQPOLL) {
+		/* there is only one file note, which is owned by sqo_task */
+		WARN_ON_ONCE((ctx->sqo_task == current) ==
+			     !xa_load(&tctx->xa, (unsigned long)file));
+
+		io_disable_sqo_submit(ctx);
+	}
+
+	if (!(ctx->flags & IORING_SETUP_SQPOLL) || ctx->sqo_task == current)
+		io_uring_del_task_file(file);
 	return 0;
 }
 
@@ -9145,8 +9174,9 @@ static unsigned long io_uring_nommu_get_unmapped_area(struct file *file,
 
 #endif /* !CONFIG_MMU */
 
-static void io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
+static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
 {
+	int ret = 0;
 	DEFINE_WAIT(wait);
 
 	do {
@@ -9155,6 +9185,11 @@ static void io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
 
 		prepare_to_wait(&ctx->sqo_sq_wait, &wait, TASK_INTERRUPTIBLE);
 
+		if (unlikely(ctx->sqo_dead)) {
+			ret = -EOWNERDEAD;
+			goto out;
+		}
+
 		if (!io_sqring_full(ctx))
 			break;
 
@@ -9162,6 +9197,8 @@ static void io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
 	} while (!signal_pending(current));
 
 	finish_wait(&ctx->sqo_sq_wait, &wait);
+out:
+	return ret;
 }
 
 static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz,
@@ -9235,10 +9272,16 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 	if (ctx->flags & IORING_SETUP_SQPOLL) {
 		io_cqring_overflow_flush(ctx, false, NULL, NULL);
 
+		ret = -EOWNERDEAD;
+		if (unlikely(ctx->sqo_dead))
+			goto out;
 		if (flags & IORING_ENTER_SQ_WAKEUP)
 			wake_up(&ctx->sq_data->wait);
-		if (flags & IORING_ENTER_SQ_WAIT)
-			io_sqpoll_wait_sq(ctx);
+		if (flags & IORING_ENTER_SQ_WAIT) {
+			ret = io_sqpoll_wait_sq(ctx);
+			if (ret)
+				goto out;
+		}
 		submitted = to_submit;
 	} else if (to_submit) {
 		ret = io_uring_add_task_file(ctx, f.file);
@@ -9665,6 +9708,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
 	trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags);
 	return ret;
 err:
+	io_disable_sqo_submit(ctx);
 	io_ring_ctx_wait_and_kill(ctx);
 	return ret;
 }

From 6bae85bd70d063b63fbe262d943cc321eab31b17 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Fri, 8 Jan 2021 22:46:02 -0800
Subject: [PATCH 545/547] maintainers: update my email address

Change my email contact ahead of a likely painful eleven-month migration
to a certain cobalt enteprisey groupware cloud product that will totally
break my workflow.  Some day I may get used to having to email being
sequestered behind both claret and cerulean oath2+sms 2fa layers, but
for now I'll stick with keying in one password to receive an email vs.
the required four.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 3d1b8fe97261..292394098e39 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9273,7 +9273,7 @@ F:	drivers/net/ethernet/sgi/ioc3-eth.c
 
 IOMAP FILESYSTEM LIBRARY
 M:	Christoph Hellwig <hch@infradead.org>
-M:	Darrick J. Wong <darrick.wong@oracle.com>
+M:	Darrick J. Wong <djwong@kernel.org>
 M:	linux-xfs@vger.kernel.org
 M:	linux-fsdevel@vger.kernel.org
 L:	linux-xfs@vger.kernel.org
@@ -19504,7 +19504,7 @@ F:	arch/x86/xen/*swiotlb*
 F:	drivers/xen/*swiotlb*
 
 XFS FILESYSTEM
-M:	Darrick J. Wong <darrick.wong@oracle.com>
+M:	Darrick J. Wong <djwong@kernel.org>
 M:	linux-xfs@vger.kernel.org
 L:	linux-xfs@vger.kernel.org
 S:	Supported

From 5625dcfbbcf892e40e8d60abbb5f56701a1d031c Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 7 Jan 2021 17:12:08 +0530
Subject: [PATCH 546/547] Documentation: kbuild: Fix section reference

Section 3.11 was incorrectly called 3.9, fix it.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 Documentation/kbuild/makefiles.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst
index d36768cf1250..9f6a11881951 100644
--- a/Documentation/kbuild/makefiles.rst
+++ b/Documentation/kbuild/makefiles.rst
@@ -598,7 +598,7 @@ more details, with real examples.
 	explicitly added to $(targets).
 
 	Assignments to $(targets) are without $(obj)/ prefix. if_changed may be
-	used in conjunction with custom rules as defined in "3.9 Custom Rules".
+	used in conjunction with custom rules as defined in "3.11 Custom Rules".
 
 	Note: It is a typical mistake to forget the FORCE prerequisite.
 	Another common pitfall is that whitespace is sometimes significant; for

From 7c53f6b671f4aba70ff15e1b05148b10d58c2837 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 10 Jan 2021 14:34:50 -0800
Subject: [PATCH 547/547] Linux 5.11-rc3

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 8b2c3f88ee5e..9e73f82e0d86 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 11
 SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION = -rc3
 NAME = Kleptomaniac Octopus
 
 # *DOCUMENTATION*