From 3b1313eb32c499d46dc4c3e896d19d9564c879c4 Mon Sep 17 00:00:00 2001
From: Vladimir Kondratiev <vladimir.kondratiev@intel.com>
Date: Sun, 24 Nov 2019 16:07:31 +0200
Subject: [PATCH 001/146] mips: cacheinfo: report shared CPU map

Report L1 caches as shared per core; L2 - per cluster.

This fixes "perf" that went crazy if shared_cpu_map attribute not
reported on sysfs, in form of

/sys/devices/system/cpu/cpu*/cache/index*/shared_cpu_list
/sys/devices/system/cpu/cpu*/cache/index*/shared_cpu_map

Signed-off-by: Vladimir Kondratiev <vladimir.kondratiev@intel.com>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/kernel/cacheinfo.c | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/arch/mips/kernel/cacheinfo.c b/arch/mips/kernel/cacheinfo.c
index f777e44653d5..47312c529410 100644
--- a/arch/mips/kernel/cacheinfo.c
+++ b/arch/mips/kernel/cacheinfo.c
@@ -50,6 +50,25 @@ static int __init_cache_level(unsigned int cpu)
 	return 0;
 }
 
+static void fill_cpumask_siblings(int cpu, cpumask_t *cpu_map)
+{
+	int cpu1;
+
+	for_each_possible_cpu(cpu1)
+		if (cpus_are_siblings(cpu, cpu1))
+			cpumask_set_cpu(cpu1, cpu_map);
+}
+
+static void fill_cpumask_cluster(int cpu, cpumask_t *cpu_map)
+{
+	int cpu1;
+	int cluster = cpu_cluster(&cpu_data[cpu]);
+
+	for_each_possible_cpu(cpu1)
+		if (cpu_cluster(&cpu_data[cpu1]) == cluster)
+			cpumask_set_cpu(cpu1, cpu_map);
+}
+
 static int __populate_cache_leaves(unsigned int cpu)
 {
 	struct cpuinfo_mips *c = &current_cpu_data;
@@ -57,14 +76,20 @@ static int __populate_cache_leaves(unsigned int cpu)
 	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
 
 	if (c->icache.waysize) {
+		/* L1 caches are per core */
+		fill_cpumask_siblings(cpu, &this_leaf->shared_cpu_map);
 		populate_cache(dcache, this_leaf, 1, CACHE_TYPE_DATA);
+		fill_cpumask_siblings(cpu, &this_leaf->shared_cpu_map);
 		populate_cache(icache, this_leaf, 1, CACHE_TYPE_INST);
 	} else {
 		populate_cache(dcache, this_leaf, 1, CACHE_TYPE_UNIFIED);
 	}
 
-	if (c->scache.waysize)
+	if (c->scache.waysize) {
+		/* L2 cache is per cluster */
+		fill_cpumask_cluster(cpu, &this_leaf->shared_cpu_map);
 		populate_cache(scache, this_leaf, 2, CACHE_TYPE_UNIFIED);
+	}
 
 	if (c->tcache.waysize)
 		populate_cache(tcache, this_leaf, 3, CACHE_TYPE_UNIFIED);

From 87f67cc4c7b81273fd1d9f775192eeb3124562f9 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 27 Nov 2019 07:26:12 -0800
Subject: [PATCH 002/146] MIPS: Fix boot on Fuloong2 systems
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 268a2d60013049 ("MIPS: Loongson64: Rename CPU TYPES") changed
Kconfig symbols as follows:
	CPU_LOONGSON2 to CPU_LOONGSON2EF
	CPU_LOONGSON3 to CPU_LOONGSON64
	SYS_HAS_CPU_LOONGSON3 to SYS_HAS_CPU_LOONGSON64

It did not touch SYS_HAS_CPU_LOONGSON2E or SYS_HAS_CPU_LOONGSON2F.
However, the patch changed a conditional from

 #if defined(CONFIG_SYS_HAS_CPU_LOONGSON2E) || \
     defined(CONFIG_SYS_HAS_CPU_LOONGSON2F)

to

 #if defined(CONFIG_SYS_HAS_CPU_LOONGSON2EF)

SYS_HAS_CPU_LOONGSON2EF does not exist, resulting in boot failures
with the qemu fulong2e emulation. Revert to the original code.

Fixes: 268a2d60013049 ("MIPS: Loongson64: Rename CPU TYPES")
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/cpu-type.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/mips/include/asm/cpu-type.h b/arch/mips/include/asm/cpu-type.h
index c46c59b0f1b4..49f0061a6051 100644
--- a/arch/mips/include/asm/cpu-type.h
+++ b/arch/mips/include/asm/cpu-type.h
@@ -15,7 +15,8 @@
 static inline int __pure __get_cpu_type(const int cpu_type)
 {
 	switch (cpu_type) {
-#if defined(CONFIG_SYS_HAS_CPU_LOONGSON2EF)
+#if defined(CONFIG_SYS_HAS_CPU_LOONGSON2E) || \
+    defined(CONFIG_SYS_HAS_CPU_LOONGSON2F)
 	case CPU_LOONGSON2EF:
 #endif
 

From 7d2aa4bb90f5f6f1b8de8848c26042403f2d7bf9 Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Fri, 29 Nov 2019 14:36:58 +0000
Subject: [PATCH 003/146] mips: Fix gettimeofday() in the vdso library

The libc provides a discovery mechanism for vDSO library and its
symbols. When a symbol is not exposed by the vDSOs the libc falls back
on the system calls.

With the introduction of the unified vDSO library on mips this behavior
is not honored anymore by the kernel in the case of gettimeofday().

The issue has been noticed and reported due to a dhclient failure on the
CI20 board:

root@letux:~# dhclient
../../../../lib/isc/unix/time.c:200: Operation not permitted
root@letux:~#

Restore the original behavior fixing gettimeofday() in the vDSO library.

Reported-by: H. Nikolaus Schaller <hns@goldelico.com>
Tested-by: H. Nikolaus Schaller <hns@goldelico.com> # CI20 with JZ4780
Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: mips-creator-ci20-dev@googlegroups.com
Cc: letux-kernel@openphoenux.org
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/vdso/gettimeofday.h | 13 -------------
 arch/mips/vdso/vgettimeofday.c            | 20 ++++++++++++++++++++
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/arch/mips/include/asm/vdso/gettimeofday.h b/arch/mips/include/asm/vdso/gettimeofday.h
index b08825531e9f..0ae9b4cbc153 100644
--- a/arch/mips/include/asm/vdso/gettimeofday.h
+++ b/arch/mips/include/asm/vdso/gettimeofday.h
@@ -26,8 +26,6 @@
 
 #define __VDSO_USE_SYSCALL		ULLONG_MAX
 
-#ifdef CONFIG_MIPS_CLOCK_VSYSCALL
-
 static __always_inline long gettimeofday_fallback(
 				struct __kernel_old_timeval *_tv,
 				struct timezone *_tz)
@@ -48,17 +46,6 @@ static __always_inline long gettimeofday_fallback(
 	return error ? -ret : ret;
 }
 
-#else
-
-static __always_inline long gettimeofday_fallback(
-				struct __kernel_old_timeval *_tv,
-				struct timezone *_tz)
-{
-	return -1;
-}
-
-#endif
-
 static __always_inline long clock_gettime_fallback(
 					clockid_t _clkid,
 					struct __kernel_timespec *_ts)
diff --git a/arch/mips/vdso/vgettimeofday.c b/arch/mips/vdso/vgettimeofday.c
index 6ebdc37c89fc..6b83b6376a4b 100644
--- a/arch/mips/vdso/vgettimeofday.c
+++ b/arch/mips/vdso/vgettimeofday.c
@@ -17,12 +17,22 @@ int __vdso_clock_gettime(clockid_t clock,
 	return __cvdso_clock_gettime32(clock, ts);
 }
 
+#ifdef CONFIG_MIPS_CLOCK_VSYSCALL
+
+/*
+ * This is behind the ifdef so that we don't provide the symbol when there's no
+ * possibility of there being a usable clocksource, because there's nothing we
+ * can do without it. When libc fails the symbol lookup it should fall back on
+ * the standard syscall path.
+ */
 int __vdso_gettimeofday(struct __kernel_old_timeval *tv,
 			struct timezone *tz)
 {
 	return __cvdso_gettimeofday(tv, tz);
 }
 
+#endif /* CONFIG_MIPS_CLOCK_VSYSCALL */
+
 int __vdso_clock_getres(clockid_t clock_id,
 			struct old_timespec32 *res)
 {
@@ -43,12 +53,22 @@ int __vdso_clock_gettime(clockid_t clock,
 	return __cvdso_clock_gettime(clock, ts);
 }
 
+#ifdef CONFIG_MIPS_CLOCK_VSYSCALL
+
+/*
+ * This is behind the ifdef so that we don't provide the symbol when there's no
+ * possibility of there being a usable clocksource, because there's nothing we
+ * can do without it. When libc fails the symbol lookup it should fall back on
+ * the standard syscall path.
+ */
 int __vdso_gettimeofday(struct __kernel_old_timeval *tv,
 			struct timezone *tz)
 {
 	return __cvdso_gettimeofday(tv, tz);
 }
 
+#endif /* CONFIG_MIPS_CLOCK_VSYSCALL */
+
 int __vdso_clock_getres(clockid_t clock_id,
 			struct __kernel_timespec *res)
 {

From a7effde99b6e8119b28c235eda5419c2e03b3089 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 28 Nov 2019 09:13:26 -0800
Subject: [PATCH 004/146] MIPS: Kconfig: Use correct form for 'depends on'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove the CONFIG_ prefix from "depends on" as it makes the selection
not possible.

Signed-off-by: Joe Perches <joe@perches.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@vger.kernel.org
Cc: LKML <linux-kernel@vger.kernel.org>
---
 drivers/platform/mips/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/mips/Kconfig b/drivers/platform/mips/Kconfig
index f4d0a86c00d0..5e77b0dc5fd6 100644
--- a/drivers/platform/mips/Kconfig
+++ b/drivers/platform/mips/Kconfig
@@ -18,7 +18,7 @@ if MIPS_PLATFORM_DEVICES
 
 config CPU_HWMON
 	tristate "Loongson-3 CPU HWMon Driver"
-	depends on CONFIG_MACH_LOONGSON64
+	depends on MACH_LOONGSON64
 	select HWMON
 	default y
 	help

From 0b8d616fb5a8ffa307b1d3af37f55c15dae14f28 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner@ubuntu.com>
Date: Wed, 9 Oct 2019 13:48:09 +0200
Subject: [PATCH 005/146] taskstats: fix data-race

When assiging and testing taskstats in taskstats_exit() there's a race
when setting up and reading sig->stats when a thread-group with more
than one thread exits:

write to 0xffff8881157bbe10 of 8 bytes by task 7951 on cpu 0:
 taskstats_tgid_alloc kernel/taskstats.c:567 [inline]
 taskstats_exit+0x6b7/0x717 kernel/taskstats.c:596
 do_exit+0x2c2/0x18e0 kernel/exit.c:864
 do_group_exit+0xb4/0x1c0 kernel/exit.c:983
 get_signal+0x2a2/0x1320 kernel/signal.c:2734
 do_signal+0x3b/0xc00 arch/x86/kernel/signal.c:815
 exit_to_usermode_loop+0x250/0x2c0 arch/x86/entry/common.c:159
 prepare_exit_to_usermode arch/x86/entry/common.c:194 [inline]
 syscall_return_slowpath arch/x86/entry/common.c:274 [inline]
 do_syscall_64+0x2d7/0x2f0 arch/x86/entry/common.c:299
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

read to 0xffff8881157bbe10 of 8 bytes by task 7949 on cpu 1:
 taskstats_tgid_alloc kernel/taskstats.c:559 [inline]
 taskstats_exit+0xb2/0x717 kernel/taskstats.c:596
 do_exit+0x2c2/0x18e0 kernel/exit.c:864
 do_group_exit+0xb4/0x1c0 kernel/exit.c:983
 __do_sys_exit_group kernel/exit.c:994 [inline]
 __se_sys_exit_group kernel/exit.c:992 [inline]
 __x64_sys_exit_group+0x2e/0x30 kernel/exit.c:992
 do_syscall_64+0xcf/0x2f0 arch/x86/entry/common.c:296
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

Fix this by using smp_load_acquire() and smp_store_release().

Reported-by: syzbot+c5d03165a1bd1dead0c1@syzkaller.appspotmail.com
Fixes: 34ec12349c8a ("taskstats: cleanup ->signal->stats allocation")
Cc: stable@vger.kernel.org
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
Acked-by: Marco Elver <elver@google.com>
Reviewed-by: Will Deacon <will@kernel.org>
Reviewed-by: Andrea Parri <parri.andrea@gmail.com>
Reviewed-by: Dmitry Vyukov <dvyukov@google.com>
Link: https://lore.kernel.org/r/20191009114809.8643-1-christian.brauner@ubuntu.com
---
 kernel/taskstats.c | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 13a0f2e6ebc2..e2ac0e37c4ae 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -554,25 +554,33 @@ static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
 static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk)
 {
 	struct signal_struct *sig = tsk->signal;
-	struct taskstats *stats;
+	struct taskstats *stats_new, *stats;
 
-	if (sig->stats || thread_group_empty(tsk))
-		goto ret;
+	/* Pairs with smp_store_release() below. */
+	stats = smp_load_acquire(&sig->stats);
+	if (stats || thread_group_empty(tsk))
+		return stats;
 
 	/* No problem if kmem_cache_zalloc() fails */
-	stats = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL);
+	stats_new = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL);
 
 	spin_lock_irq(&tsk->sighand->siglock);
-	if (!sig->stats) {
-		sig->stats = stats;
-		stats = NULL;
+	stats = sig->stats;
+	if (!stats) {
+		/*
+		 * Pairs with smp_store_release() above and order the
+		 * kmem_cache_zalloc().
+		 */
+		smp_store_release(&sig->stats, stats_new);
+		stats = stats_new;
+		stats_new = NULL;
 	}
 	spin_unlock_irq(&tsk->sighand->siglock);
 
-	if (stats)
-		kmem_cache_free(taskstats_cache, stats);
-ret:
-	return sig->stats;
+	if (stats_new)
+		kmem_cache_free(taskstats_cache, stats_new);
+
+	return stats;
 }
 
 /* Send pid data out on exit */

From 13b0ba33d78b32384ea38abd03bc2b795fcab194 Mon Sep 17 00:00:00 2001
From: Pi-Hsun Shih <pihsun@chromium.org>
Date: Mon, 18 Nov 2019 14:18:05 +0800
Subject: [PATCH 006/146] drm/mediatek: Check return value of
 mtk_drm_ddp_comp_for_plane.

The mtk_drm_ddp_comp_for_plane can return NULL, but the usage doesn't
check for it. Add check for it.

Fixes: d6b53f68356f ("drm/mediatek: Add helper to get component for a plane")
Signed-off-by: Pi-Hsun Shih <pihsun@chromium.org>
Signed-off-by: CK Hu <ck.hu@mediatek.com>
---
 drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
index f80a8ba75977..4c4f976c994e 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
@@ -310,7 +310,9 @@ static int mtk_crtc_ddp_hw_init(struct mtk_drm_crtc *mtk_crtc)
 
 		plane_state = to_mtk_plane_state(plane->state);
 		comp = mtk_drm_ddp_comp_for_plane(crtc, plane, &local_layer);
-		mtk_ddp_comp_layer_config(comp, local_layer, plane_state);
+		if (comp)
+			mtk_ddp_comp_layer_config(comp, local_layer,
+						  plane_state);
 	}
 
 	return 0;
@@ -386,8 +388,9 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc)
 			comp = mtk_drm_ddp_comp_for_plane(crtc, plane,
 							  &local_layer);
 
-			mtk_ddp_comp_layer_config(comp, local_layer,
-						  plane_state);
+			if (comp)
+				mtk_ddp_comp_layer_config(comp, local_layer,
+							  plane_state);
 			plane_state->pending.config = false;
 		}
 		mtk_crtc->pending_planes = false;
@@ -401,7 +404,9 @@ int mtk_drm_crtc_plane_check(struct drm_crtc *crtc, struct drm_plane *plane,
 	struct mtk_ddp_comp *comp;
 
 	comp = mtk_drm_ddp_comp_for_plane(crtc, plane, &local_layer);
-	return mtk_ddp_comp_layer_check(comp, local_layer, state);
+	if (comp)
+		return mtk_ddp_comp_layer_check(comp, local_layer, state);
+	return 0;
 }
 
 static void mtk_drm_crtc_atomic_enable(struct drm_crtc *crtc,

From 92c17f6043647652e1f4a772636e4288d4ecea3e Mon Sep 17 00:00:00 2001
From: Yongqiang Niu <yongqiang.niu@mediatek.com>
Date: Wed, 27 Nov 2019 18:04:19 +0800
Subject: [PATCH 007/146] drm/mediatek: Fix can't get component for external
 display plane.

The original logic is ok for primary display, but will not find out
component for external display.

For example, plane->index is 6 for external display, but there are only
2 layer nr in external display, and this condition will never happen:
if (plane->index < (count + mtk_ddp_comp_layer_nr(comp)))

Fix this by using the offset of the plane to mtk_crtc->planes as index,
instead of plane->index.

Fixes: d6b53f68356f ("drm/mediatek: Add helper to get component for a plane")
Signed-off-by: Yongqiang Niu <yongqiang.niu@mediatek.com>
Signed-off-by: Pi-Hsun Shih <pihsun@chromium.org>
Signed-off-by: CK Hu <ck.hu@mediatek.com>
---
 drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
index 4c4f976c994e..3305a94fc930 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
@@ -215,11 +215,12 @@ struct mtk_ddp_comp *mtk_drm_ddp_comp_for_plane(struct drm_crtc *crtc,
 	struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc);
 	struct mtk_ddp_comp *comp;
 	int i, count = 0;
+	unsigned int local_index = plane - mtk_crtc->planes;
 
 	for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) {
 		comp = mtk_crtc->ddp_comp[i];
-		if (plane->index < (count + mtk_ddp_comp_layer_nr(comp))) {
-			*local_layer = plane->index - count;
+		if (local_index < (count + mtk_ddp_comp_layer_nr(comp))) {
+			*local_layer = local_index - count;
 			return comp;
 		}
 		count += mtk_ddp_comp_layer_nr(comp);

From 53a256a9b925b47c7e67fc1f16ca41561a7b877c Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Thu, 5 Dec 2019 12:54:49 +0100
Subject: [PATCH 008/146] dmaengine: Fix access to uninitialized dma_slave_caps

dmaengine_desc_set_reuse() allocates a struct dma_slave_caps on the
stack, populates it using dma_get_slave_caps() and then accesses one
of its members.

However dma_get_slave_caps() may fail and this isn't accounted for,
leading to a legitimate warning of gcc-4.9 (but not newer versions):

   In file included from drivers/spi/spi-bcm2835.c:19:0:
   drivers/spi/spi-bcm2835.c: In function 'dmaengine_desc_set_reuse':
>> include/linux/dmaengine.h:1370:10: warning: 'caps.descriptor_reuse' is used uninitialized in this function [-Wuninitialized]
     if (caps.descriptor_reuse) {

Fix it, thereby also silencing the gcc-4.9 warning.

The issue has been present for 4 years but surfaces only now that
the first caller of dmaengine_desc_set_reuse() has been added in
spi-bcm2835.c. Another user of reusable DMA descriptors has existed
for a while in pxa_camera.c, but it sets the DMA_CTRL_REUSE flag
directly instead of calling dmaengine_desc_set_reuse(). Nevertheless,
tag this commit for stable in case there are out-of-tree users.

Fixes: 272420214d26 ("dmaengine: Add DMA_CTRL_REUSE")
Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Cc: stable@vger.kernel.org # v4.3+
Link: https://lore.kernel.org/r/ca92998ccc054b4f2bfd60ef3adbab2913171eac.1575546234.git.lukas@wunner.de
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/dmaengine.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 8fcdee1c0cf9..dad4a68fa009 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -1364,8 +1364,11 @@ static inline int dma_get_slave_caps(struct dma_chan *chan,
 static inline int dmaengine_desc_set_reuse(struct dma_async_tx_descriptor *tx)
 {
 	struct dma_slave_caps caps;
+	int ret;
 
-	dma_get_slave_caps(tx->chan, &caps);
+	ret = dma_get_slave_caps(tx->chan, &caps);
+	if (ret)
+		return ret;
 
 	if (caps.descriptor_reuse) {
 		tx->flags |= DMA_CTRL_REUSE;

From 6f7c41374b62fd80bbd8aae3536c43688c54d95e Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Mon, 25 Nov 2019 10:46:51 +0900
Subject: [PATCH 009/146] tomoyo: Don't use nifty names on sockets.

syzbot is reporting that use of SOCKET_I()->sk from open() can result in
use after free problem [1], for socket's inode is still reachable via
/proc/pid/fd/n despite destruction of SOCKET_I()->sk already completed.

At first I thought that this race condition applies to only open/getattr
permission checks. But James Morris has pointed out that there are more
permission checks where this race condition applies to. Thus, get rid of
tomoyo_get_socket_name() instead of conditionally bypassing permission
checks on sockets. As a side effect of this patch,
"socket:[family=\$:type=\$:protocol=\$]" in the policy files has to be
rewritten to "socket:[\$]".

[1] https://syzkaller.appspot.com/bug?id=73d590010454403d55164cca23bd0565b1eb3b74

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reported-by: syzbot <syzbot+0341f6a4d729d4e0acf1@syzkaller.appspotmail.com>
Reported-by: James Morris <jmorris@namei.org>
---
 security/tomoyo/realpath.c | 32 +-------------------------------
 1 file changed, 1 insertion(+), 31 deletions(-)

diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c
index e7832448d721..bf38fc1b59b2 100644
--- a/security/tomoyo/realpath.c
+++ b/security/tomoyo/realpath.c
@@ -217,31 +217,6 @@ out:
 	return ERR_PTR(-ENOMEM);
 }
 
-/**
- * tomoyo_get_socket_name - Get the name of a socket.
- *
- * @path:   Pointer to "struct path".
- * @buffer: Pointer to buffer to return value in.
- * @buflen: Sizeof @buffer.
- *
- * Returns the buffer.
- */
-static char *tomoyo_get_socket_name(const struct path *path, char * const buffer,
-				    const int buflen)
-{
-	struct inode *inode = d_backing_inode(path->dentry);
-	struct socket *sock = inode ? SOCKET_I(inode) : NULL;
-	struct sock *sk = sock ? sock->sk : NULL;
-
-	if (sk) {
-		snprintf(buffer, buflen, "socket:[family=%u:type=%u:protocol=%u]",
-			 sk->sk_family, sk->sk_type, sk->sk_protocol);
-	} else {
-		snprintf(buffer, buflen, "socket:[unknown]");
-	}
-	return buffer;
-}
-
 /**
  * tomoyo_realpath_from_path - Returns realpath(3) of the given pathname but ignores chroot'ed root.
  *
@@ -279,12 +254,7 @@ char *tomoyo_realpath_from_path(const struct path *path)
 			break;
 		/* To make sure that pos is '\0' terminated. */
 		buf[buf_len - 1] = '\0';
-		/* Get better name for socket. */
-		if (sb->s_magic == SOCKFS_MAGIC) {
-			pos = tomoyo_get_socket_name(path, buf, buf_len - 1);
-			goto encode;
-		}
-		/* For "pipe:[\$]". */
+		/* For "pipe:[\$]" and "socket:[\$]". */
 		if (dentry->d_op && dentry->d_op->d_dname) {
 			pos = dentry->d_op->d_dname(dentry, buf, buf_len - 1);
 			goto encode;

From a40c94be2336f3002563c9ae16572143ae3422e2 Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Tue, 10 Dec 2019 17:55:45 +0100
Subject: [PATCH 010/146] dmaengine: dma-jz4780: Also break descriptor chains
 on JZ4725B

It turns out that the JZ4725B displays the same buggy behaviour as the
JZ4740 that was described in commit f4c255f1a747 ("dmaengine: dma-jz4780:
Break descriptor chains on JZ4740").

Work around it by using the same workaround previously used for the
JZ4740.

Fixes commit f4c255f1a747 ("dmaengine: dma-jz4780: Break descriptor
chains on JZ4740")

Cc: <stable@vger.kernel.org>
Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Link: https://lore.kernel.org/r/20191210165545.59690-1-paul@crapouillou.net
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dma-jz4780.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c
index fa626acdc9b9..44af435628f8 100644
--- a/drivers/dma/dma-jz4780.c
+++ b/drivers/dma/dma-jz4780.c
@@ -999,7 +999,8 @@ static const struct jz4780_dma_soc_data jz4740_dma_soc_data = {
 static const struct jz4780_dma_soc_data jz4725b_dma_soc_data = {
 	.nb_channels = 6,
 	.transfer_ord_max = 5,
-	.flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC,
+	.flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC |
+		 JZ_SOC_DATA_BREAK_LINKS,
 };
 
 static const struct jz4780_dma_soc_data jz4770_dma_soc_data = {

From cec935ce69fc386f13959578deb40963ebbb85c3 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Wed, 4 Dec 2019 08:52:08 +0100
Subject: [PATCH 011/146] media: cec: CEC 2.0-only bcast messages were ignored

Some messages are allowed to be a broadcast message in CEC 2.0
only, and should be ignored by CEC 1.4 devices.

Unfortunately, the check was wrong, causing such messages to be
marked as invalid under CEC 2.0.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Cc: <stable@vger.kernel.org>      # for v4.10 and up
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/cec/cec-adap.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/media/cec/cec-adap.c b/drivers/media/cec/cec-adap.c
index 9340435a94a0..e90c30dac68b 100644
--- a/drivers/media/cec/cec-adap.c
+++ b/drivers/media/cec/cec-adap.c
@@ -1085,11 +1085,11 @@ void cec_received_msg_ts(struct cec_adapter *adap,
 			valid_la = false;
 		else if (!cec_msg_is_broadcast(msg) && !(dir_fl & DIRECTED))
 			valid_la = false;
-		else if (cec_msg_is_broadcast(msg) && !(dir_fl & BCAST1_4))
+		else if (cec_msg_is_broadcast(msg) && !(dir_fl & BCAST))
 			valid_la = false;
 		else if (cec_msg_is_broadcast(msg) &&
-			 adap->log_addrs.cec_version >= CEC_OP_CEC_VERSION_2_0 &&
-			 !(dir_fl & BCAST2_0))
+			 adap->log_addrs.cec_version < CEC_OP_CEC_VERSION_2_0 &&
+			 !(dir_fl & BCAST1_4))
 			valid_la = false;
 	}
 	if (valid_la && min_len) {

From 95c29d46ab2a517e4c26d0a07300edca6768db17 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Sat, 7 Dec 2019 23:48:09 +0100
Subject: [PATCH 012/146] media: cec: avoid decrementing transmit_queue_sz if
 it is 0

WARN if transmit_queue_sz is 0 but do not decrement it.
The CEC adapter will become unresponsive if it goes below
0 since then it thinks there are 4 billion messages in the
queue.

Obviously this should not happen, but a driver bug could
cause this.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Cc: <stable@vger.kernel.org>      # for v4.12 and up
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/cec/cec-adap.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/media/cec/cec-adap.c b/drivers/media/cec/cec-adap.c
index e90c30dac68b..1060e633b623 100644
--- a/drivers/media/cec/cec-adap.c
+++ b/drivers/media/cec/cec-adap.c
@@ -380,7 +380,8 @@ static void cec_data_cancel(struct cec_data *data, u8 tx_status)
 	} else {
 		list_del_init(&data->list);
 		if (!(data->msg.tx_status & CEC_TX_STATUS_OK))
-			data->adap->transmit_queue_sz--;
+			if (!WARN_ON(!data->adap->transmit_queue_sz))
+				data->adap->transmit_queue_sz--;
 	}
 
 	if (data->msg.tx_status & CEC_TX_STATUS_OK) {
@@ -432,6 +433,14 @@ static void cec_flush(struct cec_adapter *adap)
 		 * need to do anything special in that case.
 		 */
 	}
+	/*
+	 * If something went wrong and this counter isn't what it should
+	 * be, then this will reset it back to 0. Warn if it is not 0,
+	 * since it indicates a bug, either in this framework or in a
+	 * CEC driver.
+	 */
+	if (WARN_ON(adap->transmit_queue_sz))
+		adap->transmit_queue_sz = 0;
 }
 
 /*
@@ -522,7 +531,8 @@ int cec_thread_func(void *_adap)
 		data = list_first_entry(&adap->transmit_queue,
 					struct cec_data, list);
 		list_del_init(&data->list);
-		adap->transmit_queue_sz--;
+		if (!WARN_ON(!data->adap->transmit_queue_sz))
+			adap->transmit_queue_sz--;
 
 		/* Make this the current transmitting message */
 		adap->transmitting = data;

From ac479b51f3f4aaa852b5d3f00ecfb9290230cf64 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Wed, 11 Dec 2019 12:47:57 +0100
Subject: [PATCH 013/146] media: cec: check 'transmit_in_progress', not
 'transmitting'

Currently wait_event_interruptible_timeout is called in cec_thread_func()
when adap->transmitting is set. But if the adapter is unconfigured
while transmitting, then adap->transmitting is set to NULL. But the
hardware is still actually transmitting the message, and that's
indicated by adap->transmit_in_progress and we should wait until that
is finished or times out before transmitting new messages.

As the original commit says: adap->transmitting is the userspace view,
adap->transmit_in_progress reflects the hardware state.

However, if adap->transmitting is NULL and adap->transmit_in_progress
is true, then wait_event_interruptible is called (no timeout), which
can get stuck indefinitely if the CEC driver is flaky and never marks
the transmit-in-progress as 'done'.

So test against transmit_in_progress when deciding whether to use
the timeout variant or not, instead of testing against adap->transmitting.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Fixes: 32804fcb612b ("media: cec: keep track of outstanding transmits")
Cc: <stable@vger.kernel.org>      # for v4.19 and up
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/cec/cec-adap.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/media/cec/cec-adap.c b/drivers/media/cec/cec-adap.c
index 1060e633b623..6c95dc471d4c 100644
--- a/drivers/media/cec/cec-adap.c
+++ b/drivers/media/cec/cec-adap.c
@@ -465,7 +465,7 @@ int cec_thread_func(void *_adap)
 		bool timeout = false;
 		u8 attempts;
 
-		if (adap->transmitting) {
+		if (adap->transmit_in_progress) {
 			int err;
 
 			/*
@@ -500,7 +500,7 @@ int cec_thread_func(void *_adap)
 			goto unlock;
 		}
 
-		if (adap->transmitting && timeout) {
+		if (adap->transmit_in_progress && timeout) {
 			/*
 			 * If we timeout, then log that. Normally this does
 			 * not happen and it is an indication of a faulty CEC
@@ -509,14 +509,18 @@ int cec_thread_func(void *_adap)
 			 * so much traffic on the bus that the adapter was
 			 * unable to transmit for CEC_XFER_TIMEOUT_MS (2.1s).
 			 */
-			pr_warn("cec-%s: message %*ph timed out\n", adap->name,
-				adap->transmitting->msg.len,
-				adap->transmitting->msg.msg);
+			if (adap->transmitting) {
+				pr_warn("cec-%s: message %*ph timed out\n", adap->name,
+					adap->transmitting->msg.len,
+					adap->transmitting->msg.msg);
+				/* Just give up on this. */
+				cec_data_cancel(adap->transmitting,
+						CEC_TX_STATUS_TIMEOUT);
+			} else {
+				pr_warn("cec-%s: transmit timed out\n", adap->name);
+			}
 			adap->transmit_in_progress = false;
 			adap->tx_timeouts++;
-			/* Just give up on this. */
-			cec_data_cancel(adap->transmitting,
-					CEC_TX_STATUS_TIMEOUT);
 			goto unlock;
 		}
 

From e5a52a1d15c79bb48a430fb263852263ec1d3f11 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Sat, 7 Dec 2019 23:43:23 +0100
Subject: [PATCH 014/146] media: pulse8-cec: fix lost
 cec_transmit_attempt_done() call

The periodic PING command could interfere with the result of
a CEC transmit, causing a lost cec_transmit_attempt_done()
call.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Cc: <stable@vger.kernel.org>      # for v4.10 and up
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/usb/pulse8-cec/pulse8-cec.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/drivers/media/usb/pulse8-cec/pulse8-cec.c b/drivers/media/usb/pulse8-cec/pulse8-cec.c
index ac88ade94cda..59609556d969 100644
--- a/drivers/media/usb/pulse8-cec/pulse8-cec.c
+++ b/drivers/media/usb/pulse8-cec/pulse8-cec.c
@@ -116,6 +116,7 @@ struct pulse8 {
 	unsigned int vers;
 	struct completion cmd_done;
 	struct work_struct work;
+	u8 work_result;
 	struct delayed_work ping_eeprom_work;
 	struct cec_msg rx_msg;
 	u8 data[DATA_SIZE];
@@ -137,8 +138,10 @@ static void pulse8_irq_work_handler(struct work_struct *work)
 {
 	struct pulse8 *pulse8 =
 		container_of(work, struct pulse8, work);
+	u8 result = pulse8->work_result;
 
-	switch (pulse8->data[0] & 0x3f) {
+	pulse8->work_result = 0;
+	switch (result & 0x3f) {
 	case MSGCODE_FRAME_DATA:
 		cec_received_msg(pulse8->adap, &pulse8->rx_msg);
 		break;
@@ -172,12 +175,12 @@ static irqreturn_t pulse8_interrupt(struct serio *serio, unsigned char data,
 		pulse8->escape = false;
 	} else if (data == MSGEND) {
 		struct cec_msg *msg = &pulse8->rx_msg;
+		u8 msgcode = pulse8->buf[0];
 
 		if (debug)
 			dev_info(pulse8->dev, "received: %*ph\n",
 				 pulse8->idx, pulse8->buf);
-		pulse8->data[0] = pulse8->buf[0];
-		switch (pulse8->buf[0] & 0x3f) {
+		switch (msgcode & 0x3f) {
 		case MSGCODE_FRAME_START:
 			msg->len = 1;
 			msg->msg[0] = pulse8->buf[1];
@@ -186,14 +189,20 @@ static irqreturn_t pulse8_interrupt(struct serio *serio, unsigned char data,
 			if (msg->len == CEC_MAX_MSG_SIZE)
 				break;
 			msg->msg[msg->len++] = pulse8->buf[1];
-			if (pulse8->buf[0] & MSGCODE_FRAME_EOM)
+			if (msgcode & MSGCODE_FRAME_EOM) {
+				WARN_ON(pulse8->work_result);
+				pulse8->work_result = msgcode;
 				schedule_work(&pulse8->work);
+				break;
+			}
 			break;
 		case MSGCODE_TRANSMIT_SUCCEEDED:
 		case MSGCODE_TRANSMIT_FAILED_LINE:
 		case MSGCODE_TRANSMIT_FAILED_ACK:
 		case MSGCODE_TRANSMIT_FAILED_TIMEOUT_DATA:
 		case MSGCODE_TRANSMIT_FAILED_TIMEOUT_LINE:
+			WARN_ON(pulse8->work_result);
+			pulse8->work_result = msgcode;
 			schedule_work(&pulse8->work);
 			break;
 		case MSGCODE_HIGH_ERROR:

From 6bd5ce6089b561f5392460bfb654dea89356ab1b Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Mon, 16 Dec 2019 19:16:48 +0900
Subject: [PATCH 015/146] tomoyo: Suppress RCU warning at
 list_for_each_entry_rcu().

John Garry has reported that allmodconfig kernel on arm64 causes flood of
"RCU-list traversed in non-reader section!!" warning. I don't know what
change caused this warning, but this warning is safe because TOMOYO uses
SRCU lock instead. Let's suppress this warning by explicitly telling that
the caller is holding SRCU lock.

Reported-and-tested-by: John Garry <john.garry@huawei.com>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
 security/tomoyo/common.c |  9 ++++++---
 security/tomoyo/domain.c | 15 ++++++++++-----
 security/tomoyo/group.c  |  9 ++++++---
 security/tomoyo/util.c   |  6 ++++--
 4 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c
index dd3d5942e669..c36bafbcd77e 100644
--- a/security/tomoyo/common.c
+++ b/security/tomoyo/common.c
@@ -951,7 +951,8 @@ static bool tomoyo_manager(void)
 	exe = tomoyo_get_exe();
 	if (!exe)
 		return false;
-	list_for_each_entry_rcu(ptr, &tomoyo_kernel_namespace.policy_list[TOMOYO_ID_MANAGER], head.list) {
+	list_for_each_entry_rcu(ptr, &tomoyo_kernel_namespace.policy_list[TOMOYO_ID_MANAGER], head.list,
+				srcu_read_lock_held(&tomoyo_ss)) {
 		if (!ptr->head.is_deleted &&
 		    (!tomoyo_pathcmp(domainname, ptr->manager) ||
 		     !strcmp(exe, ptr->manager->name))) {
@@ -1095,7 +1096,8 @@ static int tomoyo_delete_domain(char *domainname)
 	if (mutex_lock_interruptible(&tomoyo_policy_lock))
 		return -EINTR;
 	/* Is there an active domain? */
-	list_for_each_entry_rcu(domain, &tomoyo_domain_list, list) {
+	list_for_each_entry_rcu(domain, &tomoyo_domain_list, list,
+				srcu_read_lock_held(&tomoyo_ss)) {
 		/* Never delete tomoyo_kernel_domain */
 		if (domain == &tomoyo_kernel_domain)
 			continue;
@@ -2778,7 +2780,8 @@ void tomoyo_check_profile(void)
 
 	tomoyo_policy_loaded = true;
 	pr_info("TOMOYO: 2.6.0\n");
-	list_for_each_entry_rcu(domain, &tomoyo_domain_list, list) {
+	list_for_each_entry_rcu(domain, &tomoyo_domain_list, list,
+				srcu_read_lock_held(&tomoyo_ss)) {
 		const u8 profile = domain->profile;
 		struct tomoyo_policy_namespace *ns = domain->ns;
 
diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c
index 8526a0a74023..7869d6a9980b 100644
--- a/security/tomoyo/domain.c
+++ b/security/tomoyo/domain.c
@@ -41,7 +41,8 @@ int tomoyo_update_policy(struct tomoyo_acl_head *new_entry, const int size,
 
 	if (mutex_lock_interruptible(&tomoyo_policy_lock))
 		return -ENOMEM;
-	list_for_each_entry_rcu(entry, list, list) {
+	list_for_each_entry_rcu(entry, list, list,
+				srcu_read_lock_held(&tomoyo_ss)) {
 		if (entry->is_deleted == TOMOYO_GC_IN_PROGRESS)
 			continue;
 		if (!check_duplicate(entry, new_entry))
@@ -119,7 +120,8 @@ int tomoyo_update_domain(struct tomoyo_acl_info *new_entry, const int size,
 	}
 	if (mutex_lock_interruptible(&tomoyo_policy_lock))
 		goto out;
-	list_for_each_entry_rcu(entry, list, list) {
+	list_for_each_entry_rcu(entry, list, list,
+				srcu_read_lock_held(&tomoyo_ss)) {
 		if (entry->is_deleted == TOMOYO_GC_IN_PROGRESS)
 			continue;
 		if (!tomoyo_same_acl_head(entry, new_entry) ||
@@ -166,7 +168,8 @@ void tomoyo_check_acl(struct tomoyo_request_info *r,
 	u16 i = 0;
 
 retry:
-	list_for_each_entry_rcu(ptr, list, list) {
+	list_for_each_entry_rcu(ptr, list, list,
+				srcu_read_lock_held(&tomoyo_ss)) {
 		if (ptr->is_deleted || ptr->type != r->param_type)
 			continue;
 		if (!check_entry(r, ptr))
@@ -298,7 +301,8 @@ static inline bool tomoyo_scan_transition
 {
 	const struct tomoyo_transition_control *ptr;
 
-	list_for_each_entry_rcu(ptr, list, head.list) {
+	list_for_each_entry_rcu(ptr, list, head.list,
+				srcu_read_lock_held(&tomoyo_ss)) {
 		if (ptr->head.is_deleted || ptr->type != type)
 			continue;
 		if (ptr->domainname) {
@@ -735,7 +739,8 @@ retry:
 
 		/* Check 'aggregator' directive. */
 		candidate = &exename;
-		list_for_each_entry_rcu(ptr, list, head.list) {
+		list_for_each_entry_rcu(ptr, list, head.list,
+					srcu_read_lock_held(&tomoyo_ss)) {
 			if (ptr->head.is_deleted ||
 			    !tomoyo_path_matches_pattern(&exename,
 							 ptr->original_name))
diff --git a/security/tomoyo/group.c b/security/tomoyo/group.c
index a37c7dc66e44..1cecdd797597 100644
--- a/security/tomoyo/group.c
+++ b/security/tomoyo/group.c
@@ -133,7 +133,8 @@ tomoyo_path_matches_group(const struct tomoyo_path_info *pathname,
 {
 	struct tomoyo_path_group *member;
 
-	list_for_each_entry_rcu(member, &group->member_list, head.list) {
+	list_for_each_entry_rcu(member, &group->member_list, head.list,
+				srcu_read_lock_held(&tomoyo_ss)) {
 		if (member->head.is_deleted)
 			continue;
 		if (!tomoyo_path_matches_pattern(pathname, member->member_name))
@@ -161,7 +162,8 @@ bool tomoyo_number_matches_group(const unsigned long min,
 	struct tomoyo_number_group *member;
 	bool matched = false;
 
-	list_for_each_entry_rcu(member, &group->member_list, head.list) {
+	list_for_each_entry_rcu(member, &group->member_list, head.list,
+				srcu_read_lock_held(&tomoyo_ss)) {
 		if (member->head.is_deleted)
 			continue;
 		if (min > member->number.values[1] ||
@@ -191,7 +193,8 @@ bool tomoyo_address_matches_group(const bool is_ipv6, const __be32 *address,
 	bool matched = false;
 	const u8 size = is_ipv6 ? 16 : 4;
 
-	list_for_each_entry_rcu(member, &group->member_list, head.list) {
+	list_for_each_entry_rcu(member, &group->member_list, head.list,
+				srcu_read_lock_held(&tomoyo_ss)) {
 		if (member->head.is_deleted)
 			continue;
 		if (member->address.is_ipv6 != is_ipv6)
diff --git a/security/tomoyo/util.c b/security/tomoyo/util.c
index 52752e1a84ed..eba0b3395851 100644
--- a/security/tomoyo/util.c
+++ b/security/tomoyo/util.c
@@ -594,7 +594,8 @@ struct tomoyo_domain_info *tomoyo_find_domain(const char *domainname)
 
 	name.name = domainname;
 	tomoyo_fill_path_info(&name);
-	list_for_each_entry_rcu(domain, &tomoyo_domain_list, list) {
+	list_for_each_entry_rcu(domain, &tomoyo_domain_list, list,
+				srcu_read_lock_held(&tomoyo_ss)) {
 		if (!domain->is_deleted &&
 		    !tomoyo_pathcmp(&name, domain->domainname))
 			return domain;
@@ -1028,7 +1029,8 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r)
 		return false;
 	if (!domain)
 		return true;
-	list_for_each_entry_rcu(ptr, &domain->acl_info_list, list) {
+	list_for_each_entry_rcu(ptr, &domain->acl_info_list, list,
+				srcu_read_lock_held(&tomoyo_ss)) {
 		u16 perm;
 		u8 i;
 

From e18e0f6b7c8f220774dd68965e8a9b046905acc8 Mon Sep 17 00:00:00 2001
From: Jitao Shi <jitao.shi@mediatek.com>
Date: Fri, 13 Dec 2019 17:52:15 +0800
Subject: [PATCH 016/146] drm/mediatek: reduce the hbp and hfp for phy timing

There are some extra data transfer in dsi.
ex. LPX, hs_prepare, hs_zero, hs_exit and the sof/eof of dsi packet.
This signal will enlarge the line time. So the real frame on dsi bus
will be lower than calc by video timing.

So dsi driver reduces the hbp and hfp to keep the line time.

Fixes: 7a5bc4e22ecf ("drm/mediatek: change the dsi phytiming calculate method")
Signed-off-by: Jitao Shi <jitao.shi@mediatek.com>
Tested-by: Hsin-Yi Wang <hsinyi@chromium.org>
Tested-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Signed-off-by: CK Hu <ck.hu@mediatek.com>
---
 drivers/gpu/drm/mediatek/mtk_dsi.c | 67 +++++++++++++++++-------------
 1 file changed, 38 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c
index e9931bbbe846..d77c9f484ce3 100644
--- a/drivers/gpu/drm/mediatek/mtk_dsi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dsi.c
@@ -230,28 +230,25 @@ static void mtk_dsi_mask(struct mtk_dsi *dsi, u32 offset, u32 mask, u32 data)
 static void mtk_dsi_phy_timconfig(struct mtk_dsi *dsi)
 {
 	u32 timcon0, timcon1, timcon2, timcon3;
-	u32 ui, cycle_time;
+	u32 data_rate_mhz = DIV_ROUND_UP(dsi->data_rate, 1000000);
 	struct mtk_phy_timing *timing = &dsi->phy_timing;
 
-	ui = DIV_ROUND_UP(1000000000, dsi->data_rate);
-	cycle_time = div_u64(8000000000ULL, dsi->data_rate);
+	timing->lpx = (60 * data_rate_mhz / (8 * 1000)) + 1;
+	timing->da_hs_prepare = (80 * data_rate_mhz + 4 * 1000) / 8000;
+	timing->da_hs_zero = (170 * data_rate_mhz + 10 * 1000) / 8000 + 1 -
+			     timing->da_hs_prepare;
+	timing->da_hs_trail = timing->da_hs_prepare + 1;
 
-	timing->lpx = NS_TO_CYCLE(60, cycle_time);
-	timing->da_hs_prepare = NS_TO_CYCLE(50 + 5 * ui, cycle_time);
-	timing->da_hs_zero = NS_TO_CYCLE(110 + 6 * ui, cycle_time);
-	timing->da_hs_trail = NS_TO_CYCLE(77 + 4 * ui, cycle_time);
+	timing->ta_go = 4 * timing->lpx - 2;
+	timing->ta_sure = timing->lpx + 2;
+	timing->ta_get = 4 * timing->lpx;
+	timing->da_hs_exit = 2 * timing->lpx + 1;
 
-	timing->ta_go = 4 * timing->lpx;
-	timing->ta_sure = 3 * timing->lpx / 2;
-	timing->ta_get = 5 * timing->lpx;
-	timing->da_hs_exit = 2 * timing->lpx;
-
-	timing->clk_hs_zero = NS_TO_CYCLE(336, cycle_time);
-	timing->clk_hs_trail = NS_TO_CYCLE(100, cycle_time) + 10;
-
-	timing->clk_hs_prepare = NS_TO_CYCLE(64, cycle_time);
-	timing->clk_hs_post = NS_TO_CYCLE(80 + 52 * ui, cycle_time);
-	timing->clk_hs_exit = 2 * timing->lpx;
+	timing->clk_hs_prepare = 70 * data_rate_mhz / (8 * 1000);
+	timing->clk_hs_post = timing->clk_hs_prepare + 8;
+	timing->clk_hs_trail = timing->clk_hs_prepare;
+	timing->clk_hs_zero = timing->clk_hs_trail * 4;
+	timing->clk_hs_exit = 2 * timing->clk_hs_trail;
 
 	timcon0 = timing->lpx | timing->da_hs_prepare << 8 |
 		  timing->da_hs_zero << 16 | timing->da_hs_trail << 24;
@@ -482,27 +479,39 @@ static void mtk_dsi_config_vdo_timing(struct mtk_dsi *dsi)
 			dsi_tmp_buf_bpp - 10);
 
 	data_phy_cycles = timing->lpx + timing->da_hs_prepare +
-				  timing->da_hs_zero + timing->da_hs_exit + 2;
+			  timing->da_hs_zero + timing->da_hs_exit + 3;
 
 	if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_BURST) {
-		if (vm->hfront_porch * dsi_tmp_buf_bpp >
+		if ((vm->hfront_porch + vm->hback_porch) * dsi_tmp_buf_bpp >
 		    data_phy_cycles * dsi->lanes + 18) {
-			horizontal_frontporch_byte = vm->hfront_porch *
-						     dsi_tmp_buf_bpp -
-						     data_phy_cycles *
-						     dsi->lanes - 18;
+			horizontal_frontporch_byte =
+				vm->hfront_porch * dsi_tmp_buf_bpp -
+				(data_phy_cycles * dsi->lanes + 18) *
+				vm->hfront_porch /
+				(vm->hfront_porch + vm->hback_porch);
+
+			horizontal_backporch_byte =
+				horizontal_backporch_byte -
+				(data_phy_cycles * dsi->lanes + 18) *
+				vm->hback_porch /
+				(vm->hfront_porch + vm->hback_porch);
 		} else {
 			DRM_WARN("HFP less than d-phy, FPS will under 60Hz\n");
 			horizontal_frontporch_byte = vm->hfront_porch *
 						     dsi_tmp_buf_bpp;
 		}
 	} else {
-		if (vm->hfront_porch * dsi_tmp_buf_bpp >
+		if ((vm->hfront_porch + vm->hback_porch) * dsi_tmp_buf_bpp >
 		    data_phy_cycles * dsi->lanes + 12) {
-			horizontal_frontporch_byte = vm->hfront_porch *
-						     dsi_tmp_buf_bpp -
-						     data_phy_cycles *
-						     dsi->lanes - 12;
+			horizontal_frontporch_byte =
+				vm->hfront_porch * dsi_tmp_buf_bpp -
+				(data_phy_cycles * dsi->lanes + 12) *
+				vm->hfront_porch /
+				(vm->hfront_porch + vm->hback_porch);
+			horizontal_backporch_byte = horizontal_backporch_byte -
+				(data_phy_cycles * dsi->lanes + 12) *
+				vm->hback_porch /
+				(vm->hfront_porch + vm->hback_porch);
 		} else {
 			DRM_WARN("HFP less than d-phy, FPS will under 60Hz\n");
 			horizontal_frontporch_byte = vm->hfront_porch *

From 92adc96f8eecd9522a907c197cc3d62e405539fe Mon Sep 17 00:00:00 2001
From: Hui Wang <hui.wang@canonical.com>
Date: Wed, 18 Dec 2019 21:26:50 +0800
Subject: [PATCH 017/146] ALSA: usb-audio: set the interface format after
 resume on Dell WD19

Recently we found the headset-mic on the Dell Dock WD19 doesn't work
anymore after s3 (s2i or deep), this problem could be workarounded by
closing (pcm_close) the app and then reopening (pcm_open) the app, so
this bug is not easy to be detected by users.

When problem happens, retire_capture_urb() could still be called
periodically, but the size of captured data is always 0, it could be
a firmware bug on the dock. Anyway I found after resuming, the
snd_usb_pcm_prepare() will be called, and if we forcibly run
set_format() to set the interface and its endpoint, the capture
size will be normal again. This problem and workaound also apply to
playback.

To fix it in the kernel, add a quirk to let set_format() run
forcibly once after resume.

Signed-off-by: Hui Wang <hui.wang@canonical.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20191218132650.6303-1-hui.wang@canonical.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/card.h         |  1 +
 sound/usb/pcm.c          | 21 +++++++++++++++++++--
 sound/usb/quirks-table.h |  3 ++-
 sound/usb/quirks.c       | 11 +++++++++++
 sound/usb/usbaudio.h     |  3 ++-
 5 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/sound/usb/card.h b/sound/usb/card.h
index 2991b9986f66..395403a2d33f 100644
--- a/sound/usb/card.h
+++ b/sound/usb/card.h
@@ -145,6 +145,7 @@ struct snd_usb_substream {
 	struct snd_usb_endpoint *sync_endpoint;
 	unsigned long flags;
 	bool need_setup_ep;		/* (re)configure EP at prepare? */
+	bool need_setup_fmt;		/* (re)configure fmt after resume? */
 	unsigned int speed;		/* USB_SPEED_XXX */
 
 	u64 formats;			/* format bitmasks (all or'ed) */
diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
index 9c8930bb00c8..96298c767c76 100644
--- a/sound/usb/pcm.c
+++ b/sound/usb/pcm.c
@@ -510,11 +510,11 @@ static int set_format(struct snd_usb_substream *subs, struct audioformat *fmt)
 	if (WARN_ON(altsd->bAlternateSetting != fmt->altsetting))
 		return -EINVAL;
 
-	if (fmt == subs->cur_audiofmt)
+	if (fmt == subs->cur_audiofmt && !subs->need_setup_fmt)
 		return 0;
 
 	/* close the old interface */
-	if (subs->interface >= 0 && subs->interface != fmt->iface) {
+	if (subs->interface >= 0 && (subs->interface != fmt->iface || subs->need_setup_fmt)) {
 		if (!subs->stream->chip->keep_iface) {
 			err = usb_set_interface(subs->dev, subs->interface, 0);
 			if (err < 0) {
@@ -528,6 +528,9 @@ static int set_format(struct snd_usb_substream *subs, struct audioformat *fmt)
 		subs->altset_idx = 0;
 	}
 
+	if (subs->need_setup_fmt)
+		subs->need_setup_fmt = false;
+
 	/* set interface */
 	if (iface->cur_altsetting != alts) {
 		err = snd_usb_select_mode_quirk(subs, fmt);
@@ -1728,6 +1731,13 @@ static int snd_usb_substream_playback_trigger(struct snd_pcm_substream *substrea
 		subs->data_endpoint->retire_data_urb = retire_playback_urb;
 		subs->running = 0;
 		return 0;
+	case SNDRV_PCM_TRIGGER_SUSPEND:
+		if (subs->stream->chip->setup_fmt_after_resume_quirk) {
+			stop_endpoints(subs, true);
+			subs->need_setup_fmt = true;
+			return 0;
+		}
+		break;
 	}
 
 	return -EINVAL;
@@ -1760,6 +1770,13 @@ static int snd_usb_substream_capture_trigger(struct snd_pcm_substream *substream
 		subs->data_endpoint->retire_data_urb = retire_capture_urb;
 		subs->running = 1;
 		return 0;
+	case SNDRV_PCM_TRIGGER_SUSPEND:
+		if (subs->stream->chip->setup_fmt_after_resume_quirk) {
+			stop_endpoints(subs, true);
+			subs->need_setup_fmt = true;
+			return 0;
+		}
+		break;
 	}
 
 	return -EINVAL;
diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h
index 70c338f3ae24..d187aa6d50db 100644
--- a/sound/usb/quirks-table.h
+++ b/sound/usb/quirks-table.h
@@ -3466,7 +3466,8 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"),
 		.vendor_name = "Dell",
 		.product_name = "WD19 Dock",
 		.profile_name = "Dell-WD15-Dock",
-		.ifnum = QUIRK_NO_INTERFACE
+		.ifnum = QUIRK_ANY_INTERFACE,
+		.type = QUIRK_SETUP_FMT_AFTER_RESUME
 	}
 },
 /* MOTU Microbook II */
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 349e1e52996d..a81c2066499f 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -508,6 +508,16 @@ static int create_standard_mixer_quirk(struct snd_usb_audio *chip,
 	return snd_usb_create_mixer(chip, quirk->ifnum, 0);
 }
 
+
+static int setup_fmt_after_resume_quirk(struct snd_usb_audio *chip,
+				       struct usb_interface *iface,
+				       struct usb_driver *driver,
+				       const struct snd_usb_audio_quirk *quirk)
+{
+	chip->setup_fmt_after_resume_quirk = 1;
+	return 1;	/* Continue with creating streams and mixer */
+}
+
 /*
  * audio-interface quirks
  *
@@ -546,6 +556,7 @@ int snd_usb_create_quirk(struct snd_usb_audio *chip,
 		[QUIRK_AUDIO_EDIROL_UAXX] = create_uaxx_quirk,
 		[QUIRK_AUDIO_ALIGN_TRANSFER] = create_align_transfer_quirk,
 		[QUIRK_AUDIO_STANDARD_MIXER] = create_standard_mixer_quirk,
+		[QUIRK_SETUP_FMT_AFTER_RESUME] = setup_fmt_after_resume_quirk,
 	};
 
 	if (quirk->type < QUIRK_TYPE_COUNT) {
diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h
index ff3cbf653de8..6fe3ab582ec6 100644
--- a/sound/usb/usbaudio.h
+++ b/sound/usb/usbaudio.h
@@ -33,7 +33,7 @@ struct snd_usb_audio {
 	wait_queue_head_t shutdown_wait;
 	unsigned int txfr_quirk:1; /* Subframe boundaries on transfers */
 	unsigned int tx_length_quirk:1; /* Put length specifier in transfers */
-	
+	unsigned int setup_fmt_after_resume_quirk:1; /* setup the format to interface after resume */
 	int num_interfaces;
 	int num_suspended_intf;
 	int sample_rate_read_error;
@@ -98,6 +98,7 @@ enum quirk_type {
 	QUIRK_AUDIO_EDIROL_UAXX,
 	QUIRK_AUDIO_ALIGN_TRANSFER,
 	QUIRK_AUDIO_STANDARD_MIXER,
+	QUIRK_SETUP_FMT_AFTER_RESUME,
 
 	QUIRK_TYPE_COUNT
 };

From 57177d214ee0816c4436c23d6c933ccb32c571f1 Mon Sep 17 00:00:00 2001
From: Stefan Mavrodiev <stefan@olimex.com>
Date: Tue, 17 Dec 2019 14:46:32 +0200
Subject: [PATCH 018/146] drm/sun4i: hdmi: Remove duplicate cleanup calls

When the HDMI unbinds drm_connector_cleanup() and drm_encoder_cleanup()
are called. This also happens when the connector and the encoder are
destroyed. This double call triggers a NULL pointer exception.

The patch fixes this by removing the cleanup calls in the unbind
function.

Cc: <stable@vger.kernel.org>
Fixes: 9c5681011a0c ("drm/sun4i: Add HDMI support")
Signed-off-by: Stefan Mavrodiev <stefan@olimex.com>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Link: https://patchwork.freedesktop.org/patch/msgid/20191217124632.20820-1-stefan@olimex.com
---
 drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
index a7c4654445c7..68d4644ac2dc 100644
--- a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
+++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
@@ -685,8 +685,6 @@ static void sun4i_hdmi_unbind(struct device *dev, struct device *master,
 	struct sun4i_hdmi *hdmi = dev_get_drvdata(dev);
 
 	cec_unregister_adapter(hdmi->cec_adap);
-	drm_connector_cleanup(&hdmi->connector);
-	drm_encoder_cleanup(&hdmi->encoder);
 	i2c_del_adapter(hdmi->i2c);
 	i2c_put_adapter(hdmi->ddc_i2c);
 	clk_disable_unprepare(hdmi->mod_clk);

From a4a3893114a41e365274d5fab5d9ff5acc235ff0 Mon Sep 17 00:00:00 2001
From: Jouni Hogander <jouni.hogander@unikie.com>
Date: Mon, 9 Dec 2019 14:37:07 +0200
Subject: [PATCH 019/146] MIPS: Prevent link failure with kcov instrumentation

__sanitizer_cov_trace_pc() is not linked in and causing link
failure if KCOV_INSTRUMENT is enabled. Fix this by disabling
instrumentation for compressed image.

Signed-off-by: Jouni Hogander <jouni.hogander@unikie.com>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Cc: linux-mips@vger.kernel.org
---
 arch/mips/boot/compressed/Makefile | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile
index 172801ed35b8..d859f079b771 100644
--- a/arch/mips/boot/compressed/Makefile
+++ b/arch/mips/boot/compressed/Makefile
@@ -29,6 +29,9 @@ KBUILD_AFLAGS := $(KBUILD_AFLAGS) -D__ASSEMBLY__ \
 	-DBOOT_HEAP_SIZE=$(BOOT_HEAP_SIZE) \
 	-DKERNEL_ENTRY=$(VMLINUX_ENTRY_ADDRESS)
 
+# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
+KCOV_INSTRUMENT		:= n
+
 # decompressor objects (linked with vmlinuz)
 vmlinuzobjs-y := $(obj)/head.o $(obj)/decompress.o $(obj)/string.o
 

From f8fffebdea752a25757b906f3dffecf1a59a6194 Mon Sep 17 00:00:00 2001
From: Paul Burton <paulburton@kernel.org>
Date: Thu, 5 Dec 2019 10:23:18 -0800
Subject: [PATCH 020/146] MIPS: BPF: Disable MIPS32 eBPF JIT

Commit 716850ab104d ("MIPS: eBPF: Initial eBPF support for MIPS32
architecture.") enabled our eBPF JIT for MIPS32 kernels, whereas it has
previously only been availailable for MIPS64. It was my understanding at
the time that the BPF test suite was passing & JITing a comparable
number of tests to our cBPF JIT [1], but it turns out that was not the
case.

The eBPF JIT has a number of problems on MIPS32:

- Most notably various code paths still result in emission of MIPS64
  instructions which will cause reserved instruction exceptions & kernel
  panics when run on MIPS32 CPUs.

- The eBPF JIT doesn't account for differences between the O32 ABI used
  by MIPS32 kernels versus the N64 ABI used by MIPS64 kernels. Notably
  arguments beyond the first 4 are passed on the stack in O32, and this
  is entirely unhandled when JITing a BPF_CALL instruction. Stack space
  must be reserved for arguments even if they all fit in registers, and
  the callee is free to assume that stack space has been reserved for
  its use - with the eBPF JIT this is not the case, so calling any
  function can result in clobbering values on the stack & unpredictable
  behaviour. Function arguments in eBPF are always 64-bit values which
  is also entirely unhandled - the JIT still uses a single (32-bit)
  register per argument. As a result all function arguments are always
  passed incorrectly when JITing a BPF_CALL instruction, leading to
  kernel crashes or strange behavior.

- The JIT attempts to bail our on use of ALU64 instructions or 64-bit
  memory access instructions. The code doing this at the start of
  build_one_insn() incorrectly checks whether BPF_OP() equals BPF_DW,
  when it should really be checking BPF_SIZE() & only doing so when
  BPF_CLASS() is one of BPF_{LD,LDX,ST,STX}. This results in false
  positives that cause more bailouts than intended, and that in turns
  hides some of the problems described above.

- The kernel's cBPF->eBPF translation makes heavy use of 64-bit eBPF
  instructions that the MIPS32 eBPF JIT bails out on, leading to most
  cBPF programs not being JITed at all.

Until these problems are resolved, revert the enabling of the eBPF JIT
on MIPS32 done by commit 716850ab104d ("MIPS: eBPF: Initial eBPF support
for MIPS32 architecture.").

Note that this does not undo the changes made to the eBPF JIT by that
commit, since they are a useful starting point to providing MIPS32
support - they're just not nearly complete.

[1] https://lore.kernel.org/linux-mips/MWHPR2201MB13583388481F01A422CE7D66D4410@MWHPR2201MB1358.namprd22.prod.outlook.com/

Signed-off-by: Paul Burton <paulburton@kernel.org>
Fixes: 716850ab104d ("MIPS: eBPF: Initial eBPF support for MIPS32 architecture.")
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Hassan Naveed <hnaveed@wavecomp.com>
Cc: Tony Ambardar <itugrok@yahoo.com>
Cc: bpf@vger.kernel.org
Cc: netdev@vger.kernel.org
Cc: <stable@vger.kernel.org> # v5.2+
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index c86be02b6d89..90ee2ed463a8 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -47,7 +47,7 @@ config MIPS
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES
 	select HAVE_ASM_MODVERSIONS
-	select HAVE_EBPF_JIT if (!CPU_MICROMIPS)
+	select HAVE_EBPF_JIT if (64BIT && !CPU_MICROMIPS)
 	select HAVE_CONTEXT_TRACKING
 	select HAVE_COPY_THREAD_TLS
 	select HAVE_C_RECORDMCOUNT

From f596cf0d8062cb5d0a4513a8b3afca318c13be10 Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <alobakin@dlink.ru>
Date: Fri, 6 Dec 2019 11:07:41 +0300
Subject: [PATCH 021/146] MIPS: BPF: eBPF JIT: check for MIPS ISA compliance in
 Kconfig

It is completely wrong to check for compile-time MIPS ISA revision in
the body of bpf_int_jit_compile() as it may lead to get MIPS JIT fully
omitted by the CC while the rest system will think that the JIT is
actually present and works [1].
We can check if the selected CPU really supports MIPS eBPF JIT at
configure time and avoid such situations when kernel can be built
without both JIT and interpreter, but with CONFIG_BPF_SYSCALL=y.

[1] https://lore.kernel.org/linux-mips/09d713a59665d745e21d021deeaebe0a@dlink.ru/

Fixes: 716850ab104d ("MIPS: eBPF: Initial eBPF support for MIPS32 architecture.")
Cc: <stable@vger.kernel.org> # v5.2+
Signed-off-by: Alexander Lobakin <alobakin@dlink.ru>
Signed-off-by: Paul Burton <paulburton@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: Hassan Naveed <hnaveed@wavecomp.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Martin KaFai Lau <kafai@fb.com>
Cc: Song Liu <songliubraving@fb.com>
Cc: Yonghong Song <yhs@fb.com>
Cc: Andrii Nakryiko <andriin@fb.com>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: netdev@vger.kernel.org
Cc: bpf@vger.kernel.org
---
 arch/mips/Kconfig        | 2 +-
 arch/mips/net/ebpf_jit.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 90ee2ed463a8..827bbda105f3 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -47,7 +47,7 @@ config MIPS
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES
 	select HAVE_ASM_MODVERSIONS
-	select HAVE_EBPF_JIT if (64BIT && !CPU_MICROMIPS)
+	select HAVE_EBPF_JIT if 64BIT && !CPU_MICROMIPS && TARGET_ISA_REV >= 2
 	select HAVE_CONTEXT_TRACKING
 	select HAVE_COPY_THREAD_TLS
 	select HAVE_C_RECORDMCOUNT
diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c
index 46b76751f3a5..a2405d5f7d1e 100644
--- a/arch/mips/net/ebpf_jit.c
+++ b/arch/mips/net/ebpf_jit.c
@@ -1803,7 +1803,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	unsigned int image_size;
 	u8 *image_ptr;
 
-	if (!prog->jit_requested || MIPS_ISA_REV < 2)
+	if (!prog->jit_requested)
 		return prog;
 
 	tmp = bpf_jit_blind_constants(prog);

From 66c5d718e5a6f80153b5e8d6ad8ba8e9c3320839 Mon Sep 17 00:00:00 2001
From: Kailang Yang <kailang@realtek.com>
Date: Mon, 9 Dec 2019 15:56:15 +0800
Subject: [PATCH 022/146] ALSA: hda/realtek - Add headset Mic no shutup for
 ALC283

Chrome machine had humming noise from external speaker plugin at
codec D3 state.

Signed-off-by: Kailang Yang <kailang@realtek.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/2692449396954c6c968f5b75e2660358@realtek.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index dbfafee97931..5bc1a6d24333 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -501,6 +501,7 @@ static void alc_shutup_pins(struct hda_codec *codec)
 	struct alc_spec *spec = codec->spec;
 
 	switch (codec->core.vendor_id) {
+	case 0x10ec0283:
 	case 0x10ec0286:
 	case 0x10ec0288:
 	case 0x10ec0298:

From c9b3b8207bc487de02cbca968927ba2c2cb46aaf Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 10 Dec 2019 21:24:28 +0100
Subject: [PATCH 023/146] netfilter: nf_flow_table: fix big-endian integer
 overflow

In some configurations, gcc reports an integer overflow:

net/netfilter/nf_flow_table_offload.c: In function 'nf_flow_rule_match':
net/netfilter/nf_flow_table_offload.c:80:21: error: unsigned conversion from 'int' to '__be16' {aka 'short unsigned int'} changes value from '327680' to '0' [-Werror=overflow]
   mask->tcp.flags = TCP_FLAG_RST | TCP_FLAG_FIN;
                     ^~~~~~~~~~~~

From what I can tell, we want the upper 16 bits of these constants,
so they need to be shifted in cpu-endian mode.

Fixes: c29f74e0df7a ("netfilter: nf_flow_table: hardware offload support")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_flow_table_offload.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index de7a0d1e15c8..0d72e5ccb47b 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -88,7 +88,7 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
 	switch (tuple->l4proto) {
 	case IPPROTO_TCP:
 		key->tcp.flags = 0;
-		mask->tcp.flags = TCP_FLAG_RST | TCP_FLAG_FIN;
+		mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16);
 		match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
 		break;
 	case IPPROTO_UDP:

From d05d5db815d56a0ce203ed297153d9794dfdcb68 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 13 Dec 2019 01:19:58 +0100
Subject: [PATCH 024/146] selftests: netfilter: extend flowtable test script
 with dnat rule

NAT test currently covers snat (masquerade) only.

Also add a dnat rule and then check that a connecting to the
to-be-dnated address will work.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 .../selftests/netfilter/nft_flowtable.sh      | 39 ++++++++++++++++---
 1 file changed, 34 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh
index 16571ac1dab4..d3e0809ab368 100755
--- a/tools/testing/selftests/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/netfilter/nft_flowtable.sh
@@ -226,17 +226,19 @@ check_transfer()
 	return 0
 }
 
-test_tcp_forwarding()
+test_tcp_forwarding_ip()
 {
 	local nsa=$1
 	local nsb=$2
+	local dstip=$3
+	local dstport=$4
 	local lret=0
 
 	ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" &
 	lpid=$!
 
 	sleep 1
-	ip netns exec $nsa nc -w 4 10.0.2.99 12345 < "$ns1in" > "$ns1out" &
+	ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$ns1in" > "$ns1out" &
 	cpid=$!
 
 	sleep 3
@@ -258,6 +260,28 @@ test_tcp_forwarding()
 	return $lret
 }
 
+test_tcp_forwarding()
+{
+	test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
+
+	return $?
+}
+
+test_tcp_forwarding_nat()
+{
+	local lret
+
+	test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
+	lret=$?
+
+	if [ $lret -eq 0 ] ; then
+		test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666
+		lret=$?
+	fi
+
+	return $lret
+}
+
 make_file "$ns1in" "ns1"
 make_file "$ns2in" "ns2"
 
@@ -283,14 +307,19 @@ ip -net ns2 route add 192.168.10.1 via 10.0.2.1
 # Same, but with NAT enabled.
 ip netns exec nsr1 nft -f - <<EOF
 table ip nat {
+   chain prerouting {
+      type nat hook prerouting priority 0; policy accept;
+      meta iif "veth0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345
+   }
+
    chain postrouting {
       type nat hook postrouting priority 0; policy accept;
-      meta oifname "veth1" masquerade
+      meta oifname "veth1" counter masquerade
    }
 }
 EOF
 
-test_tcp_forwarding ns1 ns2
+test_tcp_forwarding_nat ns1 ns2
 
 if [ $? -eq 0 ] ;then
 	echo "PASS: flow offloaded for ns1/ns2 with NAT"
@@ -313,7 +342,7 @@ fi
 ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
 ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
 
-test_tcp_forwarding ns1 ns2
+test_tcp_forwarding_nat ns1 ns2
 if [ $? -eq 0 ] ;then
 	echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery"
 else

From e608f631f0ba5f1fc5ee2e260a3a35d13107cbfe Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sun, 15 Dec 2019 03:49:25 +0100
Subject: [PATCH 025/146] netfilter: ebtables: compat: reject all padding in
 matches/watchers

syzbot reported following splat:

BUG: KASAN: vmalloc-out-of-bounds in size_entry_mwt net/bridge/netfilter/ebtables.c:2063 [inline]
BUG: KASAN: vmalloc-out-of-bounds in compat_copy_entries+0x128b/0x1380 net/bridge/netfilter/ebtables.c:2155
Read of size 4 at addr ffffc900004461f4 by task syz-executor267/7937

CPU: 1 PID: 7937 Comm: syz-executor267 Not tainted 5.5.0-rc1-syzkaller #0
 size_entry_mwt net/bridge/netfilter/ebtables.c:2063 [inline]
 compat_copy_entries+0x128b/0x1380 net/bridge/netfilter/ebtables.c:2155
 compat_do_replace+0x344/0x720 net/bridge/netfilter/ebtables.c:2249
 compat_do_ebt_set_ctl+0x22f/0x27e net/bridge/netfilter/ebtables.c:2333
 [..]

Because padding isn't considered during computation of ->buf_user_offset,
"total" is decremented by fewer bytes than it should.

Therefore, the first part of

if (*total < sizeof(*entry) || entry->next_offset < sizeof(*entry))

will pass, -- it should not have.  This causes oob access:
entry->next_offset is past the vmalloced size.

Reject padding and check that computed user offset (sum of ebt_entry
structure plus all individual matches/watchers/targets) is same
value that userspace gave us as the offset of the next entry.

Reported-by: syzbot+f68108fed972453a0ad4@syzkaller.appspotmail.com
Fixes: 81e675c227ec ("netfilter: ebtables: add CONFIG_COMPAT support")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/netfilter/ebtables.c | 35 ++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 4096d8a74a2b..e1256e03a9a8 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1867,7 +1867,7 @@ static int ebt_buf_count(struct ebt_entries_buf_state *state, unsigned int sz)
 }
 
 static int ebt_buf_add(struct ebt_entries_buf_state *state,
-		       void *data, unsigned int sz)
+		       const void *data, unsigned int sz)
 {
 	if (state->buf_kern_start == NULL)
 		goto count_only;
@@ -1901,7 +1901,7 @@ enum compat_mwt {
 	EBT_COMPAT_TARGET,
 };
 
-static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
+static int compat_mtw_from_user(const struct compat_ebt_entry_mwt *mwt,
 				enum compat_mwt compat_mwt,
 				struct ebt_entries_buf_state *state,
 				const unsigned char *base)
@@ -1979,22 +1979,23 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
 /* return size of all matches, watchers or target, including necessary
  * alignment and padding.
  */
-static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32,
+static int ebt_size_mwt(const struct compat_ebt_entry_mwt *match32,
 			unsigned int size_left, enum compat_mwt type,
 			struct ebt_entries_buf_state *state, const void *base)
 {
+	const char *buf = (const char *)match32;
 	int growth = 0;
-	char *buf;
 
 	if (size_left == 0)
 		return 0;
 
-	buf = (char *) match32;
-
-	while (size_left >= sizeof(*match32)) {
+	do {
 		struct ebt_entry_match *match_kern;
 		int ret;
 
+		if (size_left < sizeof(*match32))
+			return -EINVAL;
+
 		match_kern = (struct ebt_entry_match *) state->buf_kern_start;
 		if (match_kern) {
 			char *tmp;
@@ -2031,22 +2032,18 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32,
 		if (match_kern)
 			match_kern->match_size = ret;
 
-		/* rule should have no remaining data after target */
-		if (type == EBT_COMPAT_TARGET && size_left)
-			return -EINVAL;
-
 		match32 = (struct compat_ebt_entry_mwt *) buf;
-	}
+	} while (size_left);
 
 	return growth;
 }
 
 /* called for all ebt_entry structures. */
-static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
+static int size_entry_mwt(const struct ebt_entry *entry, const unsigned char *base,
 			  unsigned int *total,
 			  struct ebt_entries_buf_state *state)
 {
-	unsigned int i, j, startoff, new_offset = 0;
+	unsigned int i, j, startoff, next_expected_off, new_offset = 0;
 	/* stores match/watchers/targets & offset of next struct ebt_entry: */
 	unsigned int offsets[4];
 	unsigned int *offsets_update = NULL;
@@ -2132,11 +2129,13 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
 			return ret;
 	}
 
-	startoff = state->buf_user_offset - startoff;
-
-	if (WARN_ON(*total < startoff))
+	next_expected_off = state->buf_user_offset - startoff;
+	if (next_expected_off != entry->next_offset)
 		return -EINVAL;
-	*total -= startoff;
+
+	if (*total < entry->next_offset)
+		return -EINVAL;
+	*total -= entry->next_offset;
 	return 0;
 }
 

From 8cb4ec44de42b99b92399b4d1daf3dc430ed0186 Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Wed, 18 Dec 2019 00:59:29 +0100
Subject: [PATCH 026/146] netfilter: nft_tproxy: Fix port selector on Big
 Endian
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On Big Endian architectures, u16 port value was extracted from the wrong
parts of u32 sreg_port, just like commit 10596608c4d62 ("netfilter:
nf_tables: fix mismatch in big-endian system") describes.

Fixes: 4ed8eb6570a49 ("netfilter: nf_tables: Add native tproxy support")
Signed-off-by: Phil Sutter <phil@nwl.cc>
Acked-by: Florian Westphal <fw@strlen.de>
Acked-by: Máté Eckl <ecklm94@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_tproxy.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c
index f92a82c73880..95980154ef02 100644
--- a/net/netfilter/nft_tproxy.c
+++ b/net/netfilter/nft_tproxy.c
@@ -50,7 +50,7 @@ static void nft_tproxy_eval_v4(const struct nft_expr *expr,
 	taddr = nf_tproxy_laddr4(skb, taddr, iph->daddr);
 
 	if (priv->sreg_port)
-		tport = regs->data[priv->sreg_port];
+		tport = nft_reg_load16(&regs->data[priv->sreg_port]);
 	if (!tport)
 		tport = hp->dest;
 
@@ -117,7 +117,7 @@ static void nft_tproxy_eval_v6(const struct nft_expr *expr,
 	taddr = *nf_tproxy_laddr6(skb, &taddr, &iph->daddr);
 
 	if (priv->sreg_port)
-		tport = regs->data[priv->sreg_port];
+		tport = nft_reg_load16(&regs->data[priv->sreg_port]);
 	if (!tport)
 		tport = hp->dest;
 

From 0141254b0a74b37aa7eb13d42a56adba84d51c73 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 20 Dec 2019 10:31:34 +0100
Subject: [PATCH 027/146] ALSA: usb-audio: fix set_format altsetting sanity
 check

Make sure to check the return value of usb_altnum_to_altsetting() to
avoid dereferencing a NULL pointer when the requested alternate settings
is missing.

The format altsetting number may come from a quirk table and there does
not seem to be any other validation of it (the corresponding index is
checked however).

Fixes: b099b9693d23 ("ALSA: usb-audio: Avoid superfluous usb_set_interface() calls")
Cc: stable <stable@vger.kernel.org>     # 4.18
Signed-off-by: Johan Hovold <johan@kernel.org>
Link: https://lore.kernel.org/r/20191220093134.1248-1-johan@kernel.org
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/pcm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
index 96298c767c76..a11c8150af58 100644
--- a/sound/usb/pcm.c
+++ b/sound/usb/pcm.c
@@ -506,9 +506,9 @@ static int set_format(struct snd_usb_substream *subs, struct audioformat *fmt)
 	if (WARN_ON(!iface))
 		return -EINVAL;
 	alts = usb_altnum_to_altsetting(iface, fmt->altsetting);
-	altsd = get_iface_desc(alts);
-	if (WARN_ON(altsd->bAlternateSetting != fmt->altsetting))
+	if (WARN_ON(!alts))
 		return -EINVAL;
+	altsd = get_iface_desc(alts);
 
 	if (fmt == subs->cur_audiofmt && !subs->need_setup_fmt)
 		return 0;

From ac2917b01992c098b8d4e6837115e3ca347fdd90 Mon Sep 17 00:00:00 2001
From: "Ben Dooks (Codethink)" <ben.dooks@codethink.co.uk>
Date: Tue, 17 Dec 2019 11:53:09 +0000
Subject: [PATCH 028/146] drm/arm/mali: make malidp_mw_connector_helper_funcs
 static

The malidp_mw_connector_helper_funcs is not referenced by name
outside of the file it is in, so make it static to avoid the
following warning:

drivers/gpu/drm/arm/malidp_mw.c:59:41: warning: symbol 'malidp_mw_connector_helper_funcs' was not declared. Should it be static?

Signed-off-by: Ben Dooks (Codethink) <ben.dooks@codethink.co.uk>
Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191217115309.2133503-1-ben.dooks@codethink.co.uk
---
 drivers/gpu/drm/arm/malidp_mw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/arm/malidp_mw.c b/drivers/gpu/drm/arm/malidp_mw.c
index 875a3a9eabfa..7d0e7b031e44 100644
--- a/drivers/gpu/drm/arm/malidp_mw.c
+++ b/drivers/gpu/drm/arm/malidp_mw.c
@@ -56,7 +56,7 @@ malidp_mw_connector_mode_valid(struct drm_connector *connector,
 	return MODE_OK;
 }
 
-const struct drm_connector_helper_funcs malidp_mw_connector_helper_funcs = {
+static const struct drm_connector_helper_funcs malidp_mw_connector_helper_funcs = {
 	.get_modes = malidp_mw_connector_get_modes,
 	.mode_valid = malidp_mw_connector_mode_valid,
 };

From f33121cbe91973a08e68e4bde8c3f7e6e4e351c1 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 18 Dec 2019 16:38:49 +0000
Subject: [PATCH 029/146] rxrpc: Unlock new call in rxrpc_new_incoming_call()
 rather than the caller

Move the unlock and the ping transmission for a new incoming call into
rxrpc_new_incoming_call() rather than doing it in the caller.  This makes
it clearer to see what's going on.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
cc: Ingo Molnar <mingo@redhat.com>
cc: Will Deacon <will@kernel.org>
cc: Davidlohr Bueso <dave@stgolabs.net>
---
 net/rxrpc/call_accept.c | 36 ++++++++++++++++++++++++++++--------
 net/rxrpc/input.c       | 18 ------------------
 2 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 135bf5cd8dd5..3685b1732f65 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -239,6 +239,22 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
 	kfree(b);
 }
 
+/*
+ * Ping the other end to fill our RTT cache and to retrieve the rwind
+ * and MTU parameters.
+ */
+static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb)
+{
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	ktime_t now = skb->tstamp;
+
+	if (call->peer->rtt_usage < 3 ||
+	    ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now))
+		rxrpc_propose_ACK(call, RXRPC_ACK_PING, sp->hdr.serial,
+				  true, true,
+				  rxrpc_propose_ack_ping_for_params);
+}
+
 /*
  * Allocate a new incoming call from the prealloc pool, along with a connection
  * and a peer as necessary.
@@ -346,9 +362,7 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
 				  sp->hdr.seq, RX_INVALID_OPERATION, ESHUTDOWN);
 		skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
 		skb->priority = RX_INVALID_OPERATION;
-		_leave(" = NULL [close]");
-		call = NULL;
-		goto out;
+		goto no_call;
 	}
 
 	/* The peer, connection and call may all have sprung into existence due
@@ -361,9 +375,7 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
 	call = rxrpc_alloc_incoming_call(rx, local, peer, conn, skb);
 	if (!call) {
 		skb->mark = RXRPC_SKB_MARK_REJECT_BUSY;
-		_leave(" = NULL [busy]");
-		call = NULL;
-		goto out;
+		goto no_call;
 	}
 
 	trace_rxrpc_receive(call, rxrpc_receive_incoming,
@@ -432,10 +444,18 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
 	 */
 	rxrpc_put_call(call, rxrpc_call_put);
 
-	_leave(" = %p{%d}", call, call->debug_id);
-out:
 	spin_unlock(&rx->incoming_lock);
+
+	rxrpc_send_ping(call, skb);
+	mutex_unlock(&call->user_mutex);
+
+	_leave(" = %p{%d}", call, call->debug_id);
 	return call;
+
+no_call:
+	spin_unlock(&rx->incoming_lock);
+	_leave(" = NULL [%u]", skb->mark);
+	return NULL;
 }
 
 /*
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 157be1ff8697..86bd133b4fa0 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -192,22 +192,6 @@ send_extra_data:
 	goto out_no_clear_ca;
 }
 
-/*
- * Ping the other end to fill our RTT cache and to retrieve the rwind
- * and MTU parameters.
- */
-static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb)
-{
-	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-	ktime_t now = skb->tstamp;
-
-	if (call->peer->rtt_usage < 3 ||
-	    ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now))
-		rxrpc_propose_ACK(call, RXRPC_ACK_PING, sp->hdr.serial,
-				  true, true,
-				  rxrpc_propose_ack_ping_for_params);
-}
-
 /*
  * Apply a hard ACK by advancing the Tx window.
  */
@@ -1396,8 +1380,6 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
 		call = rxrpc_new_incoming_call(local, rx, skb);
 		if (!call)
 			goto reject_packet;
-		rxrpc_send_ping(call, skb);
-		mutex_unlock(&call->user_mutex);
 	}
 
 	/* Process a call packet; this either discards or passes on the ref

From 13b7955a0252e15265386b229b814152f109b234 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 20 Dec 2019 16:20:56 +0000
Subject: [PATCH 030/146] rxrpc: Don't take call->user_mutex in
 rxrpc_new_incoming_call()

Standard kernel mutexes cannot be used in any way from interrupt or softirq
context, so the user_mutex which manages access to a call cannot be a mutex
since on a new call the mutex must start off locked and be unlocked within
the softirq handler to prevent userspace interfering with a call we're
setting up.

Commit a0855d24fc22d49cdc25664fb224caee16998683 ("locking/mutex: Complain
upon mutex API misuse in IRQ contexts") causes big warnings to be splashed
in dmesg for each a new call that comes in from the server.  Whilst it
*seems* like it should be okay, since the accept path uses trylock, there
are issues with PI boosting and marking the wrong task as the owner.

Fix this by not taking the mutex in the softirq path at all.  It's not
obvious that there should be any need for it as the state is set before the
first notification is generated for the new call.

There's also no particular reason why the link-assessing ping should be
triggered inside the mutex.  It's not actually transmitted there anyway,
but rather it has to be deferred to a workqueue.

Further, I don't think that there's any particular reason that the socket
notification needs to be done from within rx->incoming_lock, so the amount
of time that lock is held can be shortened too and the ping prepared before
the new call notification is sent.

Fixes: 540b1c48c37a ("rxrpc: Fix deadlock between call creation and sendmsg/recvmsg")
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Peter Zijlstra (Intel) <peterz@infradead.org>
cc: Ingo Molnar <mingo@redhat.com>
cc: Will Deacon <will@kernel.org>
cc: Davidlohr Bueso <dave@stgolabs.net>
---
 net/rxrpc/call_accept.c | 20 +++-----------------
 1 file changed, 3 insertions(+), 17 deletions(-)

diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 3685b1732f65..44fa22b020ef 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -381,18 +381,6 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
 	trace_rxrpc_receive(call, rxrpc_receive_incoming,
 			    sp->hdr.serial, sp->hdr.seq);
 
-	/* Lock the call to prevent rxrpc_kernel_send/recv_data() and
-	 * sendmsg()/recvmsg() inconveniently stealing the mutex once the
-	 * notification is generated.
-	 *
-	 * The BUG should never happen because the kernel should be well
-	 * behaved enough not to access the call before the first notification
-	 * event and userspace is prevented from doing so until the state is
-	 * appropriate.
-	 */
-	if (!mutex_trylock(&call->user_mutex))
-		BUG();
-
 	/* Make the call live. */
 	rxrpc_incoming_call(rx, call, skb);
 	conn = call->conn;
@@ -433,6 +421,9 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
 		BUG();
 	}
 	spin_unlock(&conn->state_lock);
+	spin_unlock(&rx->incoming_lock);
+
+	rxrpc_send_ping(call, skb);
 
 	if (call->state == RXRPC_CALL_SERVER_ACCEPTING)
 		rxrpc_notify_socket(call);
@@ -444,11 +435,6 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
 	 */
 	rxrpc_put_call(call, rxrpc_call_put);
 
-	spin_unlock(&rx->incoming_lock);
-
-	rxrpc_send_ping(call, skb);
-	mutex_unlock(&call->user_mutex);
-
 	_leave(" = %p{%d}", call, call->debug_id);
 	return call;
 

From 063c60d39180cec7c9317f5acfc3071f8fecd705 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 20 Dec 2019 16:17:16 +0000
Subject: [PATCH 031/146] rxrpc: Fix missing security check on incoming calls

Fix rxrpc_new_incoming_call() to check that we have a suitable service key
available for the combination of service ID and security class of a new
incoming call - and to reject calls for which we don't.

This causes an assertion like the following to appear:

	rxrpc: Assertion failed - 6(0x6) == 12(0xc) is false
	kernel BUG at net/rxrpc/call_object.c:456!

Where call->state is RXRPC_CALL_SERVER_SECURING (6) rather than
RXRPC_CALL_COMPLETE (12).

Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code")
Reported-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 net/rxrpc/ar-internal.h  | 10 ++++--
 net/rxrpc/call_accept.c  | 14 ++++++--
 net/rxrpc/conn_event.c   | 16 +--------
 net/rxrpc/conn_service.c |  4 +++
 net/rxrpc/rxkad.c        |  5 +--
 net/rxrpc/security.c     | 70 +++++++++++++++++++---------------------
 6 files changed, 59 insertions(+), 60 deletions(-)

diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 7c7d10f2e0c1..5e99df80e80a 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -209,6 +209,7 @@ struct rxrpc_skb_priv {
 struct rxrpc_security {
 	const char		*name;		/* name of this service */
 	u8			security_index;	/* security type provided */
+	u32			no_key_abort;	/* Abort code indicating no key */
 
 	/* Initialise a security service */
 	int (*init)(void);
@@ -977,8 +978,9 @@ static inline void rxrpc_reduce_conn_timer(struct rxrpc_connection *conn,
 struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *,
 						     struct sk_buff *);
 struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *, gfp_t);
-void rxrpc_new_incoming_connection(struct rxrpc_sock *,
-				   struct rxrpc_connection *, struct sk_buff *);
+void rxrpc_new_incoming_connection(struct rxrpc_sock *, struct rxrpc_connection *,
+				   const struct rxrpc_security *, struct key *,
+				   struct sk_buff *);
 void rxrpc_unpublish_service_conn(struct rxrpc_connection *);
 
 /*
@@ -1103,7 +1105,9 @@ extern const struct rxrpc_security rxkad;
 int __init rxrpc_init_security(void);
 void rxrpc_exit_security(void);
 int rxrpc_init_client_conn_security(struct rxrpc_connection *);
-int rxrpc_init_server_conn_security(struct rxrpc_connection *);
+bool rxrpc_look_up_server_security(struct rxrpc_local *, struct rxrpc_sock *,
+				   const struct rxrpc_security **, struct key **,
+				   struct sk_buff *);
 
 /*
  * sendmsg.c
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 44fa22b020ef..70e44abf106c 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -263,6 +263,8 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
 						    struct rxrpc_local *local,
 						    struct rxrpc_peer *peer,
 						    struct rxrpc_connection *conn,
+						    const struct rxrpc_security *sec,
+						    struct key *key,
 						    struct sk_buff *skb)
 {
 	struct rxrpc_backlog *b = rx->backlog;
@@ -310,7 +312,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
 		conn->params.local = rxrpc_get_local(local);
 		conn->params.peer = peer;
 		rxrpc_see_connection(conn);
-		rxrpc_new_incoming_connection(rx, conn, skb);
+		rxrpc_new_incoming_connection(rx, conn, sec, key, skb);
 	} else {
 		rxrpc_get_connection(conn);
 	}
@@ -349,9 +351,11 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
 					   struct sk_buff *skb)
 {
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	const struct rxrpc_security *sec = NULL;
 	struct rxrpc_connection *conn;
 	struct rxrpc_peer *peer = NULL;
-	struct rxrpc_call *call;
+	struct rxrpc_call *call = NULL;
+	struct key *key = NULL;
 
 	_enter("");
 
@@ -372,7 +376,11 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
 	 */
 	conn = rxrpc_find_connection_rcu(local, skb, &peer);
 
-	call = rxrpc_alloc_incoming_call(rx, local, peer, conn, skb);
+	if (!conn && !rxrpc_look_up_server_security(local, rx, &sec, &key, skb))
+		goto no_call;
+
+	call = rxrpc_alloc_incoming_call(rx, local, peer, conn, sec, key, skb);
+	key_put(key);
 	if (!call) {
 		skb->mark = RXRPC_SKB_MARK_REJECT_BUSY;
 		goto no_call;
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index a1ceef4f5cd0..808a4723f868 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -376,21 +376,7 @@ static void rxrpc_secure_connection(struct rxrpc_connection *conn)
 	_enter("{%d}", conn->debug_id);
 
 	ASSERT(conn->security_ix != 0);
-
-	if (!conn->params.key) {
-		_debug("set up security");
-		ret = rxrpc_init_server_conn_security(conn);
-		switch (ret) {
-		case 0:
-			break;
-		case -ENOENT:
-			abort_code = RX_CALL_DEAD;
-			goto abort;
-		default:
-			abort_code = RXKADNOAUTH;
-			goto abort;
-		}
-	}
+	ASSERT(conn->server_key);
 
 	if (conn->security->issue_challenge(conn) < 0) {
 		abort_code = RX_CALL_DEAD;
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
index 123d6ceab15c..21da48e3d2e5 100644
--- a/net/rxrpc/conn_service.c
+++ b/net/rxrpc/conn_service.c
@@ -148,6 +148,8 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn
  */
 void rxrpc_new_incoming_connection(struct rxrpc_sock *rx,
 				   struct rxrpc_connection *conn,
+				   const struct rxrpc_security *sec,
+				   struct key *key,
 				   struct sk_buff *skb)
 {
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
@@ -160,6 +162,8 @@ void rxrpc_new_incoming_connection(struct rxrpc_sock *rx,
 	conn->service_id	= sp->hdr.serviceId;
 	conn->security_ix	= sp->hdr.securityIndex;
 	conn->out_clientflag	= 0;
+	conn->security		= sec;
+	conn->server_key	= key_get(key);
 	if (conn->security_ix)
 		conn->state	= RXRPC_CONN_SERVICE_UNSECURED;
 	else
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 8d8aa3c230b5..098f1f9ec53b 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -648,9 +648,9 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
 	u32 serial;
 	int ret;
 
-	_enter("{%d,%x}", conn->debug_id, key_serial(conn->params.key));
+	_enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key));
 
-	ret = key_validate(conn->params.key);
+	ret = key_validate(conn->server_key);
 	if (ret < 0)
 		return ret;
 
@@ -1293,6 +1293,7 @@ static void rxkad_exit(void)
 const struct rxrpc_security rxkad = {
 	.name				= "rxkad",
 	.security_index			= RXRPC_SECURITY_RXKAD,
+	.no_key_abort			= RXKADUNKNOWNKEY,
 	.init				= rxkad_init,
 	.exit				= rxkad_exit,
 	.init_connection_security	= rxkad_init_connection_security,
diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c
index a4c47d2b7054..9b1fb9ed0717 100644
--- a/net/rxrpc/security.c
+++ b/net/rxrpc/security.c
@@ -101,62 +101,58 @@ int rxrpc_init_client_conn_security(struct rxrpc_connection *conn)
 }
 
 /*
- * initialise the security on a server connection
+ * Find the security key for a server connection.
  */
-int rxrpc_init_server_conn_security(struct rxrpc_connection *conn)
+bool rxrpc_look_up_server_security(struct rxrpc_local *local, struct rxrpc_sock *rx,
+				   const struct rxrpc_security **_sec,
+				   struct key **_key,
+				   struct sk_buff *skb)
 {
 	const struct rxrpc_security *sec;
-	struct rxrpc_local *local = conn->params.local;
-	struct rxrpc_sock *rx;
-	struct key *key;
-	key_ref_t kref;
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	key_ref_t kref = NULL;
 	char kdesc[5 + 1 + 3 + 1];
 
 	_enter("");
 
-	sprintf(kdesc, "%u:%u", conn->service_id, conn->security_ix);
+	sprintf(kdesc, "%u:%u", sp->hdr.serviceId, sp->hdr.securityIndex);
 
-	sec = rxrpc_security_lookup(conn->security_ix);
+	sec = rxrpc_security_lookup(sp->hdr.securityIndex);
 	if (!sec) {
-		_leave(" = -ENOKEY [lookup]");
-		return -ENOKEY;
+		trace_rxrpc_abort(0, "SVS",
+				  sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+				  RX_INVALID_OPERATION, EKEYREJECTED);
+		skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
+		skb->priority = RX_INVALID_OPERATION;
+		return false;
 	}
 
-	/* find the service */
-	read_lock(&local->services_lock);
-	rx = rcu_dereference_protected(local->service,
-				       lockdep_is_held(&local->services_lock));
-	if (rx && (rx->srx.srx_service == conn->service_id ||
-		   rx->second_service == conn->service_id))
-		goto found_service;
+	if (sp->hdr.securityIndex == RXRPC_SECURITY_NONE)
+		goto out;
 
-	/* the service appears to have died */
-	read_unlock(&local->services_lock);
-	_leave(" = -ENOENT");
-	return -ENOENT;
-
-found_service:
 	if (!rx->securities) {
-		read_unlock(&local->services_lock);
-		_leave(" = -ENOKEY");
-		return -ENOKEY;
+		trace_rxrpc_abort(0, "SVR",
+				  sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+				  RX_INVALID_OPERATION, EKEYREJECTED);
+		skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
+		skb->priority = RX_INVALID_OPERATION;
+		return false;
 	}
 
 	/* look through the service's keyring */
 	kref = keyring_search(make_key_ref(rx->securities, 1UL),
 			      &key_type_rxrpc_s, kdesc, true);
 	if (IS_ERR(kref)) {
-		read_unlock(&local->services_lock);
-		_leave(" = %ld [search]", PTR_ERR(kref));
-		return PTR_ERR(kref);
+		trace_rxrpc_abort(0, "SVK",
+				  sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+				  sec->no_key_abort, EKEYREJECTED);
+		skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
+		skb->priority = sec->no_key_abort;
+		return false;
 	}
 
-	key = key_ref_to_ptr(kref);
-	read_unlock(&local->services_lock);
-
-	conn->server_key = key;
-	conn->security = sec;
-
-	_leave(" = 0");
-	return 0;
+out:
+	*_sec = sec;
+	*_key = key_ref_to_ptr(kref);
+	return true;
 }

From 8cc0991c09bfd11fd878b0321a7a06724520d879 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 20 Dec 2019 19:17:02 -0500
Subject: [PATCH 032/146] ALSA: hda/hdmi - fix atpx_present when CLASS is not
 VGA

You can't use PCI_BASE_CLASS with pci_get_class().  This
happens to work by luck on devices with PCI_CLASS_DISPLAY_VGA, but
misses PCI_CLASS_DISPLAY_OTHER.  Add a check for those as well.

Fixes: 586bc4aab878 ("ALSA: hda/hdmi - fix vgaswitcheroo detection for AMD")
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://lore.kernel.org/r/20191221001702.1338587-1-alexander.deucher@amd.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/hda_intel.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index b856b89378ac..f69c8de64bd6 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -1410,7 +1410,17 @@ static bool atpx_present(void)
 	acpi_handle dhandle, atpx_handle;
 	acpi_status status;
 
-	while ((pdev = pci_get_class(PCI_BASE_CLASS_DISPLAY << 16, pdev)) != NULL) {
+	while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) {
+		dhandle = ACPI_HANDLE(&pdev->dev);
+		if (dhandle) {
+			status = acpi_get_handle(dhandle, "ATPX", &atpx_handle);
+			if (!ACPI_FAILURE(status)) {
+				pci_dev_put(pdev);
+				return true;
+			}
+		}
+	}
+	while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_OTHER << 8, pdev)) != NULL) {
 		dhandle = ACPI_HANDLE(&pdev->dev);
 		if (dhandle) {
 			status = acpi_get_handle(dhandle, "ATPX", &atpx_handle);

From 43cf75d96409a20ef06b756877a2e72b10a026fc Mon Sep 17 00:00:00 2001
From: chenqiwu <chenqiwu@xiaomi.com>
Date: Thu, 19 Dec 2019 14:29:53 +0800
Subject: [PATCH 033/146] exit: panic before exit_mm() on global init exit

Currently, when global init and all threads in its thread-group have exited
we panic via:
do_exit()
-> exit_notify()
   -> forget_original_parent()
      -> find_child_reaper()
This makes it hard to extract a useable coredump for global init from a
kernel crashdump because by the time we panic exit_mm() will have already
released global init's mm.
This patch moves the panic futher up before exit_mm() is called. As was the
case previously, we only panic when global init and all its threads in the
thread-group have exited.

Signed-off-by: chenqiwu <chenqiwu@xiaomi.com>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
[christian.brauner@ubuntu.com: fix typo, rewrite commit message]
Link: https://lore.kernel.org/r/1576736993-10121-1-git-send-email-qiwuchen55@gmail.com
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 kernel/exit.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/kernel/exit.c b/kernel/exit.c
index a46a50d67002..fc364272759d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -517,10 +517,6 @@ static struct task_struct *find_child_reaper(struct task_struct *father,
 	}
 
 	write_unlock_irq(&tasklist_lock);
-	if (unlikely(pid_ns == &init_pid_ns)) {
-		panic("Attempted to kill init! exitcode=0x%08x\n",
-			father->signal->group_exit_code ?: father->exit_code);
-	}
 
 	list_for_each_entry_safe(p, n, dead, ptrace_entry) {
 		list_del_init(&p->ptrace_entry);
@@ -786,6 +782,14 @@ void __noreturn do_exit(long code)
 	acct_update_integrals(tsk);
 	group_dead = atomic_dec_and_test(&tsk->signal->live);
 	if (group_dead) {
+		/*
+		 * If the last thread of global init has exited, panic
+		 * immediately to get a useable coredump.
+		 */
+		if (unlikely(is_global_init(tsk)))
+			panic("Attempted to kill init! exitcode=0x%08x\n",
+				tsk->signal->group_exit_code ?: (int)code);
+
 #ifdef CONFIG_POSIX_TIMERS
 		hrtimer_cancel(&tsk->signal->real_timer);
 		exit_itimers(tsk->signal);

From f54c7898ed1c3c9331376c0337a5049c38f66497 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sun, 22 Dec 2019 23:37:40 +0100
Subject: [PATCH 034/146] bpf: Fix precision tracking for unbounded scalars

Anatoly has been fuzzing with kBdysch harness and reported a hang in one
of the outcomes. Upon closer analysis, it turns out that precise scalar
value tracking is missing a few precision markings for unknown scalars:

  0: R1=ctx(id=0,off=0,imm=0) R10=fp0
  0: (b7) r0 = 0
  1: R0_w=invP0 R1=ctx(id=0,off=0,imm=0) R10=fp0
  1: (35) if r0 >= 0xf72e goto pc+0
  --> only follow fallthrough
  2: R0_w=invP0 R1=ctx(id=0,off=0,imm=0) R10=fp0
  2: (35) if r0 >= 0x80fe0000 goto pc+0
  --> only follow fallthrough
  3: R0_w=invP0 R1=ctx(id=0,off=0,imm=0) R10=fp0
  3: (14) w0 -= -536870912
  4: R0_w=invP536870912 R1=ctx(id=0,off=0,imm=0) R10=fp0
  4: (0f) r1 += r0
  5: R0_w=invP536870912 R1_w=inv(id=0) R10=fp0
  5: (55) if r1 != 0x104c1500 goto pc+0
  --> push other branch for later analysis
  R0_w=invP536870912 R1_w=inv273421568 R10=fp0
  6: R0_w=invP536870912 R1_w=inv273421568 R10=fp0
  6: (b7) r0 = 0
  7: R0=invP0 R1=inv273421568 R10=fp0
  7: (76) if w1 s>= 0xffffff00 goto pc+3
  --> only follow goto
  11: R0=invP0 R1=inv273421568 R10=fp0
  11: (95) exit
  6: R0_w=invP536870912 R1_w=inv(id=0) R10=fp0
  6: (b7) r0 = 0
  propagating r0
  7: safe
  processed 11 insns [...]

In the analysis of the second path coming after the successful exit above,
the path is being pruned at line 7. Pruning analysis found that both r0 are
precise P0 and both R1 are non-precise scalars and given prior path with
R1 as non-precise scalar succeeded, this one is therefore safe as well.

However, problem is that given condition at insn 7 in the first run, we only
followed goto and didn't push the other branch for later analysis, we've
never walked the few insns in there and therefore dead-code sanitation
rewrites it as goto pc-1, causing the hang depending on the skb address
hitting these conditions. The issue is that R1 should have been marked as
precise as well such that pruning enforces range check and conluded that new
R1 is not in range of old R1. In insn 4, we mark R1 (skb) as unknown scalar
via __mark_reg_unbounded() but not mark_reg_unbounded() and therefore
regs->precise remains as false.

Back in b5dc0163d8fd ("bpf: precise scalar_value tracking"), this was not
the case since marking out of __mark_reg_unbounded() had this covered as well.
Once in both are set as precise in 4 as they should have been, we conclude
that given R1 was in prior fall-through path 0x104c1500 and now is completely
unknown, the check at insn 7 concludes that we need to continue walking.
Analysis after the fix:

  0: R1=ctx(id=0,off=0,imm=0) R10=fp0
  0: (b7) r0 = 0
  1: R0_w=invP0 R1=ctx(id=0,off=0,imm=0) R10=fp0
  1: (35) if r0 >= 0xf72e goto pc+0
  2: R0_w=invP0 R1=ctx(id=0,off=0,imm=0) R10=fp0
  2: (35) if r0 >= 0x80fe0000 goto pc+0
  3: R0_w=invP0 R1=ctx(id=0,off=0,imm=0) R10=fp0
  3: (14) w0 -= -536870912
  4: R0_w=invP536870912 R1=ctx(id=0,off=0,imm=0) R10=fp0
  4: (0f) r1 += r0
  5: R0_w=invP536870912 R1_w=invP(id=0) R10=fp0
  5: (55) if r1 != 0x104c1500 goto pc+0
  R0_w=invP536870912 R1_w=invP273421568 R10=fp0
  6: R0_w=invP536870912 R1_w=invP273421568 R10=fp0
  6: (b7) r0 = 0
  7: R0=invP0 R1=invP273421568 R10=fp0
  7: (76) if w1 s>= 0xffffff00 goto pc+3
  11: R0=invP0 R1=invP273421568 R10=fp0
  11: (95) exit
  6: R0_w=invP536870912 R1_w=invP(id=0) R10=fp0
  6: (b7) r0 = 0
  7: R0_w=invP0 R1_w=invP(id=0) R10=fp0
  7: (76) if w1 s>= 0xffffff00 goto pc+3
  R0_w=invP0 R1_w=invP(id=0) R10=fp0
  8: R0_w=invP0 R1_w=invP(id=0) R10=fp0
  8: (a5) if r0 < 0x2007002a goto pc+0
  9: R0_w=invP0 R1_w=invP(id=0) R10=fp0
  9: (57) r0 &= -16316416
  10: R0_w=invP0 R1_w=invP(id=0) R10=fp0
  10: (a6) if w0 < 0x1201 goto pc+0
  11: R0_w=invP0 R1_w=invP(id=0) R10=fp0
  11: (95) exit
  11: R0=invP0 R1=invP(id=0) R10=fp0
  11: (95) exit
  processed 16 insns [...]

Fixes: 6754172c208d ("bpf: fix precision tracking in presence of bpf2bpf calls")
Reported-by: Anatoly Trosinenko <anatoly.trosinenko@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20191222223740.25297-1-daniel@iogearbox.net
---
 kernel/bpf/verifier.c | 43 ++++++++++++++++++++++---------------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 4983940cbdca..6f63ae7a370c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -907,7 +907,8 @@ static const int caller_saved[CALLER_SAVED_REGS] = {
 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
 };
 
-static void __mark_reg_not_init(struct bpf_reg_state *reg);
+static void __mark_reg_not_init(const struct bpf_verifier_env *env,
+				struct bpf_reg_state *reg);
 
 /* Mark the unknown part of a register (variable offset or scalar value) as
  * known to have the value @imm.
@@ -945,7 +946,7 @@ static void mark_reg_known_zero(struct bpf_verifier_env *env,
 		verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
 		/* Something bad happened, let's kill all regs */
 		for (regno = 0; regno < MAX_BPF_REG; regno++)
-			__mark_reg_not_init(regs + regno);
+			__mark_reg_not_init(env, regs + regno);
 		return;
 	}
 	__mark_reg_known_zero(regs + regno);
@@ -1054,7 +1055,8 @@ static void __mark_reg_unbounded(struct bpf_reg_state *reg)
 }
 
 /* Mark a register as having a completely unknown (scalar) value. */
-static void __mark_reg_unknown(struct bpf_reg_state *reg)
+static void __mark_reg_unknown(const struct bpf_verifier_env *env,
+			       struct bpf_reg_state *reg)
 {
 	/*
 	 * Clear type, id, off, and union(map_ptr, range) and
@@ -1064,6 +1066,8 @@ static void __mark_reg_unknown(struct bpf_reg_state *reg)
 	reg->type = SCALAR_VALUE;
 	reg->var_off = tnum_unknown;
 	reg->frameno = 0;
+	reg->precise = env->subprog_cnt > 1 || !env->allow_ptr_leaks ?
+		       true : false;
 	__mark_reg_unbounded(reg);
 }
 
@@ -1074,19 +1078,16 @@ static void mark_reg_unknown(struct bpf_verifier_env *env,
 		verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
 		/* Something bad happened, let's kill all regs except FP */
 		for (regno = 0; regno < BPF_REG_FP; regno++)
-			__mark_reg_not_init(regs + regno);
+			__mark_reg_not_init(env, regs + regno);
 		return;
 	}
-	regs += regno;
-	__mark_reg_unknown(regs);
-	/* constant backtracking is enabled for root without bpf2bpf calls */
-	regs->precise = env->subprog_cnt > 1 || !env->allow_ptr_leaks ?
-			true : false;
+	__mark_reg_unknown(env, regs + regno);
 }
 
-static void __mark_reg_not_init(struct bpf_reg_state *reg)
+static void __mark_reg_not_init(const struct bpf_verifier_env *env,
+				struct bpf_reg_state *reg)
 {
-	__mark_reg_unknown(reg);
+	__mark_reg_unknown(env, reg);
 	reg->type = NOT_INIT;
 }
 
@@ -1097,10 +1098,10 @@ static void mark_reg_not_init(struct bpf_verifier_env *env,
 		verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
 		/* Something bad happened, let's kill all regs except FP */
 		for (regno = 0; regno < BPF_REG_FP; regno++)
-			__mark_reg_not_init(regs + regno);
+			__mark_reg_not_init(env, regs + regno);
 		return;
 	}
-	__mark_reg_not_init(regs + regno);
+	__mark_reg_not_init(env, regs + regno);
 }
 
 #define DEF_NOT_SUBREG	(0)
@@ -3234,7 +3235,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
 		}
 		if (state->stack[spi].slot_type[0] == STACK_SPILL &&
 		    state->stack[spi].spilled_ptr.type == SCALAR_VALUE) {
-			__mark_reg_unknown(&state->stack[spi].spilled_ptr);
+			__mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
 			for (j = 0; j < BPF_REG_SIZE; j++)
 				state->stack[spi].slot_type[j] = STACK_MISC;
 			goto mark;
@@ -3892,7 +3893,7 @@ static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
 		if (!reg)
 			continue;
 		if (reg_is_pkt_pointer_any(reg))
-			__mark_reg_unknown(reg);
+			__mark_reg_unknown(env, reg);
 	}
 }
 
@@ -3920,7 +3921,7 @@ static void release_reg_references(struct bpf_verifier_env *env,
 		if (!reg)
 			continue;
 		if (reg->ref_obj_id == ref_obj_id)
-			__mark_reg_unknown(reg);
+			__mark_reg_unknown(env, reg);
 	}
 }
 
@@ -4582,7 +4583,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 		/* Taint dst register if offset had invalid bounds derived from
 		 * e.g. dead branches.
 		 */
-		__mark_reg_unknown(dst_reg);
+		__mark_reg_unknown(env, dst_reg);
 		return 0;
 	}
 
@@ -4834,13 +4835,13 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
 		/* Taint dst register if offset had invalid bounds derived from
 		 * e.g. dead branches.
 		 */
-		__mark_reg_unknown(dst_reg);
+		__mark_reg_unknown(env, dst_reg);
 		return 0;
 	}
 
 	if (!src_known &&
 	    opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
-		__mark_reg_unknown(dst_reg);
+		__mark_reg_unknown(env, dst_reg);
 		return 0;
 	}
 
@@ -6982,7 +6983,7 @@ static void clean_func_state(struct bpf_verifier_env *env,
 			/* since the register is unused, clear its state
 			 * to make further comparison simpler
 			 */
-			__mark_reg_not_init(&st->regs[i]);
+			__mark_reg_not_init(env, &st->regs[i]);
 	}
 
 	for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
@@ -6990,7 +6991,7 @@ static void clean_func_state(struct bpf_verifier_env *env,
 		/* liveness must not touch this stack slot anymore */
 		st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
 		if (!(live & REG_LIVE_READ)) {
-			__mark_reg_not_init(&st->stack[i].spilled_ptr);
+			__mark_reg_not_init(env, &st->stack[i].spilled_ptr);
 			for (j = 0; j < BPF_REG_SIZE; j++)
 				st->stack[i].slot_type[j] = STACK_INVALID;
 		}

From 2f42e05b942fe2fbfb9bbc6e34e1dd8c3ce4f3a4 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 18 Dec 2019 19:09:06 +0000
Subject: [PATCH 035/146] dmaengine: k3dma: Avoid null pointer traversal

In some cases we seem to submit two transactions in a row, which
causes us to lose track of the first. If we then cancel the
request, we may still get an interrupt, which traverses a null
ds_run value.

So try to avoid starting a new transaction if the ds_run value
is set.

While this patch avoids the null pointer crash, I've had some
reports of the k3dma driver still getting confused, which
suggests the ds_run/ds_done value handling still isn't quite
right. However, I've not run into an issue recently with it
so I think this patch is worth pushing upstream to avoid the
crash.

Signed-off-by: John Stultz <john.stultz@linaro.org>
[add ss tag]
Link: https://lore.kernel.org/r/20191218190906.6641-1-john.stultz@linaro.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/k3dma.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/dma/k3dma.c b/drivers/dma/k3dma.c
index adecea51814f..c5c1aa0dcaed 100644
--- a/drivers/dma/k3dma.c
+++ b/drivers/dma/k3dma.c
@@ -229,9 +229,11 @@ static irqreturn_t k3_dma_int_handler(int irq, void *dev_id)
 			c = p->vchan;
 			if (c && (tc1 & BIT(i))) {
 				spin_lock_irqsave(&c->vc.lock, flags);
-				vchan_cookie_complete(&p->ds_run->vd);
-				p->ds_done = p->ds_run;
-				p->ds_run = NULL;
+				if (p->ds_run != NULL) {
+					vchan_cookie_complete(&p->ds_run->vd);
+					p->ds_done = p->ds_run;
+					p->ds_run = NULL;
+				}
 				spin_unlock_irqrestore(&c->vc.lock, flags);
 			}
 			if (c && (tc2 & BIT(i))) {
@@ -271,6 +273,10 @@ static int k3_dma_start_txd(struct k3_dma_chan *c)
 	if (BIT(c->phy->idx) & k3_dma_get_chan_stat(d))
 		return -EAGAIN;
 
+	/* Avoid losing track of  ds_run if a transaction is in flight */
+	if (c->phy->ds_run)
+		return -EAGAIN;
+
 	if (vd) {
 		struct k3_dma_desc_sw *ds =
 			container_of(vd, struct k3_dma_desc_sw, vd);

From 24461d9792c2c706092805ff1b067628933441bd Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Fri, 20 Dec 2019 15:11:00 +0200
Subject: [PATCH 036/146] dmaengine: virt-dma: Fix access after free in
 vchan_complete()

vchan_vdesc_fini() is freeing up 'vd' so the access to vd->tx_result is
via already freed up memory.

Move the vchan_vdesc_fini() after invoking the callback to avoid this.

Fixes: 09d5b702b0f97 ("dmaengine: virt-dma: store result on dma descriptor")
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Reviewed-by: Alexandru Ardelean <alexandru.ardelean@analog.com>
Link: https://lore.kernel.org/r/20191220131100.21804-1-peter.ujfalusi@ti.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/virt-dma.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/dma/virt-dma.c b/drivers/dma/virt-dma.c
index ec4adf4260a0..256fc662c500 100644
--- a/drivers/dma/virt-dma.c
+++ b/drivers/dma/virt-dma.c
@@ -104,9 +104,8 @@ static void vchan_complete(unsigned long arg)
 		dmaengine_desc_get_callback(&vd->tx, &cb);
 
 		list_del(&vd->node);
-		vchan_vdesc_fini(vd);
-
 		dmaengine_desc_callback_invoke(&cb, &vd->tx_result);
+		vchan_vdesc_fini(vd);
 	}
 }
 

From 91a063c956084fb21cf2523bce6892514e3f1799 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Sat, 21 Dec 2019 14:16:54 +1100
Subject: [PATCH 037/146] powerpc/mm: Mark get_slice_psize() &
 slice_addr_is_low() as notrace

These slice routines are called from the SLB miss handler, which can
lead to warnings from the IRQ code, because we have not reconciled the
IRQ state properly:

  WARNING: CPU: 72 PID: 30150 at arch/powerpc/kernel/irq.c:258 arch_local_irq_restore.part.0+0xcc/0x100
  Modules linked in:
  CPU: 72 PID: 30150 Comm: ftracetest Not tainted 5.5.0-rc2-gcc9x-g7e0165b2f1a9 #1
  NIP:  c00000000001d83c LR: c00000000029ab90 CTR: c00000000026cf90
  REGS: c0000007eee3b960 TRAP: 0700   Not tainted  (5.5.0-rc2-gcc9x-g7e0165b2f1a9)
  MSR:  8000000000021033 <SF,ME,IR,DR,RI,LE>  CR: 22242844  XER: 20000000
  CFAR: c00000000001d780 IRQMASK: 0
  ...
  NIP arch_local_irq_restore.part.0+0xcc/0x100
  LR  trace_graph_entry+0x270/0x340
  Call Trace:
    trace_graph_entry+0x254/0x340 (unreliable)
    function_graph_enter+0xe4/0x1a0
    prepare_ftrace_return+0xa0/0x130
    ftrace_graph_caller+0x44/0x94	# (get_slice_psize())
    slb_allocate_user+0x7c/0x100
    do_slb_fault+0xf8/0x300
    instruction_access_slb_common+0x140/0x180

Fixes: 48e7b7695745 ("powerpc/64s/hash: Convert SLB miss handlers to C")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20191221121337.4894-1-mpe@ellerman.id.au
---
 arch/powerpc/mm/slice.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 42bbcd47cc85..dffe1a45b6ed 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -50,7 +50,7 @@ static void slice_print_mask(const char *label, const struct slice_mask *mask) {
 
 #endif
 
-static inline bool slice_addr_is_low(unsigned long addr)
+static inline notrace bool slice_addr_is_low(unsigned long addr)
 {
 	u64 tmp = (u64)addr;
 
@@ -659,7 +659,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
 				       mm_ctx_user_psize(&current->mm->context), 1);
 }
 
-unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
+unsigned int notrace get_slice_psize(struct mm_struct *mm, unsigned long addr)
 {
 	unsigned char *psizes;
 	int index, mask_index;

From fa633a0f89192379828103957874682d389eae83 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 23 Dec 2019 15:13:26 +0900
Subject: [PATCH 038/146] libbpf: Fix build on read-only filesystems

I got the following error when I tried to build perf on a read-only
filesystem with O=dir option.

  $ cd /some/where/ro/linux/tools/perf
  $ make O=$HOME/build/perf
  ...
    CC       /home/namhyung/build/perf/lib.o
  /bin/sh: bpf_helper_defs.h: Read-only file system
  make[3]: *** [Makefile:184: bpf_helper_defs.h] Error 1
  make[2]: *** [Makefile.perf:778: /home/namhyung/build/perf/libbpf.a] Error 2
  make[2]: *** Waiting for unfinished jobs....
    LD       /home/namhyung/build/perf/libperf-in.o
    AR       /home/namhyung/build/perf/libperf.a
    PERF_VERSION = 5.4.0
  make[1]: *** [Makefile.perf:225: sub-make] Error 2
  make: *** [Makefile:70: all] Error 2

It was becaused bpf_helper_defs.h was generated in current directory.
Move it to OUTPUT directory.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20191223061326.843366-1-namhyung@kernel.org
---
 tools/lib/bpf/Makefile                 | 15 ++++++++-------
 tools/testing/selftests/bpf/.gitignore |  1 +
 tools/testing/selftests/bpf/Makefile   |  6 +++---
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index defae23a0169..97830e46d1a0 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -138,6 +138,7 @@ STATIC_OBJDIR	:= $(OUTPUT)staticobjs/
 BPF_IN_SHARED	:= $(SHARED_OBJDIR)libbpf-in.o
 BPF_IN_STATIC	:= $(STATIC_OBJDIR)libbpf-in.o
 VERSION_SCRIPT	:= libbpf.map
+BPF_HELPER_DEFS	:= $(OUTPUT)bpf_helper_defs.h
 
 LIB_TARGET	:= $(addprefix $(OUTPUT),$(LIB_TARGET))
 LIB_FILE	:= $(addprefix $(OUTPUT),$(LIB_FILE))
@@ -159,7 +160,7 @@ all: fixdep
 
 all_cmd: $(CMD_TARGETS) check
 
-$(BPF_IN_SHARED): force elfdep bpfdep bpf_helper_defs.h
+$(BPF_IN_SHARED): force elfdep bpfdep $(BPF_HELPER_DEFS)
 	@(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \
 	(diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \
 	echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h'" >&2 )) || true
@@ -177,12 +178,12 @@ $(BPF_IN_SHARED): force elfdep bpfdep bpf_helper_defs.h
 	echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true
 	$(Q)$(MAKE) $(build)=libbpf OUTPUT=$(SHARED_OBJDIR) CFLAGS="$(CFLAGS) $(SHLIB_FLAGS)"
 
-$(BPF_IN_STATIC): force elfdep bpfdep bpf_helper_defs.h
+$(BPF_IN_STATIC): force elfdep bpfdep $(BPF_HELPER_DEFS)
 	$(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR)
 
-bpf_helper_defs.h: $(srctree)/tools/include/uapi/linux/bpf.h
+$(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h
 	$(Q)$(srctree)/scripts/bpf_helpers_doc.py --header 		\
-		--file $(srctree)/tools/include/uapi/linux/bpf.h > bpf_helper_defs.h
+		--file $(srctree)/tools/include/uapi/linux/bpf.h > $(BPF_HELPER_DEFS)
 
 $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION)
 
@@ -243,7 +244,7 @@ install_lib: all_cmd
 		$(call do_install_mkdir,$(libdir_SQ)); \
 		cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ)
 
-install_headers: bpf_helper_defs.h
+install_headers: $(BPF_HELPER_DEFS)
 	$(call QUIET_INSTALL, headers) \
 		$(call do_install,bpf.h,$(prefix)/include/bpf,644); \
 		$(call do_install,libbpf.h,$(prefix)/include/bpf,644); \
@@ -251,7 +252,7 @@ install_headers: bpf_helper_defs.h
 		$(call do_install,libbpf_util.h,$(prefix)/include/bpf,644); \
 		$(call do_install,xsk.h,$(prefix)/include/bpf,644); \
 		$(call do_install,bpf_helpers.h,$(prefix)/include/bpf,644); \
-		$(call do_install,bpf_helper_defs.h,$(prefix)/include/bpf,644); \
+		$(call do_install,$(BPF_HELPER_DEFS),$(prefix)/include/bpf,644); \
 		$(call do_install,bpf_tracing.h,$(prefix)/include/bpf,644); \
 		$(call do_install,bpf_endian.h,$(prefix)/include/bpf,644); \
 		$(call do_install,bpf_core_read.h,$(prefix)/include/bpf,644);
@@ -271,7 +272,7 @@ config-clean:
 clean:
 	$(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS) \
 		*.o *~ *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) .*.d .*.cmd \
-		*.pc LIBBPF-CFLAGS bpf_helper_defs.h \
+		*.pc LIBBPF-CFLAGS $(BPF_HELPER_DEFS) \
 		$(SHARED_OBJDIR) $(STATIC_OBJDIR)
 	$(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf
 
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 419652458da4..1ff0a9f49c01 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -40,3 +40,4 @@ xdping
 test_cpp
 /no_alu32
 /bpf_gcc
+bpf_helper_defs.h
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index e0fe01d9ec33..e2fd6f8d579c 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -120,9 +120,9 @@ force:
 $(BPFOBJ): force
 	$(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/
 
-BPF_HELPERS := $(BPFDIR)/bpf_helper_defs.h $(wildcard $(BPFDIR)/bpf_*.h)
-$(BPFDIR)/bpf_helper_defs.h:
-	$(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/ bpf_helper_defs.h
+BPF_HELPERS := $(OUTPUT)/bpf_helper_defs.h $(wildcard $(BPFDIR)/bpf_*.h)
+$(OUTPUT)/bpf_helper_defs.h:
+	$(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/ $(OUTPUT)/bpf_helper_defs.h
 
 # Get Clang's default includes on this system, as opposed to those seen by
 # '-target bpf'. This fixes "missing" files on some architectures/distros,

From c366b3dbbab14b28d044b94eb9ce77c23482ea35 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 23 Dec 2019 23:18:16 +0100
Subject: [PATCH 039/146] ALSA: hda - Apply sync-write workaround to old Intel
 platforms, too

Klaus Ethgen reported occasional high CPU usages in his system that
seem caused by HD-audio driver.  The perf output revealed that it's
in the unsolicited event handling in the workqueue, and the problem
seems triggered by some communication stall between the controller and
the codec at the runtime or system resume.

Actually a similar phenomenon was seen in the past for other Intel
platforms, and we already applied the workaround to enforce sync-write
for CORB/RIRB verbs for Skylake and newer chipsets (commit
2756d9143aa5 "ALSA: hda - Fix intermittent CORB/RIRB stall on Intel
chips").  Fortunately, the same workaround is applicable to the old
chipset, and the experiment showed the positive effect.

Based on the experiment result, this patch enables the sync-write
workaround for all Intel chipsets.  The only reason I hesitated to
apply this workaround was about the possibly slightly higher CPU usage.
But if the lack of sync causes a much severer problem even for quite
old chip, we should think this would be necessary for all Intel chips.

Reported-by: Klaus Ethgen <Klaus@ethgen.ch>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20191223171833.GA17053@chua
Link: https://lore.kernel.org/r/20191223221816.32572-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/hda_intel.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index f69c8de64bd6..5b92f290cbb0 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -282,12 +282,13 @@ enum {
 
 /* quirks for old Intel chipsets */
 #define AZX_DCAPS_INTEL_ICH \
-	(AZX_DCAPS_OLD_SSYNC | AZX_DCAPS_NO_ALIGN_BUFSIZE)
+	(AZX_DCAPS_OLD_SSYNC | AZX_DCAPS_NO_ALIGN_BUFSIZE |\
+	 AZX_DCAPS_SYNC_WRITE)
 
 /* quirks for Intel PCH */
 #define AZX_DCAPS_INTEL_PCH_BASE \
 	(AZX_DCAPS_NO_ALIGN_BUFSIZE | AZX_DCAPS_COUNT_LPIB_DELAY |\
-	 AZX_DCAPS_SNOOP_TYPE(SCH))
+	 AZX_DCAPS_SNOOP_TYPE(SCH) | AZX_DCAPS_SYNC_WRITE)
 
 /* PCH up to IVB; no runtime PM; bind with i915 gfx */
 #define AZX_DCAPS_INTEL_PCH_NOPM \
@@ -302,13 +303,13 @@ enum {
 #define AZX_DCAPS_INTEL_HASWELL \
 	(/*AZX_DCAPS_ALIGN_BUFSIZE |*/ AZX_DCAPS_COUNT_LPIB_DELAY |\
 	 AZX_DCAPS_PM_RUNTIME | AZX_DCAPS_I915_COMPONENT |\
-	 AZX_DCAPS_SNOOP_TYPE(SCH))
+	 AZX_DCAPS_SNOOP_TYPE(SCH) | AZX_DCAPS_SYNC_WRITE)
 
 /* Broadwell HDMI can't use position buffer reliably, force to use LPIB */
 #define AZX_DCAPS_INTEL_BROADWELL \
 	(/*AZX_DCAPS_ALIGN_BUFSIZE |*/ AZX_DCAPS_POSFIX_LPIB |\
 	 AZX_DCAPS_PM_RUNTIME | AZX_DCAPS_I915_COMPONENT |\
-	 AZX_DCAPS_SNOOP_TYPE(SCH))
+	 AZX_DCAPS_SNOOP_TYPE(SCH) | AZX_DCAPS_SYNC_WRITE)
 
 #define AZX_DCAPS_INTEL_BAYTRAIL \
 	(AZX_DCAPS_INTEL_PCH_BASE | AZX_DCAPS_I915_COMPONENT)

From feed8a4fc9d46c3126fb9fcae0e9248270c6321a Mon Sep 17 00:00:00 2001
From: Antonio Messina <amessina@google.com>
Date: Thu, 19 Dec 2019 15:08:03 +0100
Subject: [PATCH 040/146] udp: fix integer overflow while computing available
 space in sk_rcvbuf

When the size of the receive buffer for a socket is close to 2^31 when
computing if we have enough space in the buffer to copy a packet from
the queue to the buffer we might hit an integer overflow.

When an user set net.core.rmem_default to a value close to 2^31 UDP
packets are dropped because of this overflow. This can be visible, for
instance, with failure to resolve hostnames.

This can be fixed by casting sk_rcvbuf (which is an int) to unsigned
int, similarly to how it is done in TCP.

Signed-off-by: Antonio Messina <amessina@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 4da5758cc718..93a355b6b092 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1475,7 +1475,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
 	 * queue contains some other skb
 	 */
 	rmem = atomic_add_return(size, &sk->sk_rmem_alloc);
-	if (rmem > (size + sk->sk_rcvbuf))
+	if (rmem > (size + (unsigned int)sk->sk_rcvbuf))
 		goto uncharge_drop;
 
 	spin_lock(&list->lock);

From 61d5d4062876e21331c3d0ba4b02dbd50c06a658 Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Fri, 20 Dec 2019 15:03:44 -0300
Subject: [PATCH 041/146] sctp: fix err handling of stream initialization

The fix on 951c6db954a1 fixed the issued reported there but introduced
another. When the allocation fails within sctp_stream_init() it is
okay/necessary to free the genradix. But it is also called when adding
new streams, from sctp_send_add_streams() and
sctp_process_strreset_addstrm_in() and in those situations it cannot
just free the genradix because by then it is a fully operational
association.

The fix here then is to only free the genradix in sctp_stream_init()
and on those other call sites  move on with what it already had and let
the subsequent error handling to handle it.

Tested with the reproducers from this report and the previous one,
with lksctp-tools and sctp-tests.

Reported-by: syzbot+9a1bc632e78a1a98488b@syzkaller.appspotmail.com
Fixes: 951c6db954a1 ("sctp: fix memleak on err handling of stream initialization")
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/stream.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 6a30392068a0..c1a100d2fed3 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -84,10 +84,8 @@ static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt,
 		return 0;
 
 	ret = genradix_prealloc(&stream->out, outcnt, gfp);
-	if (ret) {
-		genradix_free(&stream->out);
+	if (ret)
 		return ret;
-	}
 
 	stream->outcnt = outcnt;
 	return 0;
@@ -102,10 +100,8 @@ static int sctp_stream_alloc_in(struct sctp_stream *stream, __u16 incnt,
 		return 0;
 
 	ret = genradix_prealloc(&stream->in, incnt, gfp);
-	if (ret) {
-		genradix_free(&stream->in);
+	if (ret)
 		return ret;
-	}
 
 	stream->incnt = incnt;
 	return 0;
@@ -123,7 +119,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
 	 * a new one with new outcnt to save memory if needed.
 	 */
 	if (outcnt == stream->outcnt)
-		goto in;
+		goto handle_in;
 
 	/* Filter out chunks queued on streams that won't exist anymore */
 	sched->unsched_all(stream);
@@ -132,24 +128,28 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
 
 	ret = sctp_stream_alloc_out(stream, outcnt, gfp);
 	if (ret)
-		goto out;
+		goto out_err;
 
 	for (i = 0; i < stream->outcnt; i++)
 		SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
 
-in:
+handle_in:
 	sctp_stream_interleave_init(stream);
 	if (!incnt)
 		goto out;
 
 	ret = sctp_stream_alloc_in(stream, incnt, gfp);
-	if (ret) {
-		sched->free(stream);
-		genradix_free(&stream->out);
-		stream->outcnt = 0;
-		goto out;
-	}
+	if (ret)
+		goto in_err;
 
+	goto out;
+
+in_err:
+	sched->free(stream);
+	genradix_free(&stream->in);
+out_err:
+	genradix_free(&stream->out);
+	stream->outcnt = 0;
 out:
 	return ret;
 }

From 7c3125f0a6ebc17846c5908ad7d6056d66c1c426 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 20 Dec 2019 11:24:21 -0800
Subject: [PATCH 042/146] net: dsa: bcm_sf2: Fix IP fragment location and
 behavior

The IP fragment is specified through user-defined field as the first
bit of the first user-defined word. We were previously trying to extract
it from the user-defined mask which could not possibly work. The ip_frag
is also supposed to be a boolean, if we do not cast it as such, we risk
overwriting the next fields in CFP_DATA(6) which would render the rule
inoperative.

Fixes: 7318166cacad ("net: dsa: bcm_sf2: Add support for ethtool::rxnfc")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/bcm_sf2_cfp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c
index f3f0c3f07391..1962c8330daa 100644
--- a/drivers/net/dsa/bcm_sf2_cfp.c
+++ b/drivers/net/dsa/bcm_sf2_cfp.c
@@ -358,7 +358,7 @@ static int bcm_sf2_cfp_ipv4_rule_set(struct bcm_sf2_priv *priv, int port,
 		return -EINVAL;
 	}
 
-	ip_frag = be32_to_cpu(fs->m_ext.data[0]);
+	ip_frag = !!(be32_to_cpu(fs->h_ext.data[0]) & 1);
 
 	/* Locate the first rule available */
 	if (fs->location == RX_CLS_LOC_ANY)
@@ -569,7 +569,7 @@ static int bcm_sf2_cfp_rule_cmp(struct bcm_sf2_priv *priv, int port,
 
 		if (rule->fs.flow_type != fs->flow_type ||
 		    rule->fs.ring_cookie != fs->ring_cookie ||
-		    rule->fs.m_ext.data[0] != fs->m_ext.data[0])
+		    rule->fs.h_ext.data[0] != fs->h_ext.data[0])
 			continue;
 
 		switch (fs->flow_type & ~FLOW_EXT) {
@@ -621,7 +621,7 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port,
 		return -EINVAL;
 	}
 
-	ip_frag = be32_to_cpu(fs->m_ext.data[0]);
+	ip_frag = !!(be32_to_cpu(fs->h_ext.data[0]) & 1);
 
 	layout = &udf_tcpip6_layout;
 	slice_num = bcm_sf2_get_slice_number(layout, 0);

From bd085ef678b2cc8c38c105673dfe8ff8f5ec0c57 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Sun, 22 Dec 2019 10:51:09 +0800
Subject: [PATCH 043/146] net: add bool confirm_neigh parameter for
 dst_ops.update_pmtu

The MTU update code is supposed to be invoked in response to real
networking events that update the PMTU. In IPv6 PMTU update function
__ip6_rt_update_pmtu() we called dst_confirm_neigh() to update neighbor
confirmed time.

But for tunnel code, it will call pmtu before xmit, like:
  - tnl_update_pmtu()
    - skb_dst_update_pmtu()
      - ip6_rt_update_pmtu()
        - __ip6_rt_update_pmtu()
          - dst_confirm_neigh()

If the tunnel remote dst mac address changed and we still do the neigh
confirm, we will not be able to update neigh cache and ping6 remote
will failed.

So for this ip_tunnel_xmit() case, _EVEN_ if the MTU is changed, we
should not be invoking dst_confirm_neigh() as we have no evidence
of successful two-way communication at this point.

On the other hand it is also important to keep the neigh reachability fresh
for TCP flows, so we cannot remove this dst_confirm_neigh() call.

To fix the issue, we have to add a new bool parameter for dst_ops.update_pmtu
to choose whether we should do neigh update or not. I will add the parameter
in this patch and set all the callers to true to comply with the previous
way, and fix the tunnel code one by one on later patches.

v5: No change.
v4: No change.
v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
    dst_ops.update_pmtu to control whether we should do neighbor confirm.
    Also split the big patch to small ones for each area.
v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.

Suggested-by: David Miller <davem@davemloft.net>
Reviewed-by: Guillaume Nault <gnault@redhat.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/gtp.c                |  2 +-
 include/net/dst.h                |  2 +-
 include/net/dst_ops.h            |  3 ++-
 net/bridge/br_nf_core.c          |  3 ++-
 net/decnet/dn_route.c            |  6 ++++--
 net/ipv4/inet_connection_sock.c  |  2 +-
 net/ipv4/route.c                 |  9 ++++++---
 net/ipv4/xfrm4_policy.c          |  5 +++--
 net/ipv6/inet6_connection_sock.c |  2 +-
 net/ipv6/ip6_gre.c               |  2 +-
 net/ipv6/route.c                 | 22 +++++++++++++++-------
 net/ipv6/xfrm6_policy.c          |  5 +++--
 net/netfilter/ipvs/ip_vs_xmit.c  |  2 +-
 net/sctp/transport.c             |  2 +-
 14 files changed, 42 insertions(+), 25 deletions(-)

diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index e5b7d6d2286e..913062017be9 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -540,7 +540,7 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev,
 		mtu = dst_mtu(&rt->dst);
 	}
 
-	rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu);
+	rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, true);
 
 	if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) &&
 	    mtu < ntohs(iph->tot_len)) {
diff --git a/include/net/dst.h b/include/net/dst.h
index 8224dad2ae94..593630e0e076 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -516,7 +516,7 @@ static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu)
 	struct dst_entry *dst = skb_dst(skb);
 
 	if (dst && dst->ops->update_pmtu)
-		dst->ops->update_pmtu(dst, NULL, skb, mtu);
+		dst->ops->update_pmtu(dst, NULL, skb, mtu, true);
 }
 
 static inline void skb_tunnel_check_pmtu(struct sk_buff *skb,
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index 5ec645f27ee3..443863c7b8da 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -27,7 +27,8 @@ struct dst_ops {
 	struct dst_entry *	(*negative_advice)(struct dst_entry *);
 	void			(*link_failure)(struct sk_buff *);
 	void			(*update_pmtu)(struct dst_entry *dst, struct sock *sk,
-					       struct sk_buff *skb, u32 mtu);
+					       struct sk_buff *skb, u32 mtu,
+					       bool confirm_neigh);
 	void			(*redirect)(struct dst_entry *dst, struct sock *sk,
 					    struct sk_buff *skb);
 	int			(*local_out)(struct net *net, struct sock *sk, struct sk_buff *skb);
diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c
index 2cdfc5d6c25d..8c69f0c95a8e 100644
--- a/net/bridge/br_nf_core.c
+++ b/net/bridge/br_nf_core.c
@@ -22,7 +22,8 @@
 #endif
 
 static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk,
-			     struct sk_buff *skb, u32 mtu)
+			     struct sk_buff *skb, u32 mtu,
+			     bool confirm_neigh)
 {
 }
 
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index aea918135ec3..08c3dc45f1a4 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -110,7 +110,8 @@ static void dn_dst_ifdown(struct dst_entry *, struct net_device *dev, int how);
 static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
 static void dn_dst_link_failure(struct sk_buff *);
 static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk,
-			       struct sk_buff *skb , u32 mtu);
+			       struct sk_buff *skb , u32 mtu,
+			       bool confirm_neigh);
 static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk,
 			    struct sk_buff *skb);
 static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst,
@@ -251,7 +252,8 @@ static int dn_dst_gc(struct dst_ops *ops)
  * advertise to the other end).
  */
 static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk,
-			       struct sk_buff *skb, u32 mtu)
+			       struct sk_buff *skb, u32 mtu,
+			       bool confirm_neigh)
 {
 	struct dn_route *rt = (struct dn_route *) dst;
 	struct neighbour *n = rt->n;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index e4c6e8b40490..18c0d5bffe12 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -1086,7 +1086,7 @@ struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu)
 		if (!dst)
 			goto out;
 	}
-	dst->ops->update_pmtu(dst, sk, NULL, mtu);
+	dst->ops->update_pmtu(dst, sk, NULL, mtu, true);
 
 	dst = __sk_dst_check(sk, 0);
 	if (!dst)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f88c93c38f11..87e979f2b74a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -139,7 +139,8 @@ static unsigned int	 ipv4_mtu(const struct dst_entry *dst);
 static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
 static void		 ipv4_link_failure(struct sk_buff *skb);
 static void		 ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
-					   struct sk_buff *skb, u32 mtu);
+					   struct sk_buff *skb, u32 mtu,
+					   bool confirm_neigh);
 static void		 ip_do_redirect(struct dst_entry *dst, struct sock *sk,
 					struct sk_buff *skb);
 static void		ipv4_dst_destroy(struct dst_entry *dst);
@@ -1043,7 +1044,8 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
 }
 
 static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
-			      struct sk_buff *skb, u32 mtu)
+			      struct sk_buff *skb, u32 mtu,
+			      bool confirm_neigh)
 {
 	struct rtable *rt = (struct rtable *) dst;
 	struct flowi4 fl4;
@@ -2687,7 +2689,8 @@ static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
 }
 
 static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
-					  struct sk_buff *skb, u32 mtu)
+					  struct sk_buff *skb, u32 mtu,
+					  bool confirm_neigh)
 {
 }
 
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 35b84b52b702..9ebd54752e03 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -100,12 +100,13 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 }
 
 static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk,
-			      struct sk_buff *skb, u32 mtu)
+			      struct sk_buff *skb, u32 mtu,
+			      bool confirm_neigh)
 {
 	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
 	struct dst_entry *path = xdst->route;
 
-	path->ops->update_pmtu(path, sk, skb, mtu);
+	path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh);
 }
 
 static void xfrm4_redirect(struct dst_entry *dst, struct sock *sk,
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index fe9cb8d1adca..e315526fa244 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -146,7 +146,7 @@ struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu)
 
 	if (IS_ERR(dst))
 		return NULL;
-	dst->ops->update_pmtu(dst, sk, NULL, mtu);
+	dst->ops->update_pmtu(dst, sk, NULL, mtu, true);
 
 	dst = inet6_csk_route_socket(sk, &fl6);
 	return IS_ERR(dst) ? NULL : dst;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 9d0965252ddf..3ba69174ad6c 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1040,7 +1040,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
 
 	/* TooBig packet may have updated dst->dev's mtu */
 	if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
-		dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu);
+		dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, true);
 
 	err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
 			   NEXTHDR_GRE);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index b59940416cb5..affb51c11a25 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -95,7 +95,8 @@ static int		ip6_pkt_prohibit(struct sk_buff *skb);
 static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
 static void		ip6_link_failure(struct sk_buff *skb);
 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
-					   struct sk_buff *skb, u32 mtu);
+					   struct sk_buff *skb, u32 mtu,
+					   bool confirm_neigh);
 static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
 					struct sk_buff *skb);
 static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
@@ -264,7 +265,8 @@ static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
 }
 
 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
-					 struct sk_buff *skb, u32 mtu)
+					 struct sk_buff *skb, u32 mtu,
+					 bool confirm_neigh)
 {
 }
 
@@ -2692,7 +2694,8 @@ static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
 }
 
 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
-				 const struct ipv6hdr *iph, u32 mtu)
+				 const struct ipv6hdr *iph, u32 mtu,
+				 bool confirm_neigh)
 {
 	const struct in6_addr *daddr, *saddr;
 	struct rt6_info *rt6 = (struct rt6_info *)dst;
@@ -2710,7 +2713,10 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
 		daddr = NULL;
 		saddr = NULL;
 	}
-	dst_confirm_neigh(dst, daddr);
+
+	if (confirm_neigh)
+		dst_confirm_neigh(dst, daddr);
+
 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
 	if (mtu >= dst_mtu(dst))
 		return;
@@ -2764,9 +2770,11 @@ out_unlock:
 }
 
 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
-			       struct sk_buff *skb, u32 mtu)
+			       struct sk_buff *skb, u32 mtu,
+			       bool confirm_neigh)
 {
-	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
+	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu,
+			     confirm_neigh);
 }
 
 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
@@ -2785,7 +2793,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
 
 	dst = ip6_route_output(net, NULL, &fl6);
 	if (!dst->error)
-		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
+		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu), true);
 	dst_release(dst);
 }
 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 699e0730ce8e..af7a4b8b1e9c 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -98,12 +98,13 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 }
 
 static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk,
-			      struct sk_buff *skb, u32 mtu)
+			      struct sk_buff *skb, u32 mtu,
+			      bool confirm_neigh)
 {
 	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
 	struct dst_entry *path = xdst->route;
 
-	path->ops->update_pmtu(path, sk, skb, mtu);
+	path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh);
 }
 
 static void xfrm6_redirect(struct dst_entry *dst, struct sock *sk,
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index b1e300f8881b..b00866d777fe 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -208,7 +208,7 @@ static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu)
 	struct rtable *ort = skb_rtable(skb);
 
 	if (!skb->dev && sk && sk_fullsock(sk))
-		ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
+		ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu, true);
 }
 
 static inline bool ensure_mtu_is_adequate(struct netns_ipvs *ipvs, int skb_af,
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 7235a6032671..3bbe1a58ec87 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -263,7 +263,7 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
 
 		pf->af->from_sk(&addr, sk);
 		pf->to_sk_daddr(&t->ipaddr, sk);
-		dst->ops->update_pmtu(dst, sk, NULL, pmtu);
+		dst->ops->update_pmtu(dst, sk, NULL, pmtu, true);
 		pf->to_sk_daddr(&addr, sk);
 
 		dst = sctp_transport_dst_check(t);

From 675d76ad0ad5bf41c9a129772ef0aba8f57ea9a7 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Sun, 22 Dec 2019 10:51:10 +0800
Subject: [PATCH 044/146] ip6_gre: do not confirm neighbor when do pmtu update

When we do ipv6 gre pmtu update, we will also do neigh confirm currently.
This will cause the neigh cache be refreshed and set to REACHABLE before
xmit.

But if the remote mac address changed, e.g. device is deleted and recreated,
we will not able to notice this and still use the old mac address as the neigh
cache is REACHABLE.

Fix this by disable neigh confirm when do pmtu update

v5: No change.
v4: No change.
v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
    dst_ops.update_pmtu to control whether we should do neighbor confirm.
    Also split the big patch to small ones for each area.
v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.

Reported-by: Jianlin Shi <jishi@redhat.com>
Reviewed-by: Guillaume Nault <gnault@redhat.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_gre.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 3ba69174ad6c..ee968d980746 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1040,7 +1040,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
 
 	/* TooBig packet may have updated dst->dev's mtu */
 	if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
-		dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, true);
+		dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, false);
 
 	err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
 			   NEXTHDR_GRE);

From 6e9105c73f8d2163d12d5dfd762fd75483ed30f5 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Sun, 22 Dec 2019 10:51:11 +0800
Subject: [PATCH 045/146] gtp: do not confirm neighbor when do pmtu update

When do IPv6 tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end,
we should not call dst_confirm_neigh() as there is no two-way communication.

Although GTP only support ipv4 right now, and __ip_rt_update_pmtu() does not
call dst_confirm_neigh(), we still set it to false to keep consistency with
IPv6 code.

v5: No change.
v4: No change.
v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
    dst_ops.update_pmtu to control whether we should do neighbor confirm.
    Also split the big patch to small ones for each area.
v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.

Reviewed-by: Guillaume Nault <gnault@redhat.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/gtp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index 913062017be9..fca471e27f39 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -540,7 +540,7 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev,
 		mtu = dst_mtu(&rt->dst);
 	}
 
-	rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, true);
+	rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, false);
 
 	if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) &&
 	    mtu < ntohs(iph->tot_len)) {

From 07dc35c6e3cc3c001915d05f5bf21f80a39a0970 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Sun, 22 Dec 2019 10:51:12 +0800
Subject: [PATCH 046/146] net/dst: add new function
 skb_dst_update_pmtu_no_confirm

Add a new function skb_dst_update_pmtu_no_confirm() for callers who need
update pmtu but should not do neighbor confirm.

v5: No change.
v4: No change.
v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
    dst_ops.update_pmtu to control whether we should do neighbor confirm.
    Also split the big patch to small ones for each area.
v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.

Reviewed-by: Guillaume Nault <gnault@redhat.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/include/net/dst.h b/include/net/dst.h
index 593630e0e076..dc7cc1f1051c 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -519,6 +519,15 @@ static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu)
 		dst->ops->update_pmtu(dst, NULL, skb, mtu, true);
 }
 
+/* update dst pmtu but not do neighbor confirm */
+static inline void skb_dst_update_pmtu_no_confirm(struct sk_buff *skb, u32 mtu)
+{
+	struct dst_entry *dst = skb_dst(skb);
+
+	if (dst && dst->ops->update_pmtu)
+		dst->ops->update_pmtu(dst, NULL, skb, mtu, false);
+}
+
 static inline void skb_tunnel_check_pmtu(struct sk_buff *skb,
 					 struct dst_entry *encap_dst,
 					 int headroom)

From 7a1592bcb15d71400a98632727791d1e68ea0ee8 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Sun, 22 Dec 2019 10:51:13 +0800
Subject: [PATCH 047/146] tunnel: do not confirm neighbor when do pmtu update

When do tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end,
we should not call dst_confirm_neigh() as there is no two-way communication.

v5: No Change.
v4: Update commit description
v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
    dst_ops.update_pmtu to control whether we should do neighbor confirm.
    Also split the big patch to small ones for each area.
v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.

Fixes: 0dec879f636f ("net: use dst_confirm_neigh for UDP, RAW, ICMP, L2TP")
Reviewed-by: Guillaume Nault <gnault@redhat.com>
Tested-by: Guillaume Nault <gnault@redhat.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_tunnel.c  | 2 +-
 net/ipv6/ip6_tunnel.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 38c02bb62e2c..0fe2a5d3e258 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -505,7 +505,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
 		mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
 
 	if (skb_valid_dst(skb))
-		skb_dst_update_pmtu(skb, mtu);
+		skb_dst_update_pmtu_no_confirm(skb, mtu);
 
 	if (skb->protocol == htons(ETH_P_IP)) {
 		if (!skb_is_gso(skb) &&
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 754a484d35df..2f376dbc37d5 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -640,7 +640,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		if (rel_info > dst_mtu(skb_dst(skb2)))
 			goto out;
 
-		skb_dst_update_pmtu(skb2, rel_info);
+		skb_dst_update_pmtu_no_confirm(skb2, rel_info);
 	}
 
 	icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
@@ -1132,7 +1132,7 @@ route_lookup:
 	mtu = max(mtu, skb->protocol == htons(ETH_P_IPV6) ?
 		       IPV6_MIN_MTU : IPV4_MIN_MTU);
 
-	skb_dst_update_pmtu(skb, mtu);
+	skb_dst_update_pmtu_no_confirm(skb, mtu);
 	if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
 		*pmtu = mtu;
 		err = -EMSGSIZE;

From 8247a79efa2f28b44329f363272550c1738377de Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Sun, 22 Dec 2019 10:51:14 +0800
Subject: [PATCH 048/146] vti: do not confirm neighbor when do pmtu update

When do IPv6 tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end,
we should not call dst_confirm_neigh() as there is no two-way communication.

Although vti and vti6 are immune to this problem because they are IFF_NOARP
interfaces, as Guillaume pointed. There is still no sense to confirm neighbour
here.

v5: Update commit description.
v4: No change.
v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
    dst_ops.update_pmtu to control whether we should do neighbor confirm.
    Also split the big patch to small ones for each area.
v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.

Reviewed-by: Guillaume Nault <gnault@redhat.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_vti.c  | 2 +-
 net/ipv6/ip6_vti.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 9b153c7fcbb4..e90b600c7a25 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -214,7 +214,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
 
 	mtu = dst_mtu(dst);
 	if (skb->len > mtu) {
-		skb_dst_update_pmtu(skb, mtu);
+		skb_dst_update_pmtu_no_confirm(skb, mtu);
 		if (skb->protocol == htons(ETH_P_IP)) {
 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
 				  htonl(mtu));
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 024db17386d2..6f08b760c2a7 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -479,7 +479,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
 
 	mtu = dst_mtu(dst);
 	if (skb->len > mtu) {
-		skb_dst_update_pmtu(skb, mtu);
+		skb_dst_update_pmtu_no_confirm(skb, mtu);
 
 		if (skb->protocol == htons(ETH_P_IPV6)) {
 			if (mtu < IPV6_MIN_MTU)

From 4d42df46d6372ece4cb4279870b46c2ea7304a47 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Sun, 22 Dec 2019 10:51:15 +0800
Subject: [PATCH 049/146] sit: do not confirm neighbor when do pmtu update

When do IPv6 tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end,
we should not call dst_confirm_neigh() as there is no two-way communication.

v5: No change.
v4: No change.
v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
    dst_ops.update_pmtu to control whether we should do neighbor confirm.
    Also split the big patch to small ones for each area.
v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.

Reviewed-by: Guillaume Nault <gnault@redhat.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index b2ccbc473127..98954830c40b 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -944,7 +944,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 		}
 
 		if (tunnel->parms.iph.daddr)
-			skb_dst_update_pmtu(skb, mtu);
+			skb_dst_update_pmtu_no_confirm(skb, mtu);
 
 		if (skb->len > mtu && !skb_is_gso(skb)) {
 			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);

From f081042d128a0c7acbd67611def62e1b52e2d294 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Sun, 22 Dec 2019 10:51:16 +0800
Subject: [PATCH 050/146] net/dst: do not confirm neighbor for vxlan and geneve
 pmtu update

When do IPv6 tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end,
we should not call dst_confirm_neigh() as there is no two-way communication.

So disable the neigh confirm for vxlan and geneve pmtu update.

v5: No change.
v4: No change.
v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
    dst_ops.update_pmtu to control whether we should do neighbor confirm.
    Also split the big patch to small ones for each area.
v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.

Fixes: a93bf0ff4490 ("vxlan: update skb dst pmtu on tx path")
Fixes: 52a589d51f10 ("geneve: update skb dst pmtu on tx path")
Reviewed-by: Guillaume Nault <gnault@redhat.com>
Tested-by: Guillaume Nault <gnault@redhat.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/dst.h b/include/net/dst.h
index dc7cc1f1051c..3448cf865ede 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -535,7 +535,7 @@ static inline void skb_tunnel_check_pmtu(struct sk_buff *skb,
 	u32 encap_mtu = dst_mtu(encap_dst);
 
 	if (skb->len > encap_mtu - headroom)
-		skb_dst_update_pmtu(skb, encap_mtu - headroom);
+		skb_dst_update_pmtu_no_confirm(skb, encap_mtu - headroom);
 }
 
 #endif /* _NET_DST_H */

From 8b5026bc16938920e4780b9094c3bf20e1e0939d Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Mon, 23 Dec 2019 15:03:21 +0100
Subject: [PATCH 051/146] s390/qeth: fix qdio teardown after early init error

qeth_l?_set_online() goes through a number of initialization steps, and
on any error uses qeth_l?_stop_card() to tear down the residual state.

The first initialization step is qeth_core_hardsetup_card(). When this
fails after having established a QDIO context on the device
(ie. somewhere after qeth_mpc_initialize()), qeth_l?_stop_card() doesn't
shut down this QDIO context again (since the card state hasn't
progressed from DOWN at this stage).

Even worse, we then call qdio_free() as final teardown step to free the
QDIO data structures - while some of them are still hooked into wider
QDIO infrastructure such as the IRQ list. This is inevitably followed by
use-after-frees and other nastyness.

Fix this by unconditionally calling qeth_qdio_clear_card() to shut down
the QDIO context, and also to halt/clear any pending activity on the
various IO channels.
Remove the naive attempt at handling the teardown in
qeth_mpc_initialize(), it clearly doesn't suffice and we're handling it
properly now in the wider teardown code.

Fixes: 4a71df50047f ("qeth: new qeth device driver")
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core_main.c | 20 ++++++++------------
 drivers/s390/net/qeth_l2_main.c   |  2 +-
 drivers/s390/net/qeth_l3_main.c   |  2 +-
 3 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index bc4158888af9..324cf22f9111 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -2482,50 +2482,46 @@ static int qeth_mpc_initialize(struct qeth_card *card)
 	rc = qeth_cm_enable(card);
 	if (rc) {
 		QETH_CARD_TEXT_(card, 2, "2err%d", rc);
-		goto out_qdio;
+		return rc;
 	}
 	rc = qeth_cm_setup(card);
 	if (rc) {
 		QETH_CARD_TEXT_(card, 2, "3err%d", rc);
-		goto out_qdio;
+		return rc;
 	}
 	rc = qeth_ulp_enable(card);
 	if (rc) {
 		QETH_CARD_TEXT_(card, 2, "4err%d", rc);
-		goto out_qdio;
+		return rc;
 	}
 	rc = qeth_ulp_setup(card);
 	if (rc) {
 		QETH_CARD_TEXT_(card, 2, "5err%d", rc);
-		goto out_qdio;
+		return rc;
 	}
 	rc = qeth_alloc_qdio_queues(card);
 	if (rc) {
 		QETH_CARD_TEXT_(card, 2, "5err%d", rc);
-		goto out_qdio;
+		return rc;
 	}
 	rc = qeth_qdio_establish(card);
 	if (rc) {
 		QETH_CARD_TEXT_(card, 2, "6err%d", rc);
 		qeth_free_qdio_queues(card);
-		goto out_qdio;
+		return rc;
 	}
 	rc = qeth_qdio_activate(card);
 	if (rc) {
 		QETH_CARD_TEXT_(card, 2, "7err%d", rc);
-		goto out_qdio;
+		return rc;
 	}
 	rc = qeth_dm_act(card);
 	if (rc) {
 		QETH_CARD_TEXT_(card, 2, "8err%d", rc);
-		goto out_qdio;
+		return rc;
 	}
 
 	return 0;
-out_qdio:
-	qeth_qdio_clear_card(card, !IS_IQD(card));
-	qdio_free(CARD_DDEV(card));
-	return rc;
 }
 
 void qeth_print_status_message(struct qeth_card *card)
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 8c95e6019bac..15e2fd65d434 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -287,12 +287,12 @@ static void qeth_l2_stop_card(struct qeth_card *card)
 		card->state = CARD_STATE_HARDSETUP;
 	}
 	if (card->state == CARD_STATE_HARDSETUP) {
-		qeth_qdio_clear_card(card, 0);
 		qeth_drain_output_queues(card);
 		qeth_clear_working_pool_list(card);
 		card->state = CARD_STATE_DOWN;
 	}
 
+	qeth_qdio_clear_card(card, 0);
 	flush_workqueue(card->event_wq);
 	card->info.mac_bits &= ~QETH_LAYER2_MAC_REGISTERED;
 	card->info.promisc_mode = 0;
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 04e301de376f..5508ab89b518 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1307,12 +1307,12 @@ static void qeth_l3_stop_card(struct qeth_card *card)
 		card->state = CARD_STATE_HARDSETUP;
 	}
 	if (card->state == CARD_STATE_HARDSETUP) {
-		qeth_qdio_clear_card(card, 0);
 		qeth_drain_output_queues(card);
 		qeth_clear_working_pool_list(card);
 		card->state = CARD_STATE_DOWN;
 	}
 
+	qeth_qdio_clear_card(card, 0);
 	flush_workqueue(card->event_wq);
 	card->info.promisc_mode = 0;
 }

From 5b6c7b55cfe26224b0f41b1c226d3534c542787f Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Mon, 23 Dec 2019 15:03:22 +0100
Subject: [PATCH 052/146] s390/qeth: lock the card while changing its hsuid

qeth_l3_dev_hsuid_store() initially checks the card state, but doesn't
take the conf_mutex to ensure that the card stays in this state while
being reconfigured.

Rework the code to take this lock, and drop a redundant state check in a
helper function.

Fixes: b333293058aa ("qeth: add support for af_iucv HiperSockets transport")
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core_main.c |  5 ----
 drivers/s390/net/qeth_l3_sys.c    | 40 +++++++++++++++++++++----------
 2 files changed, 28 insertions(+), 17 deletions(-)

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 324cf22f9111..c64ef55f0dff 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -3425,11 +3425,6 @@ int qeth_configure_cq(struct qeth_card *card, enum qeth_cq cq)
 			goto out;
 		}
 
-		if (card->state != CARD_STATE_DOWN) {
-			rc = -1;
-			goto out;
-		}
-
 		qeth_free_qdio_queues(card);
 		card->options.cq = cq;
 		rc = 0;
diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c
index f9067ed6c7d3..e8c848f72c6d 100644
--- a/drivers/s390/net/qeth_l3_sys.c
+++ b/drivers/s390/net/qeth_l3_sys.c
@@ -242,21 +242,33 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct qeth_card *card = dev_get_drvdata(dev);
+	int rc = 0;
 	char *tmp;
-	int rc;
 
 	if (!IS_IQD(card))
 		return -EPERM;
-	if (card->state != CARD_STATE_DOWN)
-		return -EPERM;
-	if (card->options.sniffer)
-		return -EPERM;
-	if (card->options.cq == QETH_CQ_NOTAVAILABLE)
-		return -EPERM;
+
+	mutex_lock(&card->conf_mutex);
+	if (card->state != CARD_STATE_DOWN) {
+		rc = -EPERM;
+		goto out;
+	}
+
+	if (card->options.sniffer) {
+		rc = -EPERM;
+		goto out;
+	}
+
+	if (card->options.cq == QETH_CQ_NOTAVAILABLE) {
+		rc = -EPERM;
+		goto out;
+	}
 
 	tmp = strsep((char **)&buf, "\n");
-	if (strlen(tmp) > 8)
-		return -EINVAL;
+	if (strlen(tmp) > 8) {
+		rc = -EINVAL;
+		goto out;
+	}
 
 	if (card->options.hsuid[0])
 		/* delete old ip address */
@@ -267,11 +279,13 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev,
 		card->options.hsuid[0] = '\0';
 		memcpy(card->dev->perm_addr, card->options.hsuid, 9);
 		qeth_configure_cq(card, QETH_CQ_DISABLED);
-		return count;
+		goto out;
 	}
 
-	if (qeth_configure_cq(card, QETH_CQ_ENABLED))
-		return -EPERM;
+	if (qeth_configure_cq(card, QETH_CQ_ENABLED)) {
+		rc = -EPERM;
+		goto out;
+	}
 
 	snprintf(card->options.hsuid, sizeof(card->options.hsuid),
 		 "%-8s", tmp);
@@ -280,6 +294,8 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev,
 
 	rc = qeth_l3_modify_hsuid(card, true);
 
+out:
+	mutex_unlock(&card->conf_mutex);
 	return rc ? rc : count;
 }
 

From 68c57bfd52836e31bff33e5e1fc64029749d2c35 Mon Sep 17 00:00:00 2001
From: Alexandra Winter <wintera@linux.ibm.com>
Date: Mon, 23 Dec 2019 15:03:23 +0100
Subject: [PATCH 053/146] s390/qeth: fix false reporting of VNIC CHAR config
 failure

Symptom: Error message "Configuring the VNIC characteristics failed"
in dmesg whenever an OSA interface on z15 is set online.

The VNIC characteristics get re-programmed when setting a L2 device
online. This follows the selected 'wanted' characteristics - with the
exception that the INVISIBLE characteristic unconditionally gets
switched off.

For devices that don't support INVISIBLE (ie. OSA), the resulting
IO failure raises a noisy error message
("Configuring the VNIC characteristics failed").
For IQD, INVISIBLE is off by default anyways.

So don't unnecessarily special-case the INVISIBLE characteristic, and
thereby suppress the misleading error message on OSA devices.

Fixes: caa1f0b10d18 ("s390/qeth: add VNICC enable/disable support")
Signed-off-by: Alexandra Winter <wintera@linux.ibm.com>
Reviewed-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_l2_main.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 15e2fd65d434..fc5d8ed3a737 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -2041,7 +2041,6 @@ static void qeth_l2_vnicc_init(struct qeth_card *card)
 	error |= qeth_l2_vnicc_recover_timeout(card, QETH_VNICC_LEARNING,
 					       timeout);
 	chars_tmp = card->options.vnicc.wanted_chars ^ QETH_VNICC_DEFAULT;
-	chars_tmp |= QETH_VNICC_BRIDGE_INVISIBLE;
 	chars_len = sizeof(card->options.vnicc.wanted_chars) * BITS_PER_BYTE;
 	for_each_set_bit(i, &chars_tmp, chars_len) {
 		vnicc = BIT(i);

From e8a66d800471e2df7f0b484e2e46898b21d1fa82 Mon Sep 17 00:00:00 2001
From: Alexandra Winter <wintera@linux.ibm.com>
Date: Mon, 23 Dec 2019 15:03:24 +0100
Subject: [PATCH 054/146] s390/qeth: Fix vnicc_is_in_use if rx_bcast not set

Symptom: After vnicc/rx_bcast has been manually set to 0,
	bridge_* sysfs parameters can still be set or written.
Only occurs on HiperSockets, as OSA doesn't support changing rx_bcast.

Vnic characteristics and bridgeport settings are mutually exclusive.
rx_bcast defaults to 1, so manually setting it to 0 should disable
bridge_* parameters.

Instead it makes sense here to check the supported mask. If the card
does not support vnicc at all, bridge commands are always allowed.

Fixes: caa1f0b10d18 ("s390/qeth: add VNICC enable/disable support")
Signed-off-by: Alexandra Winter <wintera@linux.ibm.com>
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_l2_main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index fc5d8ed3a737..8024a2112a87 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -1952,8 +1952,7 @@ int qeth_l2_vnicc_get_timeout(struct qeth_card *card, u32 *timeout)
 /* check if VNICC is currently enabled */
 bool qeth_l2_vnicc_is_in_use(struct qeth_card *card)
 {
-	/* if everything is turned off, VNICC is not active */
-	if (!card->options.vnicc.cur_chars)
+	if (!card->options.vnicc.sup_chars)
 		return false;
 	/* default values are only OK if rx_bcast was not enabled by user
 	 * or the card is offline.

From d1b9ae1864fc3c000e0eb4af8482d78c63e0915a Mon Sep 17 00:00:00 2001
From: Alexandra Winter <wintera@linux.ibm.com>
Date: Mon, 23 Dec 2019 15:03:25 +0100
Subject: [PATCH 055/146] s390/qeth: vnicc Fix init to default

During vnicc_init wanted_char should be compared to cur_char and not
to QETH_VNICC_DEFAULT. Without this patch there is no way to enforce
the default values as desired values.

Note, that it is expected, that a card comes online with default values.
This patch was tested with private card firmware.

Fixes: caa1f0b10d18 ("s390/qeth: add VNICC enable/disable support")
Signed-off-by: Alexandra Winter <wintera@linux.ibm.com>
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_l2_main.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 8024a2112a87..47d37e75dda6 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -2039,7 +2039,9 @@ static void qeth_l2_vnicc_init(struct qeth_card *card)
 	/* enforce assumed default values and recover settings, if changed  */
 	error |= qeth_l2_vnicc_recover_timeout(card, QETH_VNICC_LEARNING,
 					       timeout);
-	chars_tmp = card->options.vnicc.wanted_chars ^ QETH_VNICC_DEFAULT;
+	/* Change chars, if necessary  */
+	chars_tmp = card->options.vnicc.wanted_chars ^
+		    card->options.vnicc.cur_chars;
 	chars_len = sizeof(card->options.vnicc.wanted_chars) * BITS_PER_BYTE;
 	for_each_set_bit(i, &chars_tmp, chars_len) {
 		vnicc = BIT(i);

From 0b698c838e84149b690c7e979f78cccb6f8aa4b9 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Mon, 23 Dec 2019 15:03:26 +0100
Subject: [PATCH 056/146] s390/qeth: fix initialization on old HW

I stumbled over an old OSA model that claims to support DIAG_ASSIST,
but then rejects the cmd to query its DIAG capabilities.

In the old code this was ok, as the returned raw error code was > 0.
Now that we translate the raw codes to errnos, the "rc < 0" causes us
to fail the initialization of the device.

The fix is trivial: don't bail out when the DIAG query fails. Such an
error is not critical, we can still use the device (with a slightly
reduced set of features).

Fixes: 742d4d40831d ("s390/qeth: convert remaining legacy cmd callbacks")
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core_main.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index c64ef55f0dff..29facb913671 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -5026,10 +5026,8 @@ retriable:
 	}
 	if (qeth_adp_supported(card, IPA_SETADP_SET_DIAG_ASSIST)) {
 		rc = qeth_query_setdiagass(card);
-		if (rc < 0) {
+		if (rc)
 			QETH_CARD_TEXT_(card, 2, "8err%d", rc);
-			goto out;
-		}
 	}
 
 	if (!qeth_is_diagass_supported(card, QETH_DIAGS_CMD_TRAP) ||

From 57b948e2c9ce58e4fcb687bc285b5b5a2cc66640 Mon Sep 17 00:00:00 2001
From: Netanel Belgazal <netanel@amazon.com>
Date: Sun, 22 Dec 2019 09:47:59 +0000
Subject: [PATCH 057/146] MAINTAINERS: Add additional maintainers to ENA
 Ethernet driver

Signed-off-by: Netanel Belgazal <netanel@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index ffa3371bc750..992474b930bf 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -771,6 +771,8 @@ F:	drivers/thermal/thermal_mmio.c
 
 AMAZON ETHERNET DRIVERS
 M:	Netanel Belgazal <netanel@amazon.com>
+M:	Arthur Kiyanovski <akiyano@amazon.com>
+R:	Guy Tzalik <gtzalik@amazon.com>
 R:	Saeed Bishara <saeedb@amazon.com>
 R:	Zorik Machulsky <zorik@amazon.com>
 L:	netdev@vger.kernel.org

From 84bb59d773853bc2dda2ac1ef8474c40eb33a3c6 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Sun, 22 Dec 2019 11:25:27 +0000
Subject: [PATCH 058/146] hsr: avoid debugfs warning message when module is
 remove

When hsr module is being removed, debugfs_remove() is called to remove
both debugfs directory and file.

When module is being removed, module state is changed to
MODULE_STATE_GOING then exit() is called.
At this moment, module couldn't be held so try_module_get()
will be failed.

debugfs's open() callback tries to hold the module if .owner is existing.
If it fails, warning message is printed.

CPU0				CPU1
delete_module()
    try_stop_module()
    hsr_exit()			open() <-- WARNING
        debugfs_remove()

In order to avoid the warning message, this patch makes hsr module does
not set .owner. Unsetting .owner is safe because these are protected by
inode_lock().

Test commands:
    #SHELL1
    ip link add dummy0 type dummy
    ip link add dummy1 type dummy
    while :
    do
        ip link add hsr0 type hsr slave1 dummy0 slave2 dummy1
	modprobe -rv hsr
    done

    #SHELL2
    while :
    do
        cat /sys/kernel/debug/hsr0/node_table
    done

Splat looks like:
[  101.223783][ T1271] ------------[ cut here ]------------
[  101.230309][ T1271] debugfs file owner did not clean up at exit: node_table
[  101.230380][ T1271] WARNING: CPU: 3 PID: 1271 at fs/debugfs/file.c:309 full_proxy_open+0x10f/0x650
[  101.233153][ T1271] Modules linked in: hsr(-) dummy veth openvswitch nsh nf_conncount nf_nat nf_conntrack nf_d]
[  101.237112][ T1271] CPU: 3 PID: 1271 Comm: cat Tainted: G        W         5.5.0-rc1+ #204
[  101.238270][ T1271] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
[  101.240379][ T1271] RIP: 0010:full_proxy_open+0x10f/0x650
[  101.241166][ T1271] Code: 48 c1 ea 03 80 3c 02 00 0f 85 c1 04 00 00 49 8b 3c 24 e8 04 86 7e ff 84 c0 75 2d 4c 8
[  101.251985][ T1271] RSP: 0018:ffff8880ca22fa38 EFLAGS: 00010286
[  101.273355][ T1271] RAX: dffffc0000000008 RBX: ffff8880cc6e6200 RCX: 0000000000000000
[  101.274466][ T1271] RDX: 0000000000000000 RSI: 0000000000000006 RDI: ffff8880c4dd5c14
[  101.275581][ T1271] RBP: 0000000000000000 R08: fffffbfff2922f5d R09: 0000000000000000
[  101.276733][ T1271] R10: 0000000000000001 R11: 0000000000000000 R12: ffffffffc0551bc0
[  101.277853][ T1271] R13: ffff8880c4059a48 R14: ffff8880be50a5e0 R15: ffffffff941adaa0
[  101.278956][ T1271] FS:  00007f8871cda540(0000) GS:ffff8880da800000(0000) knlGS:0000000000000000
[  101.280216][ T1271] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  101.282832][ T1271] CR2: 00007f88717cfd10 CR3: 00000000b9440005 CR4: 00000000000606e0
[  101.283974][ T1271] Call Trace:
[  101.285328][ T1271]  do_dentry_open+0x63c/0xf50
[  101.286077][ T1271]  ? open_proxy_open+0x270/0x270
[  101.288271][ T1271]  ? __x64_sys_fchdir+0x180/0x180
[  101.288987][ T1271]  ? inode_permission+0x65/0x390
[  101.289682][ T1271]  path_openat+0x701/0x2810
[  101.290294][ T1271]  ? path_lookupat+0x880/0x880
[  101.290957][ T1271]  ? check_chain_key+0x236/0x5d0
[  101.291676][ T1271]  ? __lock_acquire+0xdfe/0x3de0
[  101.292358][ T1271]  ? sched_clock+0x5/0x10
[  101.292962][ T1271]  ? sched_clock_cpu+0x18/0x170
[  101.293644][ T1271]  ? find_held_lock+0x39/0x1d0
[  101.305616][ T1271]  do_filp_open+0x17a/0x270
[  101.306061][ T1271]  ? may_open_dev+0xc0/0xc0
[ ... ]

Fixes: fc4ecaeebd26 ("net: hsr: add debugfs support for display node list")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/hsr/hsr_debugfs.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/hsr/hsr_debugfs.c b/net/hsr/hsr_debugfs.c
index 94447974a3c0..6135706f03d5 100644
--- a/net/hsr/hsr_debugfs.c
+++ b/net/hsr/hsr_debugfs.c
@@ -64,7 +64,6 @@ hsr_node_table_open(struct inode *inode, struct file *filp)
 }
 
 static const struct file_operations hsr_fops = {
-	.owner	= THIS_MODULE,
 	.open	= hsr_node_table_open,
 	.read	= seq_read,
 	.llseek = seq_lseek,

From 1d19e2d53e8ed9e4c98fc95e0067492cda7288b0 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Sun, 22 Dec 2019 11:26:15 +0000
Subject: [PATCH 059/146] hsr: fix error handling routine in hsr_dev_finalize()

hsr_dev_finalize() is called to create new hsr interface.
There are some wrong error handling codes.

1. wrong checking return value of debugfs_create_{dir/file}.
These function doesn't return NULL. If error occurs in there,
it returns error pointer.
So, it should check error pointer instead of NULL.

2. It doesn't unregister interface if it fails to setup hsr interface.
If it fails to initialize hsr interface after register_netdevice(),
it should call unregister_netdevice().

3. Ignore failure of creation of debugfs
If creating of debugfs dir and file is failed, creating hsr interface
will be failed. But debugfs doesn't affect actual logic of hsr module.
So, ignoring this is more correct and this behavior is more general.

Fixes: c5a759117210 ("net/hsr: Use list_head (and rcu) instead of array for slave devices.")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/hsr/hsr_debugfs.c | 15 +++++++--------
 net/hsr/hsr_device.c  | 19 ++++++++++---------
 net/hsr/hsr_main.h    | 11 ++++-------
 3 files changed, 21 insertions(+), 24 deletions(-)

diff --git a/net/hsr/hsr_debugfs.c b/net/hsr/hsr_debugfs.c
index 6135706f03d5..6618a9d8e58e 100644
--- a/net/hsr/hsr_debugfs.c
+++ b/net/hsr/hsr_debugfs.c
@@ -77,15 +77,14 @@ static const struct file_operations hsr_fops = {
  * When debugfs is configured this routine sets up the node_table file per
  * hsr device for dumping the node_table entries
  */
-int hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev)
+void hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev)
 {
-	int rc = -1;
 	struct dentry *de = NULL;
 
 	de = debugfs_create_dir(hsr_dev->name, NULL);
-	if (!de) {
+	if (IS_ERR(de)) {
 		pr_err("Cannot create hsr debugfs root\n");
-		return rc;
+		return;
 	}
 
 	priv->node_tbl_root = de;
@@ -93,13 +92,13 @@ int hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev)
 	de = debugfs_create_file("node_table", S_IFREG | 0444,
 				 priv->node_tbl_root, priv,
 				 &hsr_fops);
-	if (!de) {
+	if (IS_ERR(de)) {
 		pr_err("Cannot create hsr node_table directory\n");
-		return rc;
+		debugfs_remove(priv->node_tbl_root);
+		priv->node_tbl_root = NULL;
+		return;
 	}
 	priv->node_tbl_file = de;
-
-	return 0;
 }
 
 /* hsr_debugfs_term - Tear down debugfs intrastructure
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index b01e1bae4ddc..e73549075a03 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -477,30 +477,31 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
 
 	res = hsr_add_port(hsr, hsr_dev, HSR_PT_MASTER);
 	if (res)
-		goto err_add_port;
+		goto err_add_master;
 
 	res = register_netdevice(hsr_dev);
 	if (res)
-		goto fail;
+		goto err_unregister;
 
 	res = hsr_add_port(hsr, slave[0], HSR_PT_SLAVE_A);
 	if (res)
-		goto fail;
+		goto err_add_slaves;
+
 	res = hsr_add_port(hsr, slave[1], HSR_PT_SLAVE_B);
 	if (res)
-		goto fail;
+		goto err_add_slaves;
 
+	hsr_debugfs_init(hsr, hsr_dev);
 	mod_timer(&hsr->prune_timer, jiffies + msecs_to_jiffies(PRUNE_PERIOD));
-	res = hsr_debugfs_init(hsr, hsr_dev);
-	if (res)
-		goto fail;
 
 	return 0;
 
-fail:
+err_add_slaves:
+	unregister_netdevice(hsr_dev);
+err_unregister:
 	list_for_each_entry_safe(port, tmp, &hsr->ports, port_list)
 		hsr_del_port(port);
-err_add_port:
+err_add_master:
 	hsr_del_self_node(&hsr->self_node_db);
 
 	return res;
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index 96fac696a1e1..acab9c353a49 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -184,15 +184,12 @@ static inline u16 hsr_get_skb_sequence_nr(struct sk_buff *skb)
 }
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
-int hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev);
+void hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev);
 void hsr_debugfs_term(struct hsr_priv *priv);
 #else
-static inline int hsr_debugfs_init(struct hsr_priv *priv,
-				   struct net_device *hsr_dev)
-{
-	return 0;
-}
-
+static inline void hsr_debugfs_init(struct hsr_priv *priv,
+				    struct net_device *hsr_dev)
+{}
 static inline void hsr_debugfs_term(struct hsr_priv *priv)
 {}
 #endif

From c6c4ccd7f96993e106dfea7ef18127f972f2db5e Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Sun, 22 Dec 2019 11:26:27 +0000
Subject: [PATCH 060/146] hsr: add hsr root debugfs directory

In current hsr code, when hsr interface is created, it creates debugfs
directory /sys/kernel/debug/<interface name>.
If there is same directory or file name in there, it fails.
In order to reduce possibility of failure of creation of debugfs,
this patch adds root directory.

Test commands:
    ip link add dummy0 type dummy
    ip link add dummy1 type dummy
    ip link add hsr0 type hsr slave1 dummy0 slave2 dummy1

Before this patch:
    /sys/kernel/debug/hsr0/node_table

After this patch:
    /sys/kernel/debug/hsr/hsr0/node_table

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/hsr/hsr_debugfs.c | 23 ++++++++++++++++++++---
 net/hsr/hsr_main.c    |  1 +
 net/hsr/hsr_main.h    |  6 ++++++
 net/hsr/hsr_netlink.c |  1 +
 4 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/net/hsr/hsr_debugfs.c b/net/hsr/hsr_debugfs.c
index 6618a9d8e58e..a7462a718e7b 100644
--- a/net/hsr/hsr_debugfs.c
+++ b/net/hsr/hsr_debugfs.c
@@ -20,6 +20,8 @@
 #include "hsr_main.h"
 #include "hsr_framereg.h"
 
+static struct dentry *hsr_debugfs_root_dir;
+
 static void print_mac_address(struct seq_file *sfp, unsigned char *mac)
 {
 	seq_printf(sfp, "%02x:%02x:%02x:%02x:%02x:%02x:",
@@ -81,9 +83,9 @@ void hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev)
 {
 	struct dentry *de = NULL;
 
-	de = debugfs_create_dir(hsr_dev->name, NULL);
+	de = debugfs_create_dir(hsr_dev->name, hsr_debugfs_root_dir);
 	if (IS_ERR(de)) {
-		pr_err("Cannot create hsr debugfs root\n");
+		pr_err("Cannot create hsr debugfs directory\n");
 		return;
 	}
 
@@ -93,7 +95,7 @@ void hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev)
 				 priv->node_tbl_root, priv,
 				 &hsr_fops);
 	if (IS_ERR(de)) {
-		pr_err("Cannot create hsr node_table directory\n");
+		pr_err("Cannot create hsr node_table file\n");
 		debugfs_remove(priv->node_tbl_root);
 		priv->node_tbl_root = NULL;
 		return;
@@ -115,3 +117,18 @@ hsr_debugfs_term(struct hsr_priv *priv)
 	debugfs_remove(priv->node_tbl_root);
 	priv->node_tbl_root = NULL;
 }
+
+void hsr_debugfs_create_root(void)
+{
+	hsr_debugfs_root_dir = debugfs_create_dir("hsr", NULL);
+	if (IS_ERR(hsr_debugfs_root_dir)) {
+		pr_err("Cannot create hsr debugfs root directory\n");
+		hsr_debugfs_root_dir = NULL;
+	}
+}
+
+void hsr_debugfs_remove_root(void)
+{
+	/* debugfs_remove() internally checks NULL and ERROR */
+	debugfs_remove(hsr_debugfs_root_dir);
+}
diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
index b9988a662ee1..490896379073 100644
--- a/net/hsr/hsr_main.c
+++ b/net/hsr/hsr_main.c
@@ -123,6 +123,7 @@ static void __exit hsr_exit(void)
 {
 	unregister_netdevice_notifier(&hsr_nb);
 	hsr_netlink_exit();
+	hsr_debugfs_remove_root();
 }
 
 module_init(hsr_init);
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index acab9c353a49..55d2057bf749 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -186,12 +186,18 @@ static inline u16 hsr_get_skb_sequence_nr(struct sk_buff *skb)
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 void hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev);
 void hsr_debugfs_term(struct hsr_priv *priv);
+void hsr_debugfs_create_root(void);
+void hsr_debugfs_remove_root(void);
 #else
 static inline void hsr_debugfs_init(struct hsr_priv *priv,
 				    struct net_device *hsr_dev)
 {}
 static inline void hsr_debugfs_term(struct hsr_priv *priv)
 {}
+static inline void hsr_debugfs_create_root(void)
+{}
+static inline void hsr_debugfs_remove_root(void)
+{}
 #endif
 
 #endif /*  __HSR_PRIVATE_H */
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index 8f8337f893ba..8dc0547f01d0 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -476,6 +476,7 @@ int __init hsr_netlink_init(void)
 	if (rc)
 		goto fail_genl_register_family;
 
+	hsr_debugfs_create_root();
 	return 0;
 
 fail_genl_register_family:

From 4c2d5e33dcd3a6333a7895be3b542ff3d373177c Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Sun, 22 Dec 2019 11:26:39 +0000
Subject: [PATCH 061/146] hsr: rename debugfs file when interface name is
 changed

hsr interface has own debugfs file, which name is same with interface name.
So, interface name is changed, debugfs file name should be changed too.

Fixes: fc4ecaeebd26 ("net: hsr: add debugfs support for display node list")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/hsr/hsr_debugfs.c | 13 +++++++++++++
 net/hsr/hsr_main.c    |  3 +++
 net/hsr/hsr_main.h    |  4 ++++
 3 files changed, 20 insertions(+)

diff --git a/net/hsr/hsr_debugfs.c b/net/hsr/hsr_debugfs.c
index a7462a718e7b..d5f709b940ff 100644
--- a/net/hsr/hsr_debugfs.c
+++ b/net/hsr/hsr_debugfs.c
@@ -65,6 +65,19 @@ hsr_node_table_open(struct inode *inode, struct file *filp)
 	return single_open(filp, hsr_node_table_show, inode->i_private);
 }
 
+void hsr_debugfs_rename(struct net_device *dev)
+{
+	struct hsr_priv *priv = netdev_priv(dev);
+	struct dentry *d;
+
+	d = debugfs_rename(hsr_debugfs_root_dir, priv->node_tbl_root,
+			   hsr_debugfs_root_dir, dev->name);
+	if (IS_ERR(d))
+		netdev_warn(dev, "failed to rename\n");
+	else
+		priv->node_tbl_root = d;
+}
+
 static const struct file_operations hsr_fops = {
 	.open	= hsr_node_table_open,
 	.read	= seq_read,
diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
index 490896379073..ea23eb7408e4 100644
--- a/net/hsr/hsr_main.c
+++ b/net/hsr/hsr_main.c
@@ -45,6 +45,9 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event,
 	case NETDEV_CHANGE:	/* Link (carrier) state changes */
 		hsr_check_carrier_and_operstate(hsr);
 		break;
+	case NETDEV_CHANGENAME:
+		hsr_debugfs_rename(dev);
+		break;
 	case NETDEV_CHANGEADDR:
 		if (port->type == HSR_PT_MASTER) {
 			/* This should not happen since there's no
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index 55d2057bf749..8d885bc6a54d 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -184,11 +184,15 @@ static inline u16 hsr_get_skb_sequence_nr(struct sk_buff *skb)
 }
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
+void hsr_debugfs_rename(struct net_device *dev);
 void hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev);
 void hsr_debugfs_term(struct hsr_priv *priv);
 void hsr_debugfs_create_root(void);
 void hsr_debugfs_remove_root(void);
 #else
+static inline void void hsr_debugfs_rename(struct net_device *dev)
+{
+}
 static inline void hsr_debugfs_init(struct hsr_priv *priv,
 				    struct net_device *hsr_dev)
 {}

From 92a35678ec075100ce666a2fb6969151affb0e5d Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Sun, 22 Dec 2019 11:26:54 +0000
Subject: [PATCH 062/146] hsr: fix a race condition in node list insertion and
 deletion

hsr nodes are protected by RCU and there is no write side lock.
But node insertions and deletions could be being operated concurrently.
So write side locking is needed.

Test commands:
    ip netns add nst
    ip link add veth0 type veth peer name veth1
    ip link add veth2 type veth peer name veth3
    ip link set veth1 netns nst
    ip link set veth3 netns nst
    ip link set veth0 up
    ip link set veth2 up
    ip link add hsr0 type hsr slave1 veth0 slave2 veth2
    ip a a 192.168.100.1/24 dev hsr0
    ip link set hsr0 up
    ip netns exec nst ip link set veth1 up
    ip netns exec nst ip link set veth3 up
    ip netns exec nst ip link add hsr1 type hsr slave1 veth1 slave2 veth3
    ip netns exec nst ip a a 192.168.100.2/24 dev hsr1
    ip netns exec nst ip link set hsr1 up

    for i in {0..9}
    do
        for j in {0..9}
	do
	    for k in {0..9}
	    do
	        for l in {0..9}
		do
	        arping 192.168.100.2 -I hsr0 -s 00:01:3$i:4$j:5$k:6$l -c1 &
		done
	    done
	done
    done

Splat looks like:
[  236.066091][ T3286] list_add corruption. next->prev should be prev (ffff8880a5940300), but was ffff8880a5940d0.
[  236.069617][ T3286] ------------[ cut here ]------------
[  236.070545][ T3286] kernel BUG at lib/list_debug.c:25!
[  236.071391][ T3286] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI
[  236.072343][ T3286] CPU: 0 PID: 3286 Comm: arping Tainted: G        W         5.5.0-rc1+ #209
[  236.073463][ T3286] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
[  236.074695][ T3286] RIP: 0010:__list_add_valid+0x74/0xd0
[  236.075499][ T3286] Code: 48 39 da 75 27 48 39 f5 74 36 48 39 dd 74 31 48 83 c4 08 b8 01 00 00 00 5b 5d c3 48 b
[  236.078277][ T3286] RSP: 0018:ffff8880aaa97648 EFLAGS: 00010286
[  236.086991][ T3286] RAX: 0000000000000075 RBX: ffff8880d4624c20 RCX: 0000000000000000
[  236.088000][ T3286] RDX: 0000000000000075 RSI: 0000000000000008 RDI: ffffed1015552ebf
[  236.098897][ T3286] RBP: ffff88809b53d200 R08: ffffed101b3c04f9 R09: ffffed101b3c04f9
[  236.099960][ T3286] R10: 00000000308769a1 R11: ffffed101b3c04f8 R12: ffff8880d4624c28
[  236.100974][ T3286] R13: ffff8880d4624c20 R14: 0000000040310100 R15: ffff8880ce17ee02
[  236.138967][ T3286] FS:  00007f23479fa680(0000) GS:ffff8880d9c00000(0000) knlGS:0000000000000000
[  236.144852][ T3286] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  236.145720][ T3286] CR2: 00007f4a14bab210 CR3: 00000000a61c6001 CR4: 00000000000606f0
[  236.146776][ T3286] Call Trace:
[  236.147222][ T3286]  hsr_add_node+0x314/0x490 [hsr]
[  236.153633][ T3286]  hsr_forward_skb+0x2b6/0x1bc0 [hsr]
[  236.154362][ T3286]  ? rcu_read_lock_sched_held+0x90/0xc0
[  236.155091][ T3286]  ? rcu_read_lock_bh_held+0xa0/0xa0
[  236.156607][ T3286]  hsr_dev_xmit+0x70/0xd0 [hsr]
[  236.157254][ T3286]  dev_hard_start_xmit+0x160/0x740
[  236.157941][ T3286]  __dev_queue_xmit+0x1961/0x2e10
[  236.158565][ T3286]  ? netdev_core_pick_tx+0x2e0/0x2e0
[ ... ]

Reported-by: syzbot+3924327f9ad5f4d2b343@syzkaller.appspotmail.com
Fixes: f421436a591d ("net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0)")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/hsr/hsr_device.c   |  7 ++--
 net/hsr/hsr_framereg.c | 73 ++++++++++++++++++++++++++----------------
 net/hsr/hsr_framereg.h |  6 ++--
 net/hsr/hsr_main.c     |  2 +-
 net/hsr/hsr_main.h     |  5 +--
 5 files changed, 56 insertions(+), 37 deletions(-)

diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index e73549075a03..62c03f0d0079 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -368,7 +368,7 @@ static void hsr_dev_destroy(struct net_device *hsr_dev)
 	del_timer_sync(&hsr->prune_timer);
 	del_timer_sync(&hsr->announce_timer);
 
-	hsr_del_self_node(&hsr->self_node_db);
+	hsr_del_self_node(hsr);
 	hsr_del_nodes(&hsr->node_db);
 }
 
@@ -440,11 +440,12 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
 	INIT_LIST_HEAD(&hsr->ports);
 	INIT_LIST_HEAD(&hsr->node_db);
 	INIT_LIST_HEAD(&hsr->self_node_db);
+	spin_lock_init(&hsr->list_lock);
 
 	ether_addr_copy(hsr_dev->dev_addr, slave[0]->dev_addr);
 
 	/* Make sure we recognize frames from ourselves in hsr_rcv() */
-	res = hsr_create_self_node(&hsr->self_node_db, hsr_dev->dev_addr,
+	res = hsr_create_self_node(hsr, hsr_dev->dev_addr,
 				   slave[1]->dev_addr);
 	if (res < 0)
 		return res;
@@ -502,7 +503,7 @@ err_unregister:
 	list_for_each_entry_safe(port, tmp, &hsr->ports, port_list)
 		hsr_del_port(port);
 err_add_master:
-	hsr_del_self_node(&hsr->self_node_db);
+	hsr_del_self_node(hsr);
 
 	return res;
 }
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 292be446007b..27dc65d7de67 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -75,10 +75,11 @@ static struct hsr_node *find_node_by_addr_A(struct list_head *node_db,
 /* Helper for device init; the self_node_db is used in hsr_rcv() to recognize
  * frames from self that's been looped over the HSR ring.
  */
-int hsr_create_self_node(struct list_head *self_node_db,
+int hsr_create_self_node(struct hsr_priv *hsr,
 			 unsigned char addr_a[ETH_ALEN],
 			 unsigned char addr_b[ETH_ALEN])
 {
+	struct list_head *self_node_db = &hsr->self_node_db;
 	struct hsr_node *node, *oldnode;
 
 	node = kmalloc(sizeof(*node), GFP_KERNEL);
@@ -88,33 +89,33 @@ int hsr_create_self_node(struct list_head *self_node_db,
 	ether_addr_copy(node->macaddress_A, addr_a);
 	ether_addr_copy(node->macaddress_B, addr_b);
 
-	rcu_read_lock();
+	spin_lock_bh(&hsr->list_lock);
 	oldnode = list_first_or_null_rcu(self_node_db,
 					 struct hsr_node, mac_list);
 	if (oldnode) {
 		list_replace_rcu(&oldnode->mac_list, &node->mac_list);
-		rcu_read_unlock();
-		synchronize_rcu();
-		kfree(oldnode);
+		spin_unlock_bh(&hsr->list_lock);
+		kfree_rcu(oldnode, rcu_head);
 	} else {
-		rcu_read_unlock();
 		list_add_tail_rcu(&node->mac_list, self_node_db);
+		spin_unlock_bh(&hsr->list_lock);
 	}
 
 	return 0;
 }
 
-void hsr_del_self_node(struct list_head *self_node_db)
+void hsr_del_self_node(struct hsr_priv *hsr)
 {
+	struct list_head *self_node_db = &hsr->self_node_db;
 	struct hsr_node *node;
 
-	rcu_read_lock();
+	spin_lock_bh(&hsr->list_lock);
 	node = list_first_or_null_rcu(self_node_db, struct hsr_node, mac_list);
-	rcu_read_unlock();
 	if (node) {
 		list_del_rcu(&node->mac_list);
-		kfree(node);
+		kfree_rcu(node, rcu_head);
 	}
+	spin_unlock_bh(&hsr->list_lock);
 }
 
 void hsr_del_nodes(struct list_head *node_db)
@@ -130,30 +131,43 @@ void hsr_del_nodes(struct list_head *node_db)
  * seq_out is used to initialize filtering of outgoing duplicate frames
  * originating from the newly added node.
  */
-struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[],
-			      u16 seq_out)
+static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
+				     struct list_head *node_db,
+				     unsigned char addr[],
+				     u16 seq_out)
 {
-	struct hsr_node *node;
+	struct hsr_node *new_node, *node;
 	unsigned long now;
 	int i;
 
-	node = kzalloc(sizeof(*node), GFP_ATOMIC);
-	if (!node)
+	new_node = kzalloc(sizeof(*new_node), GFP_ATOMIC);
+	if (!new_node)
 		return NULL;
 
-	ether_addr_copy(node->macaddress_A, addr);
+	ether_addr_copy(new_node->macaddress_A, addr);
 
 	/* We are only interested in time diffs here, so use current jiffies
 	 * as initialization. (0 could trigger an spurious ring error warning).
 	 */
 	now = jiffies;
 	for (i = 0; i < HSR_PT_PORTS; i++)
-		node->time_in[i] = now;
+		new_node->time_in[i] = now;
 	for (i = 0; i < HSR_PT_PORTS; i++)
-		node->seq_out[i] = seq_out;
-
-	list_add_tail_rcu(&node->mac_list, node_db);
+		new_node->seq_out[i] = seq_out;
 
+	spin_lock_bh(&hsr->list_lock);
+	list_for_each_entry_rcu(node, node_db, mac_list) {
+		if (ether_addr_equal(node->macaddress_A, addr))
+			goto out;
+		if (ether_addr_equal(node->macaddress_B, addr))
+			goto out;
+	}
+	list_add_tail_rcu(&new_node->mac_list, node_db);
+	spin_unlock_bh(&hsr->list_lock);
+	return new_node;
+out:
+	spin_unlock_bh(&hsr->list_lock);
+	kfree(new_node);
 	return node;
 }
 
@@ -163,6 +177,7 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
 			      bool is_sup)
 {
 	struct list_head *node_db = &port->hsr->node_db;
+	struct hsr_priv *hsr = port->hsr;
 	struct hsr_node *node;
 	struct ethhdr *ethhdr;
 	u16 seq_out;
@@ -196,7 +211,7 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
 		seq_out = HSR_SEQNR_START;
 	}
 
-	return hsr_add_node(node_db, ethhdr->h_source, seq_out);
+	return hsr_add_node(hsr, node_db, ethhdr->h_source, seq_out);
 }
 
 /* Use the Supervision frame's info about an eventual macaddress_B for merging
@@ -206,10 +221,11 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
 void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr,
 			  struct hsr_port *port_rcv)
 {
-	struct ethhdr *ethhdr;
-	struct hsr_node *node_real;
+	struct hsr_priv *hsr = port_rcv->hsr;
 	struct hsr_sup_payload *hsr_sp;
+	struct hsr_node *node_real;
 	struct list_head *node_db;
+	struct ethhdr *ethhdr;
 	int i;
 
 	ethhdr = (struct ethhdr *)skb_mac_header(skb);
@@ -231,7 +247,7 @@ void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr,
 	node_real = find_node_by_addr_A(node_db, hsr_sp->macaddress_A);
 	if (!node_real)
 		/* No frame received from AddrA of this node yet */
-		node_real = hsr_add_node(node_db, hsr_sp->macaddress_A,
+		node_real = hsr_add_node(hsr, node_db, hsr_sp->macaddress_A,
 					 HSR_SEQNR_START - 1);
 	if (!node_real)
 		goto done; /* No mem */
@@ -252,7 +268,9 @@ void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr,
 	}
 	node_real->addr_B_port = port_rcv->type;
 
+	spin_lock_bh(&hsr->list_lock);
 	list_del_rcu(&node_curr->mac_list);
+	spin_unlock_bh(&hsr->list_lock);
 	kfree_rcu(node_curr, rcu_head);
 
 done:
@@ -368,12 +386,13 @@ void hsr_prune_nodes(struct timer_list *t)
 {
 	struct hsr_priv *hsr = from_timer(hsr, t, prune_timer);
 	struct hsr_node *node;
+	struct hsr_node *tmp;
 	struct hsr_port *port;
 	unsigned long timestamp;
 	unsigned long time_a, time_b;
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(node, &hsr->node_db, mac_list) {
+	spin_lock_bh(&hsr->list_lock);
+	list_for_each_entry_safe(node, tmp, &hsr->node_db, mac_list) {
 		/* Don't prune own node. Neither time_in[HSR_PT_SLAVE_A]
 		 * nor time_in[HSR_PT_SLAVE_B], will ever be updated for
 		 * the master port. Thus the master node will be repeatedly
@@ -421,7 +440,7 @@ void hsr_prune_nodes(struct timer_list *t)
 			kfree_rcu(node, rcu_head);
 		}
 	}
-	rcu_read_unlock();
+	spin_unlock_bh(&hsr->list_lock);
 
 	/* Restart timer */
 	mod_timer(&hsr->prune_timer,
diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
index 89a3ce38151d..0f0fa12b4329 100644
--- a/net/hsr/hsr_framereg.h
+++ b/net/hsr/hsr_framereg.h
@@ -12,10 +12,8 @@
 
 struct hsr_node;
 
-void hsr_del_self_node(struct list_head *self_node_db);
+void hsr_del_self_node(struct hsr_priv *hsr);
 void hsr_del_nodes(struct list_head *node_db);
-struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[],
-			      u16 seq_out);
 struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
 			      bool is_sup);
 void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr,
@@ -33,7 +31,7 @@ int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node,
 
 void hsr_prune_nodes(struct timer_list *t);
 
-int hsr_create_self_node(struct list_head *self_node_db,
+int hsr_create_self_node(struct hsr_priv *hsr,
 			 unsigned char addr_a[ETH_ALEN],
 			 unsigned char addr_b[ETH_ALEN]);
 
diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
index ea23eb7408e4..d2ee7125a7f1 100644
--- a/net/hsr/hsr_main.c
+++ b/net/hsr/hsr_main.c
@@ -67,7 +67,7 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event,
 
 		/* Make sure we recognize frames from ourselves in hsr_rcv() */
 		port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B);
-		res = hsr_create_self_node(&hsr->self_node_db,
+		res = hsr_create_self_node(hsr,
 					   master->dev->dev_addr,
 					   port ?
 						port->dev->dev_addr :
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index 8d885bc6a54d..d40de84a637f 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -160,8 +160,9 @@ struct hsr_priv {
 	int announce_count;
 	u16 sequence_nr;
 	u16 sup_sequence_nr;	/* For HSRv1 separate seq_nr for supervision */
-	u8 prot_version;		/* Indicate if HSRv0 or HSRv1. */
-	spinlock_t seqnr_lock;			/* locking for sequence_nr */
+	u8 prot_version;	/* Indicate if HSRv0 or HSRv1. */
+	spinlock_t seqnr_lock;	/* locking for sequence_nr */
+	spinlock_t list_lock;	/* locking for node list */
 	unsigned char		sup_multicast_addr[ETH_ALEN];
 #ifdef	CONFIG_DEBUG_FS
 	struct dentry *node_tbl_root;

From 3ed0a1d563903bdb4b4c36c58c4d9c1bcb23a6e6 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Sun, 22 Dec 2019 11:27:08 +0000
Subject: [PATCH 063/146] hsr: reset network header when supervision frame is
 created

The supervision frame is L2 frame.
When supervision frame is created, hsr module doesn't set network header.
If tap routine is enabled, dev_queue_xmit_nit() is called and it checks
network_header. If network_header pointer wasn't set(or invalid),
it resets network_header and warns.
In order to avoid unnecessary warning message, resetting network_header
is needed.

Test commands:
    ip netns add nst
    ip link add veth0 type veth peer name veth1
    ip link add veth2 type veth peer name veth3
    ip link set veth1 netns nst
    ip link set veth3 netns nst
    ip link set veth0 up
    ip link set veth2 up
    ip link add hsr0 type hsr slave1 veth0 slave2 veth2
    ip a a 192.168.100.1/24 dev hsr0
    ip link set hsr0 up
    ip netns exec nst ip link set veth1 up
    ip netns exec nst ip link set veth3 up
    ip netns exec nst ip link add hsr1 type hsr slave1 veth1 slave2 veth3
    ip netns exec nst ip a a 192.168.100.2/24 dev hsr1
    ip netns exec nst ip link set hsr1 up
    tcpdump -nei veth0

Splat looks like:
[  175.852292][    C3] protocol 88fb is buggy, dev veth0

Fixes: f421436a591d ("net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0)")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/hsr/hsr_device.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 62c03f0d0079..c7bd6c49fadf 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -272,6 +272,8 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
 			    skb->dev->dev_addr, skb->len) <= 0)
 		goto out;
 	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
 
 	if (hsr_ver > 0) {
 		hsr_tag = skb_put(skb, sizeof(struct hsr_tag));

From a5bcd72e054aabb93ddc51ed8cde36a5bfc50271 Mon Sep 17 00:00:00 2001
From: Vladyslav Tarasiuk <vladyslavt@mellanox.com>
Date: Thu, 26 Dec 2019 10:41:56 +0200
Subject: [PATCH 064/146] net/mlxfw: Fix out-of-memory error in mfa2 flash
 burning

The burning process requires to perform internal allocations of large
chunks of memory. This memory doesn't need to be contiguous and can be
safely allocated by vzalloc() instead of kzalloc(). This patch changes
such allocation to avoid possible out-of-memory failure.

Fixes: 410ed13cae39 ("Add the mlxfw module for Mellanox firmware flash process")
Signed-off-by: Vladyslav Tarasiuk <vladyslavt@mellanox.com>
Reviewed-by: Aya Levin <ayal@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Tested-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c
index 544344ac4894..79057af4fe99 100644
--- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c
+++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c
@@ -6,6 +6,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/netlink.h>
+#include <linux/vmalloc.h>
 #include <linux/xz.h>
 #include "mlxfw_mfa2.h"
 #include "mlxfw_mfa2_file.h"
@@ -548,7 +549,7 @@ mlxfw_mfa2_file_component_get(const struct mlxfw_mfa2_file *mfa2_file,
 	comp_size = be32_to_cpu(comp->size);
 	comp_buf_size = comp_size + mlxfw_mfa2_comp_magic_len;
 
-	comp_data = kmalloc(sizeof(*comp_data) + comp_buf_size, GFP_KERNEL);
+	comp_data = vzalloc(sizeof(*comp_data) + comp_buf_size);
 	if (!comp_data)
 		return ERR_PTR(-ENOMEM);
 	comp_data->comp.data_size = comp_size;
@@ -570,7 +571,7 @@ mlxfw_mfa2_file_component_get(const struct mlxfw_mfa2_file *mfa2_file,
 	comp_data->comp.data = comp_data->buff + mlxfw_mfa2_comp_magic_len;
 	return &comp_data->comp;
 err_out:
-	kfree(comp_data);
+	vfree(comp_data);
 	return ERR_PTR(err);
 }
 
@@ -579,7 +580,7 @@ void mlxfw_mfa2_file_component_put(struct mlxfw_mfa2_component *comp)
 	const struct mlxfw_mfa2_comp_data *comp_data;
 
 	comp_data = container_of(comp, struct mlxfw_mfa2_comp_data, comp);
-	kfree(comp_data);
+	vfree(comp_data);
 }
 
 void mlxfw_mfa2_file_fini(struct mlxfw_mfa2_file *mfa2_file)

From c27569fcd6e1b11bd24361346504f2995a256e4e Mon Sep 17 00:00:00 2001
From: Madalin Bucur <madalin.bucur@oss.nxp.com>
Date: Mon, 23 Dec 2019 09:39:22 +0200
Subject: [PATCH 065/146] dpaa_eth: fix DMA mapping leak

On the error path some fragments remain DMA mapped. Adding a fix
that unmaps all the fragments. Rework cleanup path to be simpler.

Fixes: 8151ee88bad5 ("dpaa_eth: use page backed rx buffers")
Signed-off-by: Madalin Bucur <madalin.bucur@oss.nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/freescale/dpaa/dpaa_eth.c    | 39 ++++++++++---------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index 6a9d12dad5d9..a301f0095223 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -1719,7 +1719,7 @@ static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv,
 	int page_offset;
 	unsigned int sz;
 	int *count_ptr;
-	int i;
+	int i, j;
 
 	vaddr = phys_to_virt(addr);
 	WARN_ON(!IS_ALIGNED((unsigned long)vaddr, SMP_CACHE_BYTES));
@@ -1736,14 +1736,14 @@ static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv,
 		WARN_ON(!IS_ALIGNED((unsigned long)sg_vaddr,
 				    SMP_CACHE_BYTES));
 
+		dma_unmap_page(priv->rx_dma_dev, sg_addr,
+			       DPAA_BP_RAW_SIZE, DMA_FROM_DEVICE);
+
 		/* We may use multiple Rx pools */
 		dpaa_bp = dpaa_bpid2pool(sgt[i].bpid);
 		if (!dpaa_bp)
 			goto free_buffers;
 
-		count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
-		dma_unmap_page(priv->rx_dma_dev, sg_addr,
-			       DPAA_BP_RAW_SIZE, DMA_FROM_DEVICE);
 		if (!skb) {
 			sz = dpaa_bp->size +
 				SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
@@ -1786,7 +1786,9 @@ static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv,
 			skb_add_rx_frag(skb, i - 1, head_page, frag_off,
 					frag_len, dpaa_bp->size);
 		}
+
 		/* Update the pool count for the current {cpu x bpool} */
+		count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
 		(*count_ptr)--;
 
 		if (qm_sg_entry_is_final(&sgt[i]))
@@ -1800,26 +1802,25 @@ static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv,
 	return skb;
 
 free_buffers:
-	/* compensate sw bpool counter changes */
-	for (i--; i >= 0; i--) {
-		dpaa_bp = dpaa_bpid2pool(sgt[i].bpid);
-		if (dpaa_bp) {
-			count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
-			(*count_ptr)++;
-		}
-	}
 	/* free all the SG entries */
-	for (i = 0; i < DPAA_SGT_MAX_ENTRIES ; i++) {
-		sg_addr = qm_sg_addr(&sgt[i]);
+	for (j = 0; j < DPAA_SGT_MAX_ENTRIES ; j++) {
+		sg_addr = qm_sg_addr(&sgt[j]);
 		sg_vaddr = phys_to_virt(sg_addr);
+		/* all pages 0..i were unmaped */
+		if (j > i)
+			dma_unmap_page(priv->rx_dma_dev, qm_sg_addr(&sgt[j]),
+				       DPAA_BP_RAW_SIZE, DMA_FROM_DEVICE);
 		free_pages((unsigned long)sg_vaddr, 0);
-		dpaa_bp = dpaa_bpid2pool(sgt[i].bpid);
-		if (dpaa_bp) {
-			count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
-			(*count_ptr)--;
+		/* counters 0..i-1 were decremented */
+		if (j >= i) {
+			dpaa_bp = dpaa_bpid2pool(sgt[j].bpid);
+			if (dpaa_bp) {
+				count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
+				(*count_ptr)--;
+			}
 		}
 
-		if (qm_sg_entry_is_final(&sgt[i]))
+		if (qm_sg_entry_is_final(&sgt[j]))
 			break;
 	}
 	/* free the SGT fragment */

From 1c93fb45761e79b3c00080e71523886cefaf351c Mon Sep 17 00:00:00 2001
From: Madalin Bucur <madalin.bucur@oss.nxp.com>
Date: Mon, 23 Dec 2019 10:06:10 +0200
Subject: [PATCH 066/146] net: phy: aquantia: add suspend / resume ops for
 AQR105

The suspend/resume code for AQR107 works on AQR105 too.
This patch fixes issues with the partner not seeing the link down
when the interface using AQR105 is brought down.

Fixes: bee8259dd31f ("net: phy: add driver for aquantia phy")
Signed-off-by: Madalin Bucur <madalin.bucur@oss.nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/aquantia_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/phy/aquantia_main.c b/drivers/net/phy/aquantia_main.c
index 3b29d381116f..975789d9349d 100644
--- a/drivers/net/phy/aquantia_main.c
+++ b/drivers/net/phy/aquantia_main.c
@@ -627,6 +627,8 @@ static struct phy_driver aqr_driver[] = {
 	.config_intr	= aqr_config_intr,
 	.ack_interrupt	= aqr_ack_interrupt,
 	.read_status	= aqr_read_status,
+	.suspend	= aqr107_suspend,
+	.resume		= aqr107_resume,
 },
 {
 	PHY_ID_MATCH_MODEL(PHY_ID_AQR106),

From 7df2281a174bd0fdbb2211a26914e5440740fcde Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 23 Dec 2019 11:03:21 +0100
Subject: [PATCH 067/146] of: mdio: Add missing inline to
 of_mdiobus_child_is_phy() dummy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If CONFIG_OF_MDIO=n:

    drivers/net/phy/mdio_bus.c:23:
    include/linux/of_mdio.h:58:13: warning: ‘of_mdiobus_child_is_phy’ defined but not used [-Wunused-function]
     static bool of_mdiobus_child_is_phy(struct device_node *child)
		 ^~~~~~~~~~~~~~~~~~~~~~~

Fix this by adding the missing "inline" keyword.

Fixes: 0aa4d016c043d16a ("of: mdio: export of_mdiobus_child_is_phy")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Borislav Petkov <bp@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/of_mdio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h
index 79bc82e30c02..491a2b7e77c1 100644
--- a/include/linux/of_mdio.h
+++ b/include/linux/of_mdio.h
@@ -55,7 +55,7 @@ static inline int of_mdio_parse_addr(struct device *dev,
 }
 
 #else /* CONFIG_OF_MDIO */
-static bool of_mdiobus_child_is_phy(struct device_node *child)
+static inline bool of_mdiobus_child_is_phy(struct device_node *child)
 {
 	return false;
 }

From 0444716a5dd563526e53ae686115987d5d4c249e Mon Sep 17 00:00:00 2001
From: Manish Chopra <manishc@marvell.com>
Date: Mon, 23 Dec 2019 10:23:08 -0800
Subject: [PATCH 068/146] bnx2x: Use appropriate define for vlan credit

Although it has same value as MAX_MAC_CREDIT_E2,
use MAX_VLAN_CREDIT_E2 appropriately.

Signed-off-by: Manish Chopra <manishc@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h
index 7a6e82db4231..ed237854939a 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h
@@ -1537,7 +1537,7 @@ void bnx2x_get_rss_ind_table(struct bnx2x_rss_config_obj *rss_obj,
 	 func_num + GET_NUM_VFS_PER_PF(bp) * VF_MAC_CREDIT_CNT)
 
 #define PF_VLAN_CREDIT_E2(bp, func_num)					 \
-	((MAX_MAC_CREDIT_E2 - GET_NUM_VFS_PER_PATH(bp) * VF_VLAN_CREDIT_CNT) / \
+	((MAX_VLAN_CREDIT_E2 - GET_NUM_VFS_PER_PATH(bp) * VF_VLAN_CREDIT_CNT) /\
 	 func_num + GET_NUM_VFS_PER_PF(bp) * VF_VLAN_CREDIT_CNT)
 
 #endif /* BNX2X_SP_VERBS */

From 5cdc40c7820ff66c2271e0884bd8ee8f7cfd769b Mon Sep 17 00:00:00 2001
From: Manish Chopra <manishc@marvell.com>
Date: Mon, 23 Dec 2019 10:23:09 -0800
Subject: [PATCH 069/146] bnx2x: Fix accounting of vlan resources among the PFs

While testing max vlan configuration on the PF, firmware gets
assert as driver was configuring number of vlans more than what
is supported per port/engine, it was figured out that there is an
implicit vlan (hidden default vlan consuming hardware cam entry resource)
which is configured default for all the clients (PF/VFs) on client_init
ramrod by the adapter implicitly, so when allocating resources among the
PFs this implicit vlan should be considered or total vlan entries should
be reduced by one to accommodate that default/implicit vlan entry.

Signed-off-by: Manish Chopra <manishc@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h
index ed237854939a..bacc8552bce1 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h
@@ -1536,8 +1536,11 @@ void bnx2x_get_rss_ind_table(struct bnx2x_rss_config_obj *rss_obj,
 	((MAX_MAC_CREDIT_E2 - GET_NUM_VFS_PER_PATH(bp) * VF_MAC_CREDIT_CNT) / \
 	 func_num + GET_NUM_VFS_PER_PF(bp) * VF_MAC_CREDIT_CNT)
 
+#define BNX2X_VFS_VLAN_CREDIT(bp)	\
+	(GET_NUM_VFS_PER_PATH(bp) * VF_VLAN_CREDIT_CNT)
+
 #define PF_VLAN_CREDIT_E2(bp, func_num)					 \
-	((MAX_VLAN_CREDIT_E2 - GET_NUM_VFS_PER_PATH(bp) * VF_VLAN_CREDIT_CNT) /\
+	((MAX_VLAN_CREDIT_E2 - 1 - BNX2X_VFS_VLAN_CREDIT(bp)) /	\
 	 func_num + GET_NUM_VFS_PER_PF(bp) * VF_VLAN_CREDIT_CNT)
 
 #endif /* BNX2X_SP_VERBS */

From bb3d0b8bf5be61ab1d6f472c43cbf34de17e796b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 23 Dec 2019 11:13:24 -0800
Subject: [PATCH 070/146] net_sched: sch_fq: properly set sk->sk_pacing_status

If fq_classify() recycles a struct fq_flow because
a socket structure has been reallocated, we do not
set sk->sk_pacing_status immediately, but later if the
flow becomes detached.

This means that any flow requiring pacing (BBR, or SO_MAX_PACING_RATE)
might fallback to TCP internal pacing, which requires a per-socket
high resolution timer, and therefore more cpu cycles.

Fixes: 218af599fa63 ("tcp: internal implementation for pacing")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Soheil Hassas Yeganeh <soheil@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_fq.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index b1c7e726ce5d..ff4c5e9d0d77 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -301,6 +301,9 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
 				     f->socket_hash != sk->sk_hash)) {
 				f->credit = q->initial_quantum;
 				f->socket_hash = sk->sk_hash;
+				if (q->rate_enable)
+					smp_store_release(&sk->sk_pacing_status,
+							  SK_PACING_FQ);
 				if (fq_flow_is_throttled(f))
 					fq_flow_unset_throttled(q, f);
 				f->time_next_packet = 0ULL;
@@ -322,8 +325,12 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
 
 	fq_flow_set_detached(f);
 	f->sk = sk;
-	if (skb->sk == sk)
+	if (skb->sk == sk) {
 		f->socket_hash = sk->sk_hash;
+		if (q->rate_enable)
+			smp_store_release(&sk->sk_pacing_status,
+					  SK_PACING_FQ);
+	}
 	f->credit = q->initial_quantum;
 
 	rb_link_node(&f->fq_node, parent, p);
@@ -428,17 +435,9 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	f->qlen++;
 	qdisc_qstats_backlog_inc(sch, skb);
 	if (fq_flow_is_detached(f)) {
-		struct sock *sk = skb->sk;
-
 		fq_flow_add_tail(&q->new_flows, f);
 		if (time_after(jiffies, f->age + q->flow_refill_delay))
 			f->credit = max_t(u32, f->credit, q->quantum);
-		if (sk && q->rate_enable) {
-			if (unlikely(smp_load_acquire(&sk->sk_pacing_status) !=
-				     SK_PACING_FQ))
-				smp_store_release(&sk->sk_pacing_status,
-						  SK_PACING_FQ);
-		}
 		q->inactive_flows--;
 	}
 

From b0b5ce1010ffc50015eaec72b0028aaae3f526bb Mon Sep 17 00:00:00 2001
From: "Alexander.Barabash@dell.com" <Alexander.Barabash@dell.com>
Date: Wed, 25 Dec 2019 17:55:30 +0000
Subject: [PATCH 071/146] ioat: ioat_alloc_ring() failure handling.

If dma_alloc_coherent() returns NULL in ioat_alloc_ring(), ring
allocation must not proceed.

Until now, if the first call to dma_alloc_coherent() in
ioat_alloc_ring() returned NULL, the processing could proceed, failing
with NULL-pointer dereferencing further down the line.

Signed-off-by: Alexander Barabash <alexander.barabash@dell.com>
Acked-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/75e9c0e84c3345d693c606c64f8b9ab5@x13pwhopdag1307.AMER.DELL.COM
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/ioat/dma.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index 1a422a8b43cf..18c011e57592 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -377,10 +377,11 @@ ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
 
 		descs->virt = dma_alloc_coherent(to_dev(ioat_chan),
 						 SZ_2M, &descs->hw, flags);
-		if (!descs->virt && (i > 0)) {
+		if (!descs->virt) {
 			int idx;
 
 			for (idx = 0; idx < i; idx++) {
+				descs = &ioat_chan->descs[idx];
 				dma_free_coherent(to_dev(ioat_chan), SZ_2M,
 						  descs->virt, descs->hw);
 				descs->virt = NULL;

From e79c22695abd3b75a6aecf4ea4b9607e8d82c49c Mon Sep 17 00:00:00 2001
From: Kailang Yang <kailang@realtek.com>
Date: Thu, 19 Dec 2019 14:12:15 +0800
Subject: [PATCH 072/146] ALSA: hda/realtek - Add Bass Speaker and fixed dac
 for bass speaker

Dell has new platform which has dual speaker connecting.
They want dual speaker which use same dac for output.

Signed-off-by: Kailang Yang <kailang@realtek.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/229c7efa2b474a16b7d8a916cd096b68@realtek.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 5bc1a6d24333..2ee703c2da78 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -5908,6 +5908,8 @@ enum {
 	ALC294_FIXUP_ASUS_INTSPK_HEADSET_MIC,
 	ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE,
 	ALC294_FIXUP_ASUS_INTSPK_GPIO,
+	ALC289_FIXUP_DELL_SPK2,
+	ALC289_FIXUP_DUAL_SPK,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -7009,6 +7011,21 @@ static const struct hda_fixup alc269_fixups[] = {
 		.chained = true,
 		.chain_id = ALC294_FIXUP_ASUS_INTSPK_HEADSET_MIC
 	},
+	[ALC289_FIXUP_DELL_SPK2] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x17, 0x90170130 }, /* bass spk */
+			{ }
+		},
+		.chained = true,
+		.chain_id = ALC269_FIXUP_DELL4_MIC_NO_PRESENCE
+	},
+	[ALC289_FIXUP_DUAL_SPK] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc285_fixup_speaker2_to_dac1,
+		.chained = true,
+		.chain_id = ALC289_FIXUP_DELL_SPK2
+	},
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -7081,6 +7098,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1028, 0x08ad, "Dell WYSE AIO", ALC225_FIXUP_DELL_WYSE_AIO_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x08ae, "Dell WYSE NB", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x0935, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB),
+	SND_PCI_QUIRK(0x1028, 0x097e, "Dell Precision", ALC289_FIXUP_DUAL_SPK),
+	SND_PCI_QUIRK(0x1028, 0x097d, "Dell Precision", ALC289_FIXUP_DUAL_SPK),
 	SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),

From 70cf3dc7313207816255b9acb0dffb19dae78144 Mon Sep 17 00:00:00 2001
From: Shmulik Ladkani <sladkani@proofpoint.com>
Date: Wed, 25 Dec 2019 10:51:01 +0200
Subject: [PATCH 073/146] net/sched: act_mirred: Pull mac prior redir to non
 mac_header_xmit device

There's no skb_pull performed when a mirred action is set at egress of a
mac device, with a target device/action that expects skb->data to point
at the network header.

As a result, either the target device is errornously given an skb with
data pointing to the mac (egress case), or the net stack receives the
skb with data pointing to the mac (ingress case).

E.g:
 # tc qdisc add dev eth9 root handle 1: prio
 # tc filter add dev eth9 parent 1: prio 9 protocol ip handle 9 basic \
   action mirred egress redirect dev tun0

 (tun0 is a tun device. result: tun0 errornously gets the eth header
  instead of the iph)

Revise the push/pull logic of tcf_mirred_act() to not rely on the
skb_at_tc_ingress() vs tcf_mirred_act_wants_ingress() comparison, as it
does not cover all "pull" cases.

Instead, calculate whether the required action on the target device
requires the data to point at the network header, and compare this to
whether skb->data points to network header - and make the push/pull
adjustments as necessary.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Shmulik Ladkani <sladkani@proofpoint.com>
Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_mirred.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 1e3eb3a97532..1ad300e6dbc0 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -219,8 +219,10 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
 	bool use_reinsert;
 	bool want_ingress;
 	bool is_redirect;
+	bool expects_nh;
 	int m_eaction;
 	int mac_len;
+	bool at_nh;
 
 	rec_level = __this_cpu_inc_return(mirred_rec_level);
 	if (unlikely(rec_level > MIRRED_RECURSION_LIMIT)) {
@@ -261,19 +263,19 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
 			goto out;
 	}
 
-	/* If action's target direction differs than filter's direction,
-	 * and devices expect a mac header on xmit, then mac push/pull is
-	 * needed.
-	 */
 	want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
-	if (skb_at_tc_ingress(skb) != want_ingress && m_mac_header_xmit) {
-		if (!skb_at_tc_ingress(skb)) {
-			/* caught at egress, act ingress: pull mac */
-			mac_len = skb_network_header(skb) - skb_mac_header(skb);
+
+	expects_nh = want_ingress || !m_mac_header_xmit;
+	at_nh = skb->data == skb_network_header(skb);
+	if (at_nh != expects_nh) {
+		mac_len = skb_at_tc_ingress(skb) ? skb->mac_len :
+			  skb_network_header(skb) - skb_mac_header(skb);
+		if (expects_nh) {
+			/* target device/action expect data at nh */
 			skb_pull_rcsum(skb2, mac_len);
 		} else {
-			/* caught at ingress, act egress: push mac */
-			skb_push_rcsum(skb2, skb->mac_len);
+			/* target device/action expect data at mac */
+			skb_push_rcsum(skb2, mac_len);
 		}
 	}
 

From bd6f48546b9cb7a785344fc78058c420923d7ed8 Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Thu, 26 Dec 2019 20:01:01 +0100
Subject: [PATCH 074/146] net: stmmac: dwmac-meson8b: Fix the RGMII TX delay on
 Meson8b/8m2 SoCs

GXBB and newer SoCs use the fixed FCLK_DIV2 (1GHz) clock as input for
the m250_sel clock. Meson8b and Meson8m2 use MPLL2 instead, whose rate
can be adjusted at runtime.

So far we have been running MPLL2 with ~250MHz (and the internal
m250_div with value 1), which worked enough that we could transfer data
with an TX delay of 4ns. Unfortunately there is high packet loss with
an RGMII PHY when transferring data (receiving data works fine though).
Odroid-C1's u-boot is running with a TX delay of only 2ns as well as
the internal m250_div set to 2 - no lost (TX) packets can be observed
with that setting in u-boot.

Manual testing has shown that the TX packet loss goes away when using
the following settings in Linux (the vendor kernel uses the same
settings):
- MPLL2 clock set to ~500MHz
- m250_div set to 2
- TX delay set to 2ns on the MAC side

Update the m250_div divider settings to only accept dividers greater or
equal 2 to fix the TX delay generated by the MAC.

iperf3 results before the change:
[ ID] Interval           Transfer     Bitrate         Retr
[  5]   0.00-10.00  sec   182 MBytes   153 Mbits/sec  514      sender
[  5]   0.00-10.00  sec   182 MBytes   152 Mbits/sec           receiver

iperf3 results after the change (including an updated TX delay of 2ns):
[ ID] Interval           Transfer     Bitrate         Retr  Cwnd
[  5]   0.00-10.00  sec   927 MBytes   778 Mbits/sec    0      sender
[  5]   0.00-10.01  sec   927 MBytes   777 Mbits/sec           receiver

Fixes: 4f6a71b84e1afd ("net: stmmac: dwmac-meson8b: fix internal RGMII clock configuration")
Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/stmicro/stmmac/dwmac-meson8b.c    | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index bd6c01004913..0e2fa14f1423 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -112,6 +112,14 @@ static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac)
 	struct device *dev = dwmac->dev;
 	const char *parent_name, *mux_parent_names[MUX_CLK_NUM_PARENTS];
 	struct meson8b_dwmac_clk_configs *clk_configs;
+	static const struct clk_div_table div_table[] = {
+		{ .div = 2, .val = 2, },
+		{ .div = 3, .val = 3, },
+		{ .div = 4, .val = 4, },
+		{ .div = 5, .val = 5, },
+		{ .div = 6, .val = 6, },
+		{ .div = 7, .val = 7, },
+	};
 
 	clk_configs = devm_kzalloc(dev, sizeof(*clk_configs), GFP_KERNEL);
 	if (!clk_configs)
@@ -146,9 +154,9 @@ static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac)
 	clk_configs->m250_div.reg = dwmac->regs + PRG_ETH0;
 	clk_configs->m250_div.shift = PRG_ETH0_CLK_M250_DIV_SHIFT;
 	clk_configs->m250_div.width = PRG_ETH0_CLK_M250_DIV_WIDTH;
-	clk_configs->m250_div.flags = CLK_DIVIDER_ONE_BASED |
-				CLK_DIVIDER_ALLOW_ZERO |
-				CLK_DIVIDER_ROUND_CLOSEST;
+	clk_configs->m250_div.table = div_table;
+	clk_configs->m250_div.flags = CLK_DIVIDER_ALLOW_ZERO |
+				      CLK_DIVIDER_ROUND_CLOSEST;
 	clk = meson8b_dwmac_register_clk(dwmac, "m250_div", &parent_name, 1,
 					 &clk_divider_ops,
 					 &clk_configs->m250_div.hw);

From 85a8ce62c2eabe28b9d76ca4eecf37922402df93 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Sat, 28 Dec 2019 07:05:48 +0800
Subject: [PATCH 075/146] block: add bio_truncate to fix guard_bio_eod

Some filesystem, such as vfat, may send bio which crosses device boundary,
and the worse thing is that the IO request starting within device boundaries
can contain more than one segment past EOD.

Commit dce30ca9e3b6 ("fs: fix guard_bio_eod to check for real EOD errors")
tries to fix this issue by returning -EIO for this situation. However,
this way lets fs user code lose chance to handle -EIO, then sync_inodes_sb()
may hang for ever.

Also the current truncating on last segment is dangerous by updating the
last bvec, given bvec table becomes not immutable any more, and fs bio
users may not retrieve the truncated pages via bio_for_each_segment_all() in
its .end_io callback.

Fixes this issue by supporting multi-segment truncating. And the
approach is simpler:

- just update bio size since block layer can make correct bvec with
the updated bio size. Then bvec table becomes really immutable.

- zero all truncated segments for read bio

Cc: Carlos Maiolino <cmaiolino@redhat.com>
Cc: linux-fsdevel@vger.kernel.org
Fixed-by: dce30ca9e3b6 ("fs: fix guard_bio_eod to check for real EOD errors")
Reported-by: syzbot+2b9e54155c8c25d8d165@syzkaller.appspotmail.com
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c         | 39 +++++++++++++++++++++++++++++++++++++++
 fs/buffer.c         | 25 +------------------------
 include/linux/bio.h |  1 +
 3 files changed, 41 insertions(+), 24 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index a5d75f6bf4c7..006bcc52a77e 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -538,6 +538,45 @@ void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start)
 }
 EXPORT_SYMBOL(zero_fill_bio_iter);
 
+void bio_truncate(struct bio *bio, unsigned new_size)
+{
+	struct bio_vec bv;
+	struct bvec_iter iter;
+	unsigned int done = 0;
+	bool truncated = false;
+
+	if (new_size >= bio->bi_iter.bi_size)
+		return;
+
+	if (bio_data_dir(bio) != READ)
+		goto exit;
+
+	bio_for_each_segment(bv, bio, iter) {
+		if (done + bv.bv_len > new_size) {
+			unsigned offset;
+
+			if (!truncated)
+				offset = new_size - done;
+			else
+				offset = 0;
+			zero_user(bv.bv_page, offset, bv.bv_len - offset);
+			truncated = true;
+		}
+		done += bv.bv_len;
+	}
+
+ exit:
+	/*
+	 * Don't touch bvec table here and make it really immutable, since
+	 * fs bio user has to retrieve all pages via bio_for_each_segment_all
+	 * in its .end_bio() callback.
+	 *
+	 * It is enough to truncate bio by updating .bi_size since we can make
+	 * correct bvec with the updated .bi_size for drivers.
+	 */
+	bio->bi_iter.bi_size = new_size;
+}
+
 /**
  * bio_put - release a reference to a bio
  * @bio:   bio to release reference to
diff --git a/fs/buffer.c b/fs/buffer.c
index d8c7242426bb..e94a6619464c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3034,8 +3034,6 @@ static void end_bio_bh_io_sync(struct bio *bio)
 void guard_bio_eod(int op, struct bio *bio)
 {
 	sector_t maxsector;
-	struct bio_vec *bvec = bio_last_bvec_all(bio);
-	unsigned truncated_bytes;
 	struct hd_struct *part;
 
 	rcu_read_lock();
@@ -3061,28 +3059,7 @@ void guard_bio_eod(int op, struct bio *bio)
 	if (likely((bio->bi_iter.bi_size >> 9) <= maxsector))
 		return;
 
-	/* Uhhuh. We've got a bio that straddles the device size! */
-	truncated_bytes = bio->bi_iter.bi_size - (maxsector << 9);
-
-	/*
-	 * The bio contains more than one segment which spans EOD, just return
-	 * and let IO layer turn it into an EIO
-	 */
-	if (truncated_bytes > bvec->bv_len)
-		return;
-
-	/* Truncate the bio.. */
-	bio->bi_iter.bi_size -= truncated_bytes;
-	bvec->bv_len -= truncated_bytes;
-
-	/* ..and clear the end of the buffer for reads */
-	if (op == REQ_OP_READ) {
-		struct bio_vec bv;
-
-		mp_bvec_last_segment(bvec, &bv);
-		zero_user(bv.bv_page, bv.bv_offset + bv.bv_len,
-				truncated_bytes);
-	}
+	bio_truncate(bio, maxsector << 9);
 }
 
 static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 3cdb84cdc488..853d92ceee64 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -470,6 +470,7 @@ extern struct bio *bio_copy_user_iov(struct request_queue *,
 				     gfp_t);
 extern int bio_uncopy_user(struct bio *);
 void zero_fill_bio_iter(struct bio *bio, struct bvec_iter iter);
+void bio_truncate(struct bio *bio, unsigned new_size);
 
 static inline void zero_fill_bio(struct bio *bio)
 {

From 5d30ed3c2c74eb123668d0746e23105c8fc8aed3 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 20 Dec 2019 18:57:16 -0500
Subject: [PATCH 076/146] Revert "drm/amdgpu: simplify ATPX detection"

This reverts commit f5fda6d89afe6e9cedaa1c3303903c905262f6e8.

You can't use BASE_CLASS in pci_get_class.

Bug: https://gitlab.freedesktop.org/drm/amd/issues/995
Acked-by: Evan Quan <evan.quan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index a97fb759e2f4..3e35a8f2c5e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -613,7 +613,17 @@ static bool amdgpu_atpx_detect(void)
 	bool d3_supported = false;
 	struct pci_dev *parent_pdev;
 
-	while ((pdev = pci_get_class(PCI_BASE_CLASS_DISPLAY << 16, pdev)) != NULL) {
+	while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) {
+		vga_count++;
+
+		has_atpx |= (amdgpu_atpx_pci_probe_handle(pdev) == true);
+
+		parent_pdev = pci_upstream_bridge(pdev);
+		d3_supported |= parent_pdev && parent_pdev->bridge_d3;
+		amdgpu_atpx_get_quirks(pdev);
+	}
+
+	while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_OTHER << 8, pdev)) != NULL) {
 		vga_count++;
 
 		has_atpx |= (amdgpu_atpx_pci_probe_handle(pdev) == true);

From 073d5eef9e043c2b7e3ef12bc6c879b1d248e831 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 17 Dec 2019 09:35:01 -0500
Subject: [PATCH 077/146] drm/amdgpu/smu: add metrics table lock

This table is used for lots of things, add it's own lock.

Bug: https://gitlab.freedesktop.org/drm/amd/issues/900
Reviewed-by: Kevin Wang <kevin1.wang@amd.com>
Reviewed-by: Evan Quan <evan.quan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/powerplay/amdgpu_smu.c     | 1 +
 drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index 5ff7ccedfbed..a23729d3174b 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -866,6 +866,7 @@ static int smu_sw_init(void *handle)
 	smu->smu_baco.platform_support = false;
 
 	mutex_init(&smu->sensor_lock);
+	mutex_init(&smu->metrics_lock);
 
 	smu->watermarks_bitmap = 0;
 	smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
index ac9758305ab3..41fce75b263f 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
@@ -349,6 +349,7 @@ struct smu_context
 	const struct pptable_funcs	*ppt_funcs;
 	struct mutex			mutex;
 	struct mutex			sensor_lock;
+	struct mutex			metrics_lock;
 	uint64_t pool_size;
 
 	struct smu_table_context	smu_table;

From 1da87c9f67c98d552679974dbfc1f0f65b6a0a53 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 17 Dec 2019 09:49:52 -0500
Subject: [PATCH 078/146] drm/amdgpu/smu: add metrics table lock for arcturus
 (v2)

To protect access to the metrics table.

v2: unlock on error

Bug: https://gitlab.freedesktop.org/drm/amd/issues/900
Reviewed-by: Kevin Wang <kevin1.wang@amd.com>
Reviewed-by: Evan Quan <evan.quan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/powerplay/arcturus_ppt.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
index cc71a1078a7a..472e9fed411a 100644
--- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
@@ -862,18 +862,21 @@ static int arcturus_get_metrics_table(struct smu_context *smu,
 	struct smu_table_context *smu_table= &smu->smu_table;
 	int ret = 0;
 
+	mutex_lock(&smu->metrics_lock);
 	if (!smu_table->metrics_time ||
 	     time_after(jiffies, smu_table->metrics_time + HZ / 1000)) {
 		ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0,
 				(void *)smu_table->metrics_table, false);
 		if (ret) {
 			pr_info("Failed to export SMU metrics table!\n");
+			mutex_unlock(&smu->metrics_lock);
 			return ret;
 		}
 		smu_table->metrics_time = jiffies;
 	}
 
 	memcpy(metrics_table, smu_table->metrics_table, sizeof(SmuMetrics_t));
+	mutex_unlock(&smu->metrics_lock);
 
 	return ret;
 }

From e0e384c398d4638e54b6d2098f0ceaafdab870ee Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 17 Dec 2019 09:50:42 -0500
Subject: [PATCH 079/146] drm/amdgpu/smu: add metrics table lock for navi (v2)

To protect access to the metrics table.

v2: unlock on error

Bug: https://gitlab.freedesktop.org/drm/amd/issues/900
Reviewed-by: Kevin Wang <kevin1.wang@amd.com>
Reviewed-by: Evan Quan <evan.quan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/powerplay/navi10_ppt.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
index 4a14fd1f9fd5..ca62e92e5a4f 100644
--- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
@@ -562,17 +562,20 @@ static int navi10_get_metrics_table(struct smu_context *smu,
 	struct smu_table_context *smu_table= &smu->smu_table;
 	int ret = 0;
 
+	mutex_lock(&smu->metrics_lock);
 	if (!smu_table->metrics_time || time_after(jiffies, smu_table->metrics_time + msecs_to_jiffies(100))) {
 		ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0,
 				(void *)smu_table->metrics_table, false);
 		if (ret) {
 			pr_info("Failed to export SMU metrics table!\n");
+			mutex_unlock(&smu->metrics_lock);
 			return ret;
 		}
 		smu_table->metrics_time = jiffies;
 	}
 
 	memcpy(metrics_table, smu_table->metrics_table, sizeof(SmuMetrics_t));
+	mutex_unlock(&smu->metrics_lock);
 
 	return ret;
 }

From 1c455101c6d10c99b310d6bcf613244c97854012 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 17 Dec 2019 09:51:40 -0500
Subject: [PATCH 080/146] drm/amdgpu/smu: add metrics table lock for vega20
 (v2)

To protect access to the metrics table.

v2: unlock on error

Bug: https://gitlab.freedesktop.org/drm/amd/issues/900
Reviewed-by: Kevin Wang <kevin1.wang@amd.com>
Reviewed-by: Evan Quan <evan.quan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/powerplay/vega20_ppt.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
index 60b9ff097142..0d3a3b0a934e 100644
--- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
@@ -1678,17 +1678,20 @@ static int vega20_get_metrics_table(struct smu_context *smu,
 	struct smu_table_context *smu_table= &smu->smu_table;
 	int ret = 0;
 
+	mutex_lock(&smu->metrics_lock);
 	if (!smu_table->metrics_time || time_after(jiffies, smu_table->metrics_time + HZ / 1000)) {
 		ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0,
 				(void *)smu_table->metrics_table, false);
 		if (ret) {
 			pr_info("Failed to export SMU metrics table!\n");
+			mutex_unlock(&smu->metrics_lock);
 			return ret;
 		}
 		smu_table->metrics_time = jiffies;
 	}
 
 	memcpy(metrics_table, smu_table->metrics_table, sizeof(SmuMetrics_t));
+	mutex_unlock(&smu->metrics_lock);
 
 	return ret;
 }

From e0c63812352298efbce2a71483c1dab627d0c288 Mon Sep 17 00:00:00 2001
From: changzhu <Changfeng.Zhu@amd.com>
Date: Thu, 12 Dec 2019 13:46:06 +0800
Subject: [PATCH 081/146] drm/amdgpu: enable gfxoff for raven1 refresh

When smu version is larger than 0x41e2b, it will load
raven_kicker_rlc.bin.To enable gfxoff for raven_kicker_rlc.bin,it
needs to avoid adev->pm.pp_feature &= ~PP_GFXOFF_MASK when it loads
raven_kicker_rlc.bin.

Signed-off-by: changzhu <Changfeng.Zhu@amd.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 66328ffa395a..97105a5bb246 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1052,17 +1052,10 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
 	case CHIP_VEGA20:
 		break;
 	case CHIP_RAVEN:
-		/* Disable GFXOFF on original raven.  There are combinations
-		 * of sbios and platforms that are not stable.
-		 */
-		if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8))
-			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
-		else if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
-			 &&((adev->gfx.rlc_fw_version != 106 &&
-			     adev->gfx.rlc_fw_version < 531) ||
-			    (adev->gfx.rlc_fw_version == 53815) ||
-			    (adev->gfx.rlc_feature_version < 1) ||
-			    !adev->gfx.rlc.is_rlc_v2_1))
+		if (!(adev->rev_id >= 0x8 ||
+		      adev->pdev->device == 0x15d8) &&
+		    (adev->pm.fw_version < 0x41e2b || /* not raven1 fresh */
+		     !adev->gfx.rlc.is_rlc_v2_1)) /* without rlc save restore ucodes */
 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
 
 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)

From 0aec96f5897ac16ad9945f531b4bef9a2edd2ebd Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 18 Dec 2019 20:26:06 +0100
Subject: [PATCH 082/146] ALSA: ice1724: Fix sleep-in-atomic in Infrasonic
 Quartet support code

Jia-Ju Bai reported a possible sleep-in-atomic scenario in the ice1724
driver with Infrasonic Quartet support code: namely, ice->set_rate
callback gets called inside ice->reg_lock spinlock, while the callback
in quartet.c holds ice->gpio_mutex.

This patch fixes the invalid call: it simply moves the calls of
ice->set_rate and ice->set_mclk callbacks outside the spinlock.

Reported-by: Jia-Ju Bai <baijiaju1990@gmail.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/5d43135e-73b9-a46a-2155-9e91d0dcdf83@gmail.com
Link: https://lore.kernel.org/r/20191218192606.12866-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/ice1712/ice1724.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/sound/pci/ice1712/ice1724.c b/sound/pci/ice1712/ice1724.c
index c80a16ee6e76..242542e23d28 100644
--- a/sound/pci/ice1712/ice1724.c
+++ b/sound/pci/ice1712/ice1724.c
@@ -647,6 +647,7 @@ static int snd_vt1724_set_pro_rate(struct snd_ice1712 *ice, unsigned int rate,
 	unsigned long flags;
 	unsigned char mclk_change;
 	unsigned int i, old_rate;
+	bool call_set_rate = false;
 
 	if (rate > ice->hw_rates->list[ice->hw_rates->count - 1])
 		return -EINVAL;
@@ -670,7 +671,7 @@ static int snd_vt1724_set_pro_rate(struct snd_ice1712 *ice, unsigned int rate,
 		 * setting clock rate for internal clock mode */
 		old_rate = ice->get_rate(ice);
 		if (force || (old_rate != rate))
-			ice->set_rate(ice, rate);
+			call_set_rate = true;
 		else if (rate == ice->cur_rate) {
 			spin_unlock_irqrestore(&ice->reg_lock, flags);
 			return 0;
@@ -678,12 +679,14 @@ static int snd_vt1724_set_pro_rate(struct snd_ice1712 *ice, unsigned int rate,
 	}
 
 	ice->cur_rate = rate;
+	spin_unlock_irqrestore(&ice->reg_lock, flags);
+
+	if (call_set_rate)
+		ice->set_rate(ice, rate);
 
 	/* setting master clock */
 	mclk_change = ice->set_mclk(ice, rate);
 
-	spin_unlock_irqrestore(&ice->reg_lock, flags);
-
 	if (mclk_change && ice->gpio.i2s_mclk_changed)
 		ice->gpio.i2s_mclk_changed(ice);
 	if (ice->gpio.set_pro_rate)

From 314bd842d98e1035cc40b671a71e07f48420e58f Mon Sep 17 00:00:00 2001
From: Amit Cohen <amitc@mellanox.com>
Date: Sun, 29 Dec 2019 13:40:22 +0200
Subject: [PATCH 083/146] mlxsw: spectrum_router: Skip loopback RIFs during MAC
 validation

When a router interface (RIF) is created the MAC address of the backing
netdev is verified to have the same MSBs as existing RIFs. This is
required in order to avoid changing existing RIF MAC addresses that all
share the same MSBs.

Loopback RIFs are special in this regard as they do not have a MAC
address, given they are only used to loop packets from the overlay to
the underlay.

Without this change, an error is returned when trying to create a RIF
after the creation of a GRE tunnel that is represented by a loopback
RIF. 'rif->dev->dev_addr' points to the GRE device's local IP, which
does not share the same MSBs as physical interfaces. Adding an IP
address to any physical interface results in:

Error: mlxsw_spectrum: All router interface MAC addresses must have the
same prefix.

Fix this by skipping loopback RIFs during MAC validation.

Fixes: 74bc99397438 ("mlxsw: spectrum_router: Veto unsupported RIF MAC addresses")
Signed-off-by: Amit Cohen <amitc@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 08b7e9f964da..8290e82240fc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -7079,6 +7079,9 @@ static int mlxsw_sp_router_port_check_rif_addr(struct mlxsw_sp *mlxsw_sp,
 
 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
 		rif = mlxsw_sp->router->rifs[i];
+		if (rif && rif->ops &&
+		    rif->ops->type == MLXSW_SP_RIF_TYPE_IPIP_LB)
+			continue;
 		if (rif && rif->dev && rif->dev != dev &&
 		    !ether_addr_equal_masked(rif->dev->dev_addr, dev_addr,
 					     mlxsw_sp->mac_mask)) {

From acca789a358cc960be3937851d7de6591c79d6c2 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Sun, 29 Dec 2019 13:40:23 +0200
Subject: [PATCH 084/146] mlxsw: spectrum: Use dedicated policer for VRRP
 packets

Currently, VRRP packets and packets that hit exceptions during routing
(e.g., MTU error) are policed using the same policer towards the CPU.
This means, for example, that misconfiguration of the MTU on a routed
interface can prevent VRRP packets from reaching the CPU, which in turn
can cause the VRRP daemon to assume it is the Master router.

Fix this by using a dedicated policer for VRRP packets.

Fixes: 11566d34f895 ("mlxsw: spectrum: Add VRRP traps")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reported-by: Alex Veber <alexve@mellanox.com>
Tested-by: Alex Veber <alexve@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h      | 1 +
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 9 +++++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 5294a1622643..af30e8a76682 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -5472,6 +5472,7 @@ enum mlxsw_reg_htgt_trap_group {
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_VRRP,
 
 	__MLXSW_REG_HTGT_TRAP_GROUP_MAX,
 	MLXSW_REG_HTGT_TRAP_GROUP_MAX = __MLXSW_REG_HTGT_TRAP_GROUP_MAX - 1
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 556dca328bb5..f7fd5e8fbf96 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4542,8 +4542,8 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 	MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, ROUTER_EXP, false),
 	MLXSW_SP_RXL_MARK(IPIP_DECAP_ERROR, TRAP_TO_CPU, ROUTER_EXP, false),
 	MLXSW_SP_RXL_MARK(DECAP_ECN0, TRAP_TO_CPU, ROUTER_EXP, false),
-	MLXSW_SP_RXL_MARK(IPV4_VRRP, TRAP_TO_CPU, ROUTER_EXP, false),
-	MLXSW_SP_RXL_MARK(IPV6_VRRP, TRAP_TO_CPU, ROUTER_EXP, false),
+	MLXSW_SP_RXL_MARK(IPV4_VRRP, TRAP_TO_CPU, VRRP, false),
+	MLXSW_SP_RXL_MARK(IPV6_VRRP, TRAP_TO_CPU, VRRP, false),
 	/* PKT Sample trap */
 	MLXSW_RXL(mlxsw_sp_rx_listener_sample_func, PKT_SAMPLE, MIRROR_TO_CPU,
 		  false, SP_IP2ME, DISCARD),
@@ -4626,6 +4626,10 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
 			rate = 19 * 1024;
 			burst_size = 12;
 			break;
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_VRRP:
+			rate = 360;
+			burst_size = 7;
+			break;
 		default:
 			continue;
 		}
@@ -4665,6 +4669,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_VRRP:
 			priority = 5;
 			tc = 5;
 			break;

From 48e01504cf5315cbe6de9b7412e792bfcc3dd9e1 Mon Sep 17 00:00:00 2001
From: Chris Chiu <chiu@endlessm.com>
Date: Mon, 30 Dec 2019 11:11:18 +0800
Subject: [PATCH 085/146] ALSA: hda/realtek - Enable the bass speaker of ASUS
 UX431FLC

ASUS reported that there's an bass speaker in addition to internal
speaker and it uses DAC 0x02. It was not enabled in the commit
436e25505f34 ("ALSA: hda/realtek - Enable internal speaker of ASUS
UX431FLC") which only enables the amplifier and the front speaker.
This commit enables the bass speaker on top of the aforementioned
work to improve the acoustic experience.

Fixes: 436e25505f34 ("ALSA: hda/realtek - Enable internal speaker of ASUS UX431FLC")
Signed-off-by: Chris Chiu <chiu@endlessm.com>
Signed-off-by: Jian-Hong Pan <jian-hong@endlessm.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20191230031118.95076-1-chiu@endlessm.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 38 +++++++++++++++++------------------
 1 file changed, 18 insertions(+), 20 deletions(-)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 2ee703c2da78..1cd4906a67e1 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -5905,11 +5905,12 @@ enum {
 	ALC256_FIXUP_ASUS_HEADSET_MIC,
 	ALC256_FIXUP_ASUS_MIC_NO_PRESENCE,
 	ALC299_FIXUP_PREDATOR_SPK,
-	ALC294_FIXUP_ASUS_INTSPK_HEADSET_MIC,
 	ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE,
-	ALC294_FIXUP_ASUS_INTSPK_GPIO,
 	ALC289_FIXUP_DELL_SPK2,
 	ALC289_FIXUP_DUAL_SPK,
+	ALC294_FIXUP_SPK2_TO_DAC1,
+	ALC294_FIXUP_ASUS_DUAL_SPK,
+
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -6984,16 +6985,6 @@ static const struct hda_fixup alc269_fixups[] = {
 			{ }
 		}
 	},
-	[ALC294_FIXUP_ASUS_INTSPK_HEADSET_MIC] = {
-		.type = HDA_FIXUP_PINS,
-		.v.pins = (const struct hda_pintbl[]) {
-			{ 0x14, 0x411111f0 }, /* disable confusing internal speaker */
-			{ 0x19, 0x04a11150 }, /* use as headset mic, without its own jack detect */
-			{ }
-		},
-		.chained = true,
-		.chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC
-	},
 	[ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE] = {
 		.type = HDA_FIXUP_PINS,
 		.v.pins = (const struct hda_pintbl[]) {
@@ -7004,13 +6995,6 @@ static const struct hda_fixup alc269_fixups[] = {
 		.chained = true,
 		.chain_id = ALC256_FIXUP_ASUS_HEADSET_MODE
 	},
-	[ALC294_FIXUP_ASUS_INTSPK_GPIO] = {
-		.type = HDA_FIXUP_FUNC,
-		/* The GPIO must be pulled to initialize the AMP */
-		.v.func = alc_fixup_gpio4,
-		.chained = true,
-		.chain_id = ALC294_FIXUP_ASUS_INTSPK_HEADSET_MIC
-	},
 	[ALC289_FIXUP_DELL_SPK2] = {
 		.type = HDA_FIXUP_PINS,
 		.v.pins = (const struct hda_pintbl[]) {
@@ -7026,6 +7010,20 @@ static const struct hda_fixup alc269_fixups[] = {
 		.chained = true,
 		.chain_id = ALC289_FIXUP_DELL_SPK2
 	},
+	[ALC294_FIXUP_SPK2_TO_DAC1] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc285_fixup_speaker2_to_dac1,
+		.chained = true,
+		.chain_id = ALC294_FIXUP_ASUS_HEADSET_MIC
+	},
+	[ALC294_FIXUP_ASUS_DUAL_SPK] = {
+		.type = HDA_FIXUP_FUNC,
+		/* The GPIO must be pulled to initialize the AMP */
+		.v.func = alc_fixup_gpio4,
+		.chained = true,
+		.chain_id = ALC294_FIXUP_SPK2_TO_DAC1
+	},
+
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -7187,7 +7185,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1043, 0x1427, "Asus Zenbook UX31E", ALC269VB_FIXUP_ASUS_ZENBOOK),
 	SND_PCI_QUIRK(0x1043, 0x1517, "Asus Zenbook UX31A", ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A),
 	SND_PCI_QUIRK(0x1043, 0x16e3, "ASUS UX50", ALC269_FIXUP_STEREO_DMIC),
-	SND_PCI_QUIRK(0x1043, 0x17d1, "ASUS UX431FL", ALC294_FIXUP_ASUS_INTSPK_GPIO),
+	SND_PCI_QUIRK(0x1043, 0x17d1, "ASUS UX431FL", ALC294_FIXUP_ASUS_DUAL_SPK),
 	SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC),
 	SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW),
 	SND_PCI_QUIRK(0x1043, 0x1a30, "ASUS X705UD", ALC256_FIXUP_ASUS_MIC),

From 6da3eced8c5f3b03340b0c395bacd552c4d52411 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Mon, 23 Dec 2019 14:31:47 +0100
Subject: [PATCH 086/146] powerpc/spinlocks: Include correct header for static
 key
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Recently, the spinlock implementation grew a static key optimization,
but the jump_label.h header include was left out, leading to build
errors:

  linux/arch/powerpc/include/asm/spinlock.h:44:7: error: implicit declaration of function ‘static_branch_unlikely’
   44 |  if (!static_branch_unlikely(&shared_processor))

This commit adds the missing header.

mpe: The build break is only seen with CONFIG_JUMP_LABEL=n.

Fixes: 656c21d6af5d ("powerpc/shared: Use static key to detect shared processor")
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Reviewed-by: Srikar Dronamraju <srikar@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20191223133147.129983-1-Jason@zx2c4.com
---
 arch/powerpc/include/asm/spinlock.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 1b55fc08f853..860228e917dc 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -15,6 +15,7 @@
  *
  * (the type definitions are in asm/spinlock_types.h)
  */
+#include <linux/jump_label.h>
 #include <linux/irqflags.h>
 #ifdef CONFIG_PPC64
 #include <asm/paca.h>

From 7b62e66cbbfb463a39bf83e30bdbbb4b9e83fa03 Mon Sep 17 00:00:00 2001
From: Dennis Zhou <dennis@kernel.org>
Date: Wed, 11 Dec 2019 16:07:06 -0800
Subject: [PATCH 087/146] btrfs: punt all bios created in
 btrfs_submit_compressed_write()

Compressed writes happen in the background via kworkers. However, this
causes bios to be attributed to root bypassing any cgroup limits from
the actual writer. We tag the first bio with REQ_CGROUP_PUNT, which will
punt the bio to an appropriate cgroup specific workqueue and attribute
the IO properly. However, if btrfs_submit_compressed_write() creates a
new bio, we don't tag it the same way. Add the appropriate tagging for
subsequent bios.

Fixes: ec39f7696ccfa ("Btrfs: use REQ_CGROUP_PUNT for worker thread submitted bios")
Reviewed-by: Chris Mason <clm@fb.com>
Signed-off-by: Dennis Zhou <dennis@kernel.org>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index ee834ef7beb4..b08e16b8cebb 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -491,6 +491,10 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
 			bio->bi_opf = REQ_OP_WRITE | write_flags;
 			bio->bi_private = cb;
 			bio->bi_end_io = end_compressed_bio_write;
+			if (blkcg_css) {
+				bio->bi_opf |= REQ_CGROUP_PUNT;
+				bio_associate_blkg_from_css(bio, blkcg_css);
+			}
 			bio_add_page(bio, page, PAGE_SIZE, 0);
 		}
 		if (bytes_left < PAGE_SIZE) {

From 46bcff2bfc5e6a8c638d3a32e4f6f6fa4bd01461 Mon Sep 17 00:00:00 2001
From: Dennis Zhou <dennis@kernel.org>
Date: Wed, 11 Dec 2019 15:20:15 -0800
Subject: [PATCH 088/146] btrfs: fix compressed write bio blkcg attribution

Bio attribution is handled at bio_set_dev() as once we have a device, we
have a corresponding request_queue and then can derive the current css.
In special cases, we want to attribute to bio to someone else. This can
be done by calling bio_associate_blkg_from_css() or
kthread_associate_blkcg() depending on the scenario. Btrfs does this for
compressed writeback as they are handled by kworkers, so the latter can
be done here.

Commit 1a41802701ec ("btrfs: drop bio_set_dev where not needed") removes
early bio_set_dev() calls prior to submit_stripe_bio(). This breaks the
above assumption that we'll have a request_queue when we are doing
association. To fix this, switch to using kthread_associate_blkcg().

Without this, we crash in btrfs/024:

  [ 3052.093088] BUG: kernel NULL pointer dereference, address: 0000000000000510
  [ 3052.107013] #PF: supervisor read access in kernel mode
  [ 3052.107014] #PF: error_code(0x0000) - not-present page
  [ 3052.107015] PGD 0 P4D 0
  [ 3052.107021] Oops: 0000 [#1] SMP
  [ 3052.138904] CPU: 42 PID: 201270 Comm: kworker/u161:0 Kdump: loaded Not tainted 5.5.0-rc1-00062-g4852d8ac90a9 #712
  [ 3052.138905] Hardware name: Quanta Tioga Pass Single Side 01-0032211004/Tioga Pass Single Side, BIOS F08_3A18 12/20/2018
  [ 3052.138912] Workqueue: btrfs-delalloc btrfs_work_helper
  [ 3052.191375] RIP: 0010:bio_associate_blkg_from_css+0x1e/0x3c0
  [ 3052.191379] RSP: 0018:ffffc900210cfc90 EFLAGS: 00010282
  [ 3052.191380] RAX: 0000000000000000 RBX: ffff88bfe5573c00 RCX: 0000000000000000
  [ 3052.191382] RDX: ffff889db48ec2f0 RSI: ffff88bfe5573c00 RDI: ffff889db48ec2f0
  [ 3052.191386] RBP: 0000000000000800 R08: 0000000000203bb0 R09: ffff889db16b2400
  [ 3052.293364] R10: 0000000000000000 R11: ffff88a07fffde80 R12: ffff889db48ec2f0
  [ 3052.293365] R13: 0000000000001000 R14: ffff889de82bc000 R15: ffff889e2b7bdcc8
  [ 3052.293367] FS:  0000000000000000(0000) GS:ffff889ffba00000(0000) knlGS:0000000000000000
  [ 3052.293368] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  [ 3052.293369] CR2: 0000000000000510 CR3: 0000000002611001 CR4: 00000000007606e0
  [ 3052.293370] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  [ 3052.293371] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  [ 3052.293372] PKRU: 55555554
  [ 3052.293376] Call Trace:
  [ 3052.402552]  btrfs_submit_compressed_write+0x137/0x390
  [ 3052.402558]  submit_compressed_extents+0x40f/0x4c0
  [ 3052.422401]  btrfs_work_helper+0x246/0x5a0
  [ 3052.422408]  process_one_work+0x200/0x570
  [ 3052.438601]  ? process_one_work+0x180/0x570
  [ 3052.438605]  worker_thread+0x4c/0x3e0
  [ 3052.438614]  kthread+0x103/0x140
  [ 3052.460735]  ? process_one_work+0x570/0x570
  [ 3052.460737]  ? kthread_mod_delayed_work+0xc0/0xc0
  [ 3052.460744]  ret_from_fork+0x24/0x30

Fixes: 1a41802701ec ("btrfs: drop bio_set_dev where not needed")
Reported-by: Chris Murphy <chris@colorremedies.com>
Signed-off-by: Dennis Zhou <dennis@kernel.org>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index b08e16b8cebb..43e1660f450f 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -447,7 +447,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
 
 	if (blkcg_css) {
 		bio->bi_opf |= REQ_CGROUP_PUNT;
-		bio_associate_blkg_from_css(bio, blkcg_css);
+		kthread_associate_blkcg(blkcg_css);
 	}
 	refcount_set(&cb->pending_bios, 1);
 
@@ -491,10 +491,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
 			bio->bi_opf = REQ_OP_WRITE | write_flags;
 			bio->bi_private = cb;
 			bio->bi_end_io = end_compressed_bio_write;
-			if (blkcg_css) {
+			if (blkcg_css)
 				bio->bi_opf |= REQ_CGROUP_PUNT;
-				bio_associate_blkg_from_css(bio, blkcg_css);
-			}
 			bio_add_page(bio, page, PAGE_SIZE, 0);
 		}
 		if (bytes_left < PAGE_SIZE) {
@@ -521,6 +519,9 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
 		bio_endio(bio);
 	}
 
+	if (blkcg_css)
+		kthread_associate_blkcg(NULL);
+
 	return 0;
 }
 

From de7999afedff02c6631feab3ea726a0e8f8c3d40 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 11 Dec 2019 09:01:40 +0000
Subject: [PATCH 089/146] Btrfs: fix infinite loop during nocow writeback due
 to race

When starting writeback for a range that covers part of a preallocated
extent, due to a race with writeback for another range that also covers
another part of the same preallocated extent, we can end up in an infinite
loop.

Consider the following example where for inode 280 we have two dirty
ranges:

  range A, from 294912 to 303103, 8192 bytes
  range B, from 348160 to 438271, 90112 bytes

and we have the following file extent item layout for our inode:

  leaf 38895616 gen 24544 total ptrs 29 free space 13820 owner 5
      (...)
      item 27 key (280 108 200704) itemoff 14598 itemsize 53
          extent data disk bytenr 0 nr 0 type 1 (regular)
          extent data offset 0 nr 94208 ram 94208
      item 28 key (280 108 294912) itemoff 14545 itemsize 53
          extent data disk bytenr 10433052672 nr 81920 type 2 (prealloc)
          extent data offset 0 nr 81920 ram 81920

Then the following happens:

1) Writeback starts for range B (from 348160 to 438271), execution of
   run_delalloc_nocow() starts;

2) The first iteration of run_delalloc_nocow()'s whil loop leaves us at
   the extent item at slot 28, pointing to the prealloc extent item
   covering the range from 294912 to 376831. This extent covers part of
   our range;

3) An ordered extent is created against that extent, covering the file
   range from 348160 to 376831 (28672 bytes);

4) We adjust 'cur_offset' to 376832 and move on to the next iteration of
   the while loop;

5) The call to btrfs_lookup_file_extent() leaves us at the same leaf,
   pointing to slot 29, 1 slot after the last item (the extent item
   we processed in the previous iteration);

6) Because we are a slot beyond the last item, we call btrfs_next_leaf(),
   which releases the search path before doing a another search for the
   last key of the leaf (280 108 294912);

7) Right after btrfs_next_leaf() released the path, and before it did
   another search for the last key of the leaf, writeback for the range
   A (from 294912 to 303103) completes (it was previously started at
   some point);

8) Upon completion of the ordered extent for range A, the prealloc extent
   we previously found got split into two extent items, one covering the
   range from 294912 to 303103 (8192 bytes), with a type of regular extent
   (and no longer prealloc) and another covering the range from 303104 to
   376831 (73728 bytes), with a type of prealloc and an offset of 8192
   bytes. So our leaf now has the following layout:

     leaf 38895616 gen 24544 total ptrs 31 free space 13664 owner 5
         (...)
         item 27 key (280 108 200704) itemoff 14598 itemsize 53
             extent data disk bytenr 0 nr 0 type 1
             extent data offset 0 nr 8192 ram 94208
         item 28 key (280 108 208896) itemoff 14545 itemsize 53
             extent data disk bytenr 10433142784 nr 86016 type 1
             extent data offset 0 nr 86016 ram 86016
         item 29 key (280 108 294912) itemoff 14492 itemsize 53
             extent data disk bytenr 10433052672 nr 81920 type 1
             extent data offset 0 nr 8192 ram 81920
         item 30 key (280 108 303104) itemoff 14439 itemsize 53
             extent data disk bytenr 10433052672 nr 81920 type 2
             extent data offset 8192 nr 73728 ram 81920

9) After btrfs_next_leaf() returns, we have our path pointing to that same
   leaf and at slot 30, since it has a key we didn't have before and it's
   the first key greater then the key that was previously the last key of
   the leaf (key (280 108 294912));

10) The extent item at slot 30 covers the range from 303104 to 376831
    which is in our target range, so we process it, despite having already
    created an ordered extent against this extent for the file range from
    348160 to 376831. This is because we skip to the next extent item only
    if its end is less than or equals to the start of our delalloc range,
    and not less than or equals to the current offset ('cur_offset');

11) As a result we compute 'num_bytes' as:

    num_bytes = min(end + 1, extent_end) - cur_offset;
              = min(438271 + 1, 376832) - 376832 = 0

12) We then call create_io_em() for a 0 bytes range starting at offset
    376832;

13) Then create_io_em() enters an infinite loop because its calls to
    btrfs_drop_extent_cache() do nothing due to the 0 length range
    passed to it. So no existing extent maps that cover the offset
    376832 get removed, and therefore calls to add_extent_mapping()
    return -EEXIST, resulting in an infinite loop. This loop from
    create_io_em() is the following:

    do {
        btrfs_drop_extent_cache(BTRFS_I(inode), em->start,
                                em->start + em->len - 1, 0);
        write_lock(&em_tree->lock);
        ret = add_extent_mapping(em_tree, em, 1);
        write_unlock(&em_tree->lock);
        /*
         * The caller has taken lock_extent(), who could race with us
         * to add em?
         */
    } while (ret == -EEXIST);

Also, each call to btrfs_drop_extent_cache() triggers a warning because
the start offset passed to it (376832) is smaller then the end offset
(376832 - 1) passed to it by -1, due to the 0 length:

  [258532.052621] ------------[ cut here ]------------
  [258532.052643] WARNING: CPU: 0 PID: 9987 at fs/btrfs/file.c:602 btrfs_drop_extent_cache+0x3f4/0x590 [btrfs]
  (...)
  [258532.052672] CPU: 0 PID: 9987 Comm: fsx Tainted: G        W         5.4.0-rc7-btrfs-next-64 #1
  [258532.052673] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014
  [258532.052691] RIP: 0010:btrfs_drop_extent_cache+0x3f4/0x590 [btrfs]
  (...)
  [258532.052695] RSP: 0018:ffffb4be0153f860 EFLAGS: 00010287
  [258532.052700] RAX: ffff975b445ee360 RBX: ffff975b44eb3e08 RCX: 0000000000000000
  [258532.052700] RDX: 0000000000038fff RSI: 0000000000039000 RDI: ffff975b445ee308
  [258532.052700] RBP: 0000000000038fff R08: 0000000000000000 R09: 0000000000000001
  [258532.052701] R10: ffff975b513c5c10 R11: 00000000e3c0cfa9 R12: 0000000000039000
  [258532.052703] R13: ffff975b445ee360 R14: 00000000ffffffef R15: ffff975b445ee308
  [258532.052705] FS:  00007f86a821de80(0000) GS:ffff975b76a00000(0000) knlGS:0000000000000000
  [258532.052707] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  [258532.052708] CR2: 00007fdacf0f3ab4 CR3: 00000001f9d26002 CR4: 00000000003606f0
  [258532.052712] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  [258532.052717] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  [258532.052717] Call Trace:
  [258532.052718]  ? preempt_schedule_common+0x32/0x70
  [258532.052722]  ? ___preempt_schedule+0x16/0x20
  [258532.052741]  create_io_em+0xff/0x180 [btrfs]
  [258532.052767]  run_delalloc_nocow+0x942/0xb10 [btrfs]
  [258532.052791]  btrfs_run_delalloc_range+0x30b/0x520 [btrfs]
  [258532.052812]  ? find_lock_delalloc_range+0x221/0x250 [btrfs]
  [258532.052834]  writepage_delalloc+0xe4/0x140 [btrfs]
  [258532.052855]  __extent_writepage+0x110/0x4e0 [btrfs]
  [258532.052876]  extent_write_cache_pages+0x21c/0x480 [btrfs]
  [258532.052906]  extent_writepages+0x52/0xb0 [btrfs]
  [258532.052911]  do_writepages+0x23/0x80
  [258532.052915]  __filemap_fdatawrite_range+0xd2/0x110
  [258532.052938]  btrfs_fdatawrite_range+0x1b/0x50 [btrfs]
  [258532.052954]  start_ordered_ops+0x57/0xa0 [btrfs]
  [258532.052973]  ? btrfs_sync_file+0x225/0x490 [btrfs]
  [258532.052988]  btrfs_sync_file+0x225/0x490 [btrfs]
  [258532.052997]  __x64_sys_msync+0x199/0x200
  [258532.053004]  do_syscall_64+0x5c/0x250
  [258532.053007]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
  [258532.053010] RIP: 0033:0x7f86a7dfd760
  (...)
  [258532.053014] RSP: 002b:00007ffd99af0368 EFLAGS: 00000246 ORIG_RAX: 000000000000001a
  [258532.053016] RAX: ffffffffffffffda RBX: 0000000000000ec9 RCX: 00007f86a7dfd760
  [258532.053017] RDX: 0000000000000004 RSI: 000000000000836c RDI: 00007f86a8221000
  [258532.053019] RBP: 0000000000021ec9 R08: 0000000000000003 R09: 00007f86a812037c
  [258532.053020] R10: 0000000000000001 R11: 0000000000000246 R12: 00000000000074a3
  [258532.053021] R13: 00007f86a8221000 R14: 000000000000836c R15: 0000000000000001
  [258532.053032] irq event stamp: 1653450494
  [258532.053035] hardirqs last  enabled at (1653450493): [<ffffffff9dec69f9>] _raw_spin_unlock_irq+0x29/0x50
  [258532.053037] hardirqs last disabled at (1653450494): [<ffffffff9d4048ea>] trace_hardirqs_off_thunk+0x1a/0x20
  [258532.053039] softirqs last  enabled at (1653449852): [<ffffffff9e200466>] __do_softirq+0x466/0x6bd
  [258532.053042] softirqs last disabled at (1653449845): [<ffffffff9d4c8a0c>] irq_exit+0xec/0x120
  [258532.053043] ---[ end trace 8476fce13d9ce20a ]---

Which results in flooding dmesg/syslog since btrfs_drop_extent_cache()
uses WARN_ON() and not WARN_ON_ONCE().

So fix this issue by changing run_delalloc_nocow()'s loop to move to the
next extent item when the current extent item ends at at offset less than
or equals to the current offset instead of the start offset.

Fixes: 80ff385665b7fc ("Btrfs: update nodatacow code v2")
CC: stable@vger.kernel.org # 4.4+
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e3c76645cad7..5509c41a4f43 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1479,10 +1479,10 @@ next_slot:
 			disk_num_bytes =
 				btrfs_file_extent_disk_num_bytes(leaf, fi);
 			/*
-			 * If extent we got ends before our range starts, skip
-			 * to next extent
+			 * If the extent we got ends before our current offset,
+			 * skip to the next extent.
 			 */
-			if (extent_end <= start) {
+			if (extent_end <= cur_offset) {
 				path->slots[0]++;
 				goto next_slot;
 			}

From 429120f3df2dba2bf3a4a19f4212a53ecefc7102 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Sun, 29 Dec 2019 10:32:30 +0800
Subject: [PATCH 090/146] block: fix splitting segments on boundary masks

We ran into a problem with a mpt3sas based controller, where we would
see random (and hard to reproduce) file corruption). The issue seemed
specific to this controller, but wasn't specific to the file system.
After a lot of debugging, we find out that it's caused by segments
spanning a 4G memory boundary. This shouldn't happen, as the default
setting for segment boundary masks is 4G.

Turns out there are two issues in get_max_segment_size():

1) The default segment boundary mask is bypassed

2) The segment start address isn't taken into account when checking
   segment boundary limit

Fix these two issues by removing the bypass of the segment boundary
check even if the mask is set to the default value, and taking into
account the actual start address of the request when checking if a
segment needs splitting.

Cc: stable@vger.kernel.org # v5.1+
Reviewed-by: Chris Mason <clm@fb.com>
Tested-by: Chris Mason <clm@fb.com>
Fixes: dcebd755926b ("block: use bio_for_each_bvec() to compute multi-page bvec count")
Signed-off-by: Ming Lei <ming.lei@redhat.com>

Dropped const on the page pointer, ppc page_to_phys() doesn't mark the
page as const...

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-merge.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/block/blk-merge.c b/block/blk-merge.c
index d783bdc4559b..347782a24a35 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -157,16 +157,14 @@ static inline unsigned get_max_io_size(struct request_queue *q,
 	return sectors & (lbs - 1);
 }
 
-static unsigned get_max_segment_size(const struct request_queue *q,
-				     unsigned offset)
+static inline unsigned get_max_segment_size(const struct request_queue *q,
+					    struct page *start_page,
+					    unsigned long offset)
 {
 	unsigned long mask = queue_segment_boundary(q);
 
-	/* default segment boundary mask means no boundary limit */
-	if (mask == BLK_SEG_BOUNDARY_MASK)
-		return queue_max_segment_size(q);
-
-	return min_t(unsigned long, mask - (mask & offset) + 1,
+	offset = mask & (page_to_phys(start_page) + offset);
+	return min_t(unsigned long, mask - offset + 1,
 		     queue_max_segment_size(q));
 }
 
@@ -201,7 +199,8 @@ static bool bvec_split_segs(const struct request_queue *q,
 	unsigned seg_size = 0;
 
 	while (len && *nsegs < max_segs) {
-		seg_size = get_max_segment_size(q, bv->bv_offset + total_len);
+		seg_size = get_max_segment_size(q, bv->bv_page,
+						bv->bv_offset + total_len);
 		seg_size = min(seg_size, len);
 
 		(*nsegs)++;
@@ -419,7 +418,8 @@ static unsigned blk_bvec_map_sg(struct request_queue *q,
 
 	while (nbytes > 0) {
 		unsigned offset = bvec->bv_offset + total;
-		unsigned len = min(get_max_segment_size(q, offset), nbytes);
+		unsigned len = min(get_max_segment_size(q, bvec->bv_page,
+					offset), nbytes);
 		struct page *page = bvec->bv_page;
 
 		/*

From c7d776f85dfe5159ebf621ee1e50e555237b1a25 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Thu, 26 Dec 2019 15:54:25 +0900
Subject: [PATCH 091/146] null_blk: Fix REQ_OP_ZONE_CLOSE handling

In order to match ZBC defined behavior, closing an empty zone must
result in the "empty" zone condition instead of the "closed" condition.

Fixes: da644b2cc1a4 ("null_blk: add zone open, close, and finish support")
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/null_blk_zoned.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
index d4d88b581822..5cf49d9db95e 100644
--- a/drivers/block/null_blk_zoned.c
+++ b/drivers/block/null_blk_zoned.c
@@ -186,7 +186,10 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
 		if (zone->cond == BLK_ZONE_COND_FULL)
 			return BLK_STS_IOERR;
 
-		zone->cond = BLK_ZONE_COND_CLOSED;
+		if (zone->wp == zone->start)
+			zone->cond = BLK_ZONE_COND_EMPTY;
+		else
+			zone->cond = BLK_ZONE_COND_CLOSED;
 		break;
 	case REQ_OP_ZONE_FINISH:
 		if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)

From 1f07dcc459d5f2c639f185f6e94829a0c79f2b4c Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 30 Dec 2019 12:01:56 -0800
Subject: [PATCH 092/146] kernel.h: Remove unused FIELD_SIZEOF()

Now that all callers of FIELD_SIZEOF() have been converted to
sizeof_field(), remove the unused prior macro.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/kernel.h | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3adcb39fa6f5..0d9db2a14f44 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -79,15 +79,6 @@
  */
 #define round_down(x, y) ((x) & ~__round_mask(x, y))
 
-/**
- * FIELD_SIZEOF - get the size of a struct's field
- * @t: the target struct
- * @f: the target struct's field
- * Return: the size of @f in the struct definition without having a
- * declared instance of @t.
- */
-#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
-
 #define typeof_member(T, m)	typeof(((T*)0)->m)
 
 #define DIV_ROUND_UP __KERNEL_DIV_ROUND_UP

From 0caeaf6ad532f9be5a768a158627cb31921cc8b7 Mon Sep 17 00:00:00 2001
From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Date: Mon, 30 Dec 2019 18:14:08 +0530
Subject: [PATCH 093/146] cxgb4/cxgb4vf: fix flow control display for auto
 negotiation

As per 802.3-2005, Section Two, Annex 28B, Table 28B-2 [1], when
_only_ Rx pause is enabled, both symmetric and asymmetric pause
towards local device must be enabled. Also, firmware returns the local
device's flow control pause params as part of advertised capabilities
and negotiated params as part of current link attributes. So, fix up
ethtool's flow control pause params fetch logic to read from acaps,
instead of linkattr.

[1] https://standards.ieee.org/standard/802_3-2005.html

Fixes: c3168cabe1af ("cxgb4/cxgbvf: Handle 32-bit fw port capabilities")
Signed-off-by: Surendra Mobiya <surendra@chelsio.com>
Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h    |  1 +
 .../ethernet/chelsio/cxgb4/cxgb4_ethtool.c    |  4 ++--
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c    | 23 +++++++++++--------
 .../ethernet/chelsio/cxgb4vf/cxgb4vf_main.c   |  4 ++--
 .../ethernet/chelsio/cxgb4vf/t4vf_common.h    |  1 +
 .../net/ethernet/chelsio/cxgb4vf/t4vf_hw.c    | 20 +++++++++-------
 6 files changed, 32 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index a70ac2097892..becee29f5df7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -504,6 +504,7 @@ struct link_config {
 
 	enum cc_pause  requested_fc;     /* flow control user has requested */
 	enum cc_pause  fc;               /* actual link flow control */
+	enum cc_pause  advertised_fc;    /* actual advertised flow control */
 
 	enum cc_fec    requested_fec;	 /* Forward Error Correction: */
 	enum cc_fec    fec;		 /* requested and actual in use */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
index 20ab3b6285a2..c837382ee522 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
@@ -807,8 +807,8 @@ static void get_pauseparam(struct net_device *dev,
 	struct port_info *p = netdev_priv(dev);
 
 	epause->autoneg = (p->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
-	epause->rx_pause = (p->link_cfg.fc & PAUSE_RX) != 0;
-	epause->tx_pause = (p->link_cfg.fc & PAUSE_TX) != 0;
+	epause->rx_pause = (p->link_cfg.advertised_fc & PAUSE_RX) != 0;
+	epause->tx_pause = (p->link_cfg.advertised_fc & PAUSE_TX) != 0;
 }
 
 static int set_pauseparam(struct net_device *dev,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 19d18acfc9a6..844fdcf55118 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -4089,7 +4089,8 @@ static inline fw_port_cap32_t cc_to_fwcap_pause(enum cc_pause cc_pause)
 		if (cc_pause & PAUSE_TX)
 			fw_pause |= FW_PORT_CAP32_802_3_PAUSE;
 		else
-			fw_pause |= FW_PORT_CAP32_802_3_ASM_DIR;
+			fw_pause |= FW_PORT_CAP32_802_3_ASM_DIR |
+				    FW_PORT_CAP32_802_3_PAUSE;
 	} else if (cc_pause & PAUSE_TX) {
 		fw_pause |= FW_PORT_CAP32_802_3_ASM_DIR;
 	}
@@ -8563,17 +8564,17 @@ static fw_port_cap32_t lstatus_to_fwcap(u32 lstatus)
 void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl)
 {
 	const struct fw_port_cmd *cmd = (const void *)rpl;
-	int action = FW_PORT_CMD_ACTION_G(be32_to_cpu(cmd->action_to_len16));
-	struct adapter *adapter = pi->adapter;
-	struct link_config *lc = &pi->link_cfg;
-	int link_ok, linkdnrc;
-	enum fw_port_type port_type;
-	enum fw_port_module_type mod_type;
-	unsigned int speed, fc, fec;
 	fw_port_cap32_t pcaps, acaps, lpacaps, linkattr;
+	struct link_config *lc = &pi->link_cfg;
+	struct adapter *adapter = pi->adapter;
+	unsigned int speed, fc, fec, adv_fc;
+	enum fw_port_module_type mod_type;
+	int action, link_ok, linkdnrc;
+	enum fw_port_type port_type;
 
 	/* Extract the various fields from the Port Information message.
 	 */
+	action = FW_PORT_CMD_ACTION_G(be32_to_cpu(cmd->action_to_len16));
 	switch (action) {
 	case FW_PORT_ACTION_GET_PORT_INFO: {
 		u32 lstatus = be32_to_cpu(cmd->u.info.lstatus_to_modtype);
@@ -8611,6 +8612,7 @@ void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl)
 	}
 
 	fec = fwcap_to_cc_fec(acaps);
+	adv_fc = fwcap_to_cc_pause(acaps);
 	fc = fwcap_to_cc_pause(linkattr);
 	speed = fwcap_to_speed(linkattr);
 
@@ -8667,7 +8669,9 @@ void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl)
 	}
 
 	if (link_ok != lc->link_ok || speed != lc->speed ||
-	    fc != lc->fc || fec != lc->fec) {	/* something changed */
+	    fc != lc->fc || adv_fc != lc->advertised_fc ||
+	    fec != lc->fec) {
+		/* something changed */
 		if (!link_ok && lc->link_ok) {
 			lc->link_down_rc = linkdnrc;
 			dev_warn_ratelimited(adapter->pdev_dev,
@@ -8677,6 +8681,7 @@ void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl)
 		}
 		lc->link_ok = link_ok;
 		lc->speed = speed;
+		lc->advertised_fc = adv_fc;
 		lc->fc = fc;
 		lc->fec = fec;
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index f6fc0875d5b0..f4d41f968afa 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -1690,8 +1690,8 @@ static void cxgb4vf_get_pauseparam(struct net_device *dev,
 	struct port_info *pi = netdev_priv(dev);
 
 	pauseparam->autoneg = (pi->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
-	pauseparam->rx_pause = (pi->link_cfg.fc & PAUSE_RX) != 0;
-	pauseparam->tx_pause = (pi->link_cfg.fc & PAUSE_TX) != 0;
+	pauseparam->rx_pause = (pi->link_cfg.advertised_fc & PAUSE_RX) != 0;
+	pauseparam->tx_pause = (pi->link_cfg.advertised_fc & PAUSE_TX) != 0;
 }
 
 /*
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
index ccca67cf4487..57cfd10a99ec 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
@@ -135,6 +135,7 @@ struct link_config {
 
 	enum cc_pause	requested_fc;	/* flow control user has requested */
 	enum cc_pause	fc;		/* actual link flow control */
+	enum cc_pause   advertised_fc;  /* actual advertised flow control */
 
 	enum cc_fec	auto_fec;	/* Forward Error Correction: */
 	enum cc_fec	requested_fec;	/*   "automatic" (IEEE 802.3), */
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
index 8a389d617a23..9d49ff211cc1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
@@ -1913,16 +1913,16 @@ static const char *t4vf_link_down_rc_str(unsigned char link_down_rc)
 static void t4vf_handle_get_port_info(struct port_info *pi,
 				      const struct fw_port_cmd *cmd)
 {
-	int action = FW_PORT_CMD_ACTION_G(be32_to_cpu(cmd->action_to_len16));
-	struct adapter *adapter = pi->adapter;
-	struct link_config *lc = &pi->link_cfg;
-	int link_ok, linkdnrc;
-	enum fw_port_type port_type;
-	enum fw_port_module_type mod_type;
-	unsigned int speed, fc, fec;
 	fw_port_cap32_t pcaps, acaps, lpacaps, linkattr;
+	struct link_config *lc = &pi->link_cfg;
+	struct adapter *adapter = pi->adapter;
+	unsigned int speed, fc, fec, adv_fc;
+	enum fw_port_module_type mod_type;
+	int action, link_ok, linkdnrc;
+	enum fw_port_type port_type;
 
 	/* Extract the various fields from the Port Information message. */
+	action = FW_PORT_CMD_ACTION_G(be32_to_cpu(cmd->action_to_len16));
 	switch (action) {
 	case FW_PORT_ACTION_GET_PORT_INFO: {
 		u32 lstatus = be32_to_cpu(cmd->u.info.lstatus_to_modtype);
@@ -1982,6 +1982,7 @@ static void t4vf_handle_get_port_info(struct port_info *pi,
 	}
 
 	fec = fwcap_to_cc_fec(acaps);
+	adv_fc = fwcap_to_cc_pause(acaps);
 	fc = fwcap_to_cc_pause(linkattr);
 	speed = fwcap_to_speed(linkattr);
 
@@ -2012,7 +2013,9 @@ static void t4vf_handle_get_port_info(struct port_info *pi,
 	}
 
 	if (link_ok != lc->link_ok || speed != lc->speed ||
-	    fc != lc->fc || fec != lc->fec) {	/* something changed */
+	    fc != lc->fc || adv_fc != lc->advertised_fc ||
+	    fec != lc->fec) {
+		/* something changed */
 		if (!link_ok && lc->link_ok) {
 			lc->link_down_rc = linkdnrc;
 			dev_warn_ratelimited(adapter->pdev_dev,
@@ -2022,6 +2025,7 @@ static void t4vf_handle_get_port_info(struct port_info *pi,
 		}
 		lc->link_ok = link_ok;
 		lc->speed = speed;
+		lc->advertised_fc = adv_fc;
 		lc->fc = fc;
 		lc->fec = fec;
 

From 9fcf024dd6fae082f05e8c1fcdae23972b2f6971 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <olteanv@gmail.com>
Date: Fri, 27 Dec 2019 02:59:54 +0200
Subject: [PATCH 094/146] net: dsa: sja1105: Take PTP egress timestamp by port,
 not mgmt slot

The PTP egress timestamp N must be captured from register PTPEGR_TS[n],
where n = 2 * PORT + TSREG. There are 10 PTPEGR_TS registers, 2 per
port. We are only using TSREG=0.

As opposed to the management slots, which are 4 in number
(SJA1105_NUM_PORTS, minus the CPU port). Any management frame (which
includes PTP frames) can be sent to any non-CPU port through any
management slot. When the CPU port is not the last port (#4), there will
be a mismatch between the slot and the port number.

Luckily, the only mainline occurrence with this switch
(arch/arm/boot/dts/ls1021a-tsn.dts) does have the CPU port as #4, so the
issue did not manifest itself thus far.

Fixes: 47ed985e97f5 ("net: dsa: sja1105: Add logic for TX timestamping")
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 2 +-
 drivers/net/dsa/sja1105/sja1105_ptp.c  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index a51ac088c0bc..86bbab166633 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -1855,7 +1855,7 @@ static netdev_tx_t sja1105_port_deferred_xmit(struct dsa_switch *ds, int port,
 	if (!clone)
 		goto out;
 
-	sja1105_ptp_txtstamp_skb(ds, slot, clone);
+	sja1105_ptp_txtstamp_skb(ds, port, clone);
 
 out:
 	mutex_unlock(&priv->mgmt_lock);
diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c
index 54258a25031d..c0fda7db6271 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.c
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.c
@@ -659,7 +659,7 @@ void sja1105_ptp_clock_unregister(struct dsa_switch *ds)
 	ptp_data->clock = NULL;
 }
 
-void sja1105_ptp_txtstamp_skb(struct dsa_switch *ds, int slot,
+void sja1105_ptp_txtstamp_skb(struct dsa_switch *ds, int port,
 			      struct sk_buff *skb)
 {
 	struct sja1105_private *priv = ds->priv;
@@ -679,7 +679,7 @@ void sja1105_ptp_txtstamp_skb(struct dsa_switch *ds, int slot,
 		goto out;
 	}
 
-	rc = sja1105_ptpegr_ts_poll(ds, slot, &ts);
+	rc = sja1105_ptpegr_ts_poll(ds, port, &ts);
 	if (rc < 0) {
 		dev_err(ds->dev, "timed out polling for tstamp\n");
 		kfree_skb(skb);

From 5a47f588ee2366b2febdc822cdfdcf856cb0a777 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <olteanv@gmail.com>
Date: Fri, 27 Dec 2019 03:01:50 +0200
Subject: [PATCH 095/146] net: dsa: sja1105: Really make the PTP command
 read-write

When activating tc-taprio offload on the switch ports, the TAS state
machine will try to check whether it is running or not, but will find
both the STARTED and STOPPED bits as false in the
sja1105_tas_check_running function. So the function will return -EINVAL
(an abnormal situation) and the kernel will keep printing this from the
TAS FSM workqueue:

[   37.691971] sja1105 spi0.1: An operation returned -22

The reason is that the underlying function that gets called,
sja1105_ptp_commit, does not actually do a SPI_READ, but a SPI_WRITE. So
the command buffer remains initialized with zeroes instead of retrieving
the hardware state. Fix that.

Fixes: 41603d78b362 ("net: dsa: sja1105: Make the PTP command read-write")
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_ptp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c
index c0fda7db6271..43ab7589d0d0 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.c
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.c
@@ -234,7 +234,7 @@ int sja1105_ptp_commit(struct dsa_switch *ds, struct sja1105_ptp_cmd *cmd,
 	if (rw == SPI_WRITE)
 		priv->info->ptp_cmd_packing(buf, cmd, PACK);
 
-	rc = sja1105_xfer_buf(priv, SPI_WRITE, regs->ptp_control, buf,
+	rc = sja1105_xfer_buf(priv, rw, regs->ptp_control, buf,
 			      SJA1105_SIZE_PTP_CMD);
 
 	if (rw == SPI_READ)

From d00bdc0a8839de9a5c9be5af2a79dbf8e0087689 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <olteanv@gmail.com>
Date: Fri, 27 Dec 2019 03:03:54 +0200
Subject: [PATCH 096/146] net: dsa: sja1105: Remove restriction of zero
 base-time for taprio offload

The check originates from the initial implementation which was not based
on PTP time but on a standalone clock source. In the meantime we can now
program the PTPSCHTM register at runtime with the dynamic base time
(actually with a value that is 200 ns smaller, to avoid writing DELTA=0
in the Schedule Entry Points Parameters Table). And we also have logic
for moving the actual base time in the future of the PHC's current time
base, so the check for zero serves no purpose, since even if the user
will specify zero, that's not what will end up in the static config
table where the limitation is.

Fixes: 86db36a347b4 ("net: dsa: sja1105: Implement state machine for TAS with PTP clock source")
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_tas.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_tas.c b/drivers/net/dsa/sja1105/sja1105_tas.c
index 26b925b5dace..fa6750d973d7 100644
--- a/drivers/net/dsa/sja1105/sja1105_tas.c
+++ b/drivers/net/dsa/sja1105/sja1105_tas.c
@@ -477,11 +477,6 @@ int sja1105_setup_tc_taprio(struct dsa_switch *ds, int port,
 	if (admin->cycle_time_extension)
 		return -ENOTSUPP;
 
-	if (!ns_to_sja1105_delta(admin->base_time)) {
-		dev_err(ds->dev, "A base time of zero is not hardware-allowed\n");
-		return -ERANGE;
-	}
-
 	for (i = 0; i < admin->num_entries; i++) {
 		s64 delta_ns = admin->entries[i].interval;
 		s64 delta_cycles = ns_to_sja1105_delta(delta_ns);

From 3a323ed7c9c6d60af05bada0efe7a8ef56cf317f Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <olteanv@gmail.com>
Date: Fri, 27 Dec 2019 03:08:07 +0200
Subject: [PATCH 097/146] Documentation: net: dsa: sja1105: Remove text about
 taprio base-time limitation

Since commit 86db36a347b4 ("net: dsa: sja1105: Implement state machine
for TAS with PTP clock source"), this paragraph is no longer true. So
remove it.

Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dsa/sja1105.rst | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/Documentation/networking/dsa/sja1105.rst b/Documentation/networking/dsa/sja1105.rst
index eef20d0bcf7c..64553d8d91cb 100644
--- a/Documentation/networking/dsa/sja1105.rst
+++ b/Documentation/networking/dsa/sja1105.rst
@@ -230,12 +230,6 @@ simultaneously on two ports. The driver checks the consistency of the schedules
 against this restriction and errors out when appropriate. Schedule analysis is
 needed to avoid this, which is outside the scope of the document.
 
-At the moment, the time-aware scheduler can only be triggered based on a
-standalone clock and not based on PTP time. This means the base-time argument
-from tc-taprio is ignored and the schedule starts right away. It also means it
-is more difficult to phase-align the scheduler with the other devices in the
-network.
-
 Device Tree bindings and board design
 =====================================
 

From 54fa49ee88138756df0fcf867cb1849904710a8c Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <olteanv@gmail.com>
Date: Fri, 27 Dec 2019 03:11:13 +0200
Subject: [PATCH 098/146] net: dsa: sja1105: Reconcile the meaning of TPID and
 TPID2 for E/T and P/Q/R/S

For first-generation switches (SJA1105E and SJA1105T):
- TPID means C-Tag (typically 0x8100)
- TPID2 means S-Tag (typically 0x88A8)

While for the second generation switches (SJA1105P, SJA1105Q, SJA1105R,
SJA1105S) it is the other way around:
- TPID means S-Tag (typically 0x88A8)
- TPID2 means C-Tag (typically 0x8100)

In other words, E/T tags untagged traffic with TPID, and P/Q/R/S with
TPID2.

So the patch mentioned below fixed VLAN filtering for P/Q/R/S, but broke
it for E/T.

We strive for a common code path for all switches in the family, so just
lie in the static config packing functions that TPID and TPID2 are at
swapped bit offsets than they actually are, for P/Q/R/S. This will make
both switches understand TPID to be ETH_P_8021Q and TPID2 to be
ETH_P_8021AD. The meaning from the original E/T was chosen over P/Q/R/S
because E/T is actually the one with public documentation available
(UM10944.pdf).

Fixes: f9a1a7646c0d ("net: dsa: sja1105: Reverse TPID and TPID2")
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c          | 8 ++++----
 drivers/net/dsa/sja1105/sja1105_static_config.c | 7 +++++--
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 86bbab166633..1da5ac111499 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -1569,8 +1569,8 @@ static int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled)
 
 	if (enabled) {
 		/* Enable VLAN filtering. */
-		tpid  = ETH_P_8021AD;
-		tpid2 = ETH_P_8021Q;
+		tpid  = ETH_P_8021Q;
+		tpid2 = ETH_P_8021AD;
 	} else {
 		/* Disable VLAN filtering. */
 		tpid  = ETH_P_SJA1105;
@@ -1579,9 +1579,9 @@ static int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled)
 
 	table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS];
 	general_params = table->entries;
-	/* EtherType used to identify outer tagged (S-tag) VLAN traffic */
-	general_params->tpid = tpid;
 	/* EtherType used to identify inner tagged (C-tag) VLAN traffic */
+	general_params->tpid = tpid;
+	/* EtherType used to identify outer tagged (S-tag) VLAN traffic */
 	general_params->tpid2 = tpid2;
 	/* When VLAN filtering is on, we need to at least be able to
 	 * decode management traffic through the "backup plan".
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.c b/drivers/net/dsa/sja1105/sja1105_static_config.c
index 0d03e13e9909..63d2311817c4 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.c
@@ -142,6 +142,9 @@ static size_t sja1105et_general_params_entry_packing(void *buf, void *entry_ptr,
 	return size;
 }
 
+/* TPID and TPID2 are intentionally reversed so that semantic
+ * compatibility with E/T is kept.
+ */
 static size_t
 sja1105pqrs_general_params_entry_packing(void *buf, void *entry_ptr,
 					 enum packing_op op)
@@ -166,9 +169,9 @@ sja1105pqrs_general_params_entry_packing(void *buf, void *entry_ptr,
 	sja1105_packing(buf, &entry->mirr_port,   141, 139, size, op);
 	sja1105_packing(buf, &entry->vlmarker,    138, 107, size, op);
 	sja1105_packing(buf, &entry->vlmask,      106,  75, size, op);
-	sja1105_packing(buf, &entry->tpid,         74,  59, size, op);
+	sja1105_packing(buf, &entry->tpid2,        74,  59, size, op);
 	sja1105_packing(buf, &entry->ignore2stf,   58,  58, size, op);
-	sja1105_packing(buf, &entry->tpid2,        57,  42, size, op);
+	sja1105_packing(buf, &entry->tpid,         57,  42, size, op);
 	sja1105_packing(buf, &entry->queue_ts,     41,  41, size, op);
 	sja1105_packing(buf, &entry->egrmirrvid,   40,  29, size, op);
 	sja1105_packing(buf, &entry->egrmirrpcp,   28,  26, size, op);

From a33121e5487b424339636b25c35d3a180eaa5f5e Mon Sep 17 00:00:00 2001
From: Vladis Dronov <vdronov@redhat.com>
Date: Fri, 27 Dec 2019 03:26:27 +0100
Subject: [PATCH 099/146] ptp: fix the race between the release of ptp_clock
 and cdev

In a case when a ptp chardev (like /dev/ptp0) is open but an underlying
device is removed, closing this file leads to a race. This reproduces
easily in a kvm virtual machine:

ts# cat openptp0.c
int main() { ... fp = fopen("/dev/ptp0", "r"); ... sleep(10); }
ts# uname -r
5.5.0-rc3-46cf053e
ts# cat /proc/cmdline
... slub_debug=FZP
ts# modprobe ptp_kvm
ts# ./openptp0 &
[1] 670
opened /dev/ptp0, sleeping 10s...
ts# rmmod ptp_kvm
ts# ls /dev/ptp*
ls: cannot access '/dev/ptp*': No such file or directory
ts# ...woken up
[   48.010809] general protection fault: 0000 [#1] SMP
[   48.012502] CPU: 6 PID: 658 Comm: openptp0 Not tainted 5.5.0-rc3-46cf053e #25
[   48.014624] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), ...
[   48.016270] RIP: 0010:module_put.part.0+0x7/0x80
[   48.017939] RSP: 0018:ffffb3850073be00 EFLAGS: 00010202
[   48.018339] RAX: 000000006b6b6b6b RBX: 6b6b6b6b6b6b6b6b RCX: ffff89a476c00ad0
[   48.018936] RDX: fffff65a08d3ea08 RSI: 0000000000000247 RDI: 6b6b6b6b6b6b6b6b
[   48.019470] ...                                              ^^^ a slub poison
[   48.023854] Call Trace:
[   48.024050]  __fput+0x21f/0x240
[   48.024288]  task_work_run+0x79/0x90
[   48.024555]  do_exit+0x2af/0xab0
[   48.024799]  ? vfs_write+0x16a/0x190
[   48.025082]  do_group_exit+0x35/0x90
[   48.025387]  __x64_sys_exit_group+0xf/0x10
[   48.025737]  do_syscall_64+0x3d/0x130
[   48.026056]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[   48.026479] RIP: 0033:0x7f53b12082f6
[   48.026792] ...
[   48.030945] Modules linked in: ptp i6300esb watchdog [last unloaded: ptp_kvm]
[   48.045001] Fixing recursive fault but reboot is needed!

This happens in:

static void __fput(struct file *file)
{   ...
    if (file->f_op->release)
        file->f_op->release(inode, file); <<< cdev is kfree'd here
    if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
             !(mode & FMODE_PATH))) {
        cdev_put(inode->i_cdev); <<< cdev fields are accessed here

Namely:

__fput()
  posix_clock_release()
    kref_put(&clk->kref, delete_clock) <<< the last reference
      delete_clock()
        delete_ptp_clock()
          kfree(ptp) <<< cdev is embedded in ptp
  cdev_put
    module_put(p->owner) <<< *p is kfree'd, bang!

Here cdev is embedded in posix_clock which is embedded in ptp_clock.
The race happens because ptp_clock's lifetime is controlled by two
refcounts: kref and cdev.kobj in posix_clock. This is wrong.

Make ptp_clock's sysfs device a parent of cdev with cdev_device_add()
created especially for such cases. This way the parent device with its
ptp_clock is not released until all references to the cdev are released.
This adds a requirement that an initialized but not exposed struct
device should be provided to posix_clock_register() by a caller instead
of a simple dev_t.

This approach was adopted from the commit 72139dfa2464 ("watchdog: Fix
the race between the release of watchdog_core_data and cdev"). See
details of the implementation in the commit 233ed09d7fda ("chardev: add
helper function to register char devs with a struct device").

Link: https://lore.kernel.org/linux-fsdevel/20191125125342.6189-1-vdronov@redhat.com/T/#u
Analyzed-by: Stephen Johnston <sjohnsto@redhat.com>
Analyzed-by: Vern Lovejoy <vlovejoy@redhat.com>
Signed-off-by: Vladis Dronov <vdronov@redhat.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_clock.c     | 31 ++++++++++++++-----------------
 drivers/ptp/ptp_private.h   |  2 +-
 include/linux/posix-clock.h | 19 +++++++++++--------
 kernel/time/posix-clock.c   | 31 +++++++++++++------------------
 4 files changed, 39 insertions(+), 44 deletions(-)

diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index e60eab7f8a61..61fafe0374ce 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -166,9 +166,9 @@ static struct posix_clock_operations ptp_clock_ops = {
 	.read		= ptp_read,
 };
 
-static void delete_ptp_clock(struct posix_clock *pc)
+static void ptp_clock_release(struct device *dev)
 {
-	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
+	struct ptp_clock *ptp = container_of(dev, struct ptp_clock, dev);
 
 	mutex_destroy(&ptp->tsevq_mux);
 	mutex_destroy(&ptp->pincfg_mux);
@@ -213,7 +213,6 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
 	}
 
 	ptp->clock.ops = ptp_clock_ops;
-	ptp->clock.release = delete_ptp_clock;
 	ptp->info = info;
 	ptp->devid = MKDEV(major, index);
 	ptp->index = index;
@@ -236,15 +235,6 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
 	if (err)
 		goto no_pin_groups;
 
-	/* Create a new device in our class. */
-	ptp->dev = device_create_with_groups(ptp_class, parent, ptp->devid,
-					     ptp, ptp->pin_attr_groups,
-					     "ptp%d", ptp->index);
-	if (IS_ERR(ptp->dev)) {
-		err = PTR_ERR(ptp->dev);
-		goto no_device;
-	}
-
 	/* Register a new PPS source. */
 	if (info->pps) {
 		struct pps_source_info pps;
@@ -260,8 +250,18 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
 		}
 	}
 
-	/* Create a posix clock. */
-	err = posix_clock_register(&ptp->clock, ptp->devid);
+	/* Initialize a new device of our class in our clock structure. */
+	device_initialize(&ptp->dev);
+	ptp->dev.devt = ptp->devid;
+	ptp->dev.class = ptp_class;
+	ptp->dev.parent = parent;
+	ptp->dev.groups = ptp->pin_attr_groups;
+	ptp->dev.release = ptp_clock_release;
+	dev_set_drvdata(&ptp->dev, ptp);
+	dev_set_name(&ptp->dev, "ptp%d", ptp->index);
+
+	/* Create a posix clock and link it to the device. */
+	err = posix_clock_register(&ptp->clock, &ptp->dev);
 	if (err) {
 		pr_err("failed to create posix clock\n");
 		goto no_clock;
@@ -273,8 +273,6 @@ no_clock:
 	if (ptp->pps_source)
 		pps_unregister_source(ptp->pps_source);
 no_pps:
-	device_destroy(ptp_class, ptp->devid);
-no_device:
 	ptp_cleanup_pin_groups(ptp);
 no_pin_groups:
 	if (ptp->kworker)
@@ -304,7 +302,6 @@ int ptp_clock_unregister(struct ptp_clock *ptp)
 	if (ptp->pps_source)
 		pps_unregister_source(ptp->pps_source);
 
-	device_destroy(ptp_class, ptp->devid);
 	ptp_cleanup_pin_groups(ptp);
 
 	posix_clock_unregister(&ptp->clock);
diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h
index 9171d42468fd..6b97155148f1 100644
--- a/drivers/ptp/ptp_private.h
+++ b/drivers/ptp/ptp_private.h
@@ -28,7 +28,7 @@ struct timestamp_event_queue {
 
 struct ptp_clock {
 	struct posix_clock clock;
-	struct device *dev;
+	struct device dev;
 	struct ptp_clock_info *info;
 	dev_t devid;
 	int index; /* index into clocks.map */
diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h
index fe6cfdcfbc26..468328b1e1dd 100644
--- a/include/linux/posix-clock.h
+++ b/include/linux/posix-clock.h
@@ -69,29 +69,32 @@ struct posix_clock_operations {
  *
  * @ops:     Functional interface to the clock
  * @cdev:    Character device instance for this clock
- * @kref:    Reference count.
+ * @dev:     Pointer to the clock's device.
  * @rwsem:   Protects the 'zombie' field from concurrent access.
  * @zombie:  If 'zombie' is true, then the hardware has disappeared.
- * @release: A function to free the structure when the reference count reaches
- *           zero. May be NULL if structure is statically allocated.
  *
  * Drivers should embed their struct posix_clock within a private
  * structure, obtaining a reference to it during callbacks using
  * container_of().
+ *
+ * Drivers should supply an initialized but not exposed struct device
+ * to posix_clock_register(). It is used to manage lifetime of the
+ * driver's private structure. It's 'release' field should be set to
+ * a release function for this private structure.
  */
 struct posix_clock {
 	struct posix_clock_operations ops;
 	struct cdev cdev;
-	struct kref kref;
+	struct device *dev;
 	struct rw_semaphore rwsem;
 	bool zombie;
-	void (*release)(struct posix_clock *clk);
 };
 
 /**
  * posix_clock_register() - register a new clock
- * @clk:   Pointer to the clock. Caller must provide 'ops' and 'release'
- * @devid: Allocated device id
+ * @clk:   Pointer to the clock. Caller must provide 'ops' field
+ * @dev:   Pointer to the initialized device. Caller must provide
+ *         'release' field
  *
  * A clock driver calls this function to register itself with the
  * clock device subsystem. If 'clk' points to dynamically allocated
@@ -100,7 +103,7 @@ struct posix_clock {
  *
  * Returns zero on success, non-zero otherwise.
  */
-int posix_clock_register(struct posix_clock *clk, dev_t devid);
+int posix_clock_register(struct posix_clock *clk, struct device *dev);
 
 /**
  * posix_clock_unregister() - unregister a clock
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
index ec960bb939fd..200fb2d3be99 100644
--- a/kernel/time/posix-clock.c
+++ b/kernel/time/posix-clock.c
@@ -14,8 +14,6 @@
 
 #include "posix-timers.h"
 
-static void delete_clock(struct kref *kref);
-
 /*
  * Returns NULL if the posix_clock instance attached to 'fp' is old and stale.
  */
@@ -125,7 +123,7 @@ static int posix_clock_open(struct inode *inode, struct file *fp)
 		err = 0;
 
 	if (!err) {
-		kref_get(&clk->kref);
+		get_device(clk->dev);
 		fp->private_data = clk;
 	}
 out:
@@ -141,7 +139,7 @@ static int posix_clock_release(struct inode *inode, struct file *fp)
 	if (clk->ops.release)
 		err = clk->ops.release(clk);
 
-	kref_put(&clk->kref, delete_clock);
+	put_device(clk->dev);
 
 	fp->private_data = NULL;
 
@@ -161,38 +159,35 @@ static const struct file_operations posix_clock_file_operations = {
 #endif
 };
 
-int posix_clock_register(struct posix_clock *clk, dev_t devid)
+int posix_clock_register(struct posix_clock *clk, struct device *dev)
 {
 	int err;
 
-	kref_init(&clk->kref);
 	init_rwsem(&clk->rwsem);
 
 	cdev_init(&clk->cdev, &posix_clock_file_operations);
+	err = cdev_device_add(&clk->cdev, dev);
+	if (err) {
+		pr_err("%s unable to add device %d:%d\n",
+			dev_name(dev), MAJOR(dev->devt), MINOR(dev->devt));
+		return err;
+	}
 	clk->cdev.owner = clk->ops.owner;
-	err = cdev_add(&clk->cdev, devid, 1);
+	clk->dev = dev;
 
-	return err;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(posix_clock_register);
 
-static void delete_clock(struct kref *kref)
-{
-	struct posix_clock *clk = container_of(kref, struct posix_clock, kref);
-
-	if (clk->release)
-		clk->release(clk);
-}
-
 void posix_clock_unregister(struct posix_clock *clk)
 {
-	cdev_del(&clk->cdev);
+	cdev_device_del(&clk->cdev, clk->dev);
 
 	down_write(&clk->rwsem);
 	clk->zombie = true;
 	up_write(&clk->rwsem);
 
-	kref_put(&clk->kref, delete_clock);
+	put_device(clk->dev);
 }
 EXPORT_SYMBOL_GPL(posix_clock_unregister);
 

From 853697504de043ff0bfd815bd3a64de1dce73dc7 Mon Sep 17 00:00:00 2001
From: Cambda Zhu <cambda@linux.alibaba.com>
Date: Fri, 27 Dec 2019 16:52:37 +0800
Subject: [PATCH 100/146] tcp: Fix highest_sack and highest_sack_seq

>From commit 50895b9de1d3 ("tcp: highest_sack fix"), the logic about
setting tp->highest_sack to the head of the send queue was removed.
Of course the logic is error prone, but it is logical. Before we
remove the pointer to the highest sack skb and use the seq instead,
we need to set tp->highest_sack to NULL when there is no skb after
the last sack, and then replace NULL with the real skb when new skb
inserted into the rtx queue, because the NULL means the highest sack
seq is tp->snd_nxt. If tp->highest_sack is NULL and new data sent,
the next ACK with sack option will increase tp->reordering unexpectedly.

This patch sets tp->highest_sack to the tail of the rtx queue if
it's NULL and new data is sent. The patch keeps the rule that the
highest_sack can only be maintained by sack processing, except for
this only case.

Fixes: 50895b9de1d3 ("tcp: highest_sack fix")
Signed-off-by: Cambda Zhu <cambda@linux.alibaba.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 1f7735ca8f22..58c92a7d671c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -72,6 +72,9 @@ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
 	__skb_unlink(skb, &sk->sk_write_queue);
 	tcp_rbtree_insert(&sk->tcp_rtx_queue, skb);
 
+	if (tp->highest_sack == NULL)
+		tp->highest_sack = skb;
+
 	tp->packets_out += tcp_skb_pcount(skb);
 	if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
 		tcp_rearm_rto(sk);

From a5b72a083da197b493c7ed1e5730d62d3199f7d6 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Sat, 28 Dec 2019 16:36:58 +0100
Subject: [PATCH 101/146] net/sched: add delete_empty() to filters and use it
 in cls_flower

Revert "net/sched: cls_u32: fix refcount leak in the error path of
u32_change()", and fix the u32 refcount leak in a more generic way that
preserves the semantic of rule dumping.
On tc filters that don't support lockless insertion/removal, there is no
need to guard against concurrent insertion when a removal is in progress.
Therefore, for most of them we can avoid a full walk() when deleting, and
just decrease the refcount, like it was done on older Linux kernels.
This fixes situations where walk() was wrongly detecting a non-empty
filter, like it happened with cls_u32 in the error path of change(), thus
leading to failures in the following tdc selftests:

 6aa7: (filter, u32) Add/Replace u32 with source match and invalid indev
 6658: (filter, u32) Add/Replace u32 with custom hash table and invalid handle
 74c2: (filter, u32) Add/Replace u32 filter with invalid hash table id

On cls_flower, and on (future) lockless filters, this check is necessary:
move all the check_empty() logic in a callback so that each filter
can have its own implementation. For cls_flower, it's sufficient to check
if no IDRs have been allocated.

This reverts commit 275c44aa194b7159d1191817b20e076f55f0e620.

Changes since v1:
 - document the need for delete_empty() when TCF_PROTO_OPS_DOIT_UNLOCKED
   is used, thanks to Vlad Buslov
 - implement delete_empty() without doing fl_walk(), thanks to Vlad Buslov
 - squash revert and new fix in a single patch, to be nice with bisect
   tests that run tdc on u32 filter, thanks to Dave Miller

Fixes: 275c44aa194b ("net/sched: cls_u32: fix refcount leak in the error path of u32_change()")
Fixes: 6676d5e416ee ("net: sched: set dedicated tcf_walker flag when tp is empty")
Suggested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Suggested-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Reviewed-by: Vlad Buslov <vladbu@mellanox.com>
Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  5 +++++
 net/sched/cls_api.c       | 31 +++++--------------------------
 net/sched/cls_flower.c    | 12 ++++++++++++
 net/sched/cls_u32.c       | 25 -------------------------
 4 files changed, 22 insertions(+), 51 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 144f264ea394..fceddf89592a 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -308,6 +308,7 @@ struct tcf_proto_ops {
 	int			(*delete)(struct tcf_proto *tp, void *arg,
 					  bool *last, bool rtnl_held,
 					  struct netlink_ext_ack *);
+	bool			(*delete_empty)(struct tcf_proto *tp);
 	void			(*walk)(struct tcf_proto *tp,
 					struct tcf_walker *arg, bool rtnl_held);
 	int			(*reoffload)(struct tcf_proto *tp, bool add,
@@ -336,6 +337,10 @@ struct tcf_proto_ops {
 	int			flags;
 };
 
+/* Classifiers setting TCF_PROTO_OPS_DOIT_UNLOCKED in tcf_proto_ops->flags
+ * are expected to implement tcf_proto_ops->delete_empty(), otherwise race
+ * conditions can occur when filters are inserted/deleted simultaneously.
+ */
 enum tcf_proto_ops_flags {
 	TCF_PROTO_OPS_DOIT_UNLOCKED = 1,
 };
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 6a0eacafdb19..76e0d122616a 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -308,33 +308,12 @@ static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held,
 		tcf_proto_destroy(tp, rtnl_held, true, extack);
 }
 
-static int walker_check_empty(struct tcf_proto *tp, void *fh,
-			      struct tcf_walker *arg)
+static bool tcf_proto_check_delete(struct tcf_proto *tp)
 {
-	if (fh) {
-		arg->nonempty = true;
-		return -1;
-	}
-	return 0;
-}
+	if (tp->ops->delete_empty)
+		return tp->ops->delete_empty(tp);
 
-static bool tcf_proto_is_empty(struct tcf_proto *tp, bool rtnl_held)
-{
-	struct tcf_walker walker = { .fn = walker_check_empty, };
-
-	if (tp->ops->walk) {
-		tp->ops->walk(tp, &walker, rtnl_held);
-		return !walker.nonempty;
-	}
-	return true;
-}
-
-static bool tcf_proto_check_delete(struct tcf_proto *tp, bool rtnl_held)
-{
-	spin_lock(&tp->lock);
-	if (tcf_proto_is_empty(tp, rtnl_held))
-		tp->deleting = true;
-	spin_unlock(&tp->lock);
+	tp->deleting = true;
 	return tp->deleting;
 }
 
@@ -1751,7 +1730,7 @@ static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
 	 * concurrently.
 	 * Mark tp for deletion if it is empty.
 	 */
-	if (!tp_iter || !tcf_proto_check_delete(tp, rtnl_held)) {
+	if (!tp_iter || !tcf_proto_check_delete(tp)) {
 		mutex_unlock(&chain->filter_chain_lock);
 		return;
 	}
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 0d125de54285..b0f42e62dd76 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -2773,6 +2773,17 @@ static void fl_bind_class(void *fh, u32 classid, unsigned long cl)
 		f->res.class = cl;
 }
 
+static bool fl_delete_empty(struct tcf_proto *tp)
+{
+	struct cls_fl_head *head = fl_head_dereference(tp);
+
+	spin_lock(&tp->lock);
+	tp->deleting = idr_is_empty(&head->handle_idr);
+	spin_unlock(&tp->lock);
+
+	return tp->deleting;
+}
+
 static struct tcf_proto_ops cls_fl_ops __read_mostly = {
 	.kind		= "flower",
 	.classify	= fl_classify,
@@ -2782,6 +2793,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
 	.put		= fl_put,
 	.change		= fl_change,
 	.delete		= fl_delete,
+	.delete_empty	= fl_delete_empty,
 	.walk		= fl_walk,
 	.reoffload	= fl_reoffload,
 	.hw_add		= fl_hw_add,
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 66c6bcec16cb..a0e6fac613de 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -1108,33 +1108,10 @@ erridr:
 	return err;
 }
 
-static bool u32_hnode_empty(struct tc_u_hnode *ht, bool *non_root_ht)
-{
-	int i;
-
-	if (!ht)
-		return true;
-	if (!ht->is_root) {
-		*non_root_ht = true;
-		return false;
-	}
-	if (*non_root_ht)
-		return false;
-	if (ht->refcnt < 2)
-		return true;
-
-	for (i = 0; i <= ht->divisor; i++) {
-		if (rtnl_dereference(ht->ht[i]))
-			return false;
-	}
-	return true;
-}
-
 static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg,
 		     bool rtnl_held)
 {
 	struct tc_u_common *tp_c = tp->data;
-	bool non_root_ht = false;
 	struct tc_u_hnode *ht;
 	struct tc_u_knode *n;
 	unsigned int h;
@@ -1147,8 +1124,6 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg,
 	     ht = rtnl_dereference(ht->next)) {
 		if (ht->prio != tp->prio)
 			continue;
-		if (u32_hnode_empty(ht, &non_root_ht))
-			return;
 		if (arg->count >= arg->skip) {
 			if (arg->fn(tp, ht, arg) < 0) {
 				arg->stop = 1;

From 04b69426d846cd04ca9acefff1ea39e1c64d2714 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Sat, 28 Dec 2019 16:28:09 +0000
Subject: [PATCH 102/146] hsr: fix slab-out-of-bounds Read in
 hsr_debugfs_rename()

hsr slave interfaces don't have debugfs directory.
So, hsr_debugfs_rename() shouldn't be called when hsr slave interface name
is changed.

Test commands:
    ip link add dummy0 type dummy
    ip link add dummy1 type dummy
    ip link add hsr0 type hsr slave1 dummy0 slave2 dummy1
    ip link set dummy0 name ap

Splat looks like:
[21071.899367][T22666] ap: renamed from dummy0
[21071.914005][T22666] ==================================================================
[21071.919008][T22666] BUG: KASAN: slab-out-of-bounds in hsr_debugfs_rename+0xaa/0xb0 [hsr]
[21071.923640][T22666] Read of size 8 at addr ffff88805febcd98 by task ip/22666
[21071.926941][T22666]
[21071.927750][T22666] CPU: 0 PID: 22666 Comm: ip Not tainted 5.5.0-rc2+ #240
[21071.929919][T22666] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
[21071.935094][T22666] Call Trace:
[21071.935867][T22666]  dump_stack+0x96/0xdb
[21071.936687][T22666]  ? hsr_debugfs_rename+0xaa/0xb0 [hsr]
[21071.937774][T22666]  print_address_description.constprop.5+0x1be/0x360
[21071.939019][T22666]  ? hsr_debugfs_rename+0xaa/0xb0 [hsr]
[21071.940081][T22666]  ? hsr_debugfs_rename+0xaa/0xb0 [hsr]
[21071.940949][T22666]  __kasan_report+0x12a/0x16f
[21071.941758][T22666]  ? hsr_debugfs_rename+0xaa/0xb0 [hsr]
[21071.942674][T22666]  kasan_report+0xe/0x20
[21071.943325][T22666]  hsr_debugfs_rename+0xaa/0xb0 [hsr]
[21071.944187][T22666]  hsr_netdev_notify+0x1fe/0x9b0 [hsr]
[21071.945052][T22666]  ? __module_text_address+0x13/0x140
[21071.945897][T22666]  notifier_call_chain+0x90/0x160
[21071.946743][T22666]  dev_change_name+0x419/0x840
[21071.947496][T22666]  ? __read_once_size_nocheck.constprop.6+0x10/0x10
[21071.948600][T22666]  ? netdev_adjacent_rename_links+0x280/0x280
[21071.949577][T22666]  ? __read_once_size_nocheck.constprop.6+0x10/0x10
[21071.950672][T22666]  ? lock_downgrade+0x6e0/0x6e0
[21071.951345][T22666]  ? do_setlink+0x811/0x2ef0
[21071.951991][T22666]  do_setlink+0x811/0x2ef0
[21071.952613][T22666]  ? is_bpf_text_address+0x81/0xe0
[ ... ]

Reported-by: syzbot+9328206518f08318a5fd@syzkaller.appspotmail.com
Fixes: 4c2d5e33dcd3 ("hsr: rename debugfs file when interface name is changed")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/hsr/hsr_main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
index d2ee7125a7f1..9e389accbfc7 100644
--- a/net/hsr/hsr_main.c
+++ b/net/hsr/hsr_main.c
@@ -46,7 +46,8 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event,
 		hsr_check_carrier_and_operstate(hsr);
 		break;
 	case NETDEV_CHANGENAME:
-		hsr_debugfs_rename(dev);
+		if (is_hsr_master(dev))
+			hsr_debugfs_rename(dev);
 		break;
 	case NETDEV_CHANGEADDR:
 		if (port->type == HSR_PT_MASTER) {

From 969e11529221a6a2a787cb3b63ccf9402f8d2e37 Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Mon, 23 Dec 2019 16:13:48 +0800
Subject: [PATCH 103/146] drm/amdgpu: correct RLC firmwares loading sequence

Per confirmation with RLC firmware team, the RLC should
be unhalted after all RLC related firmwares uploaded.
However, in fact the RLC is unhalted immediately after
RLCG firmware uploaded. And that may causes unexpected
PSP hang on loading the succeeding RLC save restore
list related firmwares.
So, we correct the firmware loading sequence to load
RLC save restore list related firmwares before RLCG
ucode. That will help to get around this issue.

Signed-off-by: Evan Quan <evan.quan@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 44be3a45b25e..e1b8d8daeafc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -1488,7 +1488,7 @@ out:
 
 		/* Start rlc autoload after psp recieved all the gfx firmware */
 		if (psp->autoload_supported && ucode->ucode_id == (amdgpu_sriov_vf(adev) ?
-		    AMDGPU_UCODE_ID_CP_MEC2 : AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) {
+		    AMDGPU_UCODE_ID_CP_MEC2 : AMDGPU_UCODE_ID_RLC_G)) {
 			ret = psp_rlc_autoload(psp);
 			if (ret) {
 				DRM_ERROR("Failed to start rlc autoload\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 410587b950f3..914acecda5cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -292,10 +292,10 @@ enum AMDGPU_UCODE_ID {
 	AMDGPU_UCODE_ID_CP_MEC2_JT,
 	AMDGPU_UCODE_ID_CP_MES,
 	AMDGPU_UCODE_ID_CP_MES_DATA,
-	AMDGPU_UCODE_ID_RLC_G,
 	AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL,
 	AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM,
 	AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM,
+	AMDGPU_UCODE_ID_RLC_G,
 	AMDGPU_UCODE_ID_STORAGE,
 	AMDGPU_UCODE_ID_SMC,
 	AMDGPU_UCODE_ID_UVD,

From 9c95a278ba7ca3ccc111c165cc74cb23c744fc85 Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Wed, 11 Dec 2019 12:44:08 +0100
Subject: [PATCH 104/146] apparmor: fix bind mounts aborting with -ENOMEM

With commit df323337e507 ("apparmor: Use a memory pool instead per-CPU
caches, 2019-05-03"), AppArmor code was converted to use memory pools. In
that conversion, a bug snuck into the code that polices bind mounts that
causes all bind mounts to fail with -ENOMEM, as we erroneously error out
if `aa_get_buffer` returns a pointer instead of erroring out when it
does _not_ return a valid pointer.

Fix the issue by correctly checking for valid pointers returned by
`aa_get_buffer` to fix bind mounts with AppArmor.

Fixes: df323337e507 ("apparmor: Use a memory pool instead per-CPU caches")
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/mount.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c
index 4ed6688f9d40..e0828ee7a345 100644
--- a/security/apparmor/mount.c
+++ b/security/apparmor/mount.c
@@ -442,7 +442,7 @@ int aa_bind_mount(struct aa_label *label, const struct path *path,
 	buffer = aa_get_buffer(false);
 	old_buffer = aa_get_buffer(false);
 	error = -ENOMEM;
-	if (!buffer || old_buffer)
+	if (!buffer || !old_buffer)
 		goto out;
 
 	error = fn_for_each_confined(label, profile,

From 20d4e80d255dd7cfecb53743bc550ebcad04549d Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Wed, 18 Dec 2019 11:04:07 -0800
Subject: [PATCH 105/146] apparmor: only get a label reference if the fast path
 check fails
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The common fast path check can be done under rcu_read_lock() and
doesn't need a reference count on the label. Only take a reference
count if entering the slow path.

Fixes reported hackbench regression
  - sha1 79e178a57dae ("Merge tag 'apparmor-pr-2019-12-03' of
    git://git.kernel.org/pub/scm/linux/kernel/git/jj/linux-apparmor")

  hackbench -l (256000/#grp) -g #grp
   128 groups     19.679 ±0.90%

  - previous sha1 01d1dff64662 ("Merge tag 's390-5.5-2' of
    git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux")

  hackbench -l (256000/#grp) -g #grp
   128 groups     3.1689 ±3.04%

Reported-by: Vincent Guittot <vincent.guittot@linaro.org>
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Fixes: bce4e7e9c45e ("apparmor: reduce rcu_read_lock scope for aa_file_perm mediation")
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/file.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/security/apparmor/file.c b/security/apparmor/file.c
index fe2ebe5e865e..f1caf3674e1c 100644
--- a/security/apparmor/file.c
+++ b/security/apparmor/file.c
@@ -618,8 +618,7 @@ int aa_file_perm(const char *op, struct aa_label *label, struct file *file,
 	fctx = file_ctx(file);
 
 	rcu_read_lock();
-	flabel  = aa_get_newest_label(rcu_dereference(fctx->label));
-	rcu_read_unlock();
+	flabel  = rcu_dereference(fctx->label);
 	AA_BUG(!flabel);
 
 	/* revalidate access, if task is unconfined, or the cached cred
@@ -631,9 +630,13 @@ int aa_file_perm(const char *op, struct aa_label *label, struct file *file,
 	 */
 	denied = request & ~fctx->allow;
 	if (unconfined(label) || unconfined(flabel) ||
-	    (!denied && aa_label_is_subset(flabel, label)))
+	    (!denied && aa_label_is_subset(flabel, label))) {
+		rcu_read_unlock();
 		goto done;
+	}
 
+	flabel  = aa_get_newest_label(flabel);
+	rcu_read_unlock();
 	/* TODO: label cross check */
 
 	if (file->f_path.mnt && path_mediated_fs(file->f_path.dentry))
@@ -643,8 +646,9 @@ int aa_file_perm(const char *op, struct aa_label *label, struct file *file,
 	else if (S_ISSOCK(file_inode(file)->i_mode))
 		error = __file_sock_perm(op, label, flabel, file, request,
 					 denied);
-done:
 	aa_put_label(flabel);
+
+done:
 	return error;
 }
 

From 8df955a32a73315055e0cd187cbb1cea5820394b Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 30 Dec 2019 11:48:10 -0800
Subject: [PATCH 106/146] pstore/ram: Fix error-path memory leak in
 persistent_ram_new() callers

For callers that allocated a label for persistent_ram_new(), if the call
fails, they must clean up the allocation.

Suggested-by: Navid Emamdoost <navid.emamdoost@gmail.com>
Fixes: 1227daa43bce ("pstore/ram: Clarify resource reservation labels")
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/lkml/20191211191353.14385-1-navid.emamdoost@gmail.com
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 fs/pstore/ram.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 8caff834f002..f753f3b6f88d 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -577,6 +577,7 @@ static int ramoops_init_przs(const char *name,
 			dev_err(dev, "failed to request %s mem region (0x%zx@0x%llx): %d\n",
 				name, record_size,
 				(unsigned long long)*paddr, err);
+			kfree(label);
 
 			while (i > 0) {
 				i--;
@@ -622,6 +623,7 @@ static int ramoops_init_prz(const char *name,
 
 		dev_err(dev, "failed to request %s mem region (0x%zx@0x%llx): %d\n",
 			name, sz, (unsigned long long)*paddr, err);
+		kfree(label);
 		return err;
 	}
 

From 9e5f1c19800b808a37fb9815a26d382132c26c3d Mon Sep 17 00:00:00 2001
From: Aleksandr Yashkin <a.yashkin@inango-systems.com>
Date: Mon, 23 Dec 2019 18:38:16 +0500
Subject: [PATCH 107/146] pstore/ram: Write new dumps to start of recycled
 zones

The ram_core.c routines treat przs as circular buffers. When writing a
new crash dump, the old buffer needs to be cleared so that the new dump
doesn't end up in the wrong place (i.e. at the end).

The solution to this problem is to reset the circular buffer state before
writing a new Oops dump.

Signed-off-by: Aleksandr Yashkin <a.yashkin@inango-systems.com>
Signed-off-by: Nikolay Merinov <n.merinov@inango-systems.com>
Signed-off-by: Ariel Gilman <a.gilman@inango-systems.com>
Link: https://lore.kernel.org/r/20191223133816.28155-1-n.merinov@inango-systems.com
Fixes: 896fc1f0c4c6 ("pstore/ram: Switch to persistent_ram routines")
Cc: stable@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 fs/pstore/ram.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index f753f3b6f88d..487ee39b438a 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -407,6 +407,17 @@ static int notrace ramoops_pstore_write(struct pstore_record *record)
 
 	prz = cxt->dprzs[cxt->dump_write_cnt];
 
+	/*
+	 * Since this is a new crash dump, we need to reset the buffer in
+	 * case it still has an old dump present. Without this, the new dump
+	 * will get appended, which would seriously confuse anything trying
+	 * to check dump file contents. Specifically, ramoops_read_kmsg_hdr()
+	 * expects to find a dump header in the beginning of buffer data, so
+	 * we must to reset the buffer values, in order to ensure that the
+	 * header will be written to the beginning of the buffer.
+	 */
+	persistent_ram_zap(prz);
+
 	/* Build header and append record contents. */
 	hlen = ramoops_write_kmsg_hdr(prz, record);
 	if (!hlen)

From 771b894f2f3dfedc2ba5561731fffa0e39b1bbb6 Mon Sep 17 00:00:00 2001
From: Sargun Dhillon <sargun@sargun.me>
Date: Mon, 30 Dec 2019 12:35:03 -0800
Subject: [PATCH 108/146] samples/seccomp: Zero out members based on
 seccomp_notif_sizes

The sizes by which seccomp_notif and seccomp_notif_resp are allocated are
based on the SECCOMP_GET_NOTIF_SIZES ioctl. This allows for graceful
extension of these datastructures. If userspace zeroes out the
datastructure based on its version, and it is lagging behind the kernel's
version, it will end up sending trailing garbage. On the other hand,
if it is ahead of the kernel version, it will write extra zero space,
and potentially cause corruption.

Signed-off-by: Sargun Dhillon <sargun@sargun.me>
Suggested-by: Tycho Andersen <tycho@tycho.ws>
Link: https://lore.kernel.org/r/20191230203503.4925-1-sargun@sargun.me
Fixes: fec7b6690541 ("samples: add an example of seccomp user trap")
Cc: stable@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 samples/seccomp/user-trap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/samples/seccomp/user-trap.c b/samples/seccomp/user-trap.c
index 6d0125ca8af7..20291ec6489f 100644
--- a/samples/seccomp/user-trap.c
+++ b/samples/seccomp/user-trap.c
@@ -298,14 +298,14 @@ int main(void)
 		req = malloc(sizes.seccomp_notif);
 		if (!req)
 			goto out_close;
-		memset(req, 0, sizeof(*req));
 
 		resp = malloc(sizes.seccomp_notif_resp);
 		if (!resp)
 			goto out_req;
-		memset(resp, 0, sizeof(*resp));
+		memset(resp, 0, sizes.seccomp_notif_resp);
 
 		while (1) {
+			memset(req, 0, sizes.seccomp_notif);
 			if (ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, req)) {
 				perror("ioctl recv");
 				goto out_resp;

From 88c13f8bd71472fbab5338b01d99122908c77e53 Mon Sep 17 00:00:00 2001
From: Sargun Dhillon <sargun@sargun.me>
Date: Sat, 28 Dec 2019 22:24:49 -0800
Subject: [PATCH 109/146] selftests/seccomp: Zero out seccomp_notif

The seccomp_notif structure should be zeroed out prior to calling the
SECCOMP_IOCTL_NOTIF_RECV ioctl. Previously, the kernel did not check
whether these structures were zeroed out or not, so these worked.

This patch zeroes out the seccomp_notif data structure prior to calling
the ioctl.

Signed-off-by: Sargun Dhillon <sargun@sargun.me>
Reviewed-by: Tycho Andersen <tycho@tycho.ws>
Reviewed-by: Christian Brauner <christian.brauner@ubuntu.com>
Link: https://lore.kernel.org/r/20191229062451.9467-1-sargun@sargun.me
Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace")
Cc: stable@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 6944b898bb53..f53f14971bff 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -3278,6 +3278,7 @@ TEST(user_notification_signal)
 
 	close(sk_pair[1]);
 
+	memset(&req, 0, sizeof(req));
 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
 
 	EXPECT_EQ(kill(pid, SIGUSR1), 0);
@@ -3296,6 +3297,7 @@ TEST(user_notification_signal)
 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
 	EXPECT_EQ(errno, ENOENT);
 
+	memset(&req, 0, sizeof(req));
 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
 
 	resp.id = req.id;

From 2882d53c9c6f3b8311d225062522f03772cf0179 Mon Sep 17 00:00:00 2001
From: Sargun Dhillon <sargun@sargun.me>
Date: Sat, 28 Dec 2019 22:24:50 -0800
Subject: [PATCH 110/146] seccomp: Check that seccomp_notif is zeroed out by
 the user

This patch is a small change in enforcement of the uapi for
SECCOMP_IOCTL_NOTIF_RECV ioctl. Specifically, the datastructure which
is passed (seccomp_notif) must be zeroed out. Previously any of its
members could be set to nonsense values, and we would ignore it.

This ensures all fields are set to their zero value.

Signed-off-by: Sargun Dhillon <sargun@sargun.me>
Reviewed-by: Christian Brauner <christian.brauner@ubuntu.com>
Reviewed-by: Aleksa Sarai <cyphar@cyphar.com>
Acked-by: Tycho Andersen <tycho@tycho.ws>
Link: https://lore.kernel.org/r/20191229062451.9467-2-sargun@sargun.me
Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace")
Cc: stable@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 kernel/seccomp.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 12d2227e5786..b6ea3dcb57bf 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -1026,6 +1026,13 @@ static long seccomp_notify_recv(struct seccomp_filter *filter,
 	struct seccomp_notif unotif;
 	ssize_t ret;
 
+	/* Verify that we're not given garbage to keep struct extensible. */
+	ret = check_zeroed_user(buf, sizeof(unotif));
+	if (ret < 0)
+		return ret;
+	if (!ret)
+		return -EINVAL;
+
 	memset(&unotif, 0, sizeof(unotif));
 
 	ret = down_interruptible(&filter->notif->request);

From e4ab5ccc357b978999328fadae164e098c26fa40 Mon Sep 17 00:00:00 2001
From: Sargun Dhillon <sargun@sargun.me>
Date: Mon, 30 Dec 2019 12:38:11 -0800
Subject: [PATCH 111/146] selftests/seccomp: Catch garbage on
 SECCOMP_IOCTL_NOTIF_RECV

This adds logic to the user_notification_basic test to set a member
of struct seccomp_notif to an invalid value to ensure that the kernel
returns EINVAL if any of the struct seccomp_notif members are set to
invalid values.

Signed-off-by: Sargun Dhillon <sargun@sargun.me>
Suggested-by: Christian Brauner <christian.brauner@ubuntu.com>
Link: https://lore.kernel.org/r/20191230203811.4996-1-sargun@sargun.me
Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace")
Cc: stable@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index f53f14971bff..ee1b727ede04 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -3158,7 +3158,18 @@ TEST(user_notification_basic)
 	EXPECT_GT(poll(&pollfd, 1, -1), 0);
 	EXPECT_EQ(pollfd.revents, POLLIN);
 
-	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+	/* Test that we can't pass garbage to the kernel. */
+	memset(&req, 0, sizeof(req));
+	req.pid = -1;
+	errno = 0;
+	ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno);
+
+	if (ret) {
+		req.pid = 0;
+		EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+	}
 
 	pollfd.fd = listener;
 	pollfd.events = POLLIN | POLLOUT;

From a5b0dc5a46c221725c43bd9b01570239a4cd78b1 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 11 Dec 2019 14:39:28 +0100
Subject: [PATCH 112/146] gcc-plugins: make it possible to disable
 CONFIG_GCC_PLUGINS again

I noticed that randconfig builds with gcc no longer produce a lot of
ccache hits, unlike with clang, and traced this back to plugins
now being enabled unconditionally if they are supported.

I am now working around this by adding

   export CCACHE_COMPILERCHECK=/usr/bin/size -A %compiler%

to my top-level Makefile. This changes the heuristic that ccache uses
to determine whether the plugins are the same after a 'make clean'.

However, it also seems that being able to just turn off the plugins is
generally useful, at least for build testing it adds noticeable overhead
but does not find a lot of bugs additional bugs, and may be easier for
ccache users than my workaround.

Fixes: 9f671e58159a ("security: Create "kernel hardening" config area")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Masahiro Yamada <masahiroy@kernel.org>
Link: https://lore.kernel.org/r/20191211133951.401933-1-arnd@arndb.de
Cc: stable@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 scripts/gcc-plugins/Kconfig | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig
index d33de0b9f4f5..e3569543bdac 100644
--- a/scripts/gcc-plugins/Kconfig
+++ b/scripts/gcc-plugins/Kconfig
@@ -14,8 +14,8 @@ config HAVE_GCC_PLUGINS
 	  An arch should select this symbol if it supports building with
 	  GCC plugins.
 
-config GCC_PLUGINS
-	bool
+menuconfig GCC_PLUGINS
+	bool "GCC plugins"
 	depends on HAVE_GCC_PLUGINS
 	depends on PLUGIN_HOSTCC != ""
 	default y
@@ -25,8 +25,7 @@ config GCC_PLUGINS
 
 	  See Documentation/core-api/gcc-plugins.rst for details.
 
-menu "GCC plugins"
-	depends on GCC_PLUGINS
+if GCC_PLUGINS
 
 config GCC_PLUGIN_CYC_COMPLEXITY
 	bool "Compute the cyclomatic complexity of a function" if EXPERT
@@ -113,4 +112,4 @@ config GCC_PLUGIN_ARM_SSP_PER_TASK
 	bool
 	depends on GCC_PLUGINS && ARM
 
-endmenu
+endif

From 74f1a299107b9e1a563831a4ba85f769ab577164 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Wed, 1 Jan 2020 20:05:03 +0100
Subject: [PATCH 113/146] Revert "fs: remove ksys_dup()"

This reverts commit 8243186f0cc7 ("fs: remove ksys_dup()") and the
subsequent fix for it in commit 2d3145f8d280 ("early init: fix error
handling when opening /dev/console").

Trying to use filp_open() and f_dupfd() instead of pseudo-syscalls
caused more trouble than what is worth it: it requires accessing vfs
internals and it turns out there were other bugs in it too.

In particular, the file reference counting was wrong - because unlike
the original "open+2*dup" sequence it used "filp_open+3*f_dupfd" and
thus had an extra leaked file reference.

That in turn then caused odd problems with Androidx86 long after boot
becaue of how the extra reference to the console kept the session active
even after all file descriptors had been closed.

Reported-by: youling 257 <youling257@gmail.com>
Cc: Arvind Sankar <nivedita@alum.mit.edu>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/file.c                |  7 ++++++-
 include/linux/syscalls.h |  1 +
 init/main.c              | 26 ++++++--------------------
 3 files changed, 13 insertions(+), 21 deletions(-)

diff --git a/fs/file.c b/fs/file.c
index 2f4fcf985079..3da91a112bab 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -960,7 +960,7 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
 	return ksys_dup3(oldfd, newfd, 0);
 }
 
-SYSCALL_DEFINE1(dup, unsigned int, fildes)
+int ksys_dup(unsigned int fildes)
 {
 	int ret = -EBADF;
 	struct file *file = fget_raw(fildes);
@@ -975,6 +975,11 @@ SYSCALL_DEFINE1(dup, unsigned int, fildes)
 	return ret;
 }
 
+SYSCALL_DEFINE1(dup, unsigned int, fildes)
+{
+	return ksys_dup(fildes);
+}
+
 int f_dupfd(unsigned int from, struct file *file, unsigned flags)
 {
 	int err;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 2960dedcfde8..5262b7a76d39 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1232,6 +1232,7 @@ asmlinkage long sys_ni_syscall(void);
  */
 
 int ksys_umount(char __user *name, int flags);
+int ksys_dup(unsigned int fildes);
 int ksys_chroot(const char __user *filename);
 ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count);
 int ksys_chdir(const char __user *filename);
diff --git a/init/main.c b/init/main.c
index 1ecfd43ed464..2cd736059416 100644
--- a/init/main.c
+++ b/init/main.c
@@ -93,7 +93,6 @@
 #include <linux/rodata_test.h>
 #include <linux/jump_label.h>
 #include <linux/mem_encrypt.h>
-#include <linux/file.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -1158,26 +1157,13 @@ static int __ref kernel_init(void *unused)
 
 void console_on_rootfs(void)
 {
-	struct file *file;
-	unsigned int i;
+	/* Open the /dev/console as stdin, this should never fail */
+	if (ksys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
+		pr_err("Warning: unable to open an initial console.\n");
 
-	/* Open /dev/console in kernelspace, this should never fail */
-	file = filp_open("/dev/console", O_RDWR, 0);
-	if (IS_ERR(file))
-		goto err_out;
-
-	/* create stdin/stdout/stderr, this should never fail */
-	for (i = 0; i < 3; i++) {
-		if (f_dupfd(i, file, 0) != i)
-			goto err_out;
-	}
-
-	return;
-
-err_out:
-	/* no panic -- this might not be fatal */
-	pr_err("Warning: unable to open an initial console.\n");
-	return;
+	/* create stdout/stderr */
+	(void) ksys_dup(0);
+	(void) ksys_dup(0);
 }
 
 static noinline void __init kernel_init_freeable(void)

From bbcc5672b0063b0e9d65dc8787a4f09c3b5bb5cc Mon Sep 17 00:00:00 2001
From: Paul Burton <paulburton@kernel.org>
Date: Wed, 1 Jan 2020 20:50:38 -0800
Subject: [PATCH 114/146] MIPS: Avoid VDSO ABI breakage due to global register
 variable

Declaring __current_thread_info as a global register variable has the
effect of preventing GCC from saving & restoring its value in cases
where the ABI would typically do so.

To quote GCC documentation:

> If the register is a call-saved register, call ABI is affected: the
> register will not be restored in function epilogue sequences after the
> variable has been assigned. Therefore, functions cannot safely return
> to callers that assume standard ABI.

When our position independent VDSO is built for the n32 or n64 ABIs all
functions it exposes should be preserving the value of $gp/$28 for their
caller, but in the presence of the __current_thread_info global register
variable GCC stops doing so & simply clobbers $gp/$28 when calculating
the address of the GOT.

In cases where the VDSO returns success this problem will typically be
masked by the caller in libc returning & restoring $gp/$28 itself, but
that is by no means guaranteed. In cases where the VDSO returns an error
libc will typically contain a fallback path which will now fail
(typically with a bad memory access) if it attempts anything which
relies upon the value of $gp/$28 - eg. accessing anything via the GOT.

One fix for this would be to move the declaration of
__current_thread_info inside the current_thread_info() function,
demoting it from global register variable to local register variable &
avoiding inadvertently creating a non-standard calling ABI for the VDSO.
Unfortunately this causes issues for clang, which doesn't support local
register variables as pointed out by commit fe92da0f355e ("MIPS: Changed
current_thread_info() to an equivalent supported by both clang and GCC")
which introduced the global register variable before we had a VDSO to
worry about.

Instead, fix this by continuing to use the global register variable for
the kernel proper but declare __current_thread_info as a simple extern
variable when building the VDSO. It should never be referenced, and will
cause a link error if it is. This resolves the calling convention issue
for the VDSO without having any impact upon the build of the kernel
itself for either clang or gcc.

Signed-off-by: Paul Burton <paulburton@kernel.org>
Fixes: ebb5e78cc634 ("MIPS: Initial implementation of a VDSO")
Reported-by: Jason A. Donenfeld <Jason@zx2c4.com>
Reviewed-by: Jason A. Donenfeld <Jason@zx2c4.com>
Tested-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christian Brauner <christian.brauner@canonical.com>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Cc: <stable@vger.kernel.org> # v4.4+
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/asm/thread_info.h | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
index 4993db40482c..ee26f9a4575d 100644
--- a/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h
@@ -49,8 +49,26 @@ struct thread_info {
 	.addr_limit	= KERNEL_DS,		\
 }
 
-/* How to get the thread information struct from C.  */
+/*
+ * A pointer to the struct thread_info for the currently executing thread is
+ * held in register $28/$gp.
+ *
+ * We declare __current_thread_info as a global register variable rather than a
+ * local register variable within current_thread_info() because clang doesn't
+ * support explicit local register variables.
+ *
+ * When building the VDSO we take care not to declare the global register
+ * variable because this causes GCC to not preserve the value of $28/$gp in
+ * functions that change its value (which is common in the PIC VDSO when
+ * accessing the GOT). Since the VDSO shouldn't be accessing
+ * __current_thread_info anyway we declare it extern in order to cause a link
+ * failure if it's referenced.
+ */
+#ifdef __VDSO__
+extern struct thread_info *__current_thread_info;
+#else
 register struct thread_info *__current_thread_info __asm__("$28");
+#endif
 
 static inline struct thread_info *current_thread_info(void)
 {

From 2fec966f593efd076ad1e56d274611cea7d29eec Mon Sep 17 00:00:00 2001
From: Yunfeng Ye <yeyunfeng@huawei.com>
Date: Tue, 17 Dec 2019 20:21:37 +0800
Subject: [PATCH 115/146] agp: remove unused variable mcapndx
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch fix the following warning:
drivers/char/agp/isoch.c: In function ‘agp_3_5_isochronous_node_enable’:
drivers/char/agp/isoch.c:87:5: warning: variable ‘mcapndx’ set but not
used [-Wunused-but-set-variable]
  u8 mcapndx;
     ^~~~~~~

Signed-off-by: Yunfeng Ye <yeyunfeng@huawei.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/char/agp/isoch.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/char/agp/isoch.c b/drivers/char/agp/isoch.c
index 31c374b1b91b..324992439ee8 100644
--- a/drivers/char/agp/isoch.c
+++ b/drivers/char/agp/isoch.c
@@ -84,7 +84,6 @@ static int agp_3_5_isochronous_node_enable(struct agp_bridge_data *bridge,
 	unsigned int cdev = 0;
 	u32 mnistat, tnistat, tstatus, mcmd;
 	u16 tnicmd, mnicmd;
-	u8 mcapndx;
 	u32 tot_bw = 0, tot_n = 0, tot_rq = 0, y_max, rq_isoch, rq_async;
 	u32 step, rem, rem_isoch, rem_async;
 	int ret = 0;
@@ -138,8 +137,6 @@ static int agp_3_5_isochronous_node_enable(struct agp_bridge_data *bridge,
 		cur = list_entry(pos, struct agp_3_5_dev, list);
 		dev = cur->dev;
 
-		mcapndx = cur->capndx;
-
 		pci_read_config_dword(dev, cur->capndx+AGPNISTAT, &mnistat);
 
 		master[cdev].maxbw = (mnistat >> 16) & 0xff;
@@ -251,8 +248,6 @@ static int agp_3_5_isochronous_node_enable(struct agp_bridge_data *bridge,
 		cur = master[cdev].dev;
 		dev = cur->dev;
 
-		mcapndx = cur->capndx;
-
 		master[cdev].rq += (cdev == ndevs - 1)
 		              ? (rem_async + rem_isoch) : step;
 

From a6204fc7b83cbe3398f61cf1742b09f66f0ae220 Mon Sep 17 00:00:00 2001
From: Yunfeng Ye <yeyunfeng@huawei.com>
Date: Tue, 17 Dec 2019 20:22:57 +0800
Subject: [PATCH 116/146] agp: remove unused variable arqsz in agp_3_5_enable()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch fix the following warning:
drivers/char/agp/isoch.c: In function ‘agp_3_5_enable’:
drivers/char/agp/isoch.c:322:13: warning: variable ‘arqsz’ set but not
used [-Wunused-but-set-variable]
  u32 isoch, arqsz;
             ^~~~~

Signed-off-by: Yunfeng Ye <yeyunfeng@huawei.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/char/agp/isoch.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/char/agp/isoch.c b/drivers/char/agp/isoch.c
index 324992439ee8..7ecf20a6d19c 100644
--- a/drivers/char/agp/isoch.c
+++ b/drivers/char/agp/isoch.c
@@ -314,7 +314,7 @@ int agp_3_5_enable(struct agp_bridge_data *bridge)
 {
 	struct pci_dev *td = bridge->dev, *dev = NULL;
 	u8 mcapndx;
-	u32 isoch, arqsz;
+	u32 isoch;
 	u32 tstatus, mstatus, ncapid;
 	u32 mmajor;
 	u16 mpstat;
@@ -329,8 +329,6 @@ int agp_3_5_enable(struct agp_bridge_data *bridge)
 	if (isoch == 0)	/* isoch xfers not available, bail out. */
 		return -ENODEV;
 
-	arqsz     = (tstatus >> 13) & 0x7;
-
 	/*
 	 * Allocate a head for our AGP 3.5 device list
 	 * (multiple AGP v3 devices are allowed behind a single bridge).

From ac51e005fe1456a288929a41d71adc6224e912d2 Mon Sep 17 00:00:00 2001
From: Zong Li <zong.li@sifive.com>
Date: Thu, 2 Jan 2020 11:12:40 +0800
Subject: [PATCH 117/146] riscv: mm: use __pa_symbol for kernel symbols

__pa_symbol is the marcro that should be used for kernel symbols. It is
also a pre-requisite for DEBUG_VIRTUAL which will do bounds checking.

Signed-off-by: Zong Li <zong.li@sifive.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
---
 arch/riscv/mm/init.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 69f6678db7f3..965a8cf4829c 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -99,13 +99,13 @@ static void __init setup_initrd(void)
 		pr_info("initrd not found or empty");
 		goto disable;
 	}
-	if (__pa(initrd_end) > PFN_PHYS(max_low_pfn)) {
+	if (__pa_symbol(initrd_end) > PFN_PHYS(max_low_pfn)) {
 		pr_err("initrd extends beyond end of memory");
 		goto disable;
 	}
 
 	size = initrd_end - initrd_start;
-	memblock_reserve(__pa(initrd_start), size);
+	memblock_reserve(__pa_symbol(initrd_start), size);
 	initrd_below_start_ok = 1;
 
 	pr_info("Initial ramdisk at: 0x%p (%lu bytes)\n",
@@ -124,8 +124,8 @@ void __init setup_bootmem(void)
 {
 	struct memblock_region *reg;
 	phys_addr_t mem_size = 0;
-	phys_addr_t vmlinux_end = __pa(&_end);
-	phys_addr_t vmlinux_start = __pa(&_start);
+	phys_addr_t vmlinux_end = __pa_symbol(&_end);
+	phys_addr_t vmlinux_start = __pa_symbol(&_start);
 
 	/* Find the memory region containing the kernel */
 	for_each_memblock(memory, reg) {
@@ -445,7 +445,7 @@ static void __init setup_vm_final(void)
 
 	/* Setup swapper PGD for fixmap */
 	create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
-			   __pa(fixmap_pgd_next),
+			   __pa_symbol(fixmap_pgd_next),
 			   PGDIR_SIZE, PAGE_TABLE);
 
 	/* Map all memory banks */
@@ -474,7 +474,7 @@ static void __init setup_vm_final(void)
 	clear_fixmap(FIX_PMD);
 
 	/* Move to swapper page table */
-	csr_write(CSR_SATP, PFN_DOWN(__pa(swapper_pg_dir)) | SATP_MODE);
+	csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE);
 	local_flush_tlb_all();
 }
 #else

From 0da310e82d3a9bff6ef6b0f2fbf45d1a05cc64fe Mon Sep 17 00:00:00 2001
From: Zong Li <zong.li@sifive.com>
Date: Thu, 2 Jan 2020 11:09:54 +0800
Subject: [PATCH 118/146] riscv: gcov: enable gcov for RISC-V

This patch enables GCOV code coverage measurement on RISC-V.
Lightly tested on QEMU and Hifive Unleashed board, seems to work as
expected.

Signed-off-by: Zong Li <zong.li@sifive.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Acked-by: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
---
 Documentation/features/debug/gcov-profile-all/arch-support.txt | 2 +-
 arch/riscv/Kconfig                                             | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/features/debug/gcov-profile-all/arch-support.txt b/Documentation/features/debug/gcov-profile-all/arch-support.txt
index 059d58a549c7..6fb2b0671994 100644
--- a/Documentation/features/debug/gcov-profile-all/arch-support.txt
+++ b/Documentation/features/debug/gcov-profile-all/arch-support.txt
@@ -23,7 +23,7 @@
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: |  ok  |
-    |       riscv: | TODO |
+    |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: |  ok  |
     |       sparc: | TODO |
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index d8efbaa78d67..a31169b02ec0 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -64,6 +64,7 @@ config RISCV
 	select SPARSEMEM_STATIC if 32BIT
 	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
 	select HAVE_ARCH_MMAP_RND_BITS if MMU
+	select ARCH_HAS_GCOV_PROFILE_ALL
 
 config ARCH_MMAP_RND_BITS_MIN
 	default 18 if 64BIT

From cfda8617e22a8bf217a613d0b3ba3a38778443ba Mon Sep 17 00:00:00 2001
From: Yash Shah <yash.shah@sifive.com>
Date: Fri, 3 Jan 2020 09:43:20 +0530
Subject: [PATCH 119/146] riscv: dts: Add DT support for SiFive L2 cache
 controller

Add the L2 cache controller DT node in SiFive FU540 soc-specific DT file

Signed-off-by: Yash Shah <yash.shah@sifive.com>
Reviewed-by: Palmer Dabbelt <palmerdabbelt@google.com>
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
---
 arch/riscv/boot/dts/sifive/fu540-c000.dtsi | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
index 70a1891e7cd0..a2e3d54e830c 100644
--- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
+++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
@@ -54,6 +54,7 @@
 			reg = <1>;
 			riscv,isa = "rv64imafdc";
 			tlb-split;
+			next-level-cache = <&l2cache>;
 			cpu1_intc: interrupt-controller {
 				#interrupt-cells = <1>;
 				compatible = "riscv,cpu-intc";
@@ -77,6 +78,7 @@
 			reg = <2>;
 			riscv,isa = "rv64imafdc";
 			tlb-split;
+			next-level-cache = <&l2cache>;
 			cpu2_intc: interrupt-controller {
 				#interrupt-cells = <1>;
 				compatible = "riscv,cpu-intc";
@@ -100,6 +102,7 @@
 			reg = <3>;
 			riscv,isa = "rv64imafdc";
 			tlb-split;
+			next-level-cache = <&l2cache>;
 			cpu3_intc: interrupt-controller {
 				#interrupt-cells = <1>;
 				compatible = "riscv,cpu-intc";
@@ -123,6 +126,7 @@
 			reg = <4>;
 			riscv,isa = "rv64imafdc";
 			tlb-split;
+			next-level-cache = <&l2cache>;
 			cpu4_intc: interrupt-controller {
 				#interrupt-cells = <1>;
 				compatible = "riscv,cpu-intc";
@@ -253,6 +257,17 @@
 			#pwm-cells = <3>;
 			status = "disabled";
 		};
+		l2cache: cache-controller@2010000 {
+			compatible = "sifive,fu540-c000-ccache", "cache";
+			cache-block-size = <64>;
+			cache-level = <2>;
+			cache-sets = <1024>;
+			cache-size = <2097152>;
+			cache-unified;
+			interrupt-parent = <&plic0>;
+			interrupts = <1 2 3>;
+			reg = <0x0 0x2010000 0x0 0x1000>;
+		};
 
 	};
 };

From 1d8f65798240b6577d8c44d20c8ea8f1d429e495 Mon Sep 17 00:00:00 2001
From: Zong Li <zong.li@sifive.com>
Date: Mon, 23 Dec 2019 16:46:13 +0800
Subject: [PATCH 120/146] riscv: ftrace: correct the condition logic in
 function graph tracer

The condition should be logical NOT to assign the hook address to parent
address. Because the return value 0 of function_graph_enter upon
success.

Fixes: e949b6db51dc (riscv/function_graph: Simplify with function_graph_enter())
Signed-off-by: Zong Li <zong.li@sifive.com>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: stable@vger.kernel.org
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
---
 arch/riscv/kernel/ftrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
index b94d8db5ddcc..c40fdcdeb950 100644
--- a/arch/riscv/kernel/ftrace.c
+++ b/arch/riscv/kernel/ftrace.c
@@ -142,7 +142,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 	 */
 	old = *parent;
 
-	if (function_graph_enter(old, self_addr, frame_pointer, parent))
+	if (!function_graph_enter(old, self_addr, frame_pointer, parent))
 		*parent = return_hooker;
 }
 

From ce644cf3fa06504c2c71ab1b794160d54aaccbc0 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Wed, 6 Nov 2019 12:57:07 +0100
Subject: [PATCH 121/146] media: intel-ipu3: Align struct
 ipu3_uapi_awb_fr_config_s to 32 bytes

A struct that needs to be aligned to 32 bytes has a size of 28. Increase
the size to 32.

This makes elements of arrays of this struct aligned to 32 as well, and
other structs where members are aligned to 32 mixing
ipu3_uapi_awb_fr_config_s as well as other types.

Fixes: commit dca5ef2aa1e6 ("media: staging/intel-ipu3: remove the unnecessary compiler flags")
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Tested-by: Bingbu Cao <bingbu.cao@intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/ipu3/include/intel-ipu3.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/media/ipu3/include/intel-ipu3.h b/drivers/staging/media/ipu3/include/intel-ipu3.h
index 08eaa0bad0de..1c9c3ba4d518 100644
--- a/drivers/staging/media/ipu3/include/intel-ipu3.h
+++ b/drivers/staging/media/ipu3/include/intel-ipu3.h
@@ -449,7 +449,7 @@ struct ipu3_uapi_awb_fr_config_s {
 	__u16 reserved1;
 	__u32 bayer_sign;
 	__u8 bayer_nf;
-	__u8 reserved2[3];
+	__u8 reserved2[7];
 } __attribute__((aligned(32))) __packed;
 
 /**

From cc976614f59bd8e45de8ce988a6bcb5de711d994 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 30 Dec 2019 22:20:06 +0900
Subject: [PATCH 122/146] gen_initramfs_list.sh: fix 'bad variable name' error

Prior to commit 858805b336be ("kbuild: add $(BASH) to run scripts with
bash-extension"), this shell script was almost always run by bash since
bash is usually installed on the system by default.

Now, this script is run by sh, which might be a symlink to dash. On such
distributions, the following code emits an error:

  local dev=`LC_ALL=C ls -l "${location}"`

You can reproduce the build error, for example by setting
CONFIG_INITRAMFS_SOURCE="/dev".

    GEN     usr/initramfs_data.cpio.gz
  ./usr/gen_initramfs_list.sh: 131: local: 1: bad variable name
  make[1]: *** [usr/Makefile:61: usr/initramfs_data.cpio.gz] Error 2

This is because `LC_ALL=C ls -l "${location}"` contains spaces.
Surrounding it with double-quotes fixes the error.

Fixes: 858805b336be ("kbuild: add $(BASH) to run scripts with bash-extension")
Reported-by: Jory A. Pratt <anarchy@gentoo.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 usr/gen_initramfs_list.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/usr/gen_initramfs_list.sh b/usr/gen_initramfs_list.sh
index 0aad760fcd8c..2bbac73e6477 100755
--- a/usr/gen_initramfs_list.sh
+++ b/usr/gen_initramfs_list.sh
@@ -128,7 +128,7 @@ parse() {
 			str="${ftype} ${name} ${location} ${str}"
 			;;
 		"nod")
-			local dev=`LC_ALL=C ls -l "${location}"`
+			local dev="`LC_ALL=C ls -l "${location}"`"
 			local maj=`field 5 ${dev}`
 			local min=`field 6 ${dev}`
 			maj=${maj%,}

From 8ffdc54b6f4cd718a45802e645bb853e3a46a078 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 30 Dec 2019 15:07:47 +0100
Subject: [PATCH 123/146] kbuild/deb-pkg: annotate libelf-dev dependency as
 :native

Cross compiling the x86 kernel on a non-x86 build machine produces
the following error when CONFIG_UNWINDER_ORC is enabled, regardless
of whether libelf-dev is installed or not.

  dpkg-checkbuilddeps: error: Unmet build dependencies: libelf-dev
  dpkg-buildpackage: warning: build dependencies/conflicts unsatisfied; aborting
  dpkg-buildpackage: warning: (Use -d flag to override.)

Since this is a build time dependency for a build tool, we need to
depend on the native version of libelf-dev so add the appropriate
annotation.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/package/mkdebian | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian
index 7c230016b08d..357dc56bcf30 100755
--- a/scripts/package/mkdebian
+++ b/scripts/package/mkdebian
@@ -136,7 +136,7 @@ mkdir -p debian/source/
 echo "1.0" > debian/source/format
 
 echo $debarch > debian/arch
-extra_build_depends=", $(if_enabled_echo CONFIG_UNWINDER_ORC libelf-dev)"
+extra_build_depends=", $(if_enabled_echo CONFIG_UNWINDER_ORC libelf-dev:native)"
 extra_build_depends="$extra_build_depends, $(if_enabled_echo CONFIG_SYSTEM_TRUSTED_KEYRING libssl-dev:native)"
 
 # Generate a simple changelog template

From 15f0ec941f4f908fefa23a30ded8358977cc1cc0 Mon Sep 17 00:00:00 2001
From: Jan Stancek <jstancek@redhat.com>
Date: Fri, 3 Jan 2020 18:37:18 +0100
Subject: [PATCH 124/146] mm/hugetlbfs: fix for_each_hstate() loop in
 init_hugetlbfs_fs()

LTP memfd_create04 started failing for some huge page sizes
after v5.4-10135-gc3bfc5dd73c6.

The problem is the check introduced to for_each_hstate() loop that
should skip default_hstate_idx.  Since it doesn't update 'i' counter,
all subsequent huge page sizes are skipped as well.

Fixes: 8fc312b32b25 ("mm/hugetlbfs: fix error handling when setting up mounts")
Signed-off-by: Jan Stancek <jstancek@redhat.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hugetlbfs/inode.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index d5c2a3158610..a66e425884d1 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -1498,8 +1498,10 @@ static int __init init_hugetlbfs_fs(void)
 	/* other hstates are optional */
 	i = 0;
 	for_each_hstate(h) {
-		if (i == default_hstate_idx)
+		if (i == default_hstate_idx) {
+			i++;
 			continue;
+		}
 
 		mnt = mount_one_hugetlbfs(h);
 		if (IS_ERR(mnt))

From feee6b2989165631b17ac6d4ccdbf6759254e85a Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Sat, 4 Jan 2020 12:59:33 -0800
Subject: [PATCH 125/146] mm/memory_hotplug: shrink zones when offlining memory

We currently try to shrink a single zone when removing memory.  We use
the zone of the first page of the memory we are removing.  If that
memmap was never initialized (e.g., memory was never onlined), we will
read garbage and can trigger kernel BUGs (due to a stale pointer):

    BUG: unable to handle page fault for address: 000000000000353d
    #PF: supervisor write access in kernel mode
    #PF: error_code(0x0002) - not-present page
    PGD 0 P4D 0
    Oops: 0002 [#1] SMP PTI
    CPU: 1 PID: 7 Comm: kworker/u8:0 Not tainted 5.3.0-rc5-next-20190820+ #317
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.4
    Workqueue: kacpi_hotplug acpi_hotplug_work_fn
    RIP: 0010:clear_zone_contiguous+0x5/0x10
    Code: 48 89 c6 48 89 c3 e8 2a fe ff ff 48 85 c0 75 cf 5b 5d c3 c6 85 fd 05 00 00 01 5b 5d c3 0f 1f 840
    RSP: 0018:ffffad2400043c98 EFLAGS: 00010246
    RAX: 0000000000000000 RBX: 0000000200000000 RCX: 0000000000000000
    RDX: 0000000000200000 RSI: 0000000000140000 RDI: 0000000000002f40
    RBP: 0000000140000000 R08: 0000000000000000 R09: 0000000000000001
    R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000140000
    R13: 0000000000140000 R14: 0000000000002f40 R15: ffff9e3e7aff3680
    FS:  0000000000000000(0000) GS:ffff9e3e7bb00000(0000) knlGS:0000000000000000
    CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    CR2: 000000000000353d CR3: 0000000058610000 CR4: 00000000000006e0
    DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
    DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
    Call Trace:
     __remove_pages+0x4b/0x640
     arch_remove_memory+0x63/0x8d
     try_remove_memory+0xdb/0x130
     __remove_memory+0xa/0x11
     acpi_memory_device_remove+0x70/0x100
     acpi_bus_trim+0x55/0x90
     acpi_device_hotplug+0x227/0x3a0
     acpi_hotplug_work_fn+0x1a/0x30
     process_one_work+0x221/0x550
     worker_thread+0x50/0x3b0
     kthread+0x105/0x140
     ret_from_fork+0x3a/0x50
    Modules linked in:
    CR2: 000000000000353d

Instead, shrink the zones when offlining memory or when onlining failed.
Introduce and use remove_pfn_range_from_zone(() for that.  We now
properly shrink the zones, even if we have DIMMs whereby

 - Some memory blocks fall into no zone (never onlined)

 - Some memory blocks fall into multiple zones (offlined+re-onlined)

 - Multiple memory blocks that fall into different zones

Drop the zone parameter (with a potential dubious value) from
__remove_pages() and __remove_section().

Link: http://lkml.kernel.org/r/20191006085646.5768-6-david@redhat.com
Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online")	[visible after d0dc12e86b319]
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Logan Gunthorpe <logang@deltatee.com>
Cc: <stable@vger.kernel.org>	[5.0+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm64/mm/mmu.c            |  4 +---
 arch/ia64/mm/init.c            |  4 +---
 arch/powerpc/mm/mem.c          |  3 +--
 arch/s390/mm/init.c            |  4 +---
 arch/sh/mm/init.c              |  4 +---
 arch/x86/mm/init_32.c          |  4 +---
 arch/x86/mm/init_64.c          |  4 +---
 include/linux/memory_hotplug.h |  7 +++++--
 mm/memory_hotplug.c            | 31 ++++++++++++++++---------------
 mm/memremap.c                  |  2 +-
 10 files changed, 29 insertions(+), 38 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 5a3b15a14a7f..40797cbfba2d 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1070,7 +1070,6 @@ void arch_remove_memory(int nid, u64 start, u64 size,
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
-	struct zone *zone;
 
 	/*
 	 * FIXME: Cleanup page tables (also in arch_add_memory() in case
@@ -1079,7 +1078,6 @@ void arch_remove_memory(int nid, u64 start, u64 size,
 	 * unplug. ARCH_ENABLE_MEMORY_HOTREMOVE must not be
 	 * unlocked yet.
 	 */
-	zone = page_zone(pfn_to_page(start_pfn));
-	__remove_pages(zone, start_pfn, nr_pages, altmap);
+	__remove_pages(start_pfn, nr_pages, altmap);
 }
 #endif
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 58fd67068bac..b01d68a2d5d9 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -689,9 +689,7 @@ void arch_remove_memory(int nid, u64 start, u64 size,
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
-	struct zone *zone;
 
-	zone = page_zone(pfn_to_page(start_pfn));
-	__remove_pages(zone, start_pfn, nr_pages, altmap);
+	__remove_pages(start_pfn, nr_pages, altmap);
 }
 #endif
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 617c2777926f..f5535eae637f 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -151,10 +151,9 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
-	struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
 	int ret;
 
-	__remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
+	__remove_pages(start_pfn, nr_pages, altmap);
 
 	/* Remove htab bolted mappings for this section of memory */
 	start = (unsigned long)__va(start);
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index f0ce22220565..ac44bd76db4b 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -292,10 +292,8 @@ void arch_remove_memory(int nid, u64 start, u64 size,
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
-	struct zone *zone;
 
-	zone = page_zone(pfn_to_page(start_pfn));
-	__remove_pages(zone, start_pfn, nr_pages, altmap);
+	__remove_pages(start_pfn, nr_pages, altmap);
 	vmem_remove_mapping(start, size);
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index dfdbaa50946e..d1b1ff2be17a 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -434,9 +434,7 @@ void arch_remove_memory(int nid, u64 start, u64 size,
 {
 	unsigned long start_pfn = PFN_DOWN(start);
 	unsigned long nr_pages = size >> PAGE_SHIFT;
-	struct zone *zone;
 
-	zone = page_zone(pfn_to_page(start_pfn));
-	__remove_pages(zone, start_pfn, nr_pages, altmap);
+	__remove_pages(start_pfn, nr_pages, altmap);
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 930edeb41ec3..0a74407ef92e 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -865,10 +865,8 @@ void arch_remove_memory(int nid, u64 start, u64 size,
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
-	struct zone *zone;
 
-	zone = page_zone(pfn_to_page(start_pfn));
-	__remove_pages(zone, start_pfn, nr_pages, altmap);
+	__remove_pages(start_pfn, nr_pages, altmap);
 }
 #endif
 
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index dcb9bc961b39..bcfede46fe02 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1212,10 +1212,8 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
-	struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
-	struct zone *zone = page_zone(page);
 
-	__remove_pages(zone, start_pfn, nr_pages, altmap);
+	__remove_pages(start_pfn, nr_pages, altmap);
 	kernel_physical_mapping_remove(start, start + size);
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 3a08ecdfca11..ba0dca6aac6e 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -122,8 +122,8 @@ static inline bool movable_node_is_enabled(void)
 
 extern void arch_remove_memory(int nid, u64 start, u64 size,
 			       struct vmem_altmap *altmap);
-extern void __remove_pages(struct zone *zone, unsigned long start_pfn,
-			   unsigned long nr_pages, struct vmem_altmap *altmap);
+extern void __remove_pages(unsigned long start_pfn, unsigned long nr_pages,
+			   struct vmem_altmap *altmap);
 
 /* reasonably generic interface to expand the physical pages */
 extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
@@ -342,6 +342,9 @@ extern int add_memory(int nid, u64 start, u64 size);
 extern int add_memory_resource(int nid, struct resource *resource);
 extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
 		unsigned long nr_pages, struct vmem_altmap *altmap);
+extern void remove_pfn_range_from_zone(struct zone *zone,
+				       unsigned long start_pfn,
+				       unsigned long nr_pages);
 extern bool is_memblock_offlined(struct memory_block *mem);
 extern int sparse_add_section(int nid, unsigned long pfn,
 		unsigned long nr_pages, struct vmem_altmap *altmap);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 55ac23ef11c1..a91a072f2b2c 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -483,8 +483,9 @@ static void update_pgdat_span(struct pglist_data *pgdat)
 	pgdat->node_spanned_pages = node_end_pfn - node_start_pfn;
 }
 
-static void __remove_zone(struct zone *zone, unsigned long start_pfn,
-		unsigned long nr_pages)
+void __ref remove_pfn_range_from_zone(struct zone *zone,
+				      unsigned long start_pfn,
+				      unsigned long nr_pages)
 {
 	struct pglist_data *pgdat = zone->zone_pgdat;
 	unsigned long flags;
@@ -499,28 +500,30 @@ static void __remove_zone(struct zone *zone, unsigned long start_pfn,
 		return;
 #endif
 
+	clear_zone_contiguous(zone);
+
 	pgdat_resize_lock(zone->zone_pgdat, &flags);
 	shrink_zone_span(zone, start_pfn, start_pfn + nr_pages);
 	update_pgdat_span(pgdat);
 	pgdat_resize_unlock(zone->zone_pgdat, &flags);
+
+	set_zone_contiguous(zone);
 }
 
-static void __remove_section(struct zone *zone, unsigned long pfn,
-		unsigned long nr_pages, unsigned long map_offset,
-		struct vmem_altmap *altmap)
+static void __remove_section(unsigned long pfn, unsigned long nr_pages,
+			     unsigned long map_offset,
+			     struct vmem_altmap *altmap)
 {
 	struct mem_section *ms = __nr_to_section(pfn_to_section_nr(pfn));
 
 	if (WARN_ON_ONCE(!valid_section(ms)))
 		return;
 
-	__remove_zone(zone, pfn, nr_pages);
 	sparse_remove_section(ms, pfn, nr_pages, map_offset, altmap);
 }
 
 /**
- * __remove_pages() - remove sections of pages from a zone
- * @zone: zone from which pages need to be removed
+ * __remove_pages() - remove sections of pages
  * @pfn: starting pageframe (must be aligned to start of a section)
  * @nr_pages: number of pages to remove (must be multiple of section size)
  * @altmap: alternative device page map or %NULL if default memmap is used
@@ -530,16 +533,14 @@ static void __remove_section(struct zone *zone, unsigned long pfn,
  * sure that pages are marked reserved and zones are adjust properly by
  * calling offline_pages().
  */
-void __remove_pages(struct zone *zone, unsigned long pfn,
-		    unsigned long nr_pages, struct vmem_altmap *altmap)
+void __remove_pages(unsigned long pfn, unsigned long nr_pages,
+		    struct vmem_altmap *altmap)
 {
 	unsigned long map_offset = 0;
 	unsigned long nr, start_sec, end_sec;
 
 	map_offset = vmem_altmap_offset(altmap);
 
-	clear_zone_contiguous(zone);
-
 	if (check_pfn_span(pfn, nr_pages, "remove"))
 		return;
 
@@ -551,13 +552,11 @@ void __remove_pages(struct zone *zone, unsigned long pfn,
 		cond_resched();
 		pfns = min(nr_pages, PAGES_PER_SECTION
 				- (pfn & ~PAGE_SECTION_MASK));
-		__remove_section(zone, pfn, pfns, map_offset, altmap);
+		__remove_section(pfn, pfns, map_offset, altmap);
 		pfn += pfns;
 		nr_pages -= pfns;
 		map_offset = 0;
 	}
-
-	set_zone_contiguous(zone);
 }
 
 int set_online_page_callback(online_page_callback_t callback)
@@ -869,6 +868,7 @@ failed_addition:
 		 (unsigned long long) pfn << PAGE_SHIFT,
 		 (((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1);
 	memory_notify(MEM_CANCEL_ONLINE, &arg);
+	remove_pfn_range_from_zone(zone, pfn, nr_pages);
 	mem_hotplug_done();
 	return ret;
 }
@@ -1628,6 +1628,7 @@ static int __ref __offline_pages(unsigned long start_pfn,
 	writeback_set_ratelimit();
 
 	memory_notify(MEM_OFFLINE, &arg);
+	remove_pfn_range_from_zone(zone, start_pfn, nr_pages);
 	mem_hotplug_done();
 	return 0;
 
diff --git a/mm/memremap.c b/mm/memremap.c
index 03ccbdfeb697..c51c6bd2fe34 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -120,7 +120,7 @@ void memunmap_pages(struct dev_pagemap *pgmap)
 
 	mem_hotplug_begin();
 	if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
-		__remove_pages(page_zone(first_page), PHYS_PFN(res->start),
+		__remove_pages(PHYS_PFN(res->start),
 			       PHYS_PFN(resource_size(res)), NULL);
 	} else {
 		arch_remove_memory(nid, res->start, resource_size(res),

From ac8f05da5174c560de122c499ce5dfb5d0dfbee5 Mon Sep 17 00:00:00 2001
From: Chanho Min <chanho.min@lge.com>
Date: Sat, 4 Jan 2020 12:59:36 -0800
Subject: [PATCH 126/146] mm/zsmalloc.c: fix the migrated zspage statistics.

When zspage is migrated to the other zone, the zone page state should be
updated as well, otherwise the NR_ZSPAGE for each zone shows wrong
counts including proc/zoneinfo in practice.

Link: http://lkml.kernel.org/r/1575434841-48009-1-git-send-email-chanho.min@lge.com
Fixes: 91537fee0013 ("mm: add NR_ZSMALLOC to vmstat")
Signed-off-by: Chanho Min <chanho.min@lge.com>
Signed-off-by: Jinsuk Choi <jjinsuk.choi@lge.com>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: <stable@vger.kernel.org>        [4.9+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/zsmalloc.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 2b2b9aae8a3c..22d17ecfe7df 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -2069,6 +2069,11 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage,
 		zs_pool_dec_isolated(pool);
 	}
 
+	if (page_zone(newpage) != page_zone(page)) {
+		dec_zone_page_state(page, NR_ZSPAGES);
+		inc_zone_page_state(newpage, NR_ZSPAGES);
+	}
+
 	reset_page(page);
 	put_page(page);
 	page = newpage;

From a69b83e1ae7f6c5ff2cc310870c1708405d86be2 Mon Sep 17 00:00:00 2001
From: Andrey Konovalov <andreyknvl@google.com>
Date: Sat, 4 Jan 2020 12:59:39 -0800
Subject: [PATCH 127/146] kcov: fix struct layout for kcov_remote_arg

Make the layout of kcov_remote_arg the same for 32-bit and 64-bit code.
This makes it more convenient to write userspace apps that can be
compiled into 32-bit or 64-bit binaries and still work with the same
64-bit kernel.

Also use proper __u32 types in uapi headers instead of unsigned ints.

Link: http://lkml.kernel.org/r/9e91020876029cfefc9211ff747685eba9536426.1575638983.git.andreyknvl@google.com
Fixes: eec028c9386ed1a ("kcov: remote coverage support")
Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
Acked-by: Marco Elver <elver@google.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Felipe Balbi <balbi@kernel.org>
Cc: Chunfeng Yun <chunfeng.yun@mediatek.com>
Cc: "Jacky . Cao @ sony . com" <Jacky.Cao@sony.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Marco Elver <elver@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/dev-tools/kcov.rst | 10 +++++-----
 include/uapi/linux/kcov.h        | 10 +++++-----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst
index 36890b026e77..1c4e1825d769 100644
--- a/Documentation/dev-tools/kcov.rst
+++ b/Documentation/dev-tools/kcov.rst
@@ -251,11 +251,11 @@ selectively from different subsystems.
 .. code-block:: c
 
     struct kcov_remote_arg {
-	unsigned	trace_mode;
-	unsigned	area_size;
-	unsigned	num_handles;
-	uint64_t	common_handle;
-	uint64_t	handles[0];
+	__u32		trace_mode;
+	__u32		area_size;
+	__u32		num_handles;
+	__aligned_u64	common_handle;
+	__aligned_u64	handles[0];
     };
 
     #define KCOV_INIT_TRACE			_IOR('c', 1, unsigned long)
diff --git a/include/uapi/linux/kcov.h b/include/uapi/linux/kcov.h
index 409d3ad1e6e2..1d0350e44ae3 100644
--- a/include/uapi/linux/kcov.h
+++ b/include/uapi/linux/kcov.h
@@ -9,11 +9,11 @@
  * and the comment before kcov_remote_start() for usage details.
  */
 struct kcov_remote_arg {
-	unsigned int	trace_mode;	/* KCOV_TRACE_PC or KCOV_TRACE_CMP */
-	unsigned int	area_size;	/* Length of coverage buffer in words */
-	unsigned int	num_handles;	/* Size of handles array */
-	__u64		common_handle;
-	__u64		handles[0];
+	__u32		trace_mode;	/* KCOV_TRACE_PC or KCOV_TRACE_CMP */
+	__u32		area_size;	/* Length of coverage buffer in words */
+	__u32		num_handles;	/* Size of handles array */
+	__aligned_u64	common_handle;
+	__aligned_u64	handles[0];
 };
 
 #define KCOV_REMOTE_MAX_HANDLES		0x100

From 84029fd04c201a4c7e0b07ba262664900f47c6f5 Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeelb@google.com>
Date: Sat, 4 Jan 2020 12:59:43 -0800
Subject: [PATCH 128/146] memcg: account security cred as well to kmemcg

The cred_jar kmem_cache is already memcg accounted in the current kernel
but cred->security is not.  Account cred->security to kmemcg.

Recently we saw high root slab usage on our production and on further
inspection, we found a buggy application leaking processes.  Though that
buggy application was contained within its memcg but we observe much
more system memory overhead, couple of GiBs, during that period.  This
overhead can adversely impact the isolation on the system.

One source of high overhead we found was cred->security objects, which
have a lifetime of at least the life of the process which allocated
them.

Link: http://lkml.kernel.org/r/20191205223721.40034-1-shakeelb@google.com
Signed-off-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Chris Down <chris@chrisdown.name>
Reviewed-by: Roman Gushchin <guro@fb.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/cred.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/cred.c b/kernel/cred.c
index c0a4c12d38b2..9ed51b70ed80 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -223,7 +223,7 @@ struct cred *cred_alloc_blank(void)
 	new->magic = CRED_MAGIC;
 #endif
 
-	if (security_cred_alloc_blank(new, GFP_KERNEL) < 0)
+	if (security_cred_alloc_blank(new, GFP_KERNEL_ACCOUNT) < 0)
 		goto error;
 
 	return new;
@@ -282,7 +282,7 @@ struct cred *prepare_creds(void)
 	new->security = NULL;
 #endif
 
-	if (security_prepare_creds(new, old, GFP_KERNEL) < 0)
+	if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0)
 		goto error;
 	validate_creds(new);
 	return new;
@@ -715,7 +715,7 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
 #ifdef CONFIG_SECURITY
 	new->security = NULL;
 #endif
-	if (security_prepare_creds(new, old, GFP_KERNEL) < 0)
+	if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0)
 		goto error;
 
 	put_cred(old);

From e0153fc2c7606f101392b682e720a7a456d6c766 Mon Sep 17 00:00:00 2001
From: Yang Shi <yang.shi@linux.alibaba.com>
Date: Sat, 4 Jan 2020 12:59:46 -0800
Subject: [PATCH 129/146] mm: move_pages: return valid node id in status if the
 page is already on the target node

Felix Abecassis reports move_pages() would return random status if the
pages are already on the target node by the below test program:

  int main(void)
  {
	const long node_id = 1;
	const long page_size = sysconf(_SC_PAGESIZE);
	const int64_t num_pages = 8;

	unsigned long nodemask =  1 << node_id;
	long ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask));
	if (ret < 0)
		return (EXIT_FAILURE);

	void **pages = malloc(sizeof(void*) * num_pages);
	for (int i = 0; i < num_pages; ++i) {
		pages[i] = mmap(NULL, page_size, PROT_WRITE | PROT_READ,
				MAP_PRIVATE | MAP_POPULATE | MAP_ANONYMOUS,
				-1, 0);
		if (pages[i] == MAP_FAILED)
			return (EXIT_FAILURE);
	}

	ret = set_mempolicy(MPOL_DEFAULT, NULL, 0);
	if (ret < 0)
		return (EXIT_FAILURE);

	int *nodes = malloc(sizeof(int) * num_pages);
	int *status = malloc(sizeof(int) * num_pages);
	for (int i = 0; i < num_pages; ++i) {
		nodes[i] = node_id;
		status[i] = 0xd0; /* simulate garbage values */
	}

	ret = move_pages(0, num_pages, pages, nodes, status, MPOL_MF_MOVE);
	printf("move_pages: %ld\n", ret);
	for (int i = 0; i < num_pages; ++i)
		printf("status[%d] = %d\n", i, status[i]);
  }

Then running the program would return nonsense status values:

  $ ./move_pages_bug
  move_pages: 0
  status[0] = 208
  status[1] = 208
  status[2] = 208
  status[3] = 208
  status[4] = 208
  status[5] = 208
  status[6] = 208
  status[7] = 208

This is because the status is not set if the page is already on the
target node, but move_pages() should return valid status as long as it
succeeds.  The valid status may be errno or node id.

We can't simply initialize status array to zero since the pages may be
not on node 0.  Fix it by updating status with node id which the page is
already on.

Link: http://lkml.kernel.org/r/1575584353-125392-1-git-send-email-yang.shi@linux.alibaba.com
Fixes: a49bd4d71637 ("mm, numa: rework do_pages_move")
Signed-off-by: Yang Shi <yang.shi@linux.alibaba.com>
Reported-by: Felix Abecassis <fabecassis@nvidia.com>
Tested-by: Felix Abecassis <fabecassis@nvidia.com>
Suggested-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
Acked-by: Christoph Lameter <cl@linux.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: <stable@vger.kernel.org>	[4.17+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/migrate.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index eae1565285e3..86873b6f38a7 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1512,9 +1512,11 @@ static int do_move_pages_to_node(struct mm_struct *mm,
 /*
  * Resolves the given address to a struct page, isolates it from the LRU and
  * puts it to the given pagelist.
- * Returns -errno if the page cannot be found/isolated or 0 when it has been
- * queued or the page doesn't need to be migrated because it is already on
- * the target node
+ * Returns:
+ *     errno - if the page cannot be found/isolated
+ *     0 - when it doesn't have to be migrated because it is already on the
+ *         target node
+ *     1 - when it has been queued
  */
 static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
 		int node, struct list_head *pagelist, bool migrate_all)
@@ -1553,7 +1555,7 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
 	if (PageHuge(page)) {
 		if (PageHead(page)) {
 			isolate_huge_page(page, pagelist);
-			err = 0;
+			err = 1;
 		}
 	} else {
 		struct page *head;
@@ -1563,7 +1565,7 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
 		if (err)
 			goto out_putpage;
 
-		err = 0;
+		err = 1;
 		list_add_tail(&head->lru, pagelist);
 		mod_node_page_state(page_pgdat(head),
 			NR_ISOLATED_ANON + page_is_file_cache(head),
@@ -1640,8 +1642,17 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
 		 */
 		err = add_page_for_migration(mm, addr, current_node,
 				&pagelist, flags & MPOL_MF_MOVE_ALL);
-		if (!err)
+
+		if (!err) {
+			/* The page is already on the target node */
+			err = store_status(status, i, current_node, 1);
+			if (err)
+				goto out_flush;
 			continue;
+		} else if (err > 0) {
+			/* The page is successfully queued for migration */
+			continue;
+		}
 
 		err = store_status(status, i, err, 1);
 		if (err)

From b16155a0b01ae999add72b2ad2791b9c66285880 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sat, 4 Jan 2020 12:59:49 -0800
Subject: [PATCH 130/146] fs/direct-io.c: include fs/internal.h for missing
 prototype

Include fs/internal.h to address the following 'sparse' warning:

    fs/direct-io.c:591:5: warning: symbol 'sb_init_dio_done_wq' was not declared. Should it be static?

Link: http://lkml.kernel.org/r/20191209234544.128302-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/direct-io.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 0ec4f270139f..00b4d15bb811 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -39,6 +39,8 @@
 #include <linux/atomic.h>
 #include <linux/prefetch.h>
 
+#include "internal.h"
+
 /*
  * How many user pages to map in one call to get_user_pages().  This determines
  * the size of a structure in the slab cache

From 7bebd69ecf10787e6b9559ab780496f54943cc21 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sat, 4 Jan 2020 12:59:52 -0800
Subject: [PATCH 131/146] fs/nsfs.c: include headers for missing declarations

Include linux/proc_fs.h and fs/internal.h to address the following
'sparse' warnings:

    fs/nsfs.c:41:32: warning: symbol 'ns_dentry_operations' was not declared. Should it be static?
    fs/nsfs.c:145:5: warning: symbol 'open_related_ns' was not declared. Should it be static?

Link: http://lkml.kernel.org/r/20191209234822.156179-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nsfs.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/nsfs.c b/fs/nsfs.c
index a0431642c6b5..f75767bd623a 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -3,6 +3,7 @@
 #include <linux/pseudo_fs.h>
 #include <linux/file.h>
 #include <linux/fs.h>
+#include <linux/proc_fs.h>
 #include <linux/proc_ns.h>
 #include <linux/magic.h>
 #include <linux/ktime.h>
@@ -11,6 +12,8 @@
 #include <linux/nsfs.h>
 #include <linux/uaccess.h>
 
+#include "internal.h"
+
 static struct vfsmount *nsfs_mnt;
 
 static long ns_ioctl(struct file *filp, unsigned int ioctl,

From 213921f967cf44a7bceaee5535ff1d0196885076 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sat, 4 Jan 2020 12:59:55 -0800
Subject: [PATCH 132/146] fs/namespace.c: make to_mnt_ns() static

Make to_mnt_ns() static to address the following 'sparse' warning:

    fs/namespace.c:1731:22: warning: symbol 'to_mnt_ns' was not declared. Should it be static?

Link: http://lkml.kernel.org/r/20191209234830.156260-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/namespace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index be601d3a8008..5e1bf611a9eb 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1728,7 +1728,7 @@ static bool is_mnt_ns_file(struct dentry *dentry)
 	       dentry->d_fsdata == &mntns_operations;
 }
 
-struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
+static struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
 {
 	return container_of(ns, struct mnt_namespace, ns);
 }

From 780a0cfda9006a9a22d6473c2d4c527f5c68eb2e Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Sat, 4 Jan 2020 12:59:59 -0800
Subject: [PATCH 133/146] hexagon: parenthesize registers in asm predicates

Hexagon requires that register predicates in assembly be parenthesized.

Link: https://github.com/ClangBuiltLinux/linux/issues/754
Link: http://lkml.kernel.org/r/20191209222956.239798-3-ndesaulniers@google.com
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Suggested-by: Sid Manning <sidneym@codeaurora.org>
Acked-by: Brian Cain <bcain@codeaurora.org>
Cc: Lee Jones <lee.jones@linaro.org>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Tuowen Zhao <ztuowen@gmail.com>
Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Alexios Zavras <alexios.zavras@intel.com>
Cc: Allison Randal <allison@lohutok.net>
Cc: Will Deacon <will@kernel.org>
Cc: Richard Fontana <rfontana@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/hexagon/include/asm/atomic.h   |  8 ++++----
 arch/hexagon/include/asm/bitops.h   |  8 ++++----
 arch/hexagon/include/asm/cmpxchg.h  |  2 +-
 arch/hexagon/include/asm/futex.h    |  6 +++---
 arch/hexagon/include/asm/spinlock.h | 20 ++++++++++----------
 arch/hexagon/kernel/vm_entry.S      |  2 +-
 6 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index 12cd9231c4b8..0231d69c8bf2 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -91,7 +91,7 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 		"1:	%0 = memw_locked(%1);\n"			\
 		"	%0 = "#op "(%0,%2);\n"				\
 		"	memw_locked(%1,P3)=%0;\n"			\
-		"	if !P3 jump 1b;\n"				\
+		"	if (!P3) jump 1b;\n"				\
 		: "=&r" (output)					\
 		: "r" (&v->counter), "r" (i)				\
 		: "memory", "p3"					\
@@ -107,7 +107,7 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 		"1:	%0 = memw_locked(%1);\n"			\
 		"	%0 = "#op "(%0,%2);\n"				\
 		"	memw_locked(%1,P3)=%0;\n"			\
-		"	if !P3 jump 1b;\n"				\
+		"	if (!P3) jump 1b;\n"				\
 		: "=&r" (output)					\
 		: "r" (&v->counter), "r" (i)				\
 		: "memory", "p3"					\
@@ -124,7 +124,7 @@ static inline int atomic_fetch_##op(int i, atomic_t *v)			\
 		"1:	%0 = memw_locked(%2);\n"			\
 		"	%1 = "#op "(%0,%3);\n"				\
 		"	memw_locked(%2,P3)=%1;\n"			\
-		"	if !P3 jump 1b;\n"				\
+		"	if (!P3) jump 1b;\n"				\
 		: "=&r" (output), "=&r" (val)				\
 		: "r" (&v->counter), "r" (i)				\
 		: "memory", "p3"					\
@@ -173,7 +173,7 @@ static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 		"	}"
 		"	memw_locked(%2, p3) = %1;"
 		"	{"
-		"		if !p3 jump 1b;"
+		"		if (!p3) jump 1b;"
 		"	}"
 		"2:"
 		: "=&r" (__oldval), "=&r" (tmp)
diff --git a/arch/hexagon/include/asm/bitops.h b/arch/hexagon/include/asm/bitops.h
index 47384b094b94..71429f756af0 100644
--- a/arch/hexagon/include/asm/bitops.h
+++ b/arch/hexagon/include/asm/bitops.h
@@ -38,7 +38,7 @@ static inline int test_and_clear_bit(int nr, volatile void *addr)
 	"1:	R12 = memw_locked(R10);\n"
 	"	{ P0 = tstbit(R12,R11); R12 = clrbit(R12,R11); }\n"
 	"	memw_locked(R10,P1) = R12;\n"
-	"	{if !P1 jump 1b; %0 = mux(P0,#1,#0);}\n"
+	"	{if (!P1) jump 1b; %0 = mux(P0,#1,#0);}\n"
 	: "=&r" (oldval)
 	: "r" (addr), "r" (nr)
 	: "r10", "r11", "r12", "p0", "p1", "memory"
@@ -62,7 +62,7 @@ static inline int test_and_set_bit(int nr, volatile void *addr)
 	"1:	R12 = memw_locked(R10);\n"
 	"	{ P0 = tstbit(R12,R11); R12 = setbit(R12,R11); }\n"
 	"	memw_locked(R10,P1) = R12;\n"
-	"	{if !P1 jump 1b; %0 = mux(P0,#1,#0);}\n"
+	"	{if (!P1) jump 1b; %0 = mux(P0,#1,#0);}\n"
 	: "=&r" (oldval)
 	: "r" (addr), "r" (nr)
 	: "r10", "r11", "r12", "p0", "p1", "memory"
@@ -88,7 +88,7 @@ static inline int test_and_change_bit(int nr, volatile void *addr)
 	"1:	R12 = memw_locked(R10);\n"
 	"	{ P0 = tstbit(R12,R11); R12 = togglebit(R12,R11); }\n"
 	"	memw_locked(R10,P1) = R12;\n"
-	"	{if !P1 jump 1b; %0 = mux(P0,#1,#0);}\n"
+	"	{if (!P1) jump 1b; %0 = mux(P0,#1,#0);}\n"
 	: "=&r" (oldval)
 	: "r" (addr), "r" (nr)
 	: "r10", "r11", "r12", "p0", "p1", "memory"
@@ -223,7 +223,7 @@ static inline int ffs(int x)
 	int r;
 
 	asm("{ P0 = cmp.eq(%1,#0); %0 = ct0(%1);}\n"
-		"{ if P0 %0 = #0; if !P0 %0 = add(%0,#1);}\n"
+		"{ if (P0) %0 = #0; if (!P0) %0 = add(%0,#1);}\n"
 		: "=&r" (r)
 		: "r" (x)
 		: "p0");
diff --git a/arch/hexagon/include/asm/cmpxchg.h b/arch/hexagon/include/asm/cmpxchg.h
index 6091322c3af9..92b8a02e588a 100644
--- a/arch/hexagon/include/asm/cmpxchg.h
+++ b/arch/hexagon/include/asm/cmpxchg.h
@@ -30,7 +30,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
 	__asm__ __volatile__ (
 	"1:	%0 = memw_locked(%1);\n"    /*  load into retval */
 	"	memw_locked(%1,P0) = %2;\n" /*  store into memory */
-	"	if !P0 jump 1b;\n"
+	"	if (!P0) jump 1b;\n"
 	: "=&r" (retval)
 	: "r" (ptr), "r" (x)
 	: "memory", "p0"
diff --git a/arch/hexagon/include/asm/futex.h b/arch/hexagon/include/asm/futex.h
index cb635216a732..0191f7c7193e 100644
--- a/arch/hexagon/include/asm/futex.h
+++ b/arch/hexagon/include/asm/futex.h
@@ -16,7 +16,7 @@
 	    /* For example: %1 = %4 */ \
 	    insn \
 	"2: memw_locked(%3,p2) = %1;\n" \
-	"   if !p2 jump 1b;\n" \
+	"   if (!p2) jump 1b;\n" \
 	"   %1 = #0;\n" \
 	"3:\n" \
 	".section .fixup,\"ax\"\n" \
@@ -84,10 +84,10 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval,
 	"1: %1 = memw_locked(%3)\n"
 	"   {\n"
 	"      p2 = cmp.eq(%1,%4)\n"
-	"      if !p2.new jump:NT 3f\n"
+	"      if (!p2.new) jump:NT 3f\n"
 	"   }\n"
 	"2: memw_locked(%3,p2) = %5\n"
-	"   if !p2 jump 1b\n"
+	"   if (!p2) jump 1b\n"
 	"3:\n"
 	".section .fixup,\"ax\"\n"
 	"4: %0 = #%6\n"
diff --git a/arch/hexagon/include/asm/spinlock.h b/arch/hexagon/include/asm/spinlock.h
index bfe07d842ff3..ef103b73bec8 100644
--- a/arch/hexagon/include/asm/spinlock.h
+++ b/arch/hexagon/include/asm/spinlock.h
@@ -30,9 +30,9 @@ static inline void arch_read_lock(arch_rwlock_t *lock)
 	__asm__ __volatile__(
 		"1:	R6 = memw_locked(%0);\n"
 		"	{ P3 = cmp.ge(R6,#0); R6 = add(R6,#1);}\n"
-		"	{ if !P3 jump 1b; }\n"
+		"	{ if (!P3) jump 1b; }\n"
 		"	memw_locked(%0,P3) = R6;\n"
-		"	{ if !P3 jump 1b; }\n"
+		"	{ if (!P3) jump 1b; }\n"
 		:
 		: "r" (&lock->lock)
 		: "memory", "r6", "p3"
@@ -46,7 +46,7 @@ static inline void arch_read_unlock(arch_rwlock_t *lock)
 		"1:	R6 = memw_locked(%0);\n"
 		"	R6 = add(R6,#-1);\n"
 		"	memw_locked(%0,P3) = R6\n"
-		"	if !P3 jump 1b;\n"
+		"	if (!P3) jump 1b;\n"
 		:
 		: "r" (&lock->lock)
 		: "memory", "r6", "p3"
@@ -61,7 +61,7 @@ static inline int arch_read_trylock(arch_rwlock_t *lock)
 	__asm__ __volatile__(
 		"	R6 = memw_locked(%1);\n"
 		"	{ %0 = #0; P3 = cmp.ge(R6,#0); R6 = add(R6,#1);}\n"
-		"	{ if !P3 jump 1f; }\n"
+		"	{ if (!P3) jump 1f; }\n"
 		"	memw_locked(%1,P3) = R6;\n"
 		"	{ %0 = P3 }\n"
 		"1:\n"
@@ -78,9 +78,9 @@ static inline void arch_write_lock(arch_rwlock_t *lock)
 	__asm__ __volatile__(
 		"1:	R6 = memw_locked(%0)\n"
 		"	{ P3 = cmp.eq(R6,#0);  R6 = #-1;}\n"
-		"	{ if !P3 jump 1b; }\n"
+		"	{ if (!P3) jump 1b; }\n"
 		"	memw_locked(%0,P3) = R6;\n"
-		"	{ if !P3 jump 1b; }\n"
+		"	{ if (!P3) jump 1b; }\n"
 		:
 		: "r" (&lock->lock)
 		: "memory", "r6", "p3"
@@ -94,7 +94,7 @@ static inline int arch_write_trylock(arch_rwlock_t *lock)
 	__asm__ __volatile__(
 		"	R6 = memw_locked(%1)\n"
 		"	{ %0 = #0; P3 = cmp.eq(R6,#0);  R6 = #-1;}\n"
-		"	{ if !P3 jump 1f; }\n"
+		"	{ if (!P3) jump 1f; }\n"
 		"	memw_locked(%1,P3) = R6;\n"
 		"	%0 = P3;\n"
 		"1:\n"
@@ -117,9 +117,9 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
 	__asm__ __volatile__(
 		"1:	R6 = memw_locked(%0);\n"
 		"	P3 = cmp.eq(R6,#0);\n"
-		"	{ if !P3 jump 1b; R6 = #1; }\n"
+		"	{ if (!P3) jump 1b; R6 = #1; }\n"
 		"	memw_locked(%0,P3) = R6;\n"
-		"	{ if !P3 jump 1b; }\n"
+		"	{ if (!P3) jump 1b; }\n"
 		:
 		: "r" (&lock->lock)
 		: "memory", "r6", "p3"
@@ -139,7 +139,7 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
 	__asm__ __volatile__(
 		"	R6 = memw_locked(%1);\n"
 		"	P3 = cmp.eq(R6,#0);\n"
-		"	{ if !P3 jump 1f; R6 = #1; %0 = #0; }\n"
+		"	{ if (!P3) jump 1f; R6 = #1; %0 = #0; }\n"
 		"	memw_locked(%1,P3) = R6;\n"
 		"	%0 = P3;\n"
 		"1:\n"
diff --git a/arch/hexagon/kernel/vm_entry.S b/arch/hexagon/kernel/vm_entry.S
index 12242c27e2df..4023fdbea490 100644
--- a/arch/hexagon/kernel/vm_entry.S
+++ b/arch/hexagon/kernel/vm_entry.S
@@ -369,7 +369,7 @@ ret_from_fork:
 		R26.L = #LO(do_work_pending);
 		R0 = #VM_INT_DISABLE;
 	}
-	if P0 jump check_work_pending
+	if (P0) jump check_work_pending
 	{
 		R0 = R25;
 		callr R24

From 63e80314ab7cf4783526d2e44ee57a90514911c9 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Sat, 4 Jan 2020 13:00:02 -0800
Subject: [PATCH 134/146] hexagon: work around compiler crash

Clang cannot translate the string "r30" into a valid register yet.

Link: https://github.com/ClangBuiltLinux/linux/issues/755
Link: http://lkml.kernel.org/r/20191028155722.23419-1-ndesaulniers@google.com
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Suggested-by: Sid Manning <sidneym@quicinc.com>
Reviewed-by: Brian Cain <bcain@codeaurora.org>
Cc: Allison Randal <allison@lohutok.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Richard Fontana <rfontana@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/hexagon/kernel/stacktrace.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/hexagon/kernel/stacktrace.c b/arch/hexagon/kernel/stacktrace.c
index 35f29423fda8..5ed02f699479 100644
--- a/arch/hexagon/kernel/stacktrace.c
+++ b/arch/hexagon/kernel/stacktrace.c
@@ -11,8 +11,6 @@
 #include <linux/thread_info.h>
 #include <linux/module.h>
 
-register unsigned long current_frame_pointer asm("r30");
-
 struct stackframe {
 	unsigned long fp;
 	unsigned long rets;
@@ -30,7 +28,7 @@ void save_stack_trace(struct stack_trace *trace)
 
 	low = (unsigned long)task_stack_page(current);
 	high = low + THREAD_SIZE;
-	fp = current_frame_pointer;
+	fp = (unsigned long)__builtin_frame_address(0);
 
 	while (fp >= low && fp <= (high - sizeof(*frame))) {
 		frame = (struct stackframe *)fp;

From e39e773ad100ac94f8358d862f20101e802ae54c Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 4 Jan 2020 13:00:05 -0800
Subject: [PATCH 135/146] fs/posix_acl.c: fix kernel-doc warnings

Fix kernel-doc warnings in fs/posix_acl.c.
Also fix one typo (setgit -> setgid).

  fs/posix_acl.c:647: warning: Function parameter or member 'inode' not described in 'posix_acl_update_mode'
  fs/posix_acl.c:647: warning: Function parameter or member 'mode_p' not described in 'posix_acl_update_mode'
  fs/posix_acl.c:647: warning: Function parameter or member 'acl' not described in 'posix_acl_update_mode'

Link: http://lkml.kernel.org/r/29b0dc46-1f28-a4e5-b1d0-ba2b65629779@infradead.org
Fixes: 073931017b49d ("posix_acl: Clear SGID bit when setting file permissions")

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Andreas Gruenbacher <agruenba@redhat.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Jan Kara <jack@suse.cz>
Cc: Andreas Gruenbacher <agruenba@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/posix_acl.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 84ad1c90d535..249672bf54fe 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -631,12 +631,15 @@ EXPORT_SYMBOL_GPL(posix_acl_create);
 
 /**
  * posix_acl_update_mode  -  update mode in set_acl
+ * @inode: target inode
+ * @mode_p: mode (pointer) for update
+ * @acl: acl pointer
  *
  * Update the file mode when setting an ACL: compute the new file permission
  * bits based on the ACL.  In addition, if the ACL is equivalent to the new
- * file mode, set *acl to NULL to indicate that no ACL should be set.
+ * file mode, set *@acl to NULL to indicate that no ACL should be set.
  *
- * As with chmod, clear the setgit bit if the caller is not in the owning group
+ * As with chmod, clear the setgid bit if the caller is not in the owning group
  * or capable of CAP_FSETID (see inode_change_ok).
  *
  * Called from set_acl inode operations.

From 941f762bcb276259a78e7931674668874ccbda59 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Sat, 4 Jan 2020 13:00:09 -0800
Subject: [PATCH 136/146] mm/oom: fix pgtables units mismatch in Killed process
 message

pr_err() expects kB, but mm_pgtables_bytes() returns the number of bytes.
As everything else is printed in kB, I chose to fix the value rather than
the string.

Before:

[  pid  ]   uid  tgid total_vm      rss pgtables_bytes swapents oom_score_adj name
...
[   1878]  1000  1878   217253   151144  1269760        0             0 python
...
Out of memory: Killed process 1878 (python) total-vm:869012kB, anon-rss:604572kB, file-rss:4kB, shmem-rss:0kB, UID:1000 pgtables:1269760kB oom_score_adj:0

After:

[  pid  ]   uid  tgid total_vm      rss pgtables_bytes swapents oom_score_adj name
...
[   1436]  1000  1436   217253   151890  1294336        0             0 python
...
Out of memory: Killed process 1436 (python) total-vm:869012kB, anon-rss:607516kB, file-rss:44kB, shmem-rss:0kB, UID:1000 pgtables:1264kB oom_score_adj:0

Link: http://lkml.kernel.org/r/20191211202830.1600-1-idryomov@gmail.com
Fixes: 70cb6d267790 ("mm/oom: add oom_score_adj and pgtables to Killed process message")
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Edward Chron <echron@arista.com>
Cc: David Rientjes <rientjes@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/oom_kill.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 71e3acea7817..d58c481b3df8 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -890,7 +890,7 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
 		K(get_mm_counter(mm, MM_FILEPAGES)),
 		K(get_mm_counter(mm, MM_SHMEMPAGES)),
 		from_kuid(&init_user_ns, task_uid(victim)),
-		mm_pgtables_bytes(mm), victim->signal->oom_score_adj);
+		mm_pgtables_bytes(mm) >> 10, victim->signal->oom_score_adj);
 	task_unlock(victim);
 
 	/*

From a7c46c0c0e3d62f2764cd08b90934cd2aaaf8545 Mon Sep 17 00:00:00 2001
From: Navid Emamdoost <navid.emamdoost@gmail.com>
Date: Sat, 4 Jan 2020 13:00:12 -0800
Subject: [PATCH 137/146] mm/gup: fix memory leak in __gup_benchmark_ioctl

In the implementation of __gup_benchmark_ioctl() the allocated pages
should be released before returning in case of an invalid cmd.  Release
pages via kvfree().

[akpm@linux-foundation.org: rework code flow, return -EINVAL rather than -1]
Link: http://lkml.kernel.org/r/20191211174653.4102-1-navid.emamdoost@gmail.com
Fixes: 714a3a1ebafe ("mm/gup_benchmark.c: add additional pinning methods")
Signed-off-by: Navid Emamdoost <navid.emamdoost@gmail.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/gup_benchmark.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/mm/gup_benchmark.c b/mm/gup_benchmark.c
index 7dd602d7f8db..ad9d5b1c4473 100644
--- a/mm/gup_benchmark.c
+++ b/mm/gup_benchmark.c
@@ -26,6 +26,7 @@ static int __gup_benchmark_ioctl(unsigned int cmd,
 	unsigned long i, nr_pages, addr, next;
 	int nr;
 	struct page **pages;
+	int ret = 0;
 
 	if (gup->size > ULONG_MAX)
 		return -EINVAL;
@@ -63,7 +64,9 @@ static int __gup_benchmark_ioctl(unsigned int cmd,
 					    NULL);
 			break;
 		default:
-			return -1;
+			kvfree(pages);
+			ret = -EINVAL;
+			goto out;
 		}
 
 		if (nr <= 0)
@@ -85,7 +88,8 @@ static int __gup_benchmark_ioctl(unsigned int cmd,
 	gup->put_delta_usec = ktime_us_delta(end_time, start_time);
 
 	kvfree(pages);
-	return 0;
+out:
+	return ret;
 }
 
 static long gup_benchmark_ioctl(struct file *filep, unsigned int cmd,

From c77c0a8ac4c522638a8242fcb9de9496e3cdbb2d Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Sat, 4 Jan 2020 13:00:15 -0800
Subject: [PATCH 138/146] mm/hugetlb: defer freeing of huge pages if in
 non-task context

The following lockdep splat was observed when a certain hugetlbfs test
was run:

  ================================
  WARNING: inconsistent lock state
  4.18.0-159.el8.x86_64+debug #1 Tainted: G        W --------- -  -
  --------------------------------
  inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
  swapper/30/0 [HC0[0]:SC1[1]:HE1:SE0] takes:
  ffffffff9acdc038 (hugetlb_lock){+.?.}, at: free_huge_page+0x36f/0xaa0
  {SOFTIRQ-ON-W} state was registered at:
    lock_acquire+0x14f/0x3b0
    _raw_spin_lock+0x30/0x70
    __nr_hugepages_store_common+0x11b/0xb30
    hugetlb_sysctl_handler_common+0x209/0x2d0
    proc_sys_call_handler+0x37f/0x450
    vfs_write+0x157/0x460
    ksys_write+0xb8/0x170
    do_syscall_64+0xa5/0x4d0
    entry_SYSCALL_64_after_hwframe+0x6a/0xdf
  irq event stamp: 691296
  hardirqs last  enabled at (691296): [<ffffffff99bb034b>] _raw_spin_unlock_irqrestore+0x4b/0x60
  hardirqs last disabled at (691295): [<ffffffff99bb0ad2>] _raw_spin_lock_irqsave+0x22/0x81
  softirqs last  enabled at (691284): [<ffffffff97ff0c63>] irq_enter+0xc3/0xe0
  softirqs last disabled at (691285): [<ffffffff97ff0ebe>] irq_exit+0x23e/0x2b0

  other info that might help us debug this:
   Possible unsafe locking scenario:

         CPU0
         ----
    lock(hugetlb_lock);
    <Interrupt>
      lock(hugetlb_lock);

   *** DEADLOCK ***
      :
  Call Trace:
   <IRQ>
   __lock_acquire+0x146b/0x48c0
   lock_acquire+0x14f/0x3b0
   _raw_spin_lock+0x30/0x70
   free_huge_page+0x36f/0xaa0
   bio_check_pages_dirty+0x2fc/0x5c0
   clone_endio+0x17f/0x670 [dm_mod]
   blk_update_request+0x276/0xe50
   scsi_end_request+0x7b/0x6a0
   scsi_io_completion+0x1c6/0x1570
   blk_done_softirq+0x22e/0x350
   __do_softirq+0x23d/0xad8
   irq_exit+0x23e/0x2b0
   do_IRQ+0x11a/0x200
   common_interrupt+0xf/0xf
   </IRQ>

Both the hugetbl_lock and the subpool lock can be acquired in
free_huge_page().  One way to solve the problem is to make both locks
irq-safe.  However, Mike Kravetz had learned that the hugetlb_lock is
held for a linear scan of ALL hugetlb pages during a cgroup reparentling
operation.  So it is just too long to have irq disabled unless we can
break hugetbl_lock down into finer-grained locks with shorter lock hold
times.

Another alternative is to defer the freeing to a workqueue job.  This
patch implements the deferred freeing by adding a free_hpage_workfn()
work function to do the actual freeing.  The free_huge_page() call in a
non-task context saves the page to be freed in the hpage_freelist linked
list in a lockless manner using the llist APIs.

The generic workqueue is used to process the work, but a dedicated
workqueue can be used instead if it is desirable to have the huge page
freed ASAP.

Thanks to Kirill Tkhai <ktkhai@virtuozzo.com> for suggesting the use of
llist APIs which simplfy the code.

Link: http://lkml.kernel.org/r/20191217170331.30893-1-longman@redhat.com
Signed-off-by: Waiman Long <longman@redhat.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 50 insertions(+), 1 deletion(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ac65bb5e38ac..dd8737a94bec 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -27,6 +27,7 @@
 #include <linux/swapops.h>
 #include <linux/jhash.h>
 #include <linux/numa.h>
+#include <linux/llist.h>
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -1136,7 +1137,7 @@ static inline void ClearPageHugeTemporary(struct page *page)
 	page[2].mapping = NULL;
 }
 
-void free_huge_page(struct page *page)
+static void __free_huge_page(struct page *page)
 {
 	/*
 	 * Can't pass hstate in here because it is called from the
@@ -1199,6 +1200,54 @@ void free_huge_page(struct page *page)
 	spin_unlock(&hugetlb_lock);
 }
 
+/*
+ * As free_huge_page() can be called from a non-task context, we have
+ * to defer the actual freeing in a workqueue to prevent potential
+ * hugetlb_lock deadlock.
+ *
+ * free_hpage_workfn() locklessly retrieves the linked list of pages to
+ * be freed and frees them one-by-one. As the page->mapping pointer is
+ * going to be cleared in __free_huge_page() anyway, it is reused as the
+ * llist_node structure of a lockless linked list of huge pages to be freed.
+ */
+static LLIST_HEAD(hpage_freelist);
+
+static void free_hpage_workfn(struct work_struct *work)
+{
+	struct llist_node *node;
+	struct page *page;
+
+	node = llist_del_all(&hpage_freelist);
+
+	while (node) {
+		page = container_of((struct address_space **)node,
+				     struct page, mapping);
+		node = node->next;
+		__free_huge_page(page);
+	}
+}
+static DECLARE_WORK(free_hpage_work, free_hpage_workfn);
+
+void free_huge_page(struct page *page)
+{
+	/*
+	 * Defer freeing if in non-task context to avoid hugetlb_lock deadlock.
+	 */
+	if (!in_task()) {
+		/*
+		 * Only call schedule_work() if hpage_freelist is previously
+		 * empty. Otherwise, schedule_work() had been called but the
+		 * workfn hasn't retrieved the list yet.
+		 */
+		if (llist_add((struct llist_node *)&page->mapping,
+			      &hpage_freelist))
+			schedule_work(&free_hpage_work);
+		return;
+	}
+
+	__free_huge_page(page);
+}
+
 static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
 {
 	INIT_LIST_HEAD(&page->lru);

From 397eac17f86f404f5ba31d8c3e39ec3124b39fd3 Mon Sep 17 00:00:00 2001
From: Kai Li <li.kai4@h3c.com>
Date: Sat, 4 Jan 2020 13:00:18 -0800
Subject: [PATCH 139/146] ocfs2: call journal flush to mark journal as empty
 after journal recovery when mount

If journal is dirty when mount, it will be replayed but jbd2 sb log tail
cannot be updated to mark a new start because journal->j_flag has
already been set with JBD2_ABORT first in journal_init_common.

When a new transaction is committed, it will be recored in block 1
first(journal->j_tail is set to 1 in journal_reset).  If emergency
restart happens again before journal super block is updated
unfortunately, the new recorded trans will not be replayed in the next
mount.

The following steps describe this procedure in detail.
1. mount and touch some files
2. these transactions are committed to journal area but not checkpointed
3. emergency restart
4. mount again and its journals are replayed
5. journal super block's first s_start is 1, but its s_seq is not updated
6. touch a new file and its trans is committed but not checkpointed
7. emergency restart again
8. mount and journal is dirty, but trans committed in 6 will not be
replayed.

This exception happens easily when this lun is used by only one node.
If it is used by multi-nodes, other node will replay its journal and its
journal super block will be updated after recovery like what this patch
does.

ocfs2_recover_node->ocfs2_replay_journal.

The following jbd2 journal can be generated by touching a new file after
journal is replayed, and seq 15 is the first valid commit, but first seq
is 13 in journal super block.

logdump:
  Block 0: Journal Superblock
  Seq: 0   Type: 4 (JBD2_SUPERBLOCK_V2)
  Blocksize: 4096   Total Blocks: 32768   First Block: 1
  First Commit ID: 13   Start Log Blknum: 1
  Error: 0
  Feature Compat: 0
  Feature Incompat: 2 block64
  Feature RO compat: 0
  Journal UUID: 4ED3822C54294467A4F8E87D2BA4BC36
  FS Share Cnt: 1   Dynamic Superblk Blknum: 0
  Per Txn Block Limit    Journal: 0    Data: 0

  Block 1: Journal Commit Block
  Seq: 14   Type: 2 (JBD2_COMMIT_BLOCK)

  Block 2: Journal Descriptor
  Seq: 15   Type: 1 (JBD2_DESCRIPTOR_BLOCK)
  No. Blocknum        Flags
   0. 587             none
  UUID: 00000000000000000000000000000000
   1. 8257792         JBD2_FLAG_SAME_UUID
   2. 619             JBD2_FLAG_SAME_UUID
   3. 24772864        JBD2_FLAG_SAME_UUID
   4. 8257802         JBD2_FLAG_SAME_UUID
   5. 513             JBD2_FLAG_SAME_UUID JBD2_FLAG_LAST_TAG
  ...
  Block 7: Inode
  Inode: 8257802   Mode: 0640   Generation: 57157641 (0x3682809)
  FS Generation: 2839773110 (0xa9437fb6)
  CRC32: 00000000   ECC: 0000
  Type: Regular   Attr: 0x0   Flags: Valid
  Dynamic Features: (0x1) InlineData
  User: 0 (root)   Group: 0 (root)   Size: 7
  Links: 1   Clusters: 0
  ctime: 0x5de5d870 0x11104c61 -- Tue Dec  3 11:37:20.286280801 2019
  atime: 0x5de5d870 0x113181a1 -- Tue Dec  3 11:37:20.288457121 2019
  mtime: 0x5de5d870 0x11104c61 -- Tue Dec  3 11:37:20.286280801 2019
  dtime: 0x0 -- Thu Jan  1 08:00:00 1970
  ...
  Block 9: Journal Commit Block
  Seq: 15   Type: 2 (JBD2_COMMIT_BLOCK)

The following is journal recovery log when recovering the upper jbd2
journal when mount again.

syslog:
  ocfs2: File system on device (252,1) was not unmounted cleanly, recovering it.
  fs/jbd2/recovery.c:(do_one_pass, 449): Starting recovery pass 0
  fs/jbd2/recovery.c:(do_one_pass, 449): Starting recovery pass 1
  fs/jbd2/recovery.c:(do_one_pass, 449): Starting recovery pass 2
  fs/jbd2/recovery.c:(jbd2_journal_recover, 278): JBD2: recovery, exit status 0, recovered transactions 13 to 13

Due to first commit seq 13 recorded in journal super is not consistent
with the value recorded in block 1(seq is 14), journal recovery will be
terminated before seq 15 even though it is an unbroken commit, inode
8257802 is a new file and it will be lost.

Link: http://lkml.kernel.org/r/20191217020140.2197-1-li.kai4@h3c.com
Signed-off-by: Kai Li <li.kai4@h3c.com>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Reviewed-by: Changwei Ge <gechangwei@live.cn>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Gang He <ghe@suse.com>
Cc: Jun Piao <piaojun@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/journal.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 1afe57f425a0..68ba354cf361 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1066,6 +1066,14 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
 
 	ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num);
 
+	if (replayed) {
+		jbd2_journal_lock_updates(journal->j_journal);
+		status = jbd2_journal_flush(journal->j_journal);
+		jbd2_journal_unlock_updates(journal->j_journal);
+		if (status < 0)
+			mlog_errno(status);
+	}
+
 	status = ocfs2_journal_toggle_dirty(osb, 1, replayed);
 	if (status < 0) {
 		mlog_errno(status);

From b73eba2a867e10b9b4477738677341f3307c07bb Mon Sep 17 00:00:00 2001
From: Gang He <GHe@suse.com>
Date: Sat, 4 Jan 2020 13:00:22 -0800
Subject: [PATCH 140/146] ocfs2: fix the crash due to call ocfs2_get_dlm_debug
 once less

Because ocfs2_get_dlm_debug() function is called once less here, ocfs2
file system will trigger the system crash, usually after ocfs2 file
system is unmounted.

This system crash is caused by a generic memory corruption, these crash
backtraces are not always the same, for exapmle,

    ocfs2: Unmounting device (253,16) on (node 172167785)
    general protection fault: 0000 [#1] SMP PTI
    CPU: 3 PID: 14107 Comm: fence_legacy Kdump:
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
    RIP: 0010:__kmalloc+0xa5/0x2a0
    Code: 00 00 4d 8b 07 65 4d 8b
    RSP: 0018:ffffaa1fc094bbe8 EFLAGS: 00010286
    RAX: 0000000000000000 RBX: d310a8800d7a3faf RCX: 0000000000000000
    RDX: 0000000000000000 RSI: 0000000000000dc0 RDI: ffff96e68fc036c0
    RBP: d310a8800d7a3faf R08: ffff96e6ffdb10a0 R09: 00000000752e7079
    R10: 000000000001c513 R11: 0000000004091041 R12: 0000000000000dc0
    R13: 0000000000000039 R14: ffff96e68fc036c0 R15: ffff96e68fc036c0
    FS:  00007f699dfba540(0000) GS:ffff96e6ffd80000(0000) knlGS:00000
    CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    CR2: 000055f3a9d9b768 CR3: 000000002cd1c000 CR4: 00000000000006e0
    Call Trace:
     ext4_htree_store_dirent+0x35/0x100 [ext4]
     htree_dirblock_to_tree+0xea/0x290 [ext4]
     ext4_htree_fill_tree+0x1c1/0x2d0 [ext4]
     ext4_readdir+0x67c/0x9d0 [ext4]
     iterate_dir+0x8d/0x1a0
     __x64_sys_getdents+0xab/0x130
     do_syscall_64+0x60/0x1f0
     entry_SYSCALL_64_after_hwframe+0x49/0xbe
    RIP: 0033:0x7f699d33a9fb

This regression problem was introduced by commit e581595ea29c ("ocfs: no
need to check return value of debugfs_create functions").

Link: http://lkml.kernel.org/r/20191225061501.13587-1-ghe@suse.com
Fixes: e581595ea29c ("ocfs: no need to check return value of debugfs_create functions")
Signed-off-by: Gang He <ghe@suse.com>
Acked-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Gang He <ghe@suse.com>
Cc: Jun Piao <piaojun@huawei.com>
Cc: <stable@vger.kernel.org>	[5.3+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/dlmglue.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 1c4c51f3df60..cda1027d0819 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3282,6 +3282,7 @@ static void ocfs2_dlm_init_debug(struct ocfs2_super *osb)
 
 	debugfs_create_u32("locking_filter", 0600, osb->osb_debug_root,
 			   &dlm_debug->d_filter_secs);
+	ocfs2_get_dlm_debug(dlm_debug);
 }
 
 static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)

From 7312b70699252074d753c5005fc67266c547bbe3 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Sat, 4 Jan 2020 13:00:26 -0800
Subject: [PATCH 141/146] hexagon: define ioremap_uc

Similar to commit 38e45d81d14e ("sparc64: implement ioremap_uc") define
ioremap_uc for hexagon to avoid errors from
-Wimplicit-function-definition.

Link: http://lkml.kernel.org/r/20191209222956.239798-2-ndesaulniers@google.com
Link: https://github.com/ClangBuiltLinux/linux/issues/797
Fixes: e537654b7039 ("lib: devres: add a helper function for ioremap_uc")
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Suggested-by: Nathan Chancellor <natechancellor@gmail.com>
Acked-by: Brian Cain <bcain@codeaurora.org>
Cc: Lee Jones <lee.jones@linaro.org>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Tuowen Zhao <ztuowen@gmail.com>
Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Alexios Zavras <alexios.zavras@intel.com>
Cc: Allison Randal <allison@lohutok.net>
Cc: Will Deacon <will@kernel.org>
Cc: Richard Fontana <rfontana@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/hexagon/include/asm/io.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/hexagon/include/asm/io.h b/arch/hexagon/include/asm/io.h
index 539e3efcf39c..b0dbc3473172 100644
--- a/arch/hexagon/include/asm/io.h
+++ b/arch/hexagon/include/asm/io.h
@@ -173,6 +173,7 @@ static inline void writel(u32 data, volatile void __iomem *addr)
 
 void __iomem *ioremap(unsigned long phys_addr, unsigned long size);
 #define ioremap_nocache ioremap
+#define ioremap_uc(X, Y) ioremap((X), (Y))
 
 
 #define __raw_writel writel

From 8c62ed27a12c00e3db1c9f04bc0f272bdbb06734 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Thu, 2 Jan 2020 05:31:22 -0800
Subject: [PATCH 142/146] apparmor: fix aa_xattrs_match() may sleep while
 holding a RCU lock

aa_xattrs_match() is unfortunately calling vfs_getxattr_alloc() from a
context protected by an rcu_read_lock. This can not be done as
vfs_getxattr_alloc() may sleep regardles of the gfp_t value being
passed to it.

Fix this by breaking the rcu_read_lock on the policy search when the
xattr match feature is requested and restarting the search if a policy
changes occur.

Fixes: 8e51f9087f40 ("apparmor: Add support for attaching profiles via xattr, presence and value")
Reported-by: Jia-Ju Bai <baijiaju1990@gmail.com>
Reported-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/apparmorfs.c |  2 +-
 security/apparmor/domain.c     | 80 ++++++++++++++++++----------------
 security/apparmor/policy.c     |  4 +-
 3 files changed, 45 insertions(+), 41 deletions(-)

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 09996f2552ee..47aff8700547 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -623,7 +623,7 @@ static __poll_t ns_revision_poll(struct file *file, poll_table *pt)
 
 void __aa_bump_ns_revision(struct aa_ns *ns)
 {
-	ns->revision++;
+	WRITE_ONCE(ns->revision, ns->revision + 1);
 	wake_up_interruptible(&ns->wait);
 }
 
diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index 9be7ccb8379e..6ceb74e0f789 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -317,6 +317,7 @@ static int aa_xattrs_match(const struct linux_binprm *bprm,
 
 	if (!bprm || !profile->xattr_count)
 		return 0;
+	might_sleep();
 
 	/* transition from exec match to xattr set */
 	state = aa_dfa_null_transition(profile->xmatch, state);
@@ -361,10 +362,11 @@ out:
 }
 
 /**
- * __attach_match_ - find an attachment match
+ * find_attach - do attachment search for unconfined processes
  * @bprm - binprm structure of transitioning task
- * @name - to match against  (NOT NULL)
+ * @ns: the current namespace  (NOT NULL)
  * @head - profile list to walk  (NOT NULL)
+ * @name - to match against  (NOT NULL)
  * @info - info message if there was an error (NOT NULL)
  *
  * Do a linear search on the profiles in the list.  There is a matching
@@ -374,12 +376,11 @@ out:
  *
  * Requires: @head not be shared or have appropriate locks held
  *
- * Returns: profile or NULL if no match found
+ * Returns: label or NULL if no match found
  */
-static struct aa_profile *__attach_match(const struct linux_binprm *bprm,
-					 const char *name,
-					 struct list_head *head,
-					 const char **info)
+static struct aa_label *find_attach(const struct linux_binprm *bprm,
+				    struct aa_ns *ns, struct list_head *head,
+				    const char *name, const char **info)
 {
 	int candidate_len = 0, candidate_xattrs = 0;
 	bool conflict = false;
@@ -388,6 +389,8 @@ static struct aa_profile *__attach_match(const struct linux_binprm *bprm,
 	AA_BUG(!name);
 	AA_BUG(!head);
 
+	rcu_read_lock();
+restart:
 	list_for_each_entry_rcu(profile, head, base.list) {
 		if (profile->label.flags & FLAG_NULL &&
 		    &profile->label == ns_unconfined(profile->ns))
@@ -413,16 +416,32 @@ static struct aa_profile *__attach_match(const struct linux_binprm *bprm,
 			perm = dfa_user_allow(profile->xmatch, state);
 			/* any accepting state means a valid match. */
 			if (perm & MAY_EXEC) {
-				int ret;
+				int ret = 0;
 
 				if (count < candidate_len)
 					continue;
 
-				ret = aa_xattrs_match(bprm, profile, state);
-				/* Fail matching if the xattrs don't match */
-				if (ret < 0)
-					continue;
+				if (bprm && profile->xattr_count) {
+					long rev = READ_ONCE(ns->revision);
 
+					if (!aa_get_profile_not0(profile))
+						goto restart;
+					rcu_read_unlock();
+					ret = aa_xattrs_match(bprm, profile,
+							      state);
+					rcu_read_lock();
+					aa_put_profile(profile);
+					if (rev !=
+					    READ_ONCE(ns->revision))
+						/* policy changed */
+						goto restart;
+					/*
+					 * Fail matching if the xattrs don't
+					 * match
+					 */
+					if (ret < 0)
+						continue;
+				}
 				/*
 				 * TODO: allow for more flexible best match
 				 *
@@ -445,43 +464,28 @@ static struct aa_profile *__attach_match(const struct linux_binprm *bprm,
 				candidate_xattrs = ret;
 				conflict = false;
 			}
-		} else if (!strcmp(profile->base.name, name))
+		} else if (!strcmp(profile->base.name, name)) {
 			/*
 			 * old exact non-re match, without conditionals such
 			 * as xattrs. no more searching required
 			 */
-			return profile;
+			candidate = profile;
+			goto out;
+		}
 	}
 
-	if (conflict) {
-		*info = "conflicting profile attachments";
+	if (!candidate || conflict) {
+		if (conflict)
+			*info = "conflicting profile attachments";
+		rcu_read_unlock();
 		return NULL;
 	}
 
-	return candidate;
-}
-
-/**
- * find_attach - do attachment search for unconfined processes
- * @bprm - binprm structure of transitioning task
- * @ns: the current namespace  (NOT NULL)
- * @list: list to search  (NOT NULL)
- * @name: the executable name to match against  (NOT NULL)
- * @info: info message if there was an error
- *
- * Returns: label or NULL if no match found
- */
-static struct aa_label *find_attach(const struct linux_binprm *bprm,
-				    struct aa_ns *ns, struct list_head *list,
-				    const char *name, const char **info)
-{
-	struct aa_profile *profile;
-
-	rcu_read_lock();
-	profile = aa_get_profile(__attach_match(bprm, name, list, info));
+out:
+	candidate = aa_get_newest_profile(candidate);
 	rcu_read_unlock();
 
-	return profile ? &profile->label : NULL;
+	return &candidate->label;
 }
 
 static const char *next_name(int xtype, const char *name)
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index 03104830c913..269f2f53c0b1 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -1125,8 +1125,8 @@ ssize_t aa_remove_profiles(struct aa_ns *policy_ns, struct aa_label *subj,
 	if (!name) {
 		/* remove namespace - can only happen if fqname[0] == ':' */
 		mutex_lock_nested(&ns->parent->lock, ns->level);
-		__aa_remove_ns(ns);
 		__aa_bump_ns_revision(ns);
+		__aa_remove_ns(ns);
 		mutex_unlock(&ns->parent->lock);
 	} else {
 		/* remove profile */
@@ -1138,9 +1138,9 @@ ssize_t aa_remove_profiles(struct aa_ns *policy_ns, struct aa_label *subj,
 			goto fail_ns_lock;
 		}
 		name = profile->base.hname;
+		__aa_bump_ns_revision(ns);
 		__remove_profile(profile);
 		__aa_labelset_update_subtree(ns);
-		__aa_bump_ns_revision(ns);
 		mutex_unlock(&ns->lock);
 	}
 

From 9d05c18e8d7de566ff68f221fcae65e78708dd1d Mon Sep 17 00:00:00 2001
From: Zong Li <zong.li@sifive.com>
Date: Mon, 23 Dec 2019 16:46:14 +0800
Subject: [PATCH 143/146] clocksource: riscv: add notrace to riscv_sched_clock

When enabling ftrace graph tracer, it gets the tracing clock in
ftrace_push_return_trace().  Eventually, it invokes riscv_sched_clock()
to get the clock value.  If riscv_sched_clock() isn't marked with
'notrace', it will call ftrace_push_return_trace() and cause infinite
loop.

The result of failure as follow:

command: echo function_graph >current_tracer
[   46.176787] Unable to handle kernel paging request at virtual address ffffffe04fb38c48
[   46.177309] Oops [#1]
[   46.177478] Modules linked in:
[   46.177770] CPU: 0 PID: 256 Comm: $d Not tainted 5.5.0-rc1 #47
[   46.177981] epc: ffffffe00035e59a ra : ffffffe00035e57e sp : ffffffe03a7569b0
[   46.178216]  gp : ffffffe000d29b90 tp : ffffffe03a756180 t0 : ffffffe03a756968
[   46.178430]  t1 : ffffffe00087f408 t2 : ffffffe03a7569a0 s0 : ffffffe03a7569f0
[   46.178643]  s1 : ffffffe00087f408 a0 : 0000000ac054cda4 a1 : 000000000087f411
[   46.178856]  a2 : 0000000ac054cda4 a3 : 0000000000373ca0 a4 : ffffffe04fb38c48
[   46.179099]  a5 : 00000000153e22a8 a6 : 00000000005522ff a7 : 0000000000000005
[   46.179338]  s2 : ffffffe03a756a90 s3 : ffffffe00032811c s4 : ffffffe03a756a58
[   46.179570]  s5 : ffffffe000d29fe0 s6 : 0000000000000001 s7 : 0000000000000003
[   46.179809]  s8 : 0000000000000003 s9 : 0000000000000002 s10: 0000000000000004
[   46.180053]  s11: 0000000000000000 t3 : 0000003fc815749c t4 : 00000000000efc90
[   46.180293]  t5 : ffffffe000d29658 t6 : 0000000000040000
[   46.180482] status: 0000000000000100 badaddr: ffffffe04fb38c48 cause: 000000000000000f

Signed-off-by: Zong Li <zong.li@sifive.com>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
[paul.walmsley@sifive.com: cleaned up patch description]
Fixes: 92e0d143fdef ("clocksource/drivers/riscv_timer: Provide the sched_clock")
Cc: stable@vger.kernel.org
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
---
 drivers/clocksource/timer-riscv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c
index 4e54856ce2a5..c4f15c4068c0 100644
--- a/drivers/clocksource/timer-riscv.c
+++ b/drivers/clocksource/timer-riscv.c
@@ -56,7 +56,7 @@ static unsigned long long riscv_clocksource_rdtime(struct clocksource *cs)
 	return get_cycles64();
 }
 
-static u64 riscv_sched_clock(void)
+static u64 notrace riscv_sched_clock(void)
 {
 	return get_cycles64();
 }

From 2f3035da4019780250658d1ffe486bc324e04805 Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul.walmsley@sifive.com>
Date: Fri, 20 Dec 2019 03:09:49 -0800
Subject: [PATCH 144/146] riscv: prefix IRQ_ macro names with an RV_ namespace

"IRQ_TIMER", used in the arch/riscv CSR header file, is a sufficiently
generic macro name that it's used by several source files across the
Linux code base.  Some of these other files ultimately include the
arch/riscv CSR include file, causing collisions.  Fix by prefixing the
RISC-V csr.h IRQ_ macro names with an RV_ prefix.

Fixes: a4c3733d32a72 ("riscv: abstract out CSR names for supervisor vs machine mode")
Reported-by: Olof Johansson <olof@lixom.net>
Acked-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
---
 arch/riscv/include/asm/csr.h      | 18 +++++++++---------
 arch/riscv/kernel/irq.c           |  6 +++---
 drivers/irqchip/irq-sifive-plic.c |  2 +-
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 0a62d2d68455..435b65532e29 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -116,9 +116,9 @@
 # define SR_PIE		SR_MPIE
 # define SR_PP		SR_MPP
 
-# define IRQ_SOFT	IRQ_M_SOFT
-# define IRQ_TIMER	IRQ_M_TIMER
-# define IRQ_EXT	IRQ_M_EXT
+# define RV_IRQ_SOFT		IRQ_M_SOFT
+# define RV_IRQ_TIMER	IRQ_M_TIMER
+# define RV_IRQ_EXT		IRQ_M_EXT
 #else /* CONFIG_RISCV_M_MODE */
 # define CSR_STATUS	CSR_SSTATUS
 # define CSR_IE		CSR_SIE
@@ -133,15 +133,15 @@
 # define SR_PIE		SR_SPIE
 # define SR_PP		SR_SPP
 
-# define IRQ_SOFT	IRQ_S_SOFT
-# define IRQ_TIMER	IRQ_S_TIMER
-# define IRQ_EXT	IRQ_S_EXT
+# define RV_IRQ_SOFT		IRQ_S_SOFT
+# define RV_IRQ_TIMER	IRQ_S_TIMER
+# define RV_IRQ_EXT		IRQ_S_EXT
 #endif /* CONFIG_RISCV_M_MODE */
 
 /* IE/IP (Supervisor/Machine Interrupt Enable/Pending) flags */
-#define IE_SIE		(_AC(0x1, UL) << IRQ_SOFT)
-#define IE_TIE		(_AC(0x1, UL) << IRQ_TIMER)
-#define IE_EIE		(_AC(0x1, UL) << IRQ_EXT)
+#define IE_SIE		(_AC(0x1, UL) << RV_IRQ_SOFT)
+#define IE_TIE		(_AC(0x1, UL) << RV_IRQ_TIMER)
+#define IE_EIE		(_AC(0x1, UL) << RV_IRQ_EXT)
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c
index 3f07a91d5afb..345c4f2eba13 100644
--- a/arch/riscv/kernel/irq.c
+++ b/arch/riscv/kernel/irq.c
@@ -23,11 +23,11 @@ asmlinkage __visible void __irq_entry do_IRQ(struct pt_regs *regs)
 
 	irq_enter();
 	switch (regs->cause & ~CAUSE_IRQ_FLAG) {
-	case IRQ_TIMER:
+	case RV_IRQ_TIMER:
 		riscv_timer_interrupt();
 		break;
 #ifdef CONFIG_SMP
-	case IRQ_SOFT:
+	case RV_IRQ_SOFT:
 		/*
 		 * We only use software interrupts to pass IPIs, so if a non-SMP
 		 * system gets one, then we don't know what to do.
@@ -35,7 +35,7 @@ asmlinkage __visible void __irq_entry do_IRQ(struct pt_regs *regs)
 		riscv_software_interrupt();
 		break;
 #endif
-	case IRQ_EXT:
+	case RV_IRQ_EXT:
 		handle_arch_irq(regs);
 		break;
 	default:
diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
index 8df547d2d935..0aca5807a119 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -256,7 +256,7 @@ static int __init plic_init(struct device_node *node,
 		 * Skip contexts other than external interrupts for our
 		 * privilege level.
 		 */
-		if (parent.args[0] != IRQ_EXT)
+		if (parent.args[0] != RV_IRQ_EXT)
 			continue;
 
 		hartid = plic_find_hart_id(parent.np);

From 0e194d9da198936fe4fb4c1e031de0f7791c09b8 Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul.walmsley@sifive.com>
Date: Fri, 22 Nov 2019 18:33:28 -0800
Subject: [PATCH 145/146] Documentation: riscv: add patch acceptance guidelines

Formalize, in kernel documentation, the patch acceptance policy for
arch/riscv.  In summary, it states that as maintainers, we plan to
only accept patches for new modules or extensions that have been
frozen or ratified by the RISC-V Foundation.

We've been following these guidelines for the past few months.  In the
meantime, we've received quite a bit of feedback that it would be
helpful to have these guidelines formally documented.

Based on a suggestion from Matthew Wilcox, we also add a link to this
file to Documentation/process/index.rst, to make this document easier
to find.  The format of this document has also been changed to align
to the format outlined in the maintainer entry profiles, in accordance
with comments from Jon Corbet and Dan Williams.

Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
Reviewed-by: Palmer Dabbelt <palmerdabbelt@google.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Krste Asanovic <krste@berkeley.edu>
Cc: Andrew Waterman <waterman@eecs.berkeley.edu>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/process/index.rst          |  1 +
 Documentation/riscv/index.rst            |  1 +
 Documentation/riscv/patch-acceptance.rst | 35 ++++++++++++++++++++++++
 MAINTAINERS                              |  1 +
 4 files changed, 38 insertions(+)
 create mode 100644 Documentation/riscv/patch-acceptance.rst

diff --git a/Documentation/process/index.rst b/Documentation/process/index.rst
index 21aa7d5358e6..6399d92f0b21 100644
--- a/Documentation/process/index.rst
+++ b/Documentation/process/index.rst
@@ -60,6 +60,7 @@ lack of a better place.
    volatile-considered-harmful
    botching-up-ioctls
    clang-format
+   ../riscv/patch-acceptance
 
 .. only::  subproject and html
 
diff --git a/Documentation/riscv/index.rst b/Documentation/riscv/index.rst
index 215fd3c1f2d5..fa33bffd8992 100644
--- a/Documentation/riscv/index.rst
+++ b/Documentation/riscv/index.rst
@@ -7,6 +7,7 @@ RISC-V architecture
 
     boot-image-header
     pmu
+    patch-acceptance
 
 .. only::  subproject and html
 
diff --git a/Documentation/riscv/patch-acceptance.rst b/Documentation/riscv/patch-acceptance.rst
new file mode 100644
index 000000000000..dfe0ac5624fb
--- /dev/null
+++ b/Documentation/riscv/patch-acceptance.rst
@@ -0,0 +1,35 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+arch/riscv maintenance guidelines for developers
+================================================
+
+Overview
+--------
+The RISC-V instruction set architecture is developed in the open:
+in-progress drafts are available for all to review and to experiment
+with implementations.  New module or extension drafts can change
+during the development process - sometimes in ways that are
+incompatible with previous drafts.  This flexibility can present a
+challenge for RISC-V Linux maintenance.  Linux maintainers disapprove
+of churn, and the Linux development process prefers well-reviewed and
+tested code over experimental code.  We wish to extend these same
+principles to the RISC-V-related code that will be accepted for
+inclusion in the kernel.
+
+Submit Checklist Addendum
+-------------------------
+We'll only accept patches for new modules or extensions if the
+specifications for those modules or extensions are listed as being
+"Frozen" or "Ratified" by the RISC-V Foundation.  (Developers may, of
+course, maintain their own Linux kernel trees that contain code for
+any draft extensions that they wish.)
+
+Additionally, the RISC-V specification allows implementors to create
+their own custom extensions.  These custom extensions aren't required
+to go through any review or ratification process by the RISC-V
+Foundation.  To avoid the maintenance complexity and potential
+performance impact of adding kernel code for implementor-specific
+RISC-V extensions, we'll only to accept patches for extensions that
+have been officially frozen or ratified by the RISC-V Foundation.
+(Implementors, may, of course, maintain their own Linux kernel trees
+containing code for any custom extensions that they wish.)
diff --git a/MAINTAINERS b/MAINTAINERS
index e09bd92a1e44..2987d1e16d20 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14119,6 +14119,7 @@ M:	Paul Walmsley <paul.walmsley@sifive.com>
 M:	Palmer Dabbelt <palmer@dabbelt.com>
 M:	Albert Ou <aou@eecs.berkeley.edu>
 L:	linux-riscv@lists.infradead.org
+P:	Documentation/riscv/patch-acceptance.rst
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux.git
 S:	Supported
 F:	arch/riscv/

From c79f46a282390e0f5b306007bf7b11a46d529538 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 5 Jan 2020 14:23:27 -0800
Subject: [PATCH 146/146] Linux 5.5-rc5

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index b99d95df8075..e4c2d0327d8c 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 5
 SUBLEVEL = 0
-EXTRAVERSION = -rc4
+EXTRAVERSION = -rc5
 NAME = Kleptomaniac Octopus
 
 # *DOCUMENTATION*