From 3ff340e24c9dd5cff9fc07d67914c5adf67f80d6 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Fri, 11 Jun 2021 08:42:50 +0300
Subject: [PATCH 001/502] bus: ti-sysc: Fix gpt12 system timer issue with
 reserved status

Jarkko Nikula <jarkko.nikula@bitmer.com> reported that Beagleboard
revision c2 stopped booting. Jarkko bisected the issue down to
commit 6cfcd5563b4f ("clocksource/drivers/timer-ti-dm: Fix suspend
and resume for am3 and am4").

Let's fix the issue by tagging system timers as reserved rather than
ignoring them. And let's not probe any interconnect target module child
devices for reserved modules.

This allows PM runtime to keep track of clocks and clockdomains for
the interconnect target module, and prevent the system timer from idling
as we already have SYSC_QUIRK_NO_IDLE and SYSC_QUIRK_NO_IDLE_ON_INIT
flags set for system timers.

Fixes: 6cfcd5563b4f ("clocksource/drivers/timer-ti-dm: Fix suspend and resume for am3 and am4")
Reported-by: Jarkko Nikula <jarkko.nikula@bitmer.com>
Tested-by: Jarkko Nikula <jarkko.nikula@bitmer.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 drivers/bus/ti-sysc.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c
index 38cb116ed433..188cdb0a394e 100644
--- a/drivers/bus/ti-sysc.c
+++ b/drivers/bus/ti-sysc.c
@@ -100,6 +100,7 @@ static const char * const clock_names[SYSC_MAX_CLOCKS] = {
  * @cookie: data used by legacy platform callbacks
  * @name: name if available
  * @revision: interconnect target module revision
+ * @reserved: target module is reserved and already in use
  * @enabled: sysc runtime enabled status
  * @needs_resume: runtime resume needed on resume from suspend
  * @child_needs_resume: runtime resume needed for child on resume from suspend
@@ -130,6 +131,7 @@ struct sysc {
 	struct ti_sysc_cookie cookie;
 	const char *name;
 	u32 revision;
+	unsigned int reserved:1;
 	unsigned int enabled:1;
 	unsigned int needs_resume:1;
 	unsigned int child_needs_resume:1;
@@ -3093,8 +3095,8 @@ static int sysc_probe(struct platform_device *pdev)
 		return error;
 
 	error = sysc_check_active_timer(ddata);
-	if (error)
-		return error;
+	if (error == -EBUSY)
+		ddata->reserved = true;
 
 	error = sysc_get_clocks(ddata);
 	if (error)
@@ -3130,11 +3132,15 @@ static int sysc_probe(struct platform_device *pdev)
 	sysc_show_registers(ddata);
 
 	ddata->dev->type = &sysc_device_type;
-	error = of_platform_populate(ddata->dev->of_node, sysc_match_table,
-				     pdata ? pdata->auxdata : NULL,
-				     ddata->dev);
-	if (error)
-		goto err;
+
+	if (!ddata->reserved) {
+		error = of_platform_populate(ddata->dev->of_node,
+					     sysc_match_table,
+					     pdata ? pdata->auxdata : NULL,
+					     ddata->dev);
+		if (error)
+			goto err;
+	}
 
 	INIT_DELAYED_WORK(&ddata->idle_work, ti_sysc_idle);
 

From 7c1a80e80cde008f271bae630d28cf684351e807 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Fri, 25 Jun 2021 13:23:54 +0300
Subject: [PATCH 002/502] net: xfrm: fix memory leak in xfrm_user_rcv_msg

Syzbot reported memory leak in xfrm_user_rcv_msg(). The
problem was is non-freed skb's frag_list.

In skb_release_all() skb_release_data() will be called only
in case of skb->head != NULL, but netlink_skb_destructor()
sets head to NULL. So, allocated frag_list skb should be
freed manualy, since consume_skb() won't take care of it

Fixes: 5106f4a8acff ("xfrm/compat: Add 32=>64-bit messages translator")
Reported-and-tested-by: syzbot+fb347cf82c73a90efcca@syzkaller.appspotmail.com
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_user.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index b47d613409b7..7aff641c717d 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2811,6 +2811,16 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	err = link->doit(skb, nlh, attrs);
 
+	/* We need to free skb allocated in xfrm_alloc_compat() before
+	 * returning from this function, because consume_skb() won't take
+	 * care of frag_list since netlink destructor sets
+	 * sbk->head to NULL. (see netlink_skb_destructor())
+	 */
+	if (skb_has_frag_list(skb)) {
+		kfree_skb(skb_shinfo(skb)->frag_list);
+		skb_shinfo(skb)->frag_list = NULL;
+	}
+
 err:
 	kvfree(nlh64);
 	return err;

From eaf228263921cd15962654b539d916380a0f076e Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Fri, 2 Jul 2021 09:20:22 +0200
Subject: [PATCH 003/502] Revert "xfrm: policy: Read seqcount outside of
 rcu-read side in xfrm_policy_lookup_bytype"

This reverts commit d7b0408934c749f546b01f2b33d07421a49b6f3e.

This commit tried to fix a locking bug introduced by commit 77cc278f7b20
("xfrm: policy: Use sequence counters with associated lock"). As it
turned out, this patch did not really fix the bug. A proper fix
for this bug is applied on top of this revert.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_policy.c | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index e9d0df2a2ab1..ce500f847b99 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2092,15 +2092,12 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
 	if (unlikely(!daddr || !saddr))
 		return NULL;
 
- retry:
-	sequence = read_seqcount_begin(&xfrm_policy_hash_generation);
 	rcu_read_lock();
-
-	chain = policy_hash_direct(net, daddr, saddr, family, dir);
-	if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) {
-		rcu_read_unlock();
-		goto retry;
-	}
+ retry:
+	do {
+		sequence = read_seqcount_begin(&xfrm_policy_hash_generation);
+		chain = policy_hash_direct(net, daddr, saddr, family, dir);
+	} while (read_seqcount_retry(&xfrm_policy_hash_generation, sequence));
 
 	ret = NULL;
 	hlist_for_each_entry_rcu(pol, chain, bydst) {
@@ -2131,15 +2128,11 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
 	}
 
 skip_inexact:
-	if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) {
-		rcu_read_unlock();
+	if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence))
 		goto retry;
-	}
 
-	if (ret && !xfrm_pol_hold_rcu(ret)) {
-		rcu_read_unlock();
+	if (ret && !xfrm_pol_hold_rcu(ret))
 		goto retry;
-	}
 fail:
 	rcu_read_unlock();
 

From 2580d3f40022642452dd8422bfb8c22e54cf84bb Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Mon, 28 Jun 2021 15:34:28 +0200
Subject: [PATCH 004/502] xfrm: Fix RCU vs hash_resize_mutex lock inversion

xfrm_bydst_resize() calls synchronize_rcu() while holding
hash_resize_mutex. But then on PREEMPT_RT configurations,
xfrm_policy_lookup_bytype() may acquire that mutex while running in an
RCU read side critical section. This results in a deadlock.

In fact the scope of hash_resize_mutex is way beyond the purpose of
xfrm_policy_lookup_bytype() to just fetch a coherent and stable policy
for a given destination/direction, along with other details.

The lower level net->xfrm.xfrm_policy_lock, which among other things
protects per destination/direction references to policy entries, is
enough to serialize and benefit from priority inheritance against the
write side. As a bonus, it makes it officially a per network namespace
synchronization business where a policy table resize on namespace A
shouldn't block a policy lookup on namespace B.

Fixes: 77cc278f7b20 (xfrm: policy: Use sequence counters with associated lock)
Cc: stable@vger.kernel.org
Cc: Ahmed S. Darwish <a.darwish@linutronix.de>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Varad Gautam <varad.gautam@suse.com>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/netns/xfrm.h |  1 +
 net/xfrm/xfrm_policy.c   | 17 ++++++++---------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index e816b6a3ef2b..9b376b87bd54 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -74,6 +74,7 @@ struct netns_xfrm {
 #endif
 	spinlock_t		xfrm_state_lock;
 	seqcount_spinlock_t	xfrm_state_hash_generation;
+	seqcount_spinlock_t	xfrm_policy_hash_generation;
 
 	spinlock_t xfrm_policy_lock;
 	struct mutex xfrm_cfg_mutex;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index ce500f847b99..46a6d15b66d6 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -155,7 +155,6 @@ static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
 						__read_mostly;
 
 static struct kmem_cache *xfrm_dst_cache __ro_after_init;
-static __read_mostly seqcount_mutex_t xfrm_policy_hash_generation;
 
 static struct rhashtable xfrm_policy_inexact_table;
 static const struct rhashtable_params xfrm_pol_inexact_params;
@@ -585,7 +584,7 @@ static void xfrm_bydst_resize(struct net *net, int dir)
 		return;
 
 	spin_lock_bh(&net->xfrm.xfrm_policy_lock);
-	write_seqcount_begin(&xfrm_policy_hash_generation);
+	write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation);
 
 	odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table,
 				lockdep_is_held(&net->xfrm.xfrm_policy_lock));
@@ -596,7 +595,7 @@ static void xfrm_bydst_resize(struct net *net, int dir)
 	rcu_assign_pointer(net->xfrm.policy_bydst[dir].table, ndst);
 	net->xfrm.policy_bydst[dir].hmask = nhashmask;
 
-	write_seqcount_end(&xfrm_policy_hash_generation);
+	write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation);
 	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 
 	synchronize_rcu();
@@ -1245,7 +1244,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
 	} while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq));
 
 	spin_lock_bh(&net->xfrm.xfrm_policy_lock);
-	write_seqcount_begin(&xfrm_policy_hash_generation);
+	write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation);
 
 	/* make sure that we can insert the indirect policies again before
 	 * we start with destructive action.
@@ -1354,7 +1353,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
 
 out_unlock:
 	__xfrm_policy_inexact_flush(net);
-	write_seqcount_end(&xfrm_policy_hash_generation);
+	write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation);
 	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 
 	mutex_unlock(&hash_resize_mutex);
@@ -2095,9 +2094,9 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
 	rcu_read_lock();
  retry:
 	do {
-		sequence = read_seqcount_begin(&xfrm_policy_hash_generation);
+		sequence = read_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation);
 		chain = policy_hash_direct(net, daddr, saddr, family, dir);
-	} while (read_seqcount_retry(&xfrm_policy_hash_generation, sequence));
+	} while (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence));
 
 	ret = NULL;
 	hlist_for_each_entry_rcu(pol, chain, bydst) {
@@ -2128,7 +2127,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
 	}
 
 skip_inexact:
-	if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence))
+	if (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence))
 		goto retry;
 
 	if (ret && !xfrm_pol_hold_rcu(ret))
@@ -4084,6 +4083,7 @@ static int __net_init xfrm_net_init(struct net *net)
 	/* Initialize the per-net locks here */
 	spin_lock_init(&net->xfrm.xfrm_state_lock);
 	spin_lock_init(&net->xfrm.xfrm_policy_lock);
+	seqcount_spinlock_init(&net->xfrm.xfrm_policy_hash_generation, &net->xfrm.xfrm_policy_lock);
 	mutex_init(&net->xfrm.xfrm_cfg_mutex);
 
 	rv = xfrm_statistics_init(net);
@@ -4128,7 +4128,6 @@ void __init xfrm_init(void)
 {
 	register_pernet_subsys(&xfrm_net_ops);
 	xfrm_dev_init();
-	seqcount_mutex_init(&xfrm_policy_hash_generation, &hash_resize_mutex);
 	xfrm_input_init();
 
 #ifdef CONFIG_XFRM_ESPINTCP

From ecef6a9effe49e8e2635c839020b9833b71e934c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 9 Jul 2021 15:02:37 +0200
Subject: [PATCH 005/502] libata: fix ata_pio_sector for CONFIG_HIGHMEM

Data transfers are not required to be block aligned in memory, so they
span two pages.  Fix this by splitting the call to >sff_data_xfer into
two for that case.

This has been broken since the initial libata import before the damn
of git, but was uncovered by the legacy ide driver removal.

Reported-by: kernel test robot <oliver.sang@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20210709130237.3730959-1-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/ata/libata-sff.c | 35 +++++++++++++++++++++++++++--------
 1 file changed, 27 insertions(+), 8 deletions(-)

diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index ae7189d1a568..b71ea4a680b0 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -637,6 +637,20 @@ unsigned int ata_sff_data_xfer32(struct ata_queued_cmd *qc, unsigned char *buf,
 }
 EXPORT_SYMBOL_GPL(ata_sff_data_xfer32);
 
+static void ata_pio_xfer(struct ata_queued_cmd *qc, struct page *page,
+		unsigned int offset, size_t xfer_size)
+{
+	bool do_write = (qc->tf.flags & ATA_TFLAG_WRITE);
+	unsigned char *buf;
+
+	buf = kmap_atomic(page);
+	qc->ap->ops->sff_data_xfer(qc, buf + offset, xfer_size, do_write);
+	kunmap_atomic(buf);
+
+	if (!do_write && !PageSlab(page))
+		flush_dcache_page(page);
+}
+
 /**
  *	ata_pio_sector - Transfer a sector of data.
  *	@qc: Command on going
@@ -648,11 +662,9 @@ EXPORT_SYMBOL_GPL(ata_sff_data_xfer32);
  */
 static void ata_pio_sector(struct ata_queued_cmd *qc)
 {
-	int do_write = (qc->tf.flags & ATA_TFLAG_WRITE);
 	struct ata_port *ap = qc->ap;
 	struct page *page;
 	unsigned int offset;
-	unsigned char *buf;
 
 	if (!qc->cursg) {
 		qc->curbytes = qc->nbytes;
@@ -670,13 +682,20 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)
 
 	DPRINTK("data %s\n", qc->tf.flags & ATA_TFLAG_WRITE ? "write" : "read");
 
-	/* do the actual data transfer */
-	buf = kmap_atomic(page);
-	ap->ops->sff_data_xfer(qc, buf + offset, qc->sect_size, do_write);
-	kunmap_atomic(buf);
+	/*
+	 * Split the transfer when it splits a page boundary.  Note that the
+	 * split still has to be dword aligned like all ATA data transfers.
+	 */
+	WARN_ON_ONCE(offset % 4);
+	if (offset + qc->sect_size > PAGE_SIZE) {
+		unsigned int split_len = PAGE_SIZE - offset;
 
-	if (!do_write && !PageSlab(page))
-		flush_dcache_page(page);
+		ata_pio_xfer(qc, page, offset, split_len);
+		ata_pio_xfer(qc, nth_page(page, 1), 0,
+			     qc->sect_size - split_len);
+	} else {
+		ata_pio_xfer(qc, page, offset, qc->sect_size);
+	}
 
 	qc->curbytes += qc->sect_size;
 	qc->cursg_ofs += qc->sect_size;

From 0c23af52ccd1605926480b5dfd1dd857ef604611 Mon Sep 17 00:00:00 2001
From: Naresh Kumar PBS <nareshkumar.pbs@broadcom.com>
Date: Sun, 11 Jul 2021 06:31:36 -0700
Subject: [PATCH 006/502] RDMA/bnxt_re: Fix stats counters

Statistical counters are not incrementing in some adapter versions with
newer FW. This is due to the stats context length mismatch between FW and
driver. Since the L2 driver updates the length correctly, use the stats
length from L2 driver while allocating the DMA'able memory and creating
the stats context.

Fixes: 9d6b648c3112 ("bnxt_en: Update firmware interface spec to 1.10.1.65.")
Link: https://lore.kernel.org/r/1626010296-6076-1-git-send-email-selvin.xavier@broadcom.com
Signed-off-by: Naresh Kumar PBS <nareshkumar.pbs@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/bnxt_re/main.c      |  4 +++-
 drivers/infiniband/hw/bnxt_re/qplib_res.c | 10 ++++------
 drivers/infiniband/hw/bnxt_re/qplib_res.h |  1 +
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index d5674026512a..a8688a92c760 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -120,6 +120,7 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode)
 	if (!chip_ctx)
 		return -ENOMEM;
 	chip_ctx->chip_num = bp->chip_num;
+	chip_ctx->hw_stats_size = bp->hw_ring_stats_size;
 
 	rdev->chip_ctx = chip_ctx;
 	/* rest members to follow eventually */
@@ -550,6 +551,7 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev,
 				       dma_addr_t dma_map,
 				       u32 *fw_stats_ctx_id)
 {
+	struct bnxt_qplib_chip_ctx *chip_ctx = rdev->chip_ctx;
 	struct hwrm_stat_ctx_alloc_output resp = {0};
 	struct hwrm_stat_ctx_alloc_input req = {0};
 	struct bnxt_en_dev *en_dev = rdev->en_dev;
@@ -566,7 +568,7 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev,
 	bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, -1);
 	req.update_period_ms = cpu_to_le32(1000);
 	req.stats_dma_addr = cpu_to_le64(dma_map);
-	req.stats_dma_length = cpu_to_le16(sizeof(struct ctx_hw_stats_ext));
+	req.stats_dma_length = cpu_to_le16(chip_ctx->hw_stats_size);
 	req.stat_ctx_flags = STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE;
 	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
 			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
index 17f0701b3cee..44282a8cdd4f 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
@@ -56,6 +56,7 @@
 static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev,
 				      struct bnxt_qplib_stats *stats);
 static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
+				      struct bnxt_qplib_chip_ctx *cctx,
 				      struct bnxt_qplib_stats *stats);
 
 /* PBL */
@@ -559,7 +560,7 @@ int bnxt_qplib_alloc_ctx(struct bnxt_qplib_res *res,
 		goto fail;
 stats_alloc:
 	/* Stats */
-	rc = bnxt_qplib_alloc_stats_ctx(res->pdev, &ctx->stats);
+	rc = bnxt_qplib_alloc_stats_ctx(res->pdev, res->cctx, &ctx->stats);
 	if (rc)
 		goto fail;
 
@@ -889,15 +890,12 @@ static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev,
 }
 
 static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
+				      struct bnxt_qplib_chip_ctx *cctx,
 				      struct bnxt_qplib_stats *stats)
 {
 	memset(stats, 0, sizeof(*stats));
 	stats->fw_id = -1;
-	/* 128 byte aligned context memory is required only for 57500.
-	 * However making this unconditional, it does not harm previous
-	 * generation.
-	 */
-	stats->size = ALIGN(sizeof(struct ctx_hw_stats), 128);
+	stats->size = cctx->hw_stats_size;
 	stats->dma = dma_alloc_coherent(&pdev->dev, stats->size,
 					&stats->dma_map, GFP_KERNEL);
 	if (!stats->dma) {
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index c291f495ae91..91031502e8f5 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -54,6 +54,7 @@ struct bnxt_qplib_chip_ctx {
 	u16	chip_num;
 	u8	chip_rev;
 	u8	chip_metal;
+	u16	hw_stats_size;
 	struct bnxt_qplib_drv_modes modes;
 };
 

From 6407c69dc51fbd7cf7b6760cd8aefb105d96ff5b Mon Sep 17 00:00:00 2001
From: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Date: Wed, 7 Jul 2021 14:14:55 -0700
Subject: [PATCH 007/502] RDMA/irdma: Fix unused variable total_size warning

Fix the following unused variable warning:

 drivers/infiniband/hw/irdma/uk.c:934:6: warning: variable 'total_size' set but not used [-Wunused-but-set-variable]

Link: https://lore.kernel.org/r/20210707211455.2076-1-tatyana.e.nikolova@intel.com
Link: https://lkml.org/lkml/2021/7/1/726
Reported-by: kernel test robot <lkp@intel.com>
Fixes: 551c46edc769 ("RDMA/irdma: Add user/kernel shared libraries")
Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com>
Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/irdma/uk.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c
index a6d52c20091c..5fb92de1f015 100644
--- a/drivers/infiniband/hw/irdma/uk.c
+++ b/drivers/infiniband/hw/irdma/uk.c
@@ -931,7 +931,7 @@ enum irdma_status_code irdma_uk_mw_bind(struct irdma_qp_uk *qp,
 enum irdma_status_code irdma_uk_post_receive(struct irdma_qp_uk *qp,
 					     struct irdma_post_rq_info *info)
 {
-	u32 total_size = 0, wqe_idx, i, byte_off;
+	u32 wqe_idx, i, byte_off;
 	u32 addl_frag_cnt;
 	__le64 *wqe;
 	u64 hdr;
@@ -939,9 +939,6 @@ enum irdma_status_code irdma_uk_post_receive(struct irdma_qp_uk *qp,
 	if (qp->max_rq_frag_cnt < info->num_sges)
 		return IRDMA_ERR_INVALID_FRAG_COUNT;
 
-	for (i = 0; i < info->num_sges; i++)
-		total_size += info->sg_list[i].len;
-
 	wqe = irdma_qp_get_next_recv_wqe(qp, &wqe_idx);
 	if (!wqe)
 		return IRDMA_ERR_QP_TOOMANY_WRS_POSTED;

From 514305ee0a1dade95c6ff1eb5735de5a329d1f89 Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Thu, 1 Jul 2021 12:41:27 +0200
Subject: [PATCH 008/502] RDMA/irdma: Make spdxcheck.py happy

Commit 48d6b3336a9f ("RDMA/irdma: Add ABI definitions") adds
./include/uapi/rdma/irdma-abi.h with an additional unneeded closing
bracket at the end of the SPDX-License-Identifier line.

Hence, ./scripts/spdxcheck.py complains:

  include/uapi/rdma/irdma-abi.h: 1:77 Syntax error: )

Remove that closing bracket to make spdxcheck.py happy.

Fixes: 48d6b3336a9f ("RDMA/irdma: Add ABI definitions")
Link: https://lore.kernel.org/r/20210701104127.1877-1-lukas.bulwahn@gmail.com
Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Acked-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/uapi/rdma/irdma-abi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/rdma/irdma-abi.h b/include/uapi/rdma/irdma-abi.h
index 26b638a7ad97..a7085e092d34 100644
--- a/include/uapi/rdma/irdma-abi.h
+++ b/include/uapi/rdma/irdma-abi.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB */
 /*
  * Copyright (c) 2006 - 2021 Intel Corporation.  All rights reserved.
  * Copyright (c) 2005 Topspin Communications.  All rights reserved.

From c9538831b353b96cb37092c3d3e929d67fd43c5f Mon Sep 17 00:00:00 2001
From: Zhu Yanjun <yanjun.zhu@linux.dev>
Date: Thu, 8 Jul 2021 02:47:52 -0400
Subject: [PATCH 009/502] RDMA/irdma: Change the returned type to void

Since the function irdma_sc_parse_fpm_commit_buf always returns 0, remove
the returned value check and change the returned type to void.

Fixes: 3f49d6842569 ("RDMA/irdma: Implement HW Admin Queue OPs")
Link: https://lore.kernel.org/r/20210708064752.797520-1-yanjun.zhu@linux.dev
Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev>
Reviewed-by: Majd Dibbiny <majd@nvidia.com>
Acked-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/irdma/ctrl.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c
index b1023a7d0bd1..c3880a85e255 100644
--- a/drivers/infiniband/hw/irdma/ctrl.c
+++ b/drivers/infiniband/hw/irdma/ctrl.c
@@ -2845,7 +2845,7 @@ static u64 irdma_sc_decode_fpm_commit(struct irdma_sc_dev *dev, __le64 *buf,
  * parses fpm commit info and copy base value
  * of hmc objects in hmc_info
  */
-static enum irdma_status_code
+static void
 irdma_sc_parse_fpm_commit_buf(struct irdma_sc_dev *dev, __le64 *buf,
 			      struct irdma_hmc_obj_info *info, u32 *sd)
 {
@@ -2915,7 +2915,6 @@ irdma_sc_parse_fpm_commit_buf(struct irdma_sc_dev *dev, __le64 *buf,
 	else
 		*sd = (u32)(size >> 21);
 
-	return 0;
 }
 
 /**
@@ -4434,9 +4433,9 @@ static enum irdma_status_code irdma_sc_cfg_iw_fpm(struct irdma_sc_dev *dev,
 	ret_code = irdma_sc_commit_fpm_val(dev->cqp, 0, hmc_info->hmc_fn_id,
 					   &commit_fpm_mem, true, wait_type);
 	if (!ret_code)
-		ret_code = irdma_sc_parse_fpm_commit_buf(dev, dev->fpm_commit_buf,
-							 hmc_info->hmc_obj,
-							 &hmc_info->sd_table.sd_cnt);
+		irdma_sc_parse_fpm_commit_buf(dev, dev->fpm_commit_buf,
+					      hmc_info->hmc_obj,
+					      &hmc_info->sd_table.sd_cnt);
 	print_hex_dump_debug("HMC: COMMIT FPM BUFFER", DUMP_PREFIX_OFFSET, 16,
 			     8, commit_fpm_mem.va, IRDMA_COMMIT_FPM_BUF_SIZE,
 			     false);

From 7e71b85473f863a29eb1c69265ef025389b4091d Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 8 Jun 2021 14:26:58 +0300
Subject: [PATCH 010/502] arm64: dts: ls1028a: fix node name for the sysclk

U-Boot attempts to fix up the "clock-frequency" property of the "/sysclk" node:
https://elixir.bootlin.com/u-boot/v2021.04/source/arch/arm/cpu/armv8/fsl-layerscape/fdt.c#L512

but fails to do so:

  ## Booting kernel from Legacy Image at a1000000 ...
     Image Name:
     Created:      2021-06-08  10:31:38 UTC
     Image Type:   AArch64 Linux Kernel Image (gzip compressed)
     Data Size:    15431370 Bytes = 14.7 MiB
     Load Address: 80080000
     Entry Point:  80080000
     Verifying Checksum ... OK
  ## Flattened Device Tree blob at a0000000
     Booting using the fdt blob at 0xa0000000
     Uncompressing Kernel Image
     Loading Device Tree to 00000000fbb19000, end 00000000fbb22717 ... OK
  Unable to update property /sysclk:clock-frequency, err=FDT_ERR_NOTFOUND

  Starting kernel ...

All Layerscape SoCs except LS1028A use "sysclk" as the node name, and
not "clock-sysclk". So change the node name of LS1028A accordingly.

Fixes: 8897f3255c9c ("arm64: dts: Add support for NXP LS1028A SoC")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
index b2e3e5d2a108..343ecf0e8973 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
@@ -66,7 +66,7 @@
 		};
 	};
 
-	sysclk: clock-sysclk {
+	sysclk: sysclk {
 		compatible = "fixed-clock";
 		#clock-cells = <0>;
 		clock-frequency = <100000000>;

From 7dd2dd4ff9f3abda601f22b9d01441a0869d20d7 Mon Sep 17 00:00:00 2001
From: Adrian Larumbe <adrian.martinezlarumbe@imgtec.com>
Date: Wed, 7 Jul 2021 00:43:38 +0100
Subject: [PATCH 011/502] dmaengine: xilinx_dma: Fix read-after-free bug when
 terminating transfers

When user calls dmaengine_terminate_sync, the driver will clean up any
remaining descriptors for all the pending or active transfers that had
previously been submitted. However, this might happen whilst the tasklet is
invoking the DMA callback for the last finished transfer, so by the time it
returns and takes over the channel's spinlock, the list of completed
descriptors it was traversing is no longer valid. This leads to a
read-after-free situation.

Fix it by signalling whether a user-triggered termination has happened by
means of a boolean variable.

Signed-off-by: Adrian Larumbe <adrian.martinezlarumbe@imgtec.com>
Link: https://lore.kernel.org/r/20210706234338.7696-3-adrian.martinezlarumbe@imgtec.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/xilinx/xilinx_dma.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index 75c0b8e904e5..4b9530a7bf65 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -394,6 +394,7 @@ struct xilinx_dma_tx_descriptor {
  * @genlock: Support genlock mode
  * @err: Channel has errors
  * @idle: Check for channel idle
+ * @terminating: Check for channel being synchronized by user
  * @tasklet: Cleanup work after irq
  * @config: Device configuration info
  * @flush_on_fsync: Flush on Frame sync
@@ -431,6 +432,7 @@ struct xilinx_dma_chan {
 	bool genlock;
 	bool err;
 	bool idle;
+	bool terminating;
 	struct tasklet_struct tasklet;
 	struct xilinx_vdma_config config;
 	bool flush_on_fsync;
@@ -1049,6 +1051,13 @@ static void xilinx_dma_chan_desc_cleanup(struct xilinx_dma_chan *chan)
 		/* Run any dependencies, then free the descriptor */
 		dma_run_dependencies(&desc->async_tx);
 		xilinx_dma_free_tx_descriptor(chan, desc);
+
+		/*
+		 * While we ran a callback the user called a terminate function,
+		 * which takes care of cleaning up any remaining descriptors
+		 */
+		if (chan->terminating)
+			break;
 	}
 
 	spin_unlock_irqrestore(&chan->lock, flags);
@@ -1965,6 +1974,8 @@ static dma_cookie_t xilinx_dma_tx_submit(struct dma_async_tx_descriptor *tx)
 	if (desc->cyclic)
 		chan->cyclic = true;
 
+	chan->terminating = false;
+
 	spin_unlock_irqrestore(&chan->lock, flags);
 
 	return cookie;
@@ -2436,6 +2447,7 @@ static int xilinx_dma_terminate_all(struct dma_chan *dchan)
 
 	xilinx_dma_chan_reset(chan);
 	/* Remove and free all of the descriptors in the lists */
+	chan->terminating = true;
 	xilinx_dma_free_descriptors(chan);
 	chan->idle = true;
 

From 1da569fa7ec8cb0591c74aa3050d4ea1397778b4 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Tue, 6 Jul 2021 20:45:21 +0800
Subject: [PATCH 012/502] dmaengine: usb-dmac: Fix PM reference leak in
 usb_dmac_probe()

pm_runtime_get_sync will increment pm usage counter even it failed.
Forgetting to putting operation will result in reference leak here.
Fix it by moving the error_pm label above the pm_runtime_put() in
the error path.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Link: https://lore.kernel.org/r/20210706124521.1371901-1-yukuai3@huawei.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/sh/usb-dmac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/sh/usb-dmac.c b/drivers/dma/sh/usb-dmac.c
index 8f7ceb698226..1cc06900153e 100644
--- a/drivers/dma/sh/usb-dmac.c
+++ b/drivers/dma/sh/usb-dmac.c
@@ -855,8 +855,8 @@ static int usb_dmac_probe(struct platform_device *pdev)
 
 error:
 	of_dma_controller_free(pdev->dev.of_node);
-	pm_runtime_put(&pdev->dev);
 error_pm:
+	pm_runtime_put(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 	return ret;
 }

From da435aedb00a4ef61019ff11ae0c08ffb9b1fb18 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Thu, 24 Jun 2021 12:09:29 -0700
Subject: [PATCH 013/502] dmaengine: idxd: fix array index when int_handles are
 being used

The index to the irq vector should be local and has no relation to
the assigned interrupt handle. Assign the MSIX interrupt index that is
programmed for the descriptor. The interrupt handle only matters when it
comes to hardware descriptor programming.

Fixes: eb15e7154fbf ("dmaengine: idxd: add interrupt handle request and release support")
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/162456176939.1121476.3366256009925001897.stgit@djiang5-desk3.ch.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/idxd/submit.c | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c
index 19afb62abaff..e29887528077 100644
--- a/drivers/dma/idxd/submit.c
+++ b/drivers/dma/idxd/submit.c
@@ -128,19 +128,8 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
 	 * Pending the descriptor to the lockless list for the irq_entry
 	 * that we designated the descriptor to.
 	 */
-	if (desc->hw->flags & IDXD_OP_FLAG_RCI) {
-		int vec;
-
-		/*
-		 * If the driver is on host kernel, it would be the value
-		 * assigned to interrupt handle, which is index for MSIX
-		 * vector. If it's guest then can't use the int_handle since
-		 * that is the index to IMS for the entire device. The guest
-		 * device local index will be used.
-		 */
-		vec = !idxd->int_handles ? desc->hw->int_handle : desc->vector;
-		llist_add(&desc->llnode, &idxd->irq_entries[vec].pending_llist);
-	}
+	if (desc->hw->flags & IDXD_OP_FLAG_RCI)
+		llist_add(&desc->llnode, &idxd->irq_entries[desc->vector].pending_llist);
 
 	return 0;
 }

From d5c10e0fc8645342fe5c9796b00c84ab078cd713 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Thu, 24 Jun 2021 13:43:32 -0700
Subject: [PATCH 014/502] dmaengine: idxd: fix setup sequence for MSIXPERM
 table

The MSIX permission table should be programmed BEFORE request_irq()
happens. This prevents any possibility of an interrupt happening before the
MSIX perm table is setup, however slight.

Fixes: 6df0e6c57dfc ("dmaengine: idxd: clear MSIX permission entry on shutdown")
Sign-off-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/162456741222.1138073.1298447364671237896.stgit@djiang5-desk3.ch.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/idxd/init.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index c8ae41d36040..4e32a4dcc3ab 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -102,6 +102,8 @@ static int idxd_setup_interrupts(struct idxd_device *idxd)
 		spin_lock_init(&idxd->irq_entries[i].list_lock);
 	}
 
+	idxd_msix_perm_setup(idxd);
+
 	irq_entry = &idxd->irq_entries[0];
 	rc = request_threaded_irq(irq_entry->vector, NULL, idxd_misc_thread,
 				  0, "idxd-misc", irq_entry);
@@ -148,7 +150,6 @@ static int idxd_setup_interrupts(struct idxd_device *idxd)
 	}
 
 	idxd_unmask_error_interrupts(idxd);
-	idxd_msix_perm_setup(idxd);
 	return 0;
 
  err_wq_irqs:
@@ -162,6 +163,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd)
  err_misc_irq:
 	/* Disable error interrupt generation */
 	idxd_mask_error_interrupts(idxd);
+	idxd_msix_perm_clear(idxd);
  err_irq_entries:
 	pci_free_irq_vectors(pdev);
 	dev_err(dev, "No usable interrupts\n");

From f9613aa07f16d6042e74208d1b40a6104d72964a Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Tue, 15 Jun 2021 20:52:38 +0800
Subject: [PATCH 015/502] ARM: imx: add missing iounmap()

Commit e76bdfd7403a ("ARM: imx: Added perf functionality to mmdc driver")
introduced imx_mmdc_remove(), the mmdc_base need be unmapped in it if
config PERF_EVENTS is enabled.

If imx_mmdc_perf_init() fails, the mmdc_base also need be unmapped.

Fixes: e76bdfd7403a ("ARM: imx: Added perf functionality to mmdc driver")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Reviewed-by: Dong Aisheng <aisheng.dong@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/mach-imx/mmdc.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c
index 0dfd0ae7a63d..8e57691aafe2 100644
--- a/arch/arm/mach-imx/mmdc.c
+++ b/arch/arm/mach-imx/mmdc.c
@@ -462,6 +462,7 @@ static int imx_mmdc_remove(struct platform_device *pdev)
 
 	cpuhp_state_remove_instance_nocalls(cpuhp_mmdc_state, &pmu_mmdc->node);
 	perf_pmu_unregister(&pmu_mmdc->pmu);
+	iounmap(pmu_mmdc->mmdc_base);
 	kfree(pmu_mmdc);
 	return 0;
 }
@@ -567,7 +568,11 @@ static int imx_mmdc_probe(struct platform_device *pdev)
 	val &= ~(1 << BP_MMDC_MAPSR_PSD);
 	writel_relaxed(val, reg);
 
-	return imx_mmdc_perf_init(pdev, mmdc_base);
+	err = imx_mmdc_perf_init(pdev, mmdc_base);
+	if (err)
+		iounmap(mmdc_base);
+
+	return err;
 }
 
 int imx_mmdc_get_ddr_type(void)

From f07ec85365807b3939f32d0094a6dd5ce065d1b9 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Tue, 15 Jun 2021 20:52:39 +0800
Subject: [PATCH 016/502] ARM: imx: add missing clk_disable_unprepare()

clock source is prepared and enabled by clk_prepare_enable()
in probe function, but no disable or unprepare in remove and
error path.

Fixes: 9454a0caff6a ("ARM: imx: add mmdc ipg clock operation for mmdc")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Reviewed-by: Dong Aisheng <aisheng.dong@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/mach-imx/mmdc.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c
index 8e57691aafe2..4a6f1359e1e9 100644
--- a/arch/arm/mach-imx/mmdc.c
+++ b/arch/arm/mach-imx/mmdc.c
@@ -103,6 +103,7 @@ struct mmdc_pmu {
 	struct perf_event *mmdc_events[MMDC_NUM_COUNTERS];
 	struct hlist_node node;
 	struct fsl_mmdc_devtype_data *devtype_data;
+	struct clk *mmdc_ipg_clk;
 };
 
 /*
@@ -463,11 +464,13 @@ static int imx_mmdc_remove(struct platform_device *pdev)
 	cpuhp_state_remove_instance_nocalls(cpuhp_mmdc_state, &pmu_mmdc->node);
 	perf_pmu_unregister(&pmu_mmdc->pmu);
 	iounmap(pmu_mmdc->mmdc_base);
+	clk_disable_unprepare(pmu_mmdc->mmdc_ipg_clk);
 	kfree(pmu_mmdc);
 	return 0;
 }
 
-static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_base)
+static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_base,
+			      struct clk *mmdc_ipg_clk)
 {
 	struct mmdc_pmu *pmu_mmdc;
 	char *name;
@@ -495,6 +498,7 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b
 	}
 
 	mmdc_num = mmdc_pmu_init(pmu_mmdc, mmdc_base, &pdev->dev);
+	pmu_mmdc->mmdc_ipg_clk = mmdc_ipg_clk;
 	if (mmdc_num == 0)
 		name = "mmdc";
 	else
@@ -568,9 +572,11 @@ static int imx_mmdc_probe(struct platform_device *pdev)
 	val &= ~(1 << BP_MMDC_MAPSR_PSD);
 	writel_relaxed(val, reg);
 
-	err = imx_mmdc_perf_init(pdev, mmdc_base);
-	if (err)
+	err = imx_mmdc_perf_init(pdev, mmdc_base, mmdc_ipg_clk);
+	if (err) {
 		iounmap(mmdc_base);
+		clk_disable_unprepare(mmdc_ipg_clk);
+	}
 
 	return err;
 }

From fb1425b436bcf936065edbbe8d092465a53185b6 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <festevam@denx.de>
Date: Thu, 17 Jun 2021 11:54:15 -0300
Subject: [PATCH 017/502] ARM: imx: common: Move prototype outside the SMP
 block

Currently the imx_gpcv2_set_core1_pdn_pup_by_software() prototype is
guarded by the CONFIG_SMP symbol. This causes W=1 build warnings when
CONFIG_SMP is not selected:

arch/arm/mach-imx/src.c:103:6: warning: no previous prototype for 'imx_gpcv2_set_core1_pdn_pup_by_software' [-Wmissing-prototypes]

Fix it by moving the imx_gpcv2_set_core1_pdn_pup_by_software() prototype
outside of the CONFIG_SMP block.

Fixes: e34645f45805 ("ARM: imx: add smp support for imx7d")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Fabio Estevam <festevam@denx.de>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/mach-imx/common.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mach-imx/common.h b/arch/arm/mach-imx/common.h
index f0a073a71401..13f3068e9845 100644
--- a/arch/arm/mach-imx/common.h
+++ b/arch/arm/mach-imx/common.h
@@ -68,7 +68,6 @@ void imx_set_cpu_arg(int cpu, u32 arg);
 void v7_secondary_startup(void);
 void imx_scu_map_io(void);
 void imx_smp_prepare(void);
-void imx_gpcv2_set_core1_pdn_pup_by_software(bool pdn);
 #else
 static inline void imx_scu_map_io(void) {}
 static inline void imx_smp_prepare(void) {}
@@ -81,6 +80,7 @@ void imx_gpc_mask_all(void);
 void imx_gpc_restore_all(void);
 void imx_gpc_hwirq_mask(unsigned int hwirq);
 void imx_gpc_hwirq_unmask(unsigned int hwirq);
+void imx_gpcv2_set_core1_pdn_pup_by_software(bool pdn);
 void imx_anatop_init(void);
 void imx_anatop_pre_suspend(void);
 void imx_anatop_post_resume(void);

From fd8e83884fdd7b5fc411f201a58d8d01890198a2 Mon Sep 17 00:00:00 2001
From: Maxime Chevallier <maxime.chevallier@bootlin.com>
Date: Fri, 25 Jun 2021 14:13:53 +0200
Subject: [PATCH 018/502] ARM: dts: imx6qdl-sr-som: Increase the PHY reset
 duration to 10ms
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The AR803x PHY used on this modules seems to require the reset line to
be asserted for around 10ms in order to avoid rare cases where the PHY
gets stuck in an incoherent state that prevents it to function
correctly.

The previous value of 2ms was found to be problematic on some setups,
causing intermittent issues where the PHY would be unresponsive
every once in a while on some sytems, with a low occurrence (it typically
took around 30 consecutive reboots to encounter the issue).

Bumping the delay to the 10ms makes the issue dissapear, with more than
2500 consecutive reboots performed without the issue showing-up.

Fixes: 208d7baf8085 ("ARM: imx: initial SolidRun HummingBoard support")
Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Tested-by: Hervé Codina <herve.codina@bootlin.com>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/boot/dts/imx6qdl-sr-som.dtsi | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/imx6qdl-sr-som.dtsi b/arch/arm/boot/dts/imx6qdl-sr-som.dtsi
index 0ad8ccde0cf8..f86efd0ccc40 100644
--- a/arch/arm/boot/dts/imx6qdl-sr-som.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-sr-som.dtsi
@@ -54,7 +54,13 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_microsom_enet_ar8035>;
 	phy-mode = "rgmii-id";
-	phy-reset-duration = <2>;
+
+	/*
+	 * The PHY seems to require a long-enough reset duration to avoid
+	 * some rare issues where the PHY gets stuck in an inconsistent and
+	 * non-functional state at boot-up. 10ms proved to be fine .
+	 */
+	phy-reset-duration = <10>;
 	phy-reset-gpios = <&gpio4 15 GPIO_ACTIVE_LOW>;
 	status = "okay";
 

From 80d9ac9bd7b9366c2a89d2716a397749299728e7 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 13 Jul 2021 12:36:41 +0100
Subject: [PATCH 019/502] KVM: arm64: Fix detection of shared VMAs on guest
 fault

When merging the KVM MTE support, the blob that was interposed between
the chair and the keyboard experienced a neuronal accident (also known
as a brain fart), turning a check for VM_SHARED into VM_PFNMAP as it
was reshuffling some of the code.

The blob having now come back to its senses, let's restore the
initial check that the original author got right the first place.

Fixes: ea7fc1bb1cd1 ("KVM: arm64: Introduce MTE VM feature")
Reviewed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210713114804.594993-1-maz@kernel.org
---
 arch/arm64/kvm/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 3155c9e778f0..0625bf2353c2 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -947,7 +947,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		vma_shift = get_vma_page_shift(vma, hva);
 	}
 
-	shared = (vma->vm_flags & VM_PFNMAP);
+	shared = (vma->vm_flags & VM_SHARED);
 
 	switch (vma_shift) {
 #ifndef __PAGETABLE_PMD_FOLDED

From bac0b135907855e9f8c032877c3df3c60885a08f Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Tue, 13 Jul 2021 22:37:41 +0200
Subject: [PATCH 020/502] KVM: selftests: change pthread_yield to sched_yield
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With later GCC we get

 steal_time.c: In function ‘main’:
 steal_time.c:323:25: warning: ‘pthread_yield’ is deprecated: pthread_yield is deprecated, use sched_yield instead [-Wdeprecated-declarations]

Let's follow the instructions and use sched_yield instead.

Signed-off-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210713203742.29680-2-drjones@redhat.com
---
 tools/testing/selftests/kvm/steal_time.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c
index b0031f2d38fd..ecec30865a74 100644
--- a/tools/testing/selftests/kvm/steal_time.c
+++ b/tools/testing/selftests/kvm/steal_time.c
@@ -320,7 +320,7 @@ int main(int ac, char **av)
 		run_delay = get_run_delay();
 		pthread_create(&thread, &attr, do_steal_time, NULL);
 		do
-			pthread_yield();
+			sched_yield();
 		while (get_run_delay() - run_delay < MIN_RUN_DELAY_NS);
 		pthread_join(thread, NULL);
 		run_delay = get_run_delay() - run_delay;

From 5cf17746b302aa32a4f200cc6ce38865bfe4cf94 Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Tue, 13 Jul 2021 22:37:42 +0200
Subject: [PATCH 021/502] KVM: arm64: selftests: get-reg-list: actually enable
 pmu regs in pmu sublist

We reworked get-reg-list to make it easier to enable optional register
sublists by parametrizing their vcpu feature flags as well as making
other generalizations. That was all to make sure we enable the PMU
registers when we want to test them. Somehow we forgot to actually
include the PMU feature flag in the PMU sublist description though!
Do that now.

Fixes: 313673bad871 ("KVM: arm64: selftests: get-reg-list: Split base and pmu registers")
Signed-off-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210713203742.29680-3-drjones@redhat.com
---
 tools/testing/selftests/kvm/aarch64/get-reg-list.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
index a16c8f05366c..cc898181faab 100644
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -1019,7 +1019,8 @@ static __u64 sve_rejects_set[] = {
 #define VREGS_SUBLIST \
 	{ "vregs", .regs = vregs, .regs_n = ARRAY_SIZE(vregs), }
 #define PMU_SUBLIST \
-	{ "pmu", .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), }
+	{ "pmu", .capability = KVM_CAP_ARM_PMU_V3, .feature = KVM_ARM_VCPU_PMU_V3, \
+	  .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), }
 #define SVE_SUBLIST \
 	{ "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \
 	  .regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \

From 95d429206c97cf109591009fa386004191c62c47 Mon Sep 17 00:00:00 2001
From: Mark Pearson <markpearson@lenovo.com>
Date: Mon, 28 Jun 2021 18:28:46 -0400
Subject: [PATCH 022/502] platform/x86: think-lmi: Add pending_reboot support

The Think-lmi driver was missing pending_reboot support as it wasn't
available from the BIOS. Turns out this is really useful to have from
user space so implementing from a purely SW point of view.

Thanks to Mario Limonciello for guidance on how fwupd would use this.

Suggested-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Mark Pearson <markpearson@lenovo.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210628222846.8830-1-markpearson@lenovo.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/think-lmi.c | 19 +++++++++++++++++++
 drivers/platform/x86/think-lmi.h |  1 +
 2 files changed, 20 insertions(+)

diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c
index 3671b5d20613..64dcec53a7a0 100644
--- a/drivers/platform/x86/think-lmi.c
+++ b/drivers/platform/x86/think-lmi.c
@@ -571,6 +571,11 @@ static ssize_t current_value_store(struct kobject *kobj,
 	else
 		ret = tlmi_save_bios_settings("");
 
+	if (!ret && !tlmi_priv.pending_changes) {
+		tlmi_priv.pending_changes = true;
+		/* let userland know it may need to check reboot pending again */
+		kobject_uevent(&tlmi_priv.class_dev->kobj, KOBJ_CHANGE);
+	}
 out:
 	kfree(auth_str);
 	kfree(set_str);
@@ -647,6 +652,14 @@ static struct kobj_type tlmi_pwd_setting_ktype = {
 	.sysfs_ops	= &tlmi_kobj_sysfs_ops,
 };
 
+static ssize_t pending_reboot_show(struct kobject *kobj, struct kobj_attribute *attr,
+				   char *buf)
+{
+	return sprintf(buf, "%d\n", tlmi_priv.pending_changes);
+}
+
+static struct kobj_attribute pending_reboot = __ATTR_RO(pending_reboot);
+
 /* ---- Initialisation --------------------------------------------------------- */
 static void tlmi_release_attr(void)
 {
@@ -667,6 +680,7 @@ static void tlmi_release_attr(void)
 	sysfs_remove_group(&tlmi_priv.pwd_power->kobj, &auth_attr_group);
 	kobject_put(&tlmi_priv.pwd_power->kobj);
 	kset_unregister(tlmi_priv.authentication_kset);
+	sysfs_remove_file(&tlmi_priv.class_dev->kobj, &pending_reboot.attr);
 }
 
 static int tlmi_sysfs_init(void)
@@ -746,6 +760,11 @@ static int tlmi_sysfs_init(void)
 	if (ret)
 		goto fail_create_attr;
 
+	/* Create global sysfs files */
+	ret = sysfs_create_file(&tlmi_priv.class_dev->kobj, &pending_reboot.attr);
+	if (ret)
+		goto fail_create_attr;
+
 	return ret;
 
 fail_create_attr:
diff --git a/drivers/platform/x86/think-lmi.h b/drivers/platform/x86/think-lmi.h
index 6fa8da7af6c7..eb598846628a 100644
--- a/drivers/platform/x86/think-lmi.h
+++ b/drivers/platform/x86/think-lmi.h
@@ -60,6 +60,7 @@ struct think_lmi {
 	bool can_get_bios_selections;
 	bool can_set_bios_password;
 	bool can_get_password_settings;
+	bool pending_changes;
 
 	struct tlmi_attr_setting *setting[TLMI_SETTINGS_COUNT];
 	struct device *class_dev;

From 95e1b60f8dc8f225b14619e9aca9bdd7d99167db Mon Sep 17 00:00:00 2001
From: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Date: Tue, 29 Jun 2021 14:17:57 +0530
Subject: [PATCH 023/502] platform/x86: amd-pmc: Fix command completion code

The protocol to submit a job request to SMU is to wait for
AMD_PMC_REGISTER_RESPONSE to return 1,meaning SMU is ready to take
requests. PMC driver has to make sure that the response code is always
AMD_PMC_RESULT_OK before making any command submissions.

When we submit a message to SMU, we have to wait until it processes
the request. Adding a read_poll_timeout() check as this was missing in
the existing code.

Also, add a mutex to protect amd_pmc_send_cmd() calls to SMU.

Fixes: 156ec4731cb2 ("platform/x86: amd-pmc: Add AMD platform support for S2Idle")
Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Acked-by: Raul E Rangel <rrangel@chromium.org>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210629084803.248498-2-Shyam-sundar.S-k@amd.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/amd-pmc.c | 38 ++++++++++++++++++++++++++++++++--
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c
index b9da58ee9b1e..1b5f149932c1 100644
--- a/drivers/platform/x86/amd-pmc.c
+++ b/drivers/platform/x86/amd-pmc.c
@@ -68,6 +68,7 @@ struct amd_pmc_dev {
 	u32 base_addr;
 	u32 cpu_id;
 	struct device *dev;
+	struct mutex lock; /* generic mutex lock */
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 	struct dentry *dbgfs_dir;
 #endif /* CONFIG_DEBUG_FS */
@@ -138,9 +139,10 @@ static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set)
 	u8 msg;
 	u32 val;
 
+	mutex_lock(&dev->lock);
 	/* Wait until we get a valid response */
 	rc = readx_poll_timeout(ioread32, dev->regbase + AMD_PMC_REGISTER_RESPONSE,
-				val, val > 0, PMC_MSG_DELAY_MIN_US,
+				val, val != 0, PMC_MSG_DELAY_MIN_US,
 				PMC_MSG_DELAY_MIN_US * RESPONSE_REGISTER_LOOP_MAX);
 	if (rc) {
 		dev_err(dev->dev, "failed to talk to SMU\n");
@@ -156,7 +158,37 @@ static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set)
 	/* Write message ID to message ID register */
 	msg = (dev->cpu_id == AMD_CPU_ID_RN) ? MSG_OS_HINT_RN : MSG_OS_HINT_PCO;
 	amd_pmc_reg_write(dev, AMD_PMC_REGISTER_MESSAGE, msg);
-	return 0;
+	/* Wait until we get a valid response */
+	rc = readx_poll_timeout(ioread32, dev->regbase + AMD_PMC_REGISTER_RESPONSE,
+				val, val != 0, PMC_MSG_DELAY_MIN_US,
+				PMC_MSG_DELAY_MIN_US * RESPONSE_REGISTER_LOOP_MAX);
+	if (rc) {
+		dev_err(dev->dev, "SMU response timed out\n");
+		goto out_unlock;
+	}
+
+	switch (val) {
+	case AMD_PMC_RESULT_OK:
+		break;
+	case AMD_PMC_RESULT_CMD_REJECT_BUSY:
+		dev_err(dev->dev, "SMU not ready. err: 0x%x\n", val);
+		rc = -EBUSY;
+		goto out_unlock;
+	case AMD_PMC_RESULT_CMD_UNKNOWN:
+		dev_err(dev->dev, "SMU cmd unknown. err: 0x%x\n", val);
+		rc = -EINVAL;
+		goto out_unlock;
+	case AMD_PMC_RESULT_CMD_REJECT_PREREQ:
+	case AMD_PMC_RESULT_FAILED:
+	default:
+		dev_err(dev->dev, "SMU cmd failed. err: 0x%x\n", val);
+		rc = -EIO;
+		goto out_unlock;
+	}
+
+out_unlock:
+	mutex_unlock(&dev->lock);
+	return rc;
 }
 
 static int __maybe_unused amd_pmc_suspend(struct device *dev)
@@ -259,6 +291,7 @@ static int amd_pmc_probe(struct platform_device *pdev)
 
 	amd_pmc_dump_registers(dev);
 
+	mutex_init(&dev->lock);
 	platform_set_drvdata(pdev, dev);
 	amd_pmc_dbgfs_register(dev);
 	return 0;
@@ -269,6 +302,7 @@ static int amd_pmc_remove(struct platform_device *pdev)
 	struct amd_pmc_dev *dev = platform_get_drvdata(pdev);
 
 	amd_pmc_dbgfs_unregister(dev);
+	mutex_destroy(&dev->lock);
 	return 0;
 }
 

From 4c06d35dfedf4c1fd03702e0f05292a69d020e21 Mon Sep 17 00:00:00 2001
From: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Date: Tue, 29 Jun 2021 14:17:58 +0530
Subject: [PATCH 024/502] platform/x86: amd-pmc: Fix SMU firmware reporting
 mechanism

It was lately understood that the current mechanism available in the
driver to get SMU firmware info works only on internal SMU builds and
there is a separate way to get all the SMU logging counters (addressed
in the next patch). Hence remove all the smu info shown via debugfs as it
is no more useful.

Fixes: 156ec4731cb2 ("platform/x86: amd-pmc: Add AMD platform support for S2Idle")
Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210629084803.248498-3-Shyam-sundar.S-k@amd.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/amd-pmc.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c
index 1b5f149932c1..b1d6175a13b2 100644
--- a/drivers/platform/x86/amd-pmc.c
+++ b/drivers/platform/x86/amd-pmc.c
@@ -52,7 +52,6 @@
 #define AMD_CPU_ID_PCO			AMD_CPU_ID_RV
 #define AMD_CPU_ID_CZN			AMD_CPU_ID_RN
 
-#define AMD_SMU_FW_VERSION		0x0
 #define PMC_MSG_DELAY_MIN_US		100
 #define RESPONSE_REGISTER_LOOP_MAX	200
 
@@ -89,11 +88,6 @@ static inline void amd_pmc_reg_write(struct amd_pmc_dev *dev, int reg_offset, u3
 #ifdef CONFIG_DEBUG_FS
 static int smu_fw_info_show(struct seq_file *s, void *unused)
 {
-	struct amd_pmc_dev *dev = s->private;
-	u32 value;
-
-	value = ioread32(dev->smu_base + AMD_SMU_FW_VERSION);
-	seq_printf(s, "SMU FW Info: %x\n", value);
 	return 0;
 }
 DEFINE_SHOW_ATTRIBUTE(smu_fw_info);
@@ -280,10 +274,6 @@ static int amd_pmc_probe(struct platform_device *pdev)
 	pci_dev_put(rdev);
 	base_addr = ((u64)base_addr_hi << 32 | base_addr_lo);
 
-	dev->smu_base = devm_ioremap(dev->dev, base_addr, AMD_PMC_MAPPING_SIZE);
-	if (!dev->smu_base)
-		return -ENOMEM;
-
 	dev->regbase = devm_ioremap(dev->dev, base_addr + AMD_PMC_BASE_ADDR_OFFSET,
 				    AMD_PMC_MAPPING_SIZE);
 	if (!dev->regbase)

From 162b937a8064029ed22cd1039d4dcf7f1721f940 Mon Sep 17 00:00:00 2001
From: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Date: Tue, 29 Jun 2021 14:17:59 +0530
Subject: [PATCH 025/502] platform/x86: amd-pmc: call dump registers only once

Currently amd_pmc_dump_registers() routine is being called at
multiple places. The best to call it is after command submission
to SMU.

Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210629084803.248498-4-Shyam-sundar.S-k@amd.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/amd-pmc.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c
index b1d6175a13b2..e5107e3b1911 100644
--- a/drivers/platform/x86/amd-pmc.c
+++ b/drivers/platform/x86/amd-pmc.c
@@ -182,6 +182,7 @@ static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set)
 
 out_unlock:
 	mutex_unlock(&dev->lock);
+	amd_pmc_dump_registers(dev);
 	return rc;
 }
 
@@ -194,7 +195,6 @@ static int __maybe_unused amd_pmc_suspend(struct device *dev)
 	if (rc)
 		dev_err(pdev->dev, "suspend failed\n");
 
-	amd_pmc_dump_registers(pdev);
 	return 0;
 }
 
@@ -207,7 +207,6 @@ static int __maybe_unused amd_pmc_resume(struct device *dev)
 	if (rc)
 		dev_err(pdev->dev, "resume failed\n");
 
-	amd_pmc_dump_registers(pdev);
 	return 0;
 }
 
@@ -279,8 +278,6 @@ static int amd_pmc_probe(struct platform_device *pdev)
 	if (!dev->regbase)
 		return -ENOMEM;
 
-	amd_pmc_dump_registers(dev);
-
 	mutex_init(&dev->lock);
 	platform_set_drvdata(pdev, dev);
 	amd_pmc_dbgfs_register(dev);

From 76620567496237f1f1f54683ec7da1755ee501d7 Mon Sep 17 00:00:00 2001
From: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Date: Tue, 29 Jun 2021 14:18:00 +0530
Subject: [PATCH 026/502] platform/x86: amd-pmc: Add support for logging SMU
 metrics

SMU provides a way to dump the s0ix debug statistics in the form of a
metrics table via a of set special mailbox commands.

Add support to the driver which can send these commands to SMU and expose
the information received via debugfs. The information contains the s0ix
entry/exit, active time of each IP block etc.

As a side note, SMU subsystem logging is not supported on Picasso based
SoC's.

Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210629084803.248498-5-Shyam-sundar.S-k@amd.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/amd-pmc.c | 147 +++++++++++++++++++++++++++++++--
 1 file changed, 139 insertions(+), 8 deletions(-)

diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c
index e5107e3b1911..0ebb2732c46a 100644
--- a/drivers/platform/x86/amd-pmc.c
+++ b/drivers/platform/x86/amd-pmc.c
@@ -46,6 +46,14 @@
 #define AMD_PMC_RESULT_CMD_UNKNOWN           0xFE
 #define AMD_PMC_RESULT_FAILED                0xFF
 
+/* SMU Message Definations */
+#define SMU_MSG_GETSMUVERSION		0x02
+#define SMU_MSG_LOG_GETDRAM_ADDR_HI	0x04
+#define SMU_MSG_LOG_GETDRAM_ADDR_LO	0x05
+#define SMU_MSG_LOG_START		0x06
+#define SMU_MSG_LOG_RESET		0x07
+#define SMU_MSG_LOG_DUMP_DATA		0x08
+#define SMU_MSG_GET_SUP_CONSTRAINTS	0x09
 /* List of supported CPU ids */
 #define AMD_CPU_ID_RV			0x15D0
 #define AMD_CPU_ID_RN			0x1630
@@ -55,17 +63,42 @@
 #define PMC_MSG_DELAY_MIN_US		100
 #define RESPONSE_REGISTER_LOOP_MAX	200
 
+#define SOC_SUBSYSTEM_IP_MAX	12
+#define DELAY_MIN_US		2000
+#define DELAY_MAX_US		3000
 enum amd_pmc_def {
 	MSG_TEST = 0x01,
 	MSG_OS_HINT_PCO,
 	MSG_OS_HINT_RN,
 };
 
+struct amd_pmc_bit_map {
+	const char *name;
+	u32 bit_mask;
+};
+
+static const struct amd_pmc_bit_map soc15_ip_blk[] = {
+	{"DISPLAY",	BIT(0)},
+	{"CPU",		BIT(1)},
+	{"GFX",		BIT(2)},
+	{"VDD",		BIT(3)},
+	{"ACP",		BIT(4)},
+	{"VCN",		BIT(5)},
+	{"ISP",		BIT(6)},
+	{"NBIO",	BIT(7)},
+	{"DF",		BIT(8)},
+	{"USB0",	BIT(9)},
+	{"USB1",	BIT(10)},
+	{"LAPIC",	BIT(11)},
+	{}
+};
+
 struct amd_pmc_dev {
 	void __iomem *regbase;
-	void __iomem *smu_base;
+	void __iomem *smu_virt_addr;
 	u32 base_addr;
 	u32 cpu_id;
+	u32 active_ips;
 	struct device *dev;
 	struct mutex lock; /* generic mutex lock */
 #if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -74,6 +107,7 @@ struct amd_pmc_dev {
 };
 
 static struct amd_pmc_dev pmc;
+static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set, u32 *data, u8 msg, bool ret);
 
 static inline u32 amd_pmc_reg_read(struct amd_pmc_dev *dev, int reg_offset)
 {
@@ -85,9 +119,49 @@ static inline void amd_pmc_reg_write(struct amd_pmc_dev *dev, int reg_offset, u3
 	iowrite32(val, dev->regbase + reg_offset);
 }
 
+struct smu_metrics {
+	u32 table_version;
+	u32 hint_count;
+	u32 s0i3_cyclecount;
+	u32 timein_s0i2;
+	u64 timeentering_s0i3_lastcapture;
+	u64 timeentering_s0i3_totaltime;
+	u64 timeto_resume_to_os_lastcapture;
+	u64 timeto_resume_to_os_totaltime;
+	u64 timein_s0i3_lastcapture;
+	u64 timein_s0i3_totaltime;
+	u64 timein_swdrips_lastcapture;
+	u64 timein_swdrips_totaltime;
+	u64 timecondition_notmet_lastcapture[SOC_SUBSYSTEM_IP_MAX];
+	u64 timecondition_notmet_totaltime[SOC_SUBSYSTEM_IP_MAX];
+} __packed;
+
 #ifdef CONFIG_DEBUG_FS
 static int smu_fw_info_show(struct seq_file *s, void *unused)
 {
+	struct amd_pmc_dev *dev = s->private;
+	struct smu_metrics table;
+	int idx;
+
+	if (dev->cpu_id == AMD_CPU_ID_PCO)
+		return -EINVAL;
+
+	memcpy_fromio(&table, dev->smu_virt_addr, sizeof(struct smu_metrics));
+
+	seq_puts(s, "\n=== SMU Statistics ===\n");
+	seq_printf(s, "Table Version: %d\n", table.table_version);
+	seq_printf(s, "Hint Count: %d\n", table.hint_count);
+	seq_printf(s, "S0i3 Cycle Count: %d\n", table.s0i3_cyclecount);
+	seq_printf(s, "Time (in us) to S0i3: %lld\n", table.timeentering_s0i3_lastcapture);
+	seq_printf(s, "Time (in us) in S0i3: %lld\n", table.timein_s0i3_lastcapture);
+
+	seq_puts(s, "\n=== Active time (in us) ===\n");
+	for (idx = 0 ; idx < SOC_SUBSYSTEM_IP_MAX ; idx++) {
+		if (soc15_ip_blk[idx].bit_mask & dev->active_ips)
+			seq_printf(s, "%-8s : %lld\n", soc15_ip_blk[idx].name,
+				   table.timecondition_notmet_lastcapture[idx]);
+	}
+
 	return 0;
 }
 DEFINE_SHOW_ATTRIBUTE(smu_fw_info);
@@ -113,6 +187,32 @@ static inline void amd_pmc_dbgfs_unregister(struct amd_pmc_dev *dev)
 }
 #endif /* CONFIG_DEBUG_FS */
 
+static int amd_pmc_setup_smu_logging(struct amd_pmc_dev *dev)
+{
+	u32 phys_addr_low, phys_addr_hi;
+	u64 smu_phys_addr;
+
+	if (dev->cpu_id == AMD_CPU_ID_PCO)
+		return -EINVAL;
+
+	/* Get Active devices list from SMU */
+	amd_pmc_send_cmd(dev, 0, &dev->active_ips, SMU_MSG_GET_SUP_CONSTRAINTS, 1);
+
+	/* Get dram address */
+	amd_pmc_send_cmd(dev, 0, &phys_addr_low, SMU_MSG_LOG_GETDRAM_ADDR_LO, 1);
+	amd_pmc_send_cmd(dev, 0, &phys_addr_hi, SMU_MSG_LOG_GETDRAM_ADDR_HI, 1);
+	smu_phys_addr = ((u64)phys_addr_hi << 32 | phys_addr_low);
+
+	dev->smu_virt_addr = devm_ioremap(dev->dev, smu_phys_addr, sizeof(struct smu_metrics));
+	if (!dev->smu_virt_addr)
+		return -ENOMEM;
+
+	/* Start the logging */
+	amd_pmc_send_cmd(dev, 0, NULL, SMU_MSG_LOG_START, 0);
+
+	return 0;
+}
+
 static void amd_pmc_dump_registers(struct amd_pmc_dev *dev)
 {
 	u32 value;
@@ -127,10 +227,9 @@ static void amd_pmc_dump_registers(struct amd_pmc_dev *dev)
 	dev_dbg(dev->dev, "AMD_PMC_REGISTER_MESSAGE:%x\n", value);
 }
 
-static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set)
+static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set, u32 *data, u8 msg, bool ret)
 {
 	int rc;
-	u8 msg;
 	u32 val;
 
 	mutex_lock(&dev->lock);
@@ -150,8 +249,8 @@ static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set)
 	amd_pmc_reg_write(dev, AMD_PMC_REGISTER_ARGUMENT, set);
 
 	/* Write message ID to message ID register */
-	msg = (dev->cpu_id == AMD_CPU_ID_RN) ? MSG_OS_HINT_RN : MSG_OS_HINT_PCO;
 	amd_pmc_reg_write(dev, AMD_PMC_REGISTER_MESSAGE, msg);
+
 	/* Wait until we get a valid response */
 	rc = readx_poll_timeout(ioread32, dev->regbase + AMD_PMC_REGISTER_RESPONSE,
 				val, val != 0, PMC_MSG_DELAY_MIN_US,
@@ -163,6 +262,11 @@ static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set)
 
 	switch (val) {
 	case AMD_PMC_RESULT_OK:
+		if (ret) {
+			/* PMFW may take longer time to return back the data */
+			usleep_range(DELAY_MIN_US, 10 * DELAY_MAX_US);
+			*data = amd_pmc_reg_read(dev, AMD_PMC_REGISTER_ARGUMENT);
+		}
 		break;
 	case AMD_PMC_RESULT_CMD_REJECT_BUSY:
 		dev_err(dev->dev, "SMU not ready. err: 0x%x\n", val);
@@ -186,12 +290,29 @@ out_unlock:
 	return rc;
 }
 
+static int amd_pmc_get_os_hint(struct amd_pmc_dev *dev)
+{
+	switch (dev->cpu_id) {
+	case AMD_CPU_ID_PCO:
+		return MSG_OS_HINT_PCO;
+	case AMD_CPU_ID_RN:
+		return MSG_OS_HINT_RN;
+	}
+	return -EINVAL;
+}
+
 static int __maybe_unused amd_pmc_suspend(struct device *dev)
 {
 	struct amd_pmc_dev *pdev = dev_get_drvdata(dev);
 	int rc;
+	u8 msg;
 
-	rc = amd_pmc_send_cmd(pdev, 1);
+	/* Reset and Start SMU logging - to monitor the s0i3 stats */
+	amd_pmc_send_cmd(pdev, 0, NULL, SMU_MSG_LOG_RESET, 0);
+	amd_pmc_send_cmd(pdev, 0, NULL, SMU_MSG_LOG_START, 0);
+
+	msg = amd_pmc_get_os_hint(pdev);
+	rc = amd_pmc_send_cmd(pdev, 1, NULL, msg, 0);
 	if (rc)
 		dev_err(pdev->dev, "suspend failed\n");
 
@@ -202,8 +323,13 @@ static int __maybe_unused amd_pmc_resume(struct device *dev)
 {
 	struct amd_pmc_dev *pdev = dev_get_drvdata(dev);
 	int rc;
+	u8 msg;
 
-	rc = amd_pmc_send_cmd(pdev, 0);
+	/* Let SMU know that we are looking for stats */
+	amd_pmc_send_cmd(pdev, 0, NULL, SMU_MSG_LOG_DUMP_DATA, 0);
+
+	msg = amd_pmc_get_os_hint(pdev);
+	rc = amd_pmc_send_cmd(pdev, 0, NULL, msg, 0);
 	if (rc)
 		dev_err(pdev->dev, "resume failed\n");
 
@@ -226,8 +352,7 @@ static int amd_pmc_probe(struct platform_device *pdev)
 {
 	struct amd_pmc_dev *dev = &pmc;
 	struct pci_dev *rdev;
-	u32 base_addr_lo;
-	u32 base_addr_hi;
+	u32 base_addr_lo, base_addr_hi;
 	u64 base_addr;
 	int err;
 	u32 val;
@@ -279,6 +404,12 @@ static int amd_pmc_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	mutex_init(&dev->lock);
+
+	/* Use SMU to get the s0i3 debug stats */
+	err = amd_pmc_setup_smu_logging(dev);
+	if (err)
+		dev_err(dev->dev, "SMU debugging info not supported on this platform\n");
+
 	platform_set_drvdata(pdev, dev);
 	amd_pmc_dbgfs_register(dev);
 	return 0;

From b9a4fa6978bef902409858737fa180fa7b9346ac Mon Sep 17 00:00:00 2001
From: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Date: Tue, 29 Jun 2021 14:18:01 +0530
Subject: [PATCH 027/502] platform/x86: amd-pmc: Add support for logging s0ix
 counters

Even the FCH SSC registers provides certain level of information
about the s0ix entry and exit times which comes handy when the SMU
fails to report the statistics via the mailbox communication.

This information is captured via a new debugfs file "s0ix_stats".
A non-zero entry in this counters would mean that the system entered
the s0ix state.

If s0ix entry time and exit time don't change during suspend to idle,
the silicon has not entered the deepest state.

Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210629084803.248498-6-Shyam-sundar.S-k@amd.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/amd-pmc.c | 45 +++++++++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c
index 0ebb2732c46a..7f011c3f60f2 100644
--- a/drivers/platform/x86/amd-pmc.c
+++ b/drivers/platform/x86/amd-pmc.c
@@ -46,6 +46,15 @@
 #define AMD_PMC_RESULT_CMD_UNKNOWN           0xFE
 #define AMD_PMC_RESULT_FAILED                0xFF
 
+/* FCH SSC Registers */
+#define FCH_S0I3_ENTRY_TIME_L_OFFSET	0x30
+#define FCH_S0I3_ENTRY_TIME_H_OFFSET	0x34
+#define FCH_S0I3_EXIT_TIME_L_OFFSET	0x38
+#define FCH_S0I3_EXIT_TIME_H_OFFSET	0x3C
+#define FCH_SSC_MAPPING_SIZE		0x800
+#define FCH_BASE_PHY_ADDR_LOW		0xFED81100
+#define FCH_BASE_PHY_ADDR_HIGH		0x00000000
+
 /* SMU Message Definations */
 #define SMU_MSG_GETSMUVERSION		0x02
 #define SMU_MSG_LOG_GETDRAM_ADDR_HI	0x04
@@ -96,6 +105,7 @@ static const struct amd_pmc_bit_map soc15_ip_blk[] = {
 struct amd_pmc_dev {
 	void __iomem *regbase;
 	void __iomem *smu_virt_addr;
+	void __iomem *fch_virt_addr;
 	u32 base_addr;
 	u32 cpu_id;
 	u32 active_ips;
@@ -166,6 +176,29 @@ static int smu_fw_info_show(struct seq_file *s, void *unused)
 }
 DEFINE_SHOW_ATTRIBUTE(smu_fw_info);
 
+static int s0ix_stats_show(struct seq_file *s, void *unused)
+{
+	struct amd_pmc_dev *dev = s->private;
+	u64 entry_time, exit_time, residency;
+
+	entry_time = ioread32(dev->fch_virt_addr + FCH_S0I3_ENTRY_TIME_H_OFFSET);
+	entry_time = entry_time << 32 | ioread32(dev->fch_virt_addr + FCH_S0I3_ENTRY_TIME_L_OFFSET);
+
+	exit_time = ioread32(dev->fch_virt_addr + FCH_S0I3_EXIT_TIME_H_OFFSET);
+	exit_time = exit_time << 32 | ioread32(dev->fch_virt_addr + FCH_S0I3_EXIT_TIME_L_OFFSET);
+
+	/* It's in 48MHz. We need to convert it */
+	residency = (exit_time - entry_time) / 48;
+
+	seq_puts(s, "=== S0ix statistics ===\n");
+	seq_printf(s, "S0ix Entry Time: %lld\n", entry_time);
+	seq_printf(s, "S0ix Exit Time: %lld\n", exit_time);
+	seq_printf(s, "Residency Time: %lld\n", residency);
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(s0ix_stats);
+
 static void amd_pmc_dbgfs_unregister(struct amd_pmc_dev *dev)
 {
 	debugfs_remove_recursive(dev->dbgfs_dir);
@@ -176,6 +209,8 @@ static void amd_pmc_dbgfs_register(struct amd_pmc_dev *dev)
 	dev->dbgfs_dir = debugfs_create_dir("amd_pmc", NULL);
 	debugfs_create_file("smu_fw_info", 0644, dev->dbgfs_dir, dev,
 			    &smu_fw_info_fops);
+	debugfs_create_file("s0ix_stats", 0644, dev->dbgfs_dir, dev,
+			    &s0ix_stats_fops);
 }
 #else
 static inline void amd_pmc_dbgfs_register(struct amd_pmc_dev *dev)
@@ -353,7 +388,7 @@ static int amd_pmc_probe(struct platform_device *pdev)
 	struct amd_pmc_dev *dev = &pmc;
 	struct pci_dev *rdev;
 	u32 base_addr_lo, base_addr_hi;
-	u64 base_addr;
+	u64 base_addr, fch_phys_addr;
 	int err;
 	u32 val;
 
@@ -405,6 +440,14 @@ static int amd_pmc_probe(struct platform_device *pdev)
 
 	mutex_init(&dev->lock);
 
+	/* Use FCH registers to get the S0ix stats */
+	base_addr_lo = FCH_BASE_PHY_ADDR_LOW;
+	base_addr_hi = FCH_BASE_PHY_ADDR_HIGH;
+	fch_phys_addr = ((u64)base_addr_hi << 32 | base_addr_lo);
+	dev->fch_virt_addr = devm_ioremap(dev->dev, fch_phys_addr, FCH_SSC_MAPPING_SIZE);
+	if (!dev->fch_virt_addr)
+		return -ENOMEM;
+
 	/* Use SMU to get the s0i3 debug stats */
 	err = amd_pmc_setup_smu_logging(dev);
 	if (err)

From 9422584a601ae8e4af51e890a14a936b2b689628 Mon Sep 17 00:00:00 2001
From: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Date: Tue, 29 Jun 2021 14:18:02 +0530
Subject: [PATCH 028/502] platform/x86: amd-pmc: Add support for ACPI ID
 AMDI0006

Some newer BIOSes have added another ACPI ID for the uPEP device.
SMU statistics behave identically on this device.

Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210629084803.248498-7-Shyam-sundar.S-k@amd.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/amd-pmc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c
index 7f011c3f60f2..c5054fa2aed9 100644
--- a/drivers/platform/x86/amd-pmc.c
+++ b/drivers/platform/x86/amd-pmc.c
@@ -469,6 +469,7 @@ static int amd_pmc_remove(struct platform_device *pdev)
 
 static const struct acpi_device_id amd_pmc_acpi_ids[] = {
 	{"AMDI0005", 0},
+	{"AMDI0006", 0},
 	{"AMD0004", 0},
 	{ }
 };

From 83cbaf14275a30f14cf558b09389a1664b173858 Mon Sep 17 00:00:00 2001
From: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Date: Tue, 29 Jun 2021 14:18:03 +0530
Subject: [PATCH 029/502] platform/x86: amd-pmc: Add new acpi id for future PMC
 controllers

The upcoming PMC controller would have a newer acpi id, add that to
the supported acpid device list.

Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210629084803.248498-8-Shyam-sundar.S-k@amd.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/amd-pmc.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c
index c5054fa2aed9..d2f9a62e1166 100644
--- a/drivers/platform/x86/amd-pmc.c
+++ b/drivers/platform/x86/amd-pmc.c
@@ -68,6 +68,7 @@
 #define AMD_CPU_ID_RN			0x1630
 #define AMD_CPU_ID_PCO			AMD_CPU_ID_RV
 #define AMD_CPU_ID_CZN			AMD_CPU_ID_RN
+#define AMD_CPU_ID_YC			0x14B5
 
 #define PMC_MSG_DELAY_MIN_US		100
 #define RESPONSE_REGISTER_LOOP_MAX	200
@@ -331,6 +332,7 @@ static int amd_pmc_get_os_hint(struct amd_pmc_dev *dev)
 	case AMD_CPU_ID_PCO:
 		return MSG_OS_HINT_PCO;
 	case AMD_CPU_ID_RN:
+	case AMD_CPU_ID_YC:
 		return MSG_OS_HINT_RN;
 	}
 	return -EINVAL;
@@ -376,6 +378,7 @@ static const struct dev_pm_ops amd_pmc_pm_ops = {
 };
 
 static const struct pci_device_id pmc_pci_ids[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_YC) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_CZN) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_RN) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_PCO) },
@@ -470,6 +473,7 @@ static int amd_pmc_remove(struct platform_device *pdev)
 static const struct acpi_device_id amd_pmc_acpi_ids[] = {
 	{"AMDI0005", 0},
 	{"AMDI0006", 0},
+	{"AMDI0007", 0},
 	{"AMD0004", 0},
 	{ }
 };

From a973c983375c37301645d4fea056b1f4bff77bf7 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 7 Jul 2021 09:16:47 -0500
Subject: [PATCH 030/502] platform/x86: amd-pmc: Use return code on suspend

Right now the driver will still return success even if the OS_HINT
command failed to send to the SMU. In the rare event of a failure,
the suspend should really be aborted here so that relevant logs
can may be captured.

Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Acked-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Link: https://lore.kernel.org/r/20210707141647.8871-1-mario.limonciello@amd.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/amd-pmc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c
index d2f9a62e1166..680f94c7e075 100644
--- a/drivers/platform/x86/amd-pmc.c
+++ b/drivers/platform/x86/amd-pmc.c
@@ -353,7 +353,7 @@ static int __maybe_unused amd_pmc_suspend(struct device *dev)
 	if (rc)
 		dev_err(pdev->dev, "suspend failed\n");
 
-	return 0;
+	return rc;
 }
 
 static int __maybe_unused amd_pmc_resume(struct device *dev)

From 23e9592b06b43cea4d6799843795beca13437907 Mon Sep 17 00:00:00 2001
From: Alex Hung <alex.hung@canonical.com>
Date: Sat, 10 Jul 2021 13:08:10 -0600
Subject: [PATCH 031/502] platform/x86: wireless-hotkey: remove hardcoded "hp"
 from the error message

This driver is no longer specific to HP laptops so "hp" in the error
message is no longer applicable.

Signed-off-by: Alex Hung <alex.hung@canonical.com>
Link: https://lore.kernel.org/r/20210710190810.313104-1-alex.hung@canonical.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/wireless-hotkey.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/wireless-hotkey.c b/drivers/platform/x86/wireless-hotkey.c
index b010e4ca3383..11c60a273446 100644
--- a/drivers/platform/x86/wireless-hotkey.c
+++ b/drivers/platform/x86/wireless-hotkey.c
@@ -78,7 +78,7 @@ static int wl_add(struct acpi_device *device)
 
 	err = wireless_input_setup();
 	if (err)
-		pr_err("Failed to setup hp wireless hotkeys\n");
+		pr_err("Failed to setup wireless hotkeys\n");
 
 	return err;
 }

From 95edbbf78c3bdbd1daa921dd4a2e61c751e469ba Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Thu, 15 Jul 2021 15:43:27 +0800
Subject: [PATCH 032/502] platform/x86: amd-pmc: Fix missing unlock on error in
 amd_pmc_send_cmd()

Add the missing unlock before return from function amd_pmc_send_cmd()
in the error handling case.

Fixes: 95e1b60f8dc8 ("platform/x86: amd-pmc: Fix command completion code")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20210715074327.1966083-1-yangyingliang@huawei.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/amd-pmc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c
index 680f94c7e075..663a4ca0580d 100644
--- a/drivers/platform/x86/amd-pmc.c
+++ b/drivers/platform/x86/amd-pmc.c
@@ -275,7 +275,7 @@ static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set, u32 *data, u8 msg
 				PMC_MSG_DELAY_MIN_US * RESPONSE_REGISTER_LOOP_MAX);
 	if (rc) {
 		dev_err(dev->dev, "failed to talk to SMU\n");
-		return rc;
+		goto out_unlock;
 	}
 
 	/* Write zero to response register */

From ac34de14ac30ba4484d68f8845a54b6b6c23db42 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Mon, 28 Jun 2021 23:09:55 +0200
Subject: [PATCH 033/502] Revert "soc: imx8m: change to use platform driver"

With the SoC matching changed to a platform driver the match data
is available only after other drivers, which may rely on it are
already probed. This breaks at least the CAAM driver on i.MX8M.
Revert the change until all those drivers have been audited and
changed to be able to eal with match data being available later
in the boot process.

Fixes: 7d981405d0fd ("soc: imx8m: change to use platform driver")
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Tested-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Acked-by: Peng Fan <peng.fan@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 drivers/soc/imx/soc-imx8m.c | 84 ++++++-------------------------------
 1 file changed, 12 insertions(+), 72 deletions(-)

diff --git a/drivers/soc/imx/soc-imx8m.c b/drivers/soc/imx/soc-imx8m.c
index 071e14496e4b..cc57a384d74d 100644
--- a/drivers/soc/imx/soc-imx8m.c
+++ b/drivers/soc/imx/soc-imx8m.c
@@ -5,8 +5,6 @@
 
 #include <linux/init.h>
 #include <linux/io.h>
-#include <linux/module.h>
-#include <linux/nvmem-consumer.h>
 #include <linux/of_address.h>
 #include <linux/slab.h>
 #include <linux/sys_soc.h>
@@ -31,7 +29,7 @@
 
 struct imx8_soc_data {
 	char *name;
-	u32 (*soc_revision)(struct device *dev);
+	u32 (*soc_revision)(void);
 };
 
 static u64 soc_uid;
@@ -52,7 +50,7 @@ static u32 imx8mq_soc_revision_from_atf(void)
 static inline u32 imx8mq_soc_revision_from_atf(void) { return 0; };
 #endif
 
-static u32 __init imx8mq_soc_revision(struct device *dev)
+static u32 __init imx8mq_soc_revision(void)
 {
 	struct device_node *np;
 	void __iomem *ocotp_base;
@@ -77,20 +75,9 @@ static u32 __init imx8mq_soc_revision(struct device *dev)
 			rev = REV_B1;
 	}
 
-	if (dev) {
-		int ret;
-
-		ret = nvmem_cell_read_u64(dev, "soc_unique_id", &soc_uid);
-		if (ret) {
-			iounmap(ocotp_base);
-			of_node_put(np);
-			return ret;
-		}
-	} else {
-		soc_uid = readl_relaxed(ocotp_base + OCOTP_UID_HIGH);
-		soc_uid <<= 32;
-		soc_uid |= readl_relaxed(ocotp_base + OCOTP_UID_LOW);
-	}
+	soc_uid = readl_relaxed(ocotp_base + OCOTP_UID_HIGH);
+	soc_uid <<= 32;
+	soc_uid |= readl_relaxed(ocotp_base + OCOTP_UID_LOW);
 
 	iounmap(ocotp_base);
 	of_node_put(np);
@@ -120,7 +107,7 @@ static void __init imx8mm_soc_uid(void)
 	of_node_put(np);
 }
 
-static u32 __init imx8mm_soc_revision(struct device *dev)
+static u32 __init imx8mm_soc_revision(void)
 {
 	struct device_node *np;
 	void __iomem *anatop_base;
@@ -138,15 +125,7 @@ static u32 __init imx8mm_soc_revision(struct device *dev)
 	iounmap(anatop_base);
 	of_node_put(np);
 
-	if (dev) {
-		int ret;
-
-		ret = nvmem_cell_read_u64(dev, "soc_unique_id", &soc_uid);
-		if (ret)
-			return ret;
-	} else {
-		imx8mm_soc_uid();
-	}
+	imx8mm_soc_uid();
 
 	return rev;
 }
@@ -171,7 +150,7 @@ static const struct imx8_soc_data imx8mp_soc_data = {
 	.soc_revision = imx8mm_soc_revision,
 };
 
-static __maybe_unused const struct of_device_id imx8_machine_match[] = {
+static __maybe_unused const struct of_device_id imx8_soc_match[] = {
 	{ .compatible = "fsl,imx8mq", .data = &imx8mq_soc_data, },
 	{ .compatible = "fsl,imx8mm", .data = &imx8mm_soc_data, },
 	{ .compatible = "fsl,imx8mn", .data = &imx8mn_soc_data, },
@@ -179,20 +158,12 @@ static __maybe_unused const struct of_device_id imx8_machine_match[] = {
 	{ }
 };
 
-static __maybe_unused const struct of_device_id imx8_soc_match[] = {
-	{ .compatible = "fsl,imx8mq-soc", .data = &imx8mq_soc_data, },
-	{ .compatible = "fsl,imx8mm-soc", .data = &imx8mm_soc_data, },
-	{ .compatible = "fsl,imx8mn-soc", .data = &imx8mn_soc_data, },
-	{ .compatible = "fsl,imx8mp-soc", .data = &imx8mp_soc_data, },
-	{ }
-};
-
 #define imx8_revision(soc_rev) \
 	soc_rev ? \
 	kasprintf(GFP_KERNEL, "%d.%d", (soc_rev >> 4) & 0xf,  soc_rev & 0xf) : \
 	"unknown"
 
-static int imx8_soc_info(struct platform_device *pdev)
+static int __init imx8_soc_init(void)
 {
 	struct soc_device_attribute *soc_dev_attr;
 	struct soc_device *soc_dev;
@@ -211,10 +182,7 @@ static int imx8_soc_info(struct platform_device *pdev)
 	if (ret)
 		goto free_soc;
 
-	if (pdev)
-		id = of_match_node(imx8_soc_match, pdev->dev.of_node);
-	else
-		id = of_match_node(imx8_machine_match, of_root);
+	id = of_match_node(imx8_soc_match, of_root);
 	if (!id) {
 		ret = -ENODEV;
 		goto free_soc;
@@ -223,16 +191,8 @@ static int imx8_soc_info(struct platform_device *pdev)
 	data = id->data;
 	if (data) {
 		soc_dev_attr->soc_id = data->name;
-		if (data->soc_revision) {
-			if (pdev) {
-				soc_rev = data->soc_revision(&pdev->dev);
-				ret = soc_rev;
-				if (ret < 0)
-					goto free_soc;
-			} else {
-				soc_rev = data->soc_revision(NULL);
-			}
-		}
+		if (data->soc_revision)
+			soc_rev = data->soc_revision();
 	}
 
 	soc_dev_attr->revision = imx8_revision(soc_rev);
@@ -270,24 +230,4 @@ free_soc:
 	kfree(soc_dev_attr);
 	return ret;
 }
-
-/* Retain device_initcall is for backward compatibility with DTS. */
-static int __init imx8_soc_init(void)
-{
-	if (of_find_matching_node_and_match(NULL, imx8_soc_match, NULL))
-		return 0;
-
-	return imx8_soc_info(NULL);
-}
 device_initcall(imx8_soc_init);
-
-static struct platform_driver imx8_soc_info_driver = {
-	.probe = imx8_soc_info,
-	.driver = {
-		.name = "imx8_soc_info",
-		.of_match_table = imx8_soc_match,
-	},
-};
-
-module_platform_driver(imx8_soc_info_driver);
-MODULE_LICENSE("GPL v2");

From b18c7da63fcb46e2f9a093cc18d7c219e13a887c Mon Sep 17 00:00:00 2001
From: Bob Pearson <rpearsonhpe@gmail.com>
Date: Mon, 5 Jul 2021 11:41:54 -0500
Subject: [PATCH 034/502] RDMA/rxe: Fix memory leak in error path code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In rxe_mr_init_user() at the third error the driver fails to free the
memory at mr->map. This patch adds code to do that.  This error only
occurs if page_address() fails to return a non zero address which should
never happen for 64 bit architectures.

Fixes: 8700e3e7c485 ("Soft RoCE driver")
Link: https://lore.kernel.org/r/20210705164153.17652-1-rpearsonhpe@gmail.com
Reported by: Haakon Bugge <haakon.bugge@oracle.com>
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Reviewed-by: Zhu Yanjun <zyjzyj2000@gmail.com>
Reviewed-by: Håkon Bugge <haakon.bugge@oracle.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_mr.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 6aabcb4de235..be4bcb420fab 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -113,13 +113,14 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
 	int			num_buf;
 	void			*vaddr;
 	int err;
+	int i;
 
 	umem = ib_umem_get(pd->ibpd.device, start, length, access);
 	if (IS_ERR(umem)) {
-		pr_warn("err %d from rxe_umem_get\n",
-			(int)PTR_ERR(umem));
+		pr_warn("%s: Unable to pin memory region err = %d\n",
+			__func__, (int)PTR_ERR(umem));
 		err = PTR_ERR(umem);
-		goto err1;
+		goto err_out;
 	}
 
 	mr->umem = umem;
@@ -129,9 +130,9 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
 
 	err = rxe_mr_alloc(mr, num_buf);
 	if (err) {
-		pr_warn("err %d from rxe_mr_alloc\n", err);
-		ib_umem_release(umem);
-		goto err1;
+		pr_warn("%s: Unable to allocate memory for map\n",
+				__func__);
+		goto err_release_umem;
 	}
 
 	mr->page_shift = PAGE_SHIFT;
@@ -151,10 +152,10 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
 
 			vaddr = page_address(sg_page_iter_page(&sg_iter));
 			if (!vaddr) {
-				pr_warn("null vaddr\n");
-				ib_umem_release(umem);
+				pr_warn("%s: Unable to get virtual address\n",
+						__func__);
 				err = -ENOMEM;
-				goto err1;
+				goto err_cleanup_map;
 			}
 
 			buf->addr = (uintptr_t)vaddr;
@@ -177,7 +178,13 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
 
 	return 0;
 
-err1:
+err_cleanup_map:
+	for (i = 0; i < mr->num_map; i++)
+		kfree(mr->map[i]);
+	kfree(mr->map);
+err_release_umem:
+	ib_umem_release(umem);
+err_out:
 	return err;
 }
 

From 0dc2d6ff40364a00cd66cae3ed327894dcd11c82 Mon Sep 17 00:00:00 2001
From: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Date: Thu, 8 Jul 2021 14:35:21 -0700
Subject: [PATCH 035/502] RDMA/irdma: Check vsi pointer before using it

Fix a coverity warning about NULL pointer dereference:
   Dereferencing "vsi", which is known to be "NULL".

Link: https://lore.kernel.org/r/20210708213521.438-1-tatyana.e.nikolova@intel.com
Reported-by: coverity-bot <keescook+coverity-bot@chromium.org>
Addresses-Coverity-ID: 1505164 ("Null pointer dereferences")
Fixes: 8498a30e1b94 ("RDMA/irdma: Register auxiliary driver and implement private channel OPs")
Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com>
Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/irdma/main.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c
index ea59432351fb..51a41359e0b4 100644
--- a/drivers/infiniband/hw/irdma/main.c
+++ b/drivers/infiniband/hw/irdma/main.c
@@ -215,10 +215,10 @@ static void irdma_remove(struct auxiliary_device *aux_dev)
 	pr_debug("INIT: Gen2 PF[%d] device remove success\n", PCI_FUNC(pf->pdev->devfn));
 }
 
-static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf)
+static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf,
+				   struct ice_vsi *vsi)
 {
 	struct irdma_pci_f *rf = iwdev->rf;
-	struct ice_vsi *vsi = ice_get_main_vsi(pf);
 
 	rf->cdev = pf;
 	rf->gen_ops.register_qset = irdma_lan_register_qset;
@@ -253,12 +253,15 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_
 							    struct iidc_auxiliary_dev,
 							    adev);
 	struct ice_pf *pf = iidc_adev->pf;
+	struct ice_vsi *vsi = ice_get_main_vsi(pf);
 	struct iidc_qos_params qos_info = {};
 	struct irdma_device *iwdev;
 	struct irdma_pci_f *rf;
 	struct irdma_l2params l2params = {};
 	int err;
 
+	if (!vsi)
+		return -EIO;
 	iwdev = ib_alloc_device(irdma_device, ibdev);
 	if (!iwdev)
 		return -ENOMEM;
@@ -268,7 +271,7 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_
 		return -ENOMEM;
 	}
 
-	irdma_fill_device_info(iwdev, pf);
+	irdma_fill_device_info(iwdev, pf, vsi);
 	rf = iwdev->rf;
 
 	if (irdma_ctrl_init_hw(rf)) {

From a323da0b73b89b3ecabd661c56978a271e1911b6 Mon Sep 17 00:00:00 2001
From: Zhu Yanjun <yanjun.zhu@linux.dev>
Date: Tue, 13 Jul 2021 23:11:28 -0400
Subject: [PATCH 036/502] RDMA/irdma: change the returned type of
 irdma_sc_repost_aeq_entries to void

The function irdma_sc_repost_aeq_entries always returns zero. So
the returned type is changed to void.

Link: https://lore.kernel.org/r/20210714031130.1511109-2-yanjun.zhu@linux.dev
Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/irdma/ctrl.c | 4 +---
 drivers/infiniband/hw/irdma/type.h | 3 +--
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c
index c3880a85e255..f1e5515256e0 100644
--- a/drivers/infiniband/hw/irdma/ctrl.c
+++ b/drivers/infiniband/hw/irdma/ctrl.c
@@ -4186,11 +4186,9 @@ enum irdma_status_code irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq,
  * @dev: sc device struct
  * @count: allocate count
  */
-enum irdma_status_code irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count)
+void irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count)
 {
 	writel(count, dev->hw_regs[IRDMA_AEQALLOC]);
-
-	return 0;
 }
 
 /**
diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h
index 7387b83e826d..874bc25a938b 100644
--- a/drivers/infiniband/hw/irdma/type.h
+++ b/drivers/infiniband/hw/irdma/type.h
@@ -1222,8 +1222,7 @@ enum irdma_status_code irdma_sc_aeq_init(struct irdma_sc_aeq *aeq,
 					 struct irdma_aeq_init_info *info);
 enum irdma_status_code irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq,
 					      struct irdma_aeqe_info *info);
-enum irdma_status_code irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev,
-						   u32 count);
+void irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count);
 
 void irdma_sc_pd_init(struct irdma_sc_dev *dev, struct irdma_sc_pd *pd, u32 pd_id,
 		      int abi_ver);

From 41f5fa9fa75cebd48b5ce9ec244ee25390ac3b89 Mon Sep 17 00:00:00 2001
From: Zhu Yanjun <yanjun.zhu@linux.dev>
Date: Tue, 13 Jul 2021 23:11:29 -0400
Subject: [PATCH 037/502] RDMA/irdma: Change the returned type of
 irdma_set_hw_rsrc to void

Since the function irdma_set_hw_rsrc always returns zero, change the
returned type to void and remove all the related source code.

Link: https://lore.kernel.org/r/20210714031130.1511109-3-yanjun.zhu@linux.dev
Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/irdma/hw.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c
index 7afb8a6a0526..00de5ee9a260 100644
--- a/drivers/infiniband/hw/irdma/hw.c
+++ b/drivers/infiniband/hw/irdma/hw.c
@@ -1920,7 +1920,7 @@ enum irdma_status_code irdma_ctrl_init_hw(struct irdma_pci_f *rf)
  * irdma_set_hw_rsrc - set hw memory resources.
  * @rf: RDMA PCI function
  */
-static u32 irdma_set_hw_rsrc(struct irdma_pci_f *rf)
+static void irdma_set_hw_rsrc(struct irdma_pci_f *rf)
 {
 	rf->allocated_qps = (void *)(rf->mem_rsrc +
 		   (sizeof(struct irdma_arp_entry) * rf->arp_table_size));
@@ -1937,8 +1937,6 @@ static u32 irdma_set_hw_rsrc(struct irdma_pci_f *rf)
 	spin_lock_init(&rf->arp_lock);
 	spin_lock_init(&rf->qptable_lock);
 	spin_lock_init(&rf->qh_list_lock);
-
-	return 0;
 }
 
 /**
@@ -2000,9 +1998,7 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf)
 
 	rf->arp_table = (struct irdma_arp_entry *)rf->mem_rsrc;
 
-	ret = irdma_set_hw_rsrc(rf);
-	if (ret)
-		goto set_hw_rsrc_fail;
+	irdma_set_hw_rsrc(rf);
 
 	set_bit(0, rf->allocated_mrs);
 	set_bit(0, rf->allocated_qps);
@@ -2025,9 +2021,6 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf)
 
 	return 0;
 
-set_hw_rsrc_fail:
-	kfree(rf->mem_rsrc);
-	rf->mem_rsrc = NULL;
 mem_rsrc_kzalloc_fail:
 	kfree(rf->allocated_ws_nodes);
 	rf->allocated_ws_nodes = NULL;

From dc6afef7e14252c5ca5b8a8444946cb4b75b0aa0 Mon Sep 17 00:00:00 2001
From: Zhu Yanjun <yanjun.zhu@linux.dev>
Date: Tue, 13 Jul 2021 23:11:30 -0400
Subject: [PATCH 038/502] RDMA/irdma: Change returned type of
 irdma_setup_virt_qp to void

Since the returned value of the function irdma_setup_virt_qp is always 0,
remove the returned value check and change the returned type to void.

Link: https://lore.kernel.org/r/20210714031130.1511109-4-yanjun.zhu@linux.dev
Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/irdma/verbs.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index 9712f6902ba8..717147ed0519 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -557,7 +557,7 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
  * @iwqp: qp ptr
  * @init_info: initialize info to return
  */
-static int irdma_setup_virt_qp(struct irdma_device *iwdev,
+static void irdma_setup_virt_qp(struct irdma_device *iwdev,
 			       struct irdma_qp *iwqp,
 			       struct irdma_qp_init_info *init_info)
 {
@@ -574,8 +574,6 @@ static int irdma_setup_virt_qp(struct irdma_device *iwdev,
 		init_info->sq_pa = qpmr->sq_pbl.addr;
 		init_info->rq_pa = qpmr->rq_pbl.addr;
 	}
-
-	return 0;
 }
 
 /**
@@ -914,7 +912,7 @@ static struct ib_qp *irdma_create_qp(struct ib_pd *ibpd,
 			}
 		}
 		init_info.qp_uk_init_info.abi_ver = iwpd->sc_pd.abi_ver;
-		err_code = irdma_setup_virt_qp(iwdev, iwqp, &init_info);
+		irdma_setup_virt_qp(iwdev, iwqp, &init_info);
 	} else {
 		init_info.qp_uk_init_info.abi_ver = IRDMA_ABI_VER;
 		err_code = irdma_setup_kmode_qp(iwdev, iwqp, &init_info, init_attr);

From e48bf29cf9d6d60d810e2af71e54b71a324094e0 Mon Sep 17 00:00:00 2001
From: Ye Xiang <xiang.ye@intel.com>
Date: Sun, 13 Jun 2021 11:25:07 +0800
Subject: [PATCH 039/502] HID: intel-ish-hid: use async resume function

ISH IPC driver uses asynchronous workqueue to do resume now, but there is
a potential timing issue: when child devices resume before bus driver, it
will cause child devices resume failed and cannot be recovered until
reboot. The current implementation in this case do wait for IPC to resume
but fail to accommodate for a case when there is no ISH reboot and soft
resume is taking time. This issue is apparent on Tiger Lake platform with
5.11.13 kernel when doing suspend to idle then resume(s0ix) test. To
resolve this issue, we change ISHTP HID client to use asynchronous resume
callback too. In the asynchronous resume callback, it waits for the ISHTP
resume done event, and then notify ISHTP HID client link ready.

Signed-off-by: Ye Xiang <xiang.ye@intel.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/intel-ish-hid/ishtp-hid-client.c | 15 +++++++++-
 drivers/hid/intel-ish-hid/ishtp-hid.h        |  1 +
 drivers/hid/intel-ish-hid/ishtp/bus.c        | 29 +++++++++++++++-----
 include/linux/intel-ish-client-if.h          |  2 ++
 4 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/drivers/hid/intel-ish-hid/ishtp-hid-client.c b/drivers/hid/intel-ish-hid/ishtp-hid-client.c
index 6b1fa971b33e..91bf4d01e91a 100644
--- a/drivers/hid/intel-ish-hid/ishtp-hid-client.c
+++ b/drivers/hid/intel-ish-hid/ishtp-hid-client.c
@@ -784,6 +784,17 @@ static void hid_ishtp_cl_reset_handler(struct work_struct *work)
 	}
 }
 
+static void hid_ishtp_cl_resume_handler(struct work_struct *work)
+{
+	struct ishtp_cl_data *client_data = container_of(work, struct ishtp_cl_data, resume_work);
+	struct ishtp_cl *hid_ishtp_cl = client_data->hid_ishtp_cl;
+
+	if (ishtp_wait_resume(ishtp_get_ishtp_device(hid_ishtp_cl))) {
+		client_data->suspended = false;
+		wake_up_interruptible(&client_data->ishtp_resume_wait);
+	}
+}
+
 ishtp_print_log ishtp_hid_print_trace;
 
 /**
@@ -822,6 +833,8 @@ static int hid_ishtp_cl_probe(struct ishtp_cl_device *cl_device)
 	init_waitqueue_head(&client_data->ishtp_resume_wait);
 
 	INIT_WORK(&client_data->work, hid_ishtp_cl_reset_handler);
+	INIT_WORK(&client_data->resume_work, hid_ishtp_cl_resume_handler);
+
 
 	ishtp_hid_print_trace = ishtp_trace_callback(cl_device);
 
@@ -921,7 +934,7 @@ static int hid_ishtp_cl_resume(struct device *device)
 
 	hid_ishtp_trace(client_data, "%s hid_ishtp_cl %p\n", __func__,
 			hid_ishtp_cl);
-	client_data->suspended = false;
+	schedule_work(&client_data->resume_work);
 	return 0;
 }
 
diff --git a/drivers/hid/intel-ish-hid/ishtp-hid.h b/drivers/hid/intel-ish-hid/ishtp-hid.h
index f88443a7d935..6a5cc11aefd8 100644
--- a/drivers/hid/intel-ish-hid/ishtp-hid.h
+++ b/drivers/hid/intel-ish-hid/ishtp-hid.h
@@ -135,6 +135,7 @@ struct ishtp_cl_data {
 	int multi_packet_cnt;
 
 	struct work_struct work;
+	struct work_struct resume_work;
 	struct ishtp_cl_device *cl_device;
 };
 
diff --git a/drivers/hid/intel-ish-hid/ishtp/bus.c b/drivers/hid/intel-ish-hid/ishtp/bus.c
index f0802b047ed8..aa2c51624012 100644
--- a/drivers/hid/intel-ish-hid/ishtp/bus.c
+++ b/drivers/hid/intel-ish-hid/ishtp/bus.c
@@ -314,13 +314,6 @@ static int ishtp_cl_device_resume(struct device *dev)
 	if (!device)
 		return 0;
 
-	/*
-	 * When ISH needs hard reset, it is done asynchrnously, hence bus
-	 * resume will  be called before full ISH resume
-	 */
-	if (device->ishtp_dev->resume_flag)
-		return 0;
-
 	driver = to_ishtp_cl_driver(dev->driver);
 	if (driver && driver->driver.pm) {
 		if (driver->driver.pm->resume)
@@ -849,6 +842,28 @@ struct device *ishtp_device(struct ishtp_cl_device *device)
 }
 EXPORT_SYMBOL(ishtp_device);
 
+/**
+ * ishtp_wait_resume() - Wait for IPC resume
+ *
+ * Wait for IPC resume
+ *
+ * Return: resume complete or not
+ */
+bool ishtp_wait_resume(struct ishtp_device *dev)
+{
+	/* 50ms to get resume response */
+	#define WAIT_FOR_RESUME_ACK_MS		50
+
+	/* Waiting to get resume response */
+	if (dev->resume_flag)
+		wait_event_interruptible_timeout(dev->resume_wait,
+						 !dev->resume_flag,
+						 msecs_to_jiffies(WAIT_FOR_RESUME_ACK_MS));
+
+	return (!dev->resume_flag);
+}
+EXPORT_SYMBOL_GPL(ishtp_wait_resume);
+
 /**
  * ishtp_get_pci_device() - Return PCI device dev pointer
  * This interface is used to return PCI device pointer
diff --git a/include/linux/intel-ish-client-if.h b/include/linux/intel-ish-client-if.h
index 25e2b4e80502..aee8ff4739b1 100644
--- a/include/linux/intel-ish-client-if.h
+++ b/include/linux/intel-ish-client-if.h
@@ -81,6 +81,8 @@ int ishtp_register_event_cb(struct ishtp_cl_device *device,
 
 /* Get the device * from ishtp device instance */
 struct device *ishtp_device(struct ishtp_cl_device *cl_device);
+/* wait for IPC resume */
+bool ishtp_wait_resume(struct ishtp_device *dev);
 /* Trace interface for clients */
 ishtp_print_log ishtp_trace_callback(struct ishtp_cl_device *cl_device);
 /* Get device pointer of PCI device for DMA acces */

From 3fdcf7cdfc229346d028242e73562704ad644dd0 Mon Sep 17 00:00:00 2001
From: "Luke D. Jones" <luke@ljones.dev>
Date: Mon, 5 Jul 2021 10:26:59 +1200
Subject: [PATCH 040/502] HID: asus: Remove check for same LED brightness on
 set

Remove the early return on LED brightness set so that any controller
application, daemon, or desktop may set the same brightness at any stage.

This is required because many ASUS ROG keyboards will default to max
brightness on laptop resume if the LEDs were set to off before sleep.

Signed-off-by: Luke D Jones <luke@ljones.dev>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-asus.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c
index fca8fc78a78a..fb807c8e989b 100644
--- a/drivers/hid/hid-asus.c
+++ b/drivers/hid/hid-asus.c
@@ -485,9 +485,6 @@ static void asus_kbd_backlight_set(struct led_classdev *led_cdev,
 {
 	struct asus_kbd_leds *led = container_of(led_cdev, struct asus_kbd_leds,
 						 cdev);
-	if (led->brightness == brightness)
-		return;
-
 	led->brightness = brightness;
 	schedule_work(&led->work);
 }

From 8aa6348634d1bc81801329e6ea98cd88ec07fb10 Mon Sep 17 00:00:00 2001
From: Dylan MacKenzie <ecstaticmorse@gmail.com>
Date: Tue, 13 Jul 2021 16:31:07 -0700
Subject: [PATCH 041/502] HID: amd_sfh: Use correct MMIO register for DMA
 address

amd_stop_sensor_v2 accidentally used a different MMIO register than
amd_start_sensor_v2 for the DMA address.

Fixes: f264481ad614dfd9 ("HID: amd_sfh: Extend driver capabilities for multi-generation support")
Signed-off-by: Dylan MacKenzie <ecstaticmorse@gmail.com>
Acked-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/amd-sfh-hid/amd_sfh_pcie.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c
index 96e2577fa37e..8d68796aa905 100644
--- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c
+++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c
@@ -58,7 +58,7 @@ static void amd_stop_sensor_v2(struct amd_mp2_dev *privdata, u16 sensor_idx)
 	cmd_base.cmd_v2.sensor_id = sensor_idx;
 	cmd_base.cmd_v2.length  = 16;
 
-	writeq(0x0, privdata->mmio + AMD_C2P_MSG2);
+	writeq(0x0, privdata->mmio + AMD_C2P_MSG1);
 	writel(cmd_base.ul, privdata->mmio + AMD_C2P_MSG0);
 }
 

From 2acf15b94d5b8ea8392c4b6753a6ffac3135cd78 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Fri, 2 Jul 2021 12:07:43 +0800
Subject: [PATCH 042/502] reiserfs: add check for root_inode in
 reiserfs_fill_super

Our syzcaller report a NULL pointer dereference:

BUG: kernel NULL pointer dereference, address: 0000000000000000
PGD 116e95067 P4D 116e95067 PUD 1080b5067 PMD 0
Oops: 0010 [#1] SMP KASAN
CPU: 7 PID: 592 Comm: a.out Not tainted 5.13.0-next-20210629-dirty #67
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-p4
RIP: 0010:0x0
Code: Unable to access opcode bytes at RIP 0xffffffffffffffd6.
RSP: 0018:ffff888114e779b8 EFLAGS: 00010246
RAX: 0000000000000000 RBX: 1ffff110229cef39 RCX: ffffffffaa67e1aa
RDX: 0000000000000000 RSI: ffff88810a58ee00 RDI: ffff8881233180b0
RBP: ffffffffac38e9c0 R08: ffffffffaa67e17e R09: 0000000000000001
R10: ffffffffb91c5557 R11: fffffbfff7238aaa R12: ffff88810a58ee00
R13: ffff888114e77aa0 R14: 0000000000000000 R15: ffff8881233180b0
FS:  00007f946163c480(0000) GS:ffff88839f1c0000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: ffffffffffffffd6 CR3: 00000001099c1000 CR4: 00000000000006e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 __lookup_slow+0x116/0x2d0
 ? page_put_link+0x120/0x120
 ? __d_lookup+0xfc/0x320
 ? d_lookup+0x49/0x90
 lookup_one_len+0x13c/0x170
 ? __lookup_slow+0x2d0/0x2d0
 ? reiserfs_schedule_old_flush+0x31/0x130
 reiserfs_lookup_privroot+0x64/0x150
 reiserfs_fill_super+0x158c/0x1b90
 ? finish_unfinished+0xb10/0xb10
 ? bprintf+0xe0/0xe0
 ? __mutex_lock_slowpath+0x30/0x30
 ? __kasan_check_write+0x20/0x30
 ? up_write+0x51/0xb0
 ? set_blocksize+0x9f/0x1f0
 mount_bdev+0x27c/0x2d0
 ? finish_unfinished+0xb10/0xb10
 ? reiserfs_kill_sb+0x120/0x120
 get_super_block+0x19/0x30
 legacy_get_tree+0x76/0xf0
 vfs_get_tree+0x49/0x160
 ? capable+0x1d/0x30
 path_mount+0xacc/0x1380
 ? putname+0x97/0xd0
 ? finish_automount+0x450/0x450
 ? kmem_cache_free+0xf8/0x5a0
 ? putname+0x97/0xd0
 do_mount+0xe2/0x110
 ? path_mount+0x1380/0x1380
 ? copy_mount_options+0x69/0x140
 __x64_sys_mount+0xf0/0x190
 do_syscall_64+0x35/0x80
 entry_SYSCALL_64_after_hwframe+0x44/0xae

This is because 'root_inode' is initialized with wrong mode, and
it's i_op is set to 'reiserfs_special_inode_operations'. Thus add
check for 'root_inode' to fix the problem.

Link: https://lore.kernel.org/r/20210702040743.1918552-1-yukuai3@huawei.com
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/reiserfs/super.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 3ffafc73acf0..58481f8d63d5 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2082,6 +2082,14 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
 		unlock_new_inode(root_inode);
 	}
 
+	if (!S_ISDIR(root_inode->i_mode) || !inode_get_bytes(root_inode) ||
+	    !root_inode->i_size) {
+		SWARN(silent, s, "", "corrupt root inode, run fsck");
+		iput(root_inode);
+		errval = -EUCLEAN;
+		goto error;
+	}
+
 	s->s_root = d_make_root(root_inode);
 	if (!s->s_root)
 		goto error;

From 728d392f8a799f037812d0f2b254fb3b5e115fcf Mon Sep 17 00:00:00 2001
From: Javier Pello <javier.pello@urjc.es>
Date: Wed, 14 Jul 2021 18:54:48 +0200
Subject: [PATCH 043/502] fs/ext2: Avoid page_address on pages returned by
 ext2_get_page

Commit 782b76d7abdf02b12c46ed6f1e9bf715569027f7 ("fs/ext2: Replace
kmap() with kmap_local_page()") replaced the kmap/kunmap calls in
ext2_get_page/ext2_put_page with kmap_local_page/kunmap_local for
efficiency reasons. As a necessary side change, the commit also
made ext2_get_page (and ext2_find_entry and ext2_dotdot) return
the mapping address along with the page itself, as it is required
for kunmap_local, and converted uses of page_address on such pages
to use the newly returned address instead. However, uses of
page_address on such pages were missed in ext2_check_page and
ext2_delete_entry, which triggers oopses if kmap_local_page happens
to return an address from high memory. Fix this now by converting
the remaining uses of page_address to use the right address, as
returned by kmap_local_page.

Link: https://lore.kernel.org/r/20210714185448.8707ac239e9f12b3a7f5b9f9@urjc.es
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Javier Pello <javier.pello@urjc.es>
Fixes: 782b76d7abdf ("fs/ext2: Replace kmap() with kmap_local_page()")
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext2/dir.c   | 12 ++++++------
 fs/ext2/ext2.h  |  3 ++-
 fs/ext2/namei.c |  4 ++--
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 14292dba3a12..2c2f179b6977 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -106,12 +106,11 @@ static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len)
 	return err;
 }
 
-static bool ext2_check_page(struct page *page, int quiet)
+static bool ext2_check_page(struct page *page, int quiet, char *kaddr)
 {
 	struct inode *dir = page->mapping->host;
 	struct super_block *sb = dir->i_sb;
 	unsigned chunk_size = ext2_chunk_size(dir);
-	char *kaddr = page_address(page);
 	u32 max_inumber = le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count);
 	unsigned offs, rec_len;
 	unsigned limit = PAGE_SIZE;
@@ -205,7 +204,8 @@ static struct page * ext2_get_page(struct inode *dir, unsigned long n,
 	if (!IS_ERR(page)) {
 		*page_addr = kmap_local_page(page);
 		if (unlikely(!PageChecked(page))) {
-			if (PageError(page) || !ext2_check_page(page, quiet))
+			if (PageError(page) || !ext2_check_page(page, quiet,
+								*page_addr))
 				goto fail;
 		}
 	}
@@ -584,10 +584,10 @@ out_unlock:
  * ext2_delete_entry deletes a directory entry by merging it with the
  * previous entry. Page is up-to-date.
  */
-int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
+int ext2_delete_entry (struct ext2_dir_entry_2 *dir, struct page *page,
+			char *kaddr)
 {
 	struct inode *inode = page->mapping->host;
-	char *kaddr = page_address(page);
 	unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1);
 	unsigned to = ((char *)dir - kaddr) +
 				ext2_rec_len_from_disk(dir->rec_len);
@@ -607,7 +607,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
 		de = ext2_next_entry(de);
 	}
 	if (pde)
-		from = (char*)pde - (char*)page_address(page);
+		from = (char *)pde - kaddr;
 	pos = page_offset(page) + from;
 	lock_page(page);
 	err = ext2_prepare_chunk(page, pos, to - from);
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index b0a694820cb7..e512630cb63e 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -740,7 +740,8 @@ extern int ext2_inode_by_name(struct inode *dir,
 extern int ext2_make_empty(struct inode *, struct inode *);
 extern struct ext2_dir_entry_2 *ext2_find_entry(struct inode *, const struct qstr *,
 						struct page **, void **res_page_addr);
-extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *);
+extern int ext2_delete_entry(struct ext2_dir_entry_2 *dir, struct page *page,
+			     char *kaddr);
 extern int ext2_empty_dir (struct inode *);
 extern struct ext2_dir_entry_2 *ext2_dotdot(struct inode *dir, struct page **p, void **pa);
 extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, void *,
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 1f69b81655b6..5f6b7560eb3f 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -293,7 +293,7 @@ static int ext2_unlink(struct inode * dir, struct dentry *dentry)
 		goto out;
 	}
 
-	err = ext2_delete_entry (de, page);
+	err = ext2_delete_entry (de, page, page_addr);
 	ext2_put_page(page, page_addr);
 	if (err)
 		goto out;
@@ -397,7 +397,7 @@ static int ext2_rename (struct user_namespace * mnt_userns,
 	old_inode->i_ctime = current_time(old_inode);
 	mark_inode_dirty(old_inode);
 
-	ext2_delete_entry(old_de, old_page);
+	ext2_delete_entry(old_de, old_page, old_page_addr);
 
 	if (dir_de) {
 		if (old_dir != new_dir)

From 13d257503c0930010ef9eed78b689cec417ab741 Mon Sep 17 00:00:00 2001
From: Shreyansh Chouhan <chouhan.shreyansh630@gmail.com>
Date: Fri, 9 Jul 2021 20:59:29 +0530
Subject: [PATCH 044/502] reiserfs: check directory items on read from disk

While verifying the leaf item that we read from the disk, reiserfs
doesn't check the directory items, this could cause a crash when we
read a directory item from the disk that has an invalid deh_location.

This patch adds a check to the directory items read from the disk that
does a bounds check on deh_location for the directory entries. Any
directory entry header with a directory entry offset greater than the
item length is considered invalid.

Link: https://lore.kernel.org/r/20210709152929.766363-1-chouhan.shreyansh630@gmail.com
Reported-by: syzbot+c31a48e6702ccb3d64c9@syzkaller.appspotmail.com
Signed-off-by: Shreyansh Chouhan <chouhan.shreyansh630@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/reiserfs/stree.c | 31 ++++++++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 476a7ff49482..ef42729216d1 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -387,6 +387,24 @@ void pathrelse(struct treepath *search_path)
 	search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
 }
 
+static int has_valid_deh_location(struct buffer_head *bh, struct item_head *ih)
+{
+	struct reiserfs_de_head *deh;
+	int i;
+
+	deh = B_I_DEH(bh, ih);
+	for (i = 0; i < ih_entry_count(ih); i++) {
+		if (deh_location(&deh[i]) > ih_item_len(ih)) {
+			reiserfs_warning(NULL, "reiserfs-5094",
+					 "directory entry location seems wrong %h",
+					 &deh[i]);
+			return 0;
+		}
+	}
+
+	return 1;
+}
+
 static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
 {
 	struct block_head *blkh;
@@ -454,11 +472,14 @@ static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
 					 "(second one): %h", ih);
 			return 0;
 		}
-		if (is_direntry_le_ih(ih) && (ih_item_len(ih) < (ih_entry_count(ih) * IH_SIZE))) {
-			reiserfs_warning(NULL, "reiserfs-5093",
-					 "item entry count seems wrong %h",
-					 ih);
-			return 0;
+		if (is_direntry_le_ih(ih)) {
+			if (ih_item_len(ih) < (ih_entry_count(ih) * IH_SIZE)) {
+				reiserfs_warning(NULL, "reiserfs-5093",
+						 "item entry count seems wrong %h",
+						 ih);
+				return 0;
+			}
+			return has_valid_deh_location(bh, ih);
 		}
 		prev_location = ih_location(ih);
 	}

From 59089a189e3adde4cf85f2ce479738d1ae4c514d Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 29 Jun 2021 09:39:15 +0000
Subject: [PATCH 045/502] bpf: Remove superfluous aux sanitation on subprog
 rejection

Follow-up to fe9a5ca7e370 ("bpf: Do not mark insn as seen under speculative
path verification"). The sanitize_insn_aux_data() helper does not serve a
particular purpose in today's code. The original intention for the helper
was that if function-by-function verification fails, a given program would
be cleared from temporary insn_aux_data[], and then its verification would
be re-attempted in the context of the main program a second time.

However, a failure in do_check_subprogs() will skip do_check_main() and
propagate the error to the user instead, thus such situation can never occur.
Given its interaction is not compatible to the Spectre v1 mitigation (due to
comparing aux->seen with env->pass_cnt), just remove sanitize_insn_aux_data()
to avoid future bugs in this area.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 34 ----------------------------------
 1 file changed, 34 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9de3c9c3267c..8a7a28b4cfb9 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -12752,37 +12752,6 @@ static void free_states(struct bpf_verifier_env *env)
 	}
 }
 
-/* The verifier is using insn_aux_data[] to store temporary data during
- * verification and to store information for passes that run after the
- * verification like dead code sanitization. do_check_common() for subprogram N
- * may analyze many other subprograms. sanitize_insn_aux_data() clears all
- * temporary data after do_check_common() finds that subprogram N cannot be
- * verified independently. pass_cnt counts the number of times
- * do_check_common() was run and insn->aux->seen tells the pass number
- * insn_aux_data was touched. These variables are compared to clear temporary
- * data from failed pass. For testing and experiments do_check_common() can be
- * run multiple times even when prior attempt to verify is unsuccessful.
- *
- * Note that special handling is needed on !env->bypass_spec_v1 if this is
- * ever called outside of error path with subsequent program rejection.
- */
-static void sanitize_insn_aux_data(struct bpf_verifier_env *env)
-{
-	struct bpf_insn *insn = env->prog->insnsi;
-	struct bpf_insn_aux_data *aux;
-	int i, class;
-
-	for (i = 0; i < env->prog->len; i++) {
-		class = BPF_CLASS(insn[i].code);
-		if (class != BPF_LDX && class != BPF_STX)
-			continue;
-		aux = &env->insn_aux_data[i];
-		if (aux->seen != env->pass_cnt)
-			continue;
-		memset(aux, 0, offsetof(typeof(*aux), orig_idx));
-	}
-}
-
 static int do_check_common(struct bpf_verifier_env *env, int subprog)
 {
 	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
@@ -12859,9 +12828,6 @@ out:
 	if (!ret && pop_log)
 		bpf_vlog_reset(&env->log, 0);
 	free_states(env);
-	if (ret)
-		/* clean aux data in case subprog was rejected */
-		sanitize_insn_aux_data(env);
 	return ret;
 }
 

From e042aa532c84d18ff13291d00620502ce7a38dda Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 16 Jul 2021 09:18:21 +0000
Subject: [PATCH 046/502] bpf: Fix pointer arithmetic mask tightening under
 state pruning

In 7fedb63a8307 ("bpf: Tighten speculative pointer arithmetic mask") we
narrowed the offset mask for unprivileged pointer arithmetic in order to
mitigate a corner case where in the speculative domain it is possible to
advance, for example, the map value pointer by up to value_size-1 out-of-
bounds in order to leak kernel memory via side-channel to user space.

The verifier's state pruning for scalars leaves one corner case open
where in the first verification path R_x holds an unknown scalar with an
aux->alu_limit of e.g. 7, and in a second verification path that same
register R_x, here denoted as R_x', holds an unknown scalar which has
tighter bounds and would thus satisfy range_within(R_x, R_x') as well as
tnum_in(R_x, R_x') for state pruning, yielding an aux->alu_limit of 3:
Given the second path fits the register constraints for pruning, the final
generated mask from aux->alu_limit will remain at 7. While technically
not wrong for the non-speculative domain, it would however be possible
to craft similar cases where the mask would be too wide as in 7fedb63a8307.

One way to fix it is to detect the presence of unknown scalar map pointer
arithmetic and force a deeper search on unknown scalars to ensure that
we do not run into a masking mismatch.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h |  1 +
 kernel/bpf/verifier.c        | 27 +++++++++++++++++----------
 2 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index e774ecc1cd1f..7ba7e800d472 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -414,6 +414,7 @@ struct bpf_verifier_env {
 	u32 used_map_cnt;		/* number of used maps */
 	u32 used_btf_cnt;		/* number of used BTF objects */
 	u32 id_gen;			/* used to generate unique reg IDs */
+	bool explore_alu_limits;
 	bool allow_ptr_leaks;
 	bool allow_uninit_stack;
 	bool allow_ptr_to_map_access;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8a7a28b4cfb9..657062cb4d85 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6561,6 +6561,12 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
 		alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
 		alu_state |= ptr_is_dst_reg ?
 			     BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
+
+		/* Limit pruning on unknown scalars to enable deep search for
+		 * potential masking differences from other program paths.
+		 */
+		if (!off_is_imm)
+			env->explore_alu_limits = true;
 	}
 
 	err = update_alu_sanitation_state(aux, alu_state, alu_limit);
@@ -9936,8 +9942,8 @@ next:
 }
 
 /* Returns true if (rold safe implies rcur safe) */
-static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
-		    struct bpf_id_pair *idmap)
+static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
+		    struct bpf_reg_state *rcur, struct bpf_id_pair *idmap)
 {
 	bool equal;
 
@@ -9963,6 +9969,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
 		return false;
 	switch (rold->type) {
 	case SCALAR_VALUE:
+		if (env->explore_alu_limits)
+			return false;
 		if (rcur->type == SCALAR_VALUE) {
 			if (!rold->precise && !rcur->precise)
 				return true;
@@ -10053,9 +10061,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
 	return false;
 }
 
-static bool stacksafe(struct bpf_func_state *old,
-		      struct bpf_func_state *cur,
-		      struct bpf_id_pair *idmap)
+static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
+		      struct bpf_func_state *cur, struct bpf_id_pair *idmap)
 {
 	int i, spi;
 
@@ -10100,9 +10107,8 @@ static bool stacksafe(struct bpf_func_state *old,
 			continue;
 		if (old->stack[spi].slot_type[0] != STACK_SPILL)
 			continue;
-		if (!regsafe(&old->stack[spi].spilled_ptr,
-			     &cur->stack[spi].spilled_ptr,
-			     idmap))
+		if (!regsafe(env, &old->stack[spi].spilled_ptr,
+			     &cur->stack[spi].spilled_ptr, idmap))
 			/* when explored and current stack slot are both storing
 			 * spilled registers, check that stored pointers types
 			 * are the same as well.
@@ -10159,10 +10165,11 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat
 
 	memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
 	for (i = 0; i < MAX_BPF_REG; i++)
-		if (!regsafe(&old->regs[i], &cur->regs[i], env->idmap_scratch))
+		if (!regsafe(env, &old->regs[i], &cur->regs[i],
+			     env->idmap_scratch))
 			return false;
 
-	if (!stacksafe(old, cur, env->idmap_scratch))
+	if (!stacksafe(env, old, cur, env->idmap_scratch))
 		return false;
 
 	if (!refsafe(old, cur))

From a6c39de76d709f30982d4b80a9b9537e1d388858 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 16 Jul 2021 13:15:33 +0000
Subject: [PATCH 047/502] bpf, selftests: Add test cases for pointer alu from
 multiple paths

Add several test cases for checking update_alu_sanitation_state() under
multiple paths:

  # ./test_verifier
  [...]
  #1061/u map access: known scalar += value_ptr unknown vs const OK
  #1061/p map access: known scalar += value_ptr unknown vs const OK
  #1062/u map access: known scalar += value_ptr const vs unknown OK
  #1062/p map access: known scalar += value_ptr const vs unknown OK
  #1063/u map access: known scalar += value_ptr const vs const (ne) OK
  #1063/p map access: known scalar += value_ptr const vs const (ne) OK
  #1064/u map access: known scalar += value_ptr const vs const (eq) OK
  #1064/p map access: known scalar += value_ptr const vs const (eq) OK
  #1065/u map access: known scalar += value_ptr unknown vs unknown (eq) OK
  #1065/p map access: known scalar += value_ptr unknown vs unknown (eq) OK
  #1066/u map access: known scalar += value_ptr unknown vs unknown (lt) OK
  #1066/p map access: known scalar += value_ptr unknown vs unknown (lt) OK
  #1067/u map access: known scalar += value_ptr unknown vs unknown (gt) OK
  #1067/p map access: known scalar += value_ptr unknown vs unknown (gt) OK
  [...]
  Summary: 1762 PASSED, 0 SKIPPED, 0 FAILED

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/verifier/value_ptr_arith.c  | 229 ++++++++++++++++++
 1 file changed, 229 insertions(+)

diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
index a3e593ddfafc..2debba4e8a3a 100644
--- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
+++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
@@ -1,3 +1,232 @@
+{
+	"map access: known scalar += value_ptr unknown vs const",
+	.insns = {
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+		    offsetof(struct __sk_buff, len)),
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+	BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4),
+	BPF_MOV64_IMM(BPF_REG_1, 6),
+	BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+	BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+	BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+	BPF_MOV64_IMM(BPF_REG_1, 3),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+	BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_16b = { 5 },
+	.fixup_map_array_48b = { 8 },
+	.result_unpriv = REJECT,
+	.errstr_unpriv = "R1 tried to add from different maps, paths or scalars",
+	.result = ACCEPT,
+	.retval = 1,
+},
+{
+	"map access: known scalar += value_ptr const vs unknown",
+	.insns = {
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+		    offsetof(struct __sk_buff, len)),
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+	BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 2),
+	BPF_MOV64_IMM(BPF_REG_1, 3),
+	BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+	BPF_MOV64_IMM(BPF_REG_1, 6),
+	BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+	BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+	BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_16b = { 5 },
+	.fixup_map_array_48b = { 8 },
+	.result_unpriv = REJECT,
+	.errstr_unpriv = "R1 tried to add from different maps, paths or scalars",
+	.result = ACCEPT,
+	.retval = 1,
+},
+{
+	"map access: known scalar += value_ptr const vs const (ne)",
+	.insns = {
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+		    offsetof(struct __sk_buff, len)),
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+	BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 2),
+	BPF_MOV64_IMM(BPF_REG_1, 3),
+	BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+	BPF_MOV64_IMM(BPF_REG_1, 5),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+	BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_16b = { 5 },
+	.fixup_map_array_48b = { 8 },
+	.result_unpriv = REJECT,
+	.errstr_unpriv = "R1 tried to add from different maps, paths or scalars",
+	.result = ACCEPT,
+	.retval = 1,
+},
+{
+	"map access: known scalar += value_ptr const vs const (eq)",
+	.insns = {
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+		    offsetof(struct __sk_buff, len)),
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+	BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 2),
+	BPF_MOV64_IMM(BPF_REG_1, 5),
+	BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+	BPF_MOV64_IMM(BPF_REG_1, 5),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+	BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_16b = { 5 },
+	.fixup_map_array_48b = { 8 },
+	.result = ACCEPT,
+	.retval = 1,
+},
+{
+	"map access: known scalar += value_ptr unknown vs unknown (eq)",
+	.insns = {
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+		    offsetof(struct __sk_buff, len)),
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+	BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4),
+	BPF_MOV64_IMM(BPF_REG_1, 6),
+	BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+	BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+	BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+	BPF_MOV64_IMM(BPF_REG_1, 6),
+	BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+	BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+	BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_16b = { 5 },
+	.fixup_map_array_48b = { 8 },
+	.result = ACCEPT,
+	.retval = 1,
+},
+{
+	"map access: known scalar += value_ptr unknown vs unknown (lt)",
+	.insns = {
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+		    offsetof(struct __sk_buff, len)),
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+	BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4),
+	BPF_MOV64_IMM(BPF_REG_1, 6),
+	BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+	BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x3),
+	BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+	BPF_MOV64_IMM(BPF_REG_1, 6),
+	BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+	BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+	BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_16b = { 5 },
+	.fixup_map_array_48b = { 8 },
+	.result_unpriv = REJECT,
+	.errstr_unpriv = "R1 tried to add from different maps, paths or scalars",
+	.result = ACCEPT,
+	.retval = 1,
+},
+{
+	"map access: known scalar += value_ptr unknown vs unknown (gt)",
+	.insns = {
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+		    offsetof(struct __sk_buff, len)),
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+	BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4),
+	BPF_MOV64_IMM(BPF_REG_1, 6),
+	BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+	BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+	BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+	BPF_MOV64_IMM(BPF_REG_1, 6),
+	BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+	BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x3),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+	BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_16b = { 5 },
+	.fixup_map_array_48b = { 8 },
+	.result_unpriv = REJECT,
+	.errstr_unpriv = "R1 tried to add from different maps, paths or scalars",
+	.result = ACCEPT,
+	.retval = 1,
+},
 {
 	"map access: known scalar += value_ptr from different maps",
 	.insns = {

From 9d7a6c95f62bc335b62aaf9d50590122bd03a796 Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Mon, 5 Jul 2021 10:44:52 +0200
Subject: [PATCH 048/502] perf: Fix required permissions if sigtrap is
 requested

If perf_event_open() is called with another task as target and
perf_event_attr::sigtrap is set, and the target task's user does not
match the calling user, also require the CAP_KILL capability or
PTRACE_MODE_ATTACH permissions.

Otherwise, with the CAP_PERFMON capability alone it would be possible
for a user to send SIGTRAP signals via perf events to another user's
tasks. This could potentially result in those tasks being terminated if
they cannot handle SIGTRAP signals.

Note: The check complements the existing capability check, but is not
supposed to supersede the ptrace_may_access() check. At a high level we
now have:

	capable of CAP_PERFMON and (CAP_KILL if sigtrap)
		OR
	ptrace_may_access(...) // also checks for same thread-group and uid

Fixes: 97ba62b27867 ("perf: Add support for SIGTRAP on perf events")
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Marco Elver <elver@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Dmitry Vyukov <dvyukov@google.com>
Cc: <stable@vger.kernel.org> # 5.13+
Link: https://lore.kernel.org/r/20210705084453.2151729-1-elver@google.com
---
 kernel/events/core.c | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 464917096e73..c13730b7ac01 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -12158,10 +12158,33 @@ SYSCALL_DEFINE5(perf_event_open,
 	}
 
 	if (task) {
+		unsigned int ptrace_mode = PTRACE_MODE_READ_REALCREDS;
+		bool is_capable;
+
 		err = down_read_interruptible(&task->signal->exec_update_lock);
 		if (err)
 			goto err_file;
 
+		is_capable = perfmon_capable();
+		if (attr.sigtrap) {
+			/*
+			 * perf_event_attr::sigtrap sends signals to the other
+			 * task. Require the current task to also have
+			 * CAP_KILL.
+			 */
+			rcu_read_lock();
+			is_capable &= ns_capable(__task_cred(task)->user_ns, CAP_KILL);
+			rcu_read_unlock();
+
+			/*
+			 * If the required capabilities aren't available, checks
+			 * for ptrace permissions: upgrade to ATTACH, since
+			 * sending signals can effectively change the target
+			 * task.
+			 */
+			ptrace_mode = PTRACE_MODE_ATTACH_REALCREDS;
+		}
+
 		/*
 		 * Preserve ptrace permission check for backwards compatibility.
 		 *
@@ -12171,7 +12194,7 @@ SYSCALL_DEFINE5(perf_event_open,
 		 * perf_event_exit_task() that could imply).
 		 */
 		err = -EACCES;
-		if (!perfmon_capable() && !ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
+		if (!is_capable && !ptrace_may_access(task, ptrace_mode))
 			goto err_cred;
 	}
 

From b068fc04de10fff8974f6ef32b861ad134d94ba4 Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Mon, 5 Jul 2021 10:44:53 +0200
Subject: [PATCH 049/502] perf: Refactor permissions check into
 perf_check_permission()

Refactor the permission check in perf_event_open() into a helper
perf_check_permission(). This makes the permission check logic more
readable (because we no longer have a negated disjunction). Add a
comment mentioning the ptrace check also checks the uid.

No functional change intended.

Signed-off-by: Marco Elver <elver@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Dmitry Vyukov <dvyukov@google.com>
Link: https://lore.kernel.org/r/20210705084453.2151729-2-elver@google.com
---
 kernel/events/core.c | 58 ++++++++++++++++++++++++--------------------
 1 file changed, 32 insertions(+), 26 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index c13730b7ac01..1cb1f9b8392e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -11917,6 +11917,37 @@ again:
 	return gctx;
 }
 
+static bool
+perf_check_permission(struct perf_event_attr *attr, struct task_struct *task)
+{
+	unsigned int ptrace_mode = PTRACE_MODE_READ_REALCREDS;
+	bool is_capable = perfmon_capable();
+
+	if (attr->sigtrap) {
+		/*
+		 * perf_event_attr::sigtrap sends signals to the other task.
+		 * Require the current task to also have CAP_KILL.
+		 */
+		rcu_read_lock();
+		is_capable &= ns_capable(__task_cred(task)->user_ns, CAP_KILL);
+		rcu_read_unlock();
+
+		/*
+		 * If the required capabilities aren't available, checks for
+		 * ptrace permissions: upgrade to ATTACH, since sending signals
+		 * can effectively change the target task.
+		 */
+		ptrace_mode = PTRACE_MODE_ATTACH_REALCREDS;
+	}
+
+	/*
+	 * Preserve ptrace permission check for backwards compatibility. The
+	 * ptrace check also includes checks that the current task and other
+	 * task have matching uids, and is therefore not done here explicitly.
+	 */
+	return is_capable || ptrace_may_access(task, ptrace_mode);
+}
+
 /**
  * sys_perf_event_open - open a performance event, associate it to a task/cpu
  *
@@ -12158,43 +12189,18 @@ SYSCALL_DEFINE5(perf_event_open,
 	}
 
 	if (task) {
-		unsigned int ptrace_mode = PTRACE_MODE_READ_REALCREDS;
-		bool is_capable;
-
 		err = down_read_interruptible(&task->signal->exec_update_lock);
 		if (err)
 			goto err_file;
 
-		is_capable = perfmon_capable();
-		if (attr.sigtrap) {
-			/*
-			 * perf_event_attr::sigtrap sends signals to the other
-			 * task. Require the current task to also have
-			 * CAP_KILL.
-			 */
-			rcu_read_lock();
-			is_capable &= ns_capable(__task_cred(task)->user_ns, CAP_KILL);
-			rcu_read_unlock();
-
-			/*
-			 * If the required capabilities aren't available, checks
-			 * for ptrace permissions: upgrade to ATTACH, since
-			 * sending signals can effectively change the target
-			 * task.
-			 */
-			ptrace_mode = PTRACE_MODE_ATTACH_REALCREDS;
-		}
-
 		/*
-		 * Preserve ptrace permission check for backwards compatibility.
-		 *
 		 * We must hold exec_update_lock across this and any potential
 		 * perf_install_in_context() call for this new event to
 		 * serialize against exec() altering our credentials (and the
 		 * perf_event_exit_task() that could imply).
 		 */
 		err = -EACCES;
-		if (!is_capable && !ptrace_may_access(task, ptrace_mode))
+		if (!perf_check_permission(&attr, task))
 			goto err_cred;
 	}
 

From ba02920c51debb9198e72b3a8726a7c5ae4ffb41 Mon Sep 17 00:00:00 2001
From: Vidya Sagar <vidyas@nvidia.com>
Date: Tue, 13 Jul 2021 17:05:46 +0530
Subject: [PATCH 050/502] arm64: tegra: Enable SMMU support for PCIe on
 Tegra194

As of commit c7289b1c8a4e ("arm64: tegra: Enable SMMU support on
Tegra194"), SMMU support is enabled system-wide on Tegra194. However,
there was a bit of overlap between the SMMU enablement and the PCIe
support addition, so the PCIe device tree nodes are missing the iommus
and interconnects properties. This in turn leads to SMMU faults for
these devices, since by default the ARM SMMU will fault.

Add the iommus and interconnects properties to all the PCIe device
tree nodes to restore their functionality.

Fixes: c7289b1c8a4e ("arm64: tegra: Enable SMMU support on Tegra194")
Signed-off-by: Vidya Sagar <vidyas@nvidia.com>
Reviewed-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 arch/arm64/boot/dts/nvidia/tegra194.dtsi | 60 +++++++++++++++++++++---
 1 file changed, 54 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
index 076d5efc4c3d..5ba7a4519b95 100644
--- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
@@ -1840,7 +1840,11 @@
 
 		interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE1R &emc>,
 				<&mc TEGRA194_MEMORY_CLIENT_PCIE1W &emc>;
-		interconnect-names = "read", "write";
+		interconnect-names = "dma-mem", "write";
+		iommus = <&smmu TEGRA194_SID_PCIE1>;
+		iommu-map = <0x0 &smmu TEGRA194_SID_PCIE1 0x1000>;
+		iommu-map-mask = <0x0>;
+		dma-coherent;
 	};
 
 	pcie@14120000 {
@@ -1890,7 +1894,11 @@
 
 		interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE2AR &emc>,
 				<&mc TEGRA194_MEMORY_CLIENT_PCIE2AW &emc>;
-		interconnect-names = "read", "write";
+		interconnect-names = "dma-mem", "write";
+		iommus = <&smmu TEGRA194_SID_PCIE2>;
+		iommu-map = <0x0 &smmu TEGRA194_SID_PCIE2 0x1000>;
+		iommu-map-mask = <0x0>;
+		dma-coherent;
 	};
 
 	pcie@14140000 {
@@ -1940,7 +1948,11 @@
 
 		interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE3R &emc>,
 				<&mc TEGRA194_MEMORY_CLIENT_PCIE3W &emc>;
-		interconnect-names = "read", "write";
+		interconnect-names = "dma-mem", "write";
+		iommus = <&smmu TEGRA194_SID_PCIE3>;
+		iommu-map = <0x0 &smmu TEGRA194_SID_PCIE3 0x1000>;
+		iommu-map-mask = <0x0>;
+		dma-coherent;
 	};
 
 	pcie@14160000 {
@@ -1990,7 +2002,11 @@
 
 		interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE4R &emc>,
 				<&mc TEGRA194_MEMORY_CLIENT_PCIE4W &emc>;
-		interconnect-names = "read", "write";
+		interconnect-names = "dma-mem", "write";
+		iommus = <&smmu TEGRA194_SID_PCIE4>;
+		iommu-map = <0x0 &smmu TEGRA194_SID_PCIE4 0x1000>;
+		iommu-map-mask = <0x0>;
+		dma-coherent;
 	};
 
 	pcie@14180000 {
@@ -2040,7 +2056,11 @@
 
 		interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE0R &emc>,
 				<&mc TEGRA194_MEMORY_CLIENT_PCIE0W &emc>;
-		interconnect-names = "read", "write";
+		interconnect-names = "dma-mem", "write";
+		iommus = <&smmu TEGRA194_SID_PCIE0>;
+		iommu-map = <0x0 &smmu TEGRA194_SID_PCIE0 0x1000>;
+		iommu-map-mask = <0x0>;
+		dma-coherent;
 	};
 
 	pcie@141a0000 {
@@ -2094,7 +2114,11 @@
 
 		interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE5R &emc>,
 				<&mc TEGRA194_MEMORY_CLIENT_PCIE5W &emc>;
-		interconnect-names = "read", "write";
+		interconnect-names = "dma-mem", "write";
+		iommus = <&smmu TEGRA194_SID_PCIE5>;
+		iommu-map = <0x0 &smmu TEGRA194_SID_PCIE5 0x1000>;
+		iommu-map-mask = <0x0>;
+		dma-coherent;
 	};
 
 	pcie_ep@14160000 {
@@ -2127,6 +2151,14 @@
 		nvidia,aspm-cmrt-us = <60>;
 		nvidia,aspm-pwr-on-t-us = <20>;
 		nvidia,aspm-l0s-entrance-latency-us = <3>;
+
+		interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE4R &emc>,
+				<&mc TEGRA194_MEMORY_CLIENT_PCIE4W &emc>;
+		interconnect-names = "dma-mem", "write";
+		iommus = <&smmu TEGRA194_SID_PCIE4>;
+		iommu-map = <0x0 &smmu TEGRA194_SID_PCIE4 0x1000>;
+		iommu-map-mask = <0x0>;
+		dma-coherent;
 	};
 
 	pcie_ep@14180000 {
@@ -2159,6 +2191,14 @@
 		nvidia,aspm-cmrt-us = <60>;
 		nvidia,aspm-pwr-on-t-us = <20>;
 		nvidia,aspm-l0s-entrance-latency-us = <3>;
+
+		interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE0R &emc>,
+				<&mc TEGRA194_MEMORY_CLIENT_PCIE0W &emc>;
+		interconnect-names = "dma-mem", "write";
+		iommus = <&smmu TEGRA194_SID_PCIE0>;
+		iommu-map = <0x0 &smmu TEGRA194_SID_PCIE0 0x1000>;
+		iommu-map-mask = <0x0>;
+		dma-coherent;
 	};
 
 	pcie_ep@141a0000 {
@@ -2194,6 +2234,14 @@
 		nvidia,aspm-cmrt-us = <60>;
 		nvidia,aspm-pwr-on-t-us = <20>;
 		nvidia,aspm-l0s-entrance-latency-us = <3>;
+
+		interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE5R &emc>,
+				<&mc TEGRA194_MEMORY_CLIENT_PCIE5W &emc>;
+		interconnect-names = "dma-mem", "write";
+		iommus = <&smmu TEGRA194_SID_PCIE5>;
+		iommu-map = <0x0 &smmu TEGRA194_SID_PCIE5 0x1000>;
+		iommu-map-mask = <0x0>;
+		dma-coherent;
 	};
 
 	sram@40000000 {

From cfbe3650dd3ef2ea9a4420ca89d9a4df98af3fb6 Mon Sep 17 00:00:00 2001
From: Dongliang Mu <mudongliangabcd@gmail.com>
Date: Wed, 14 Jul 2021 11:27:03 +0800
Subject: [PATCH 051/502] netfilter: nf_tables: fix audit memory leak in
 nf_tables_commit

In nf_tables_commit, if nf_tables_commit_audit_alloc fails, it does not
free the adp variable.

Fix this by adding nf_tables_commit_audit_free which frees
the linked list with the head node adl.

backtrace:
  kmalloc include/linux/slab.h:591 [inline]
  kzalloc include/linux/slab.h:721 [inline]
  nf_tables_commit_audit_alloc net/netfilter/nf_tables_api.c:8439 [inline]
  nf_tables_commit+0x16e/0x1760 net/netfilter/nf_tables_api.c:8508
  nfnetlink_rcv_batch+0x512/0xa80 net/netfilter/nfnetlink.c:562
  nfnetlink_rcv_skb_batch net/netfilter/nfnetlink.c:634 [inline]
  nfnetlink_rcv+0x1fa/0x220 net/netfilter/nfnetlink.c:652
  netlink_unicast_kernel net/netlink/af_netlink.c:1314 [inline]
  netlink_unicast+0x2c7/0x3e0 net/netlink/af_netlink.c:1340
  netlink_sendmsg+0x36b/0x6b0 net/netlink/af_netlink.c:1929
  sock_sendmsg_nosec net/socket.c:702 [inline]
  sock_sendmsg+0x56/0x80 net/socket.c:722

Reported-by: syzbot <syzkaller@googlegroups.com>
Reported-by: kernel test robot <lkp@intel.com>
Fixes: c520292f29b8 ("audit: log nftables configuration change events once per table")
Signed-off-by: Dongliang Mu <mudongliangabcd@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index de182d1f7c4e..081437dd75b7 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -8445,6 +8445,16 @@ static int nf_tables_commit_audit_alloc(struct list_head *adl,
 	return 0;
 }
 
+static void nf_tables_commit_audit_free(struct list_head *adl)
+{
+	struct nft_audit_data *adp, *adn;
+
+	list_for_each_entry_safe(adp, adn, adl, list) {
+		list_del(&adp->list);
+		kfree(adp);
+	}
+}
+
 static void nf_tables_commit_audit_collect(struct list_head *adl,
 					   struct nft_table *table, u32 op)
 {
@@ -8509,6 +8519,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 		ret = nf_tables_commit_audit_alloc(&adl, trans->ctx.table);
 		if (ret) {
 			nf_tables_commit_chain_prepare_cancel(net);
+			nf_tables_commit_audit_free(&adl);
 			return ret;
 		}
 		if (trans->msg_type == NFT_MSG_NEWRULE ||
@@ -8518,6 +8529,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 			ret = nf_tables_commit_chain_prepare(net, chain);
 			if (ret < 0) {
 				nf_tables_commit_chain_prepare_cancel(net);
+				nf_tables_commit_audit_free(&adl);
 				return ret;
 			}
 		}

From 7f5231b114da76bfd5d0fc685d5cf408d1bbfca7 Mon Sep 17 00:00:00 2001
From: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Date: Fri, 16 Jul 2021 21:08:02 +0530
Subject: [PATCH 052/502] platform/x86: amd-pmc: Fix undefined reference to
 __udivdi3

It was reported that on i386 config

------
on i386:

ld: drivers/platform/x86/amd-pmc.o: in function `s0ix_stats_show':
amd-pmc.c:(.text+0x100): undefined reference to `__udivdi3'
-------

The reason for this is that 64-bit integer division is not supported
on 32-bit architecture. Use do_div macro to fix this.

Fixes: b9a4fa6978be ("platform/x86: amd-pmc: Add support for logging s0ix counters")
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Reviewed-by: Randy Dunlap <rdunlap@infradead.org> # and build-tested
Link: https://lore.kernel.org/r/20210716153802.2929670-1-Shyam-sundar.S-k@amd.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/amd-pmc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c
index 663a4ca0580d..3481479a2942 100644
--- a/drivers/platform/x86/amd-pmc.c
+++ b/drivers/platform/x86/amd-pmc.c
@@ -189,7 +189,8 @@ static int s0ix_stats_show(struct seq_file *s, void *unused)
 	exit_time = exit_time << 32 | ioread32(dev->fch_virt_addr + FCH_S0I3_EXIT_TIME_L_OFFSET);
 
 	/* It's in 48MHz. We need to convert it */
-	residency = (exit_time - entry_time) / 48;
+	residency = exit_time - entry_time;
+	do_div(residency, 48);
 
 	seq_puts(s, "=== S0ix statistics ===\n");
 	seq_printf(s, "S0ix Entry Time: %lld\n", entry_time);

From e62fb1e3faae60f483a96c359c8d72bb04a7b728 Mon Sep 17 00:00:00 2001
From: Mark Pearson <markpearson@lenovo.com>
Date: Sat, 17 Jul 2021 16:36:05 +0200
Subject: [PATCH 053/502] platform/x86: think-lmi: Move pending_reboot_attr to
 the attributes sysfs dir

Move the pending_reboot node under attributes dir where it should live, as
documented in: Documentation/ABI/testing/sysfs-class-firmware-attributes.

Also move the create / remove code to be together with the other code
populating / cleaning the attributes sysfs dir. In the removal path this
is necessary so that the remove is done before the
kset_unregister(tlmi_priv.attribute_kset) call.

Signed-off-by: Mark Pearson <markpearson@lenovo.com>
Co-developed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210717143607.3580-1-hdegoede@redhat.com
---
 drivers/platform/x86/think-lmi.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c
index 64dcec53a7a0..989a8221dcd8 100644
--- a/drivers/platform/x86/think-lmi.c
+++ b/drivers/platform/x86/think-lmi.c
@@ -672,6 +672,7 @@ static void tlmi_release_attr(void)
 			kobject_put(&tlmi_priv.setting[i]->kobj);
 		}
 	}
+	sysfs_remove_file(&tlmi_priv.attribute_kset->kobj, &pending_reboot.attr);
 	kset_unregister(tlmi_priv.attribute_kset);
 
 	/* Authentication structures */
@@ -680,7 +681,6 @@ static void tlmi_release_attr(void)
 	sysfs_remove_group(&tlmi_priv.pwd_power->kobj, &auth_attr_group);
 	kobject_put(&tlmi_priv.pwd_power->kobj);
 	kset_unregister(tlmi_priv.authentication_kset);
-	sysfs_remove_file(&tlmi_priv.class_dev->kobj, &pending_reboot.attr);
 }
 
 static int tlmi_sysfs_init(void)
@@ -733,6 +733,10 @@ static int tlmi_sysfs_init(void)
 			goto fail_create_attr;
 	}
 
+	ret = sysfs_create_file(&tlmi_priv.attribute_kset->kobj, &pending_reboot.attr);
+	if (ret)
+		goto fail_create_attr;
+
 	/* Create authentication entries */
 	tlmi_priv.authentication_kset = kset_create_and_add("authentication", NULL,
 								&tlmi_priv.class_dev->kobj);
@@ -760,11 +764,6 @@ static int tlmi_sysfs_init(void)
 	if (ret)
 		goto fail_create_attr;
 
-	/* Create global sysfs files */
-	ret = sysfs_create_file(&tlmi_priv.class_dev->kobj, &pending_reboot.attr);
-	if (ret)
-		goto fail_create_attr;
-
 	return ret;
 
 fail_create_attr:

From 30e78435d3bf803cabdc2a1c2eb36e6983aa4596 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sat, 17 Jul 2021 16:36:06 +0200
Subject: [PATCH 054/502] platform/x86: think-lmi: Split kobject_init() and
 kobject_add() calls

tlmi_sysfs_init() calls tlmi_release_attr() on errors which calls
kobject_put() for attributes created by tlmi_analyze(), but if we
bail early because of an error, then this means that some of the
kobjects will not have been initialized yet; and we should thus not
call kobject_put() on them.

Switch from using kobject_init_and_add() inside tlmi_sysfs_init() to
initializing all the created kobjects directly in tlmi_analyze() and
only adding them from tlmi_sysfs_init(). This way all kobjects will
always be initialized when tlmi_release_attr() gets called.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210717143607.3580-2-hdegoede@redhat.com
---
 drivers/platform/x86/think-lmi.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c
index 989a8221dcd8..c22edcf26aaa 100644
--- a/drivers/platform/x86/think-lmi.c
+++ b/drivers/platform/x86/think-lmi.c
@@ -723,8 +723,8 @@ static int tlmi_sysfs_init(void)
 
 		/* Build attribute */
 		tlmi_priv.setting[i]->kobj.kset = tlmi_priv.attribute_kset;
-		ret = kobject_init_and_add(&tlmi_priv.setting[i]->kobj, &tlmi_attr_setting_ktype,
-				NULL, "%s", tlmi_priv.setting[i]->display_name);
+		ret = kobject_add(&tlmi_priv.setting[i]->kobj, NULL,
+				  "%s", tlmi_priv.setting[i]->display_name);
 		if (ret)
 			goto fail_create_attr;
 
@@ -745,8 +745,7 @@ static int tlmi_sysfs_init(void)
 		goto fail_create_attr;
 	}
 	tlmi_priv.pwd_admin->kobj.kset = tlmi_priv.authentication_kset;
-	ret = kobject_init_and_add(&tlmi_priv.pwd_admin->kobj, &tlmi_pwd_setting_ktype,
-			NULL, "%s", "Admin");
+	ret = kobject_add(&tlmi_priv.pwd_admin->kobj, NULL, "%s", "Admin");
 	if (ret)
 		goto fail_create_attr;
 
@@ -755,8 +754,7 @@ static int tlmi_sysfs_init(void)
 		goto fail_create_attr;
 
 	tlmi_priv.pwd_power->kobj.kset = tlmi_priv.authentication_kset;
-	ret = kobject_init_and_add(&tlmi_priv.pwd_power->kobj, &tlmi_pwd_setting_ktype,
-			NULL, "%s", "System");
+	ret = kobject_add(&tlmi_priv.pwd_power->kobj, NULL, "%s", "System");
 	if (ret)
 		goto fail_create_attr;
 
@@ -836,6 +834,7 @@ static int tlmi_analyze(void)
 				pr_info("Error retrieving possible values for %d : %s\n",
 						i, setting->display_name);
 		}
+		kobject_init(&setting->kobj, &tlmi_attr_setting_ktype);
 		tlmi_priv.setting[i] = setting;
 		tlmi_priv.settings_count++;
 		kfree(item);
@@ -862,6 +861,8 @@ static int tlmi_analyze(void)
 	if (pwdcfg.password_state & TLMI_PAP_PWD)
 		tlmi_priv.pwd_admin->valid = true;
 
+	kobject_init(&tlmi_priv.pwd_admin->kobj, &tlmi_pwd_setting_ktype);
+
 	tlmi_priv.pwd_power = kzalloc(sizeof(struct tlmi_pwd_setting), GFP_KERNEL);
 	if (!tlmi_priv.pwd_power) {
 		ret = -ENOMEM;
@@ -877,6 +878,8 @@ static int tlmi_analyze(void)
 	if (pwdcfg.password_state & TLMI_POP_PWD)
 		tlmi_priv.pwd_power->valid = true;
 
+	kobject_init(&tlmi_priv.pwd_power->kobj, &tlmi_pwd_setting_ktype);
+
 	return 0;
 
 fail_clear_attr:

From f7e506ec4a9966be8b2a87d3324302f0f5dd5a29 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sat, 17 Jul 2021 16:36:07 +0200
Subject: [PATCH 055/502] platform/x86: think-lmi: Fix possible mem-leaks on
 tlmi_analyze() error-exit

Fix 2 possible memleaks on error-exits from tlmi_analyze():

1. If the kzalloc of pwd_power fails, then not only free the atributes,
   but also the allocated pwd_admin struct.

2. Freeing the attributes should also free the possible_values strings.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210717143607.3580-3-hdegoede@redhat.com
---
 drivers/platform/x86/think-lmi.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c
index c22edcf26aaa..6cfed4427fb0 100644
--- a/drivers/platform/x86/think-lmi.c
+++ b/drivers/platform/x86/think-lmi.c
@@ -866,7 +866,7 @@ static int tlmi_analyze(void)
 	tlmi_priv.pwd_power = kzalloc(sizeof(struct tlmi_pwd_setting), GFP_KERNEL);
 	if (!tlmi_priv.pwd_power) {
 		ret = -ENOMEM;
-		goto fail_clear_attr;
+		goto fail_free_pwd_admin;
 	}
 	strscpy(tlmi_priv.pwd_power->kbdlang, "us", TLMI_LANG_MAXLEN);
 	tlmi_priv.pwd_power->encoding = TLMI_ENCODING_ASCII;
@@ -882,9 +882,15 @@ static int tlmi_analyze(void)
 
 	return 0;
 
+fail_free_pwd_admin:
+	kfree(tlmi_priv.pwd_admin);
 fail_clear_attr:
-	for (i = 0; i < TLMI_SETTINGS_COUNT; ++i)
-		kfree(tlmi_priv.setting[i]);
+	for (i = 0; i < TLMI_SETTINGS_COUNT; ++i) {
+		if (tlmi_priv.setting[i]) {
+			kfree(tlmi_priv.setting[i]->possible_values);
+			kfree(tlmi_priv.setting[i]);
+		}
+	}
 	return ret;
 }
 

From ec7099fdea8025988710ee6fecfd4e4210c29ab5 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Fri, 2 Jul 2021 15:37:12 +0200
Subject: [PATCH 056/502] Revert "gpio: mpc8xxx: change the gpio interrupt
 flags."

This reverts commit 3d5bfbd9716318b1ca5c38488aa69f64d38a9aa5.

When booting with threadirqs, it causes a splat

  WARNING: CPU: 0 PID: 29 at kernel/irq/handle.c:159 __handle_irq_event_percpu+0x1ec/0x27c
  irq 66 handler irq_default_primary_handler+0x0/0x1c enabled interrupts

That splat later went away with commit 81e2073c175b ("genirq: Disable
interrupts for force threaded handlers"), which got backported to
-stable. However, when running an -rt kernel, the splat still
exists. Moreover, quoting Thomas Gleixner [1]

  But 3d5bfbd97163 ("gpio: mpc8xxx: change the gpio interrupt flags.")
  has nothing to do with that:

      "Delete the interrupt IRQF_NO_THREAD flags in order to gpio interrupts
       can be threaded to allow high-priority processes to preempt."

  This changelog is blatantly wrong. In mainline forced irq threads
  have always been invoked with softirqs disabled, which obviously
  makes them non-preemptible.

So the patch didn't even do what its commit log said.

[1] https://lore.kernel.org/lkml/871r8zey88.ffs@nanos.tec.linutronix.de/

Cc: stable@vger.kernel.org # v5.9+
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 drivers/gpio/gpio-mpc8xxx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-mpc8xxx.c b/drivers/gpio/gpio-mpc8xxx.c
index 4b9157a69fca..50b321a1ab1b 100644
--- a/drivers/gpio/gpio-mpc8xxx.c
+++ b/drivers/gpio/gpio-mpc8xxx.c
@@ -405,7 +405,7 @@ static int mpc8xxx_probe(struct platform_device *pdev)
 
 	ret = devm_request_irq(&pdev->dev, mpc8xxx_gc->irqn,
 			       mpc8xxx_gpio_irq_cascade,
-			       IRQF_SHARED, "gpio-cascade",
+			       IRQF_NO_THREAD | IRQF_SHARED, "gpio-cascade",
 			       mpc8xxx_gc);
 	if (ret) {
 		dev_err(&pdev->dev,

From d6371c76e20d7d3f61b05fd67b596af4d14a8886 Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Mon, 19 Jul 2021 09:51:34 +0100
Subject: [PATCH 057/502] bpf: Fix OOB read when printing XDP link fdinfo

We got the following UBSAN report on one of our testing machines:

    ================================================================================
    UBSAN: array-index-out-of-bounds in kernel/bpf/syscall.c:2389:24
    index 6 is out of range for type 'char *[6]'
    CPU: 43 PID: 930921 Comm: systemd-coredum Tainted: G           O      5.10.48-cloudflare-kasan-2021.7.0 #1
    Hardware name: <snip>
    Call Trace:
     dump_stack+0x7d/0xa3
     ubsan_epilogue+0x5/0x40
     __ubsan_handle_out_of_bounds.cold+0x43/0x48
     ? seq_printf+0x17d/0x250
     bpf_link_show_fdinfo+0x329/0x380
     ? bpf_map_value_size+0xe0/0xe0
     ? put_files_struct+0x20/0x2d0
     ? __kasan_kmalloc.constprop.0+0xc2/0xd0
     seq_show+0x3f7/0x540
     seq_read_iter+0x3f8/0x1040
     seq_read+0x329/0x500
     ? seq_read_iter+0x1040/0x1040
     ? __fsnotify_parent+0x80/0x820
     ? __fsnotify_update_child_dentry_flags+0x380/0x380
     vfs_read+0x123/0x460
     ksys_read+0xed/0x1c0
     ? __x64_sys_pwrite64+0x1f0/0x1f0
     do_syscall_64+0x33/0x40
     entry_SYSCALL_64_after_hwframe+0x44/0xa9
    <snip>
    ================================================================================
    ================================================================================
    UBSAN: object-size-mismatch in kernel/bpf/syscall.c:2384:2

From the report, we can infer that some array access in bpf_link_show_fdinfo at index 6
is out of bounds. The obvious candidate is bpf_link_type_strs[BPF_LINK_TYPE_XDP] with
BPF_LINK_TYPE_XDP == 6. It turns out that BPF_LINK_TYPE_XDP is missing from bpf_types.h
and therefore doesn't have an entry in bpf_link_type_strs:

    pos:	0
    flags:	02000000
    mnt_id:	13
    link_type:	(null)
    link_id:	4
    prog_tag:	bcf7977d3b93787c
    prog_id:	4
    ifindex:	1

Fixes: aa8d3a716b59 ("bpf, xdp: Add bpf_link-based XDP attachment API")
Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210719085134.43325-2-lmb@cloudflare.com
---
 include/linux/bpf_types.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index a9db1eae6796..ae3ac3a2018c 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -134,4 +134,5 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_CGROUP, cgroup)
 BPF_LINK_TYPE(BPF_LINK_TYPE_ITER, iter)
 #ifdef CONFIG_NET
 BPF_LINK_TYPE(BPF_LINK_TYPE_NETNS, netns)
+BPF_LINK_TYPE(BPF_LINK_TYPE_XDP, xdp)
 #endif

From 8ba89a3c7967808f33478a8573277cf6a7412c4c Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Wed, 14 Jul 2021 11:38:41 -0700
Subject: [PATCH 058/502] dmaengine: idxd: fix desc->vector that isn't being
 updated

Missing update for desc->vector when the wq vector gets updated. This
causes the desc->vector to always be at 0.

Fixes: da435aedb00a ("dmaengine: idxd: fix array index when int_handles are being used")
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/162628784374.353761.4736602409627820431.stgit@djiang5-desk3.ch.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/idxd/submit.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c
index e29887528077..21d7d09f73dd 100644
--- a/drivers/dma/idxd/submit.c
+++ b/drivers/dma/idxd/submit.c
@@ -25,11 +25,10 @@ static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu)
 	 * Descriptor completion vectors are 1...N for MSIX. We will round
 	 * robin through the N vectors.
 	 */
-	wq->vec_ptr = (wq->vec_ptr % idxd->num_wq_irqs) + 1;
+	wq->vec_ptr = desc->vector = (wq->vec_ptr % idxd->num_wq_irqs) + 1;
 	if (!idxd->int_handles) {
 		desc->hw->int_handle = wq->vec_ptr;
 	} else {
-		desc->vector = wq->vec_ptr;
 		/*
 		 * int_handles are only for descriptor completion. However for device
 		 * MSIX enumeration, vec 0 is used for misc interrupts. Therefore even

From 7eb25da161befbc9a80e94e1bd90d6c06aa645cf Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Wed, 14 Jul 2021 14:57:19 -0700
Subject: [PATCH 059/502] dmaengine: idxd: fix sequence for pci driver remove()
 and shutdown()

->shutdown() call should only be responsible for quiescing the device.
Currently it is doing PCI device tear down. This causes issue when things
like MMIO mapping is removed while idxd_unregister_devices() will trigger
removal of idxd device sub-driver and still initiates MMIO writes to the
device. Another issue is with the unregistering of idxd 'struct device',
the memory context gets freed. So the teardown calls are accessing freed
memory and can cause kernel oops. Move all the teardown bits that doesn't
belong in shutdown to ->remove() call. Move unregistering of the idxd
conf_dev 'struct device' to after doing all the teardown to free all
the memory that's no longer needed.

Fixes: 47c16ac27d4c ("dmaengine: idxd: fix idxd conf_dev 'struct device' lifetime")
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/162629983901.395844.17964803190905549615.stgit@djiang5-desk3.ch.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/idxd/init.c  | 26 +++++++++++++++++---------
 drivers/dma/idxd/sysfs.c |  2 --
 2 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index 4e32a4dcc3ab..c0f4c0422f32 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -760,32 +760,40 @@ static void idxd_shutdown(struct pci_dev *pdev)
 	for (i = 0; i < msixcnt; i++) {
 		irq_entry = &idxd->irq_entries[i];
 		synchronize_irq(irq_entry->vector);
-		free_irq(irq_entry->vector, irq_entry);
 		if (i == 0)
 			continue;
 		idxd_flush_pending_llist(irq_entry);
 		idxd_flush_work_list(irq_entry);
 	}
-
-	idxd_msix_perm_clear(idxd);
-	idxd_release_int_handles(idxd);
-	pci_free_irq_vectors(pdev);
-	pci_iounmap(pdev, idxd->reg_base);
-	pci_disable_device(pdev);
-	destroy_workqueue(idxd->wq);
+	flush_workqueue(idxd->wq);
 }
 
 static void idxd_remove(struct pci_dev *pdev)
 {
 	struct idxd_device *idxd = pci_get_drvdata(pdev);
+	struct idxd_irq_entry *irq_entry;
+	int msixcnt = pci_msix_vec_count(pdev);
+	int i;
 
 	dev_dbg(&pdev->dev, "%s called\n", __func__);
 	idxd_shutdown(pdev);
 	if (device_pasid_enabled(idxd))
 		idxd_disable_system_pasid(idxd);
 	idxd_unregister_devices(idxd);
-	perfmon_pmu_remove(idxd);
+
+	for (i = 0; i < msixcnt; i++) {
+		irq_entry = &idxd->irq_entries[i];
+		free_irq(irq_entry->vector, irq_entry);
+	}
+	idxd_msix_perm_clear(idxd);
+	idxd_release_int_handles(idxd);
+	pci_free_irq_vectors(pdev);
+	pci_iounmap(pdev, idxd->reg_base);
 	iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
+	pci_disable_device(pdev);
+	destroy_workqueue(idxd->wq);
+	perfmon_pmu_remove(idxd);
+	device_unregister(&idxd->conf_dev);
 }
 
 static struct pci_driver idxd_pci_driver = {
diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c
index 0460d58e3941..bb4df63906a7 100644
--- a/drivers/dma/idxd/sysfs.c
+++ b/drivers/dma/idxd/sysfs.c
@@ -1744,8 +1744,6 @@ void idxd_unregister_devices(struct idxd_device *idxd)
 
 		device_unregister(&group->conf_dev);
 	}
-
-	device_unregister(&idxd->conf_dev);
 }
 
 int idxd_register_bus_type(void)

From 6b4b87f2c31ac1af4f244990a7cbfb50d3f3e33f Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Wed, 14 Jul 2021 11:50:06 -0700
Subject: [PATCH 060/502] dmaengine: idxd: fix submission race window

Konstantin observed that when descriptors are submitted, the descriptor is
added to the pending list after the submission. This creates a race window
with the slight possibility that the descriptor can complete before it
gets added to the pending list and this window would cause the completion
handler to miss processing the descriptor.

To address the issue, the addition of the descriptor to the pending list
must be done before it gets submitted to the hardware. However, submitting
to swq with ENQCMDS instruction can cause a failure with the condition of
either wq is full or wq is not "active".

With the descriptor allocation being the gate to the wq capacity, it is not
possible to hit a retry with ENQCMDS submission to the swq. The only
possible failure can happen is when wq is no longer "active" due to hw
error and therefore we are moving towards taking down the portal. Given
this is a rare condition and there's no longer concern over I/O
performance, the driver can walk the completion lists in order to retrieve
and abort the descriptor.

The error path will set the descriptor to aborted status. It will take the
work list lock to prevent further processing of worklist. It will do a
delete_all on the pending llist to retrieve all descriptors on the pending
llist. The delete_all action does not require a lock. It will walk through
the acquired llist to find the aborted descriptor while add all remaining
descriptors to the work list since it holds the lock. If it does not find
the aborted descriptor on the llist, it will walk through the work
list. And if it still does not find the descriptor, then it means the
interrupt handler has removed the desc from the llist but is pending on
the work list lock and will process it once the error path releases the
lock.

Fixes: eb15e7154fbf ("dmaengine: idxd: add interrupt handle request and release support")
Reported-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/162628855747.360485.10101925573082466530.stgit@djiang5-desk3.ch.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/idxd/idxd.h   | 14 +++++++
 drivers/dma/idxd/irq.c    | 27 +++++++++-----
 drivers/dma/idxd/submit.c | 78 ++++++++++++++++++++++++++++++++++-----
 3 files changed, 101 insertions(+), 18 deletions(-)

diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
index 26482c7d4c3a..fc708be7ad9a 100644
--- a/drivers/dma/idxd/idxd.h
+++ b/drivers/dma/idxd/idxd.h
@@ -294,6 +294,14 @@ struct idxd_desc {
 	struct idxd_wq *wq;
 };
 
+/*
+ * This is software defined error for the completion status. We overload the error code
+ * that will never appear in completion status and only SWERR register.
+ */
+enum idxd_completion_status {
+	IDXD_COMP_DESC_ABORT = 0xff,
+};
+
 #define confdev_to_idxd(dev) container_of(dev, struct idxd_device, conf_dev)
 #define confdev_to_wq(dev) container_of(dev, struct idxd_wq, conf_dev)
 
@@ -482,4 +490,10 @@ static inline void perfmon_init(void) {}
 static inline void perfmon_exit(void) {}
 #endif
 
+static inline void complete_desc(struct idxd_desc *desc, enum idxd_complete_type reason)
+{
+	idxd_dma_complete_txd(desc, reason);
+	idxd_free_desc(desc->wq, desc);
+}
+
 #endif
diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c
index ae68e1e5487a..4e3a7198c0ca 100644
--- a/drivers/dma/idxd/irq.c
+++ b/drivers/dma/idxd/irq.c
@@ -245,12 +245,6 @@ static inline bool match_fault(struct idxd_desc *desc, u64 fault_addr)
 	return false;
 }
 
-static inline void complete_desc(struct idxd_desc *desc, enum idxd_complete_type reason)
-{
-	idxd_dma_complete_txd(desc, reason);
-	idxd_free_desc(desc->wq, desc);
-}
-
 static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry,
 				     enum irq_work_type wtype,
 				     int *processed, u64 data)
@@ -272,8 +266,16 @@ static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry,
 		reason = IDXD_COMPLETE_DEV_FAIL;
 
 	llist_for_each_entry_safe(desc, t, head, llnode) {
-		if (desc->completion->status) {
-			if ((desc->completion->status & DSA_COMP_STATUS_MASK) != DSA_COMP_SUCCESS)
+		u8 status = desc->completion->status & DSA_COMP_STATUS_MASK;
+
+		if (status) {
+			if (unlikely(status == IDXD_COMP_DESC_ABORT)) {
+				complete_desc(desc, IDXD_COMPLETE_ABORT);
+				(*processed)++;
+				continue;
+			}
+
+			if (unlikely(status != DSA_COMP_SUCCESS))
 				match_fault(desc, data);
 			complete_desc(desc, reason);
 			(*processed)++;
@@ -329,7 +331,14 @@ static int irq_process_work_list(struct idxd_irq_entry *irq_entry,
 	spin_unlock_irqrestore(&irq_entry->list_lock, flags);
 
 	list_for_each_entry(desc, &flist, list) {
-		if ((desc->completion->status & DSA_COMP_STATUS_MASK) != DSA_COMP_SUCCESS)
+		u8 status = desc->completion->status & DSA_COMP_STATUS_MASK;
+
+		if (unlikely(status == IDXD_COMP_DESC_ABORT)) {
+			complete_desc(desc, IDXD_COMPLETE_ABORT);
+			continue;
+		}
+
+		if (unlikely(status != DSA_COMP_SUCCESS))
 			match_fault(desc, data);
 		complete_desc(desc, reason);
 	}
diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c
index 21d7d09f73dd..36c9c1a89b7e 100644
--- a/drivers/dma/idxd/submit.c
+++ b/drivers/dma/idxd/submit.c
@@ -87,9 +87,64 @@ void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc)
 	sbitmap_queue_clear(&wq->sbq, desc->id, cpu);
 }
 
+static struct idxd_desc *list_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
+					 struct idxd_desc *desc)
+{
+	struct idxd_desc *d, *n;
+
+	lockdep_assert_held(&ie->list_lock);
+	list_for_each_entry_safe(d, n, &ie->work_list, list) {
+		if (d == desc) {
+			list_del(&d->list);
+			return d;
+		}
+	}
+
+	/*
+	 * At this point, the desc needs to be aborted is held by the completion
+	 * handler where it has taken it off the pending list but has not added to the
+	 * work list. It will be cleaned up by the interrupt handler when it sees the
+	 * IDXD_COMP_DESC_ABORT for completion status.
+	 */
+	return NULL;
+}
+
+static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
+			     struct idxd_desc *desc)
+{
+	struct idxd_desc *d, *t, *found = NULL;
+	struct llist_node *head;
+	unsigned long flags;
+
+	desc->completion->status = IDXD_COMP_DESC_ABORT;
+	/*
+	 * Grab the list lock so it will block the irq thread handler. This allows the
+	 * abort code to locate the descriptor need to be aborted.
+	 */
+	spin_lock_irqsave(&ie->list_lock, flags);
+	head = llist_del_all(&ie->pending_llist);
+	if (head) {
+		llist_for_each_entry_safe(d, t, head, llnode) {
+			if (d == desc) {
+				found = desc;
+				continue;
+			}
+			list_add_tail(&desc->list, &ie->work_list);
+		}
+	}
+
+	if (!found)
+		found = list_abort_desc(wq, ie, desc);
+	spin_unlock_irqrestore(&ie->list_lock, flags);
+
+	if (found)
+		complete_desc(found, IDXD_COMPLETE_ABORT);
+}
+
 int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
 {
 	struct idxd_device *idxd = wq->idxd;
+	struct idxd_irq_entry *ie = NULL;
 	void __iomem *portal;
 	int rc;
 
@@ -107,6 +162,16 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
 	 * even on UP because the recipient is a device.
 	 */
 	wmb();
+
+	/*
+	 * Pending the descriptor to the lockless list for the irq_entry
+	 * that we designated the descriptor to.
+	 */
+	if (desc->hw->flags & IDXD_OP_FLAG_RCI) {
+		ie = &idxd->irq_entries[desc->vector];
+		llist_add(&desc->llnode, &ie->pending_llist);
+	}
+
 	if (wq_dedicated(wq)) {
 		iosubmit_cmds512(portal, desc->hw, 1);
 	} else {
@@ -117,18 +182,13 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
 		 * device is not accepting descriptor at all.
 		 */
 		rc = enqcmds(portal, desc->hw);
-		if (rc < 0)
+		if (rc < 0) {
+			if (ie)
+				llist_abort_desc(wq, ie, desc);
 			return rc;
+		}
 	}
 
 	percpu_ref_put(&wq->wq_active);
-
-	/*
-	 * Pending the descriptor to the lockless list for the irq_entry
-	 * that we designated the descriptor to.
-	 */
-	if (desc->hw->flags & IDXD_OP_FLAG_RCI)
-		llist_add(&desc->llnode, &idxd->irq_entries[desc->vector].pending_llist);
-
 	return 0;
 }

From ec185dd3ab257dc2a60953fdf1b6622f524cc5b7 Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@linux.microsoft.com>
Date: Mon, 14 Jun 2021 17:33:10 -0500
Subject: [PATCH 061/502] optee: Fix memory leak when failing to register shm
 pages

Free the previously allocated pages when we encounter an error condition
while attempting to register the pages with the secure world.

Fixes: a249dd200d03 ("tee: optee: Fix dynamic shm pool allocations")
Fixes: 5a769f6ff439 ("optee: Fix multi page dynamic shm pool alloc")
Cc: stable@vger.kernel.org
Signed-off-by: Tyler Hicks <tyhicks@linux.microsoft.com>
Reviewed-by: Jens Wiklander <jens.wiklander@linaro.org>
Reviewed-by: Sumit Garg <sumit.garg@linaro.org>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
---
 drivers/tee/optee/shm_pool.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/tee/optee/shm_pool.c b/drivers/tee/optee/shm_pool.c
index d767eebf30bd..da06ce9b9313 100644
--- a/drivers/tee/optee/shm_pool.c
+++ b/drivers/tee/optee/shm_pool.c
@@ -32,8 +32,10 @@ static int pool_op_alloc(struct tee_shm_pool_mgr *poolm,
 		struct page **pages;
 
 		pages = kcalloc(nr_pages, sizeof(pages), GFP_KERNEL);
-		if (!pages)
-			return -ENOMEM;
+		if (!pages) {
+			rc = -ENOMEM;
+			goto err;
+		}
 
 		for (i = 0; i < nr_pages; i++) {
 			pages[i] = page;
@@ -44,8 +46,14 @@ static int pool_op_alloc(struct tee_shm_pool_mgr *poolm,
 		rc = optee_shm_register(shm->ctx, shm, pages, nr_pages,
 					(unsigned long)shm->kaddr);
 		kfree(pages);
+		if (rc)
+			goto err;
 	}
 
+	return 0;
+
+err:
+	__free_pages(page, order);
 	return rc;
 }
 

From adf752af454e91e123e85e3784972d166837af73 Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@linux.microsoft.com>
Date: Mon, 14 Jun 2021 17:33:11 -0500
Subject: [PATCH 062/502] optee: Refuse to load the driver under the kdump
 kernel

Fix a hung task issue, seen when booting the kdump kernel, that is
caused by all of the secure world threads being in a permanent suspended
state:

 INFO: task swapper/0:1 blocked for more than 120 seconds.
       Not tainted 5.4.83 #1
 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
 swapper/0       D    0     1      0 0x00000028
 Call trace:
  __switch_to+0xc8/0x118
  __schedule+0x2e0/0x700
  schedule+0x38/0xb8
  schedule_timeout+0x258/0x388
  wait_for_completion+0x16c/0x4b8
  optee_cq_wait_for_completion+0x28/0xa8
  optee_disable_shm_cache+0xb8/0xf8
  optee_probe+0x560/0x61c
  platform_drv_probe+0x58/0xa8
  really_probe+0xe0/0x338
  driver_probe_device+0x5c/0xf0
  device_driver_attach+0x74/0x80
  __driver_attach+0x64/0xe0
  bus_for_each_dev+0x84/0xd8
  driver_attach+0x30/0x40
  bus_add_driver+0x188/0x1e8
  driver_register+0x64/0x110
  __platform_driver_register+0x54/0x60
  optee_driver_init+0x20/0x28
  do_one_initcall+0x54/0x24c
  kernel_init_freeable+0x1e8/0x2c0
  kernel_init+0x18/0x118
  ret_from_fork+0x10/0x18

The invoke_fn hook returned OPTEE_SMC_RETURN_ETHREAD_LIMIT, indicating
that the secure world threads were all in a suspended state at the time
of the kernel crash. This intermittently prevented the kdump kernel from
booting, resulting in a failure to collect the kernel dump.

Make kernel dump collection more reliable on systems utilizing OP-TEE by
refusing to load the driver under the kdump kernel.

Cc: stable@vger.kernel.org
Signed-off-by: Tyler Hicks <tyhicks@linux.microsoft.com>
Reviewed-by: Jens Wiklander <jens.wiklander@linaro.org>
Reviewed-by: Sumit Garg <sumit.garg@linaro.org>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
---
 drivers/tee/optee/core.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c
index ddb8f9ecf307..5288cd767d82 100644
--- a/drivers/tee/optee/core.c
+++ b/drivers/tee/optee/core.c
@@ -6,6 +6,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/arm-smccc.h>
+#include <linux/crash_dump.h>
 #include <linux/errno.h>
 #include <linux/io.h>
 #include <linux/module.h>
@@ -612,6 +613,16 @@ static int optee_probe(struct platform_device *pdev)
 	u32 sec_caps;
 	int rc;
 
+	/*
+	 * The kernel may have crashed at the same time that all available
+	 * secure world threads were suspended and we cannot reschedule the
+	 * suspended threads without access to the crashed kernel's wait_queue.
+	 * Therefore, we cannot reliably initialize the OP-TEE driver in the
+	 * kdump kernel.
+	 */
+	if (is_kdump_kernel())
+		return -ENODEV;
+
 	invoke_fn = get_invoke_func(&pdev->dev);
 	if (IS_ERR(invoke_fn))
 		return PTR_ERR(invoke_fn);

From f25889f93184db8b07a543cc2bbbb9a8fcaf4333 Mon Sep 17 00:00:00 2001
From: Allen Pais <apais@linux.microsoft.com>
Date: Mon, 14 Jun 2021 17:33:12 -0500
Subject: [PATCH 063/502] optee: fix tee out of memory failure seen during
 kexec reboot

The following out of memory errors are seen on kexec reboot
from the optee core.

[    0.368428] tee_bnxt_fw optee-clnt0: tee_shm_alloc failed
[    0.368461] tee_bnxt_fw: probe of optee-clnt0 failed with error -22

tee_shm_release() is not invoked on dma shm buffer.

Implement .shutdown() method to handle the release of the buffers
correctly.

More info:
https://github.com/OP-TEE/optee_os/issues/3637

Cc: stable@vger.kernel.org
Signed-off-by: Allen Pais <apais@linux.microsoft.com>
Reviewed-by: Tyler Hicks <tyhicks@linux.microsoft.com>
Reviewed-by: Jens Wiklander <jens.wiklander@linaro.org>
Reviewed-by: Sumit Garg <sumit.garg@linaro.org>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
---
 drivers/tee/optee/core.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c
index 5288cd767d82..0987074d7ed0 100644
--- a/drivers/tee/optee/core.c
+++ b/drivers/tee/optee/core.c
@@ -573,6 +573,13 @@ static optee_invoke_fn *get_invoke_func(struct device *dev)
 	return ERR_PTR(-EINVAL);
 }
 
+/* optee_remove - Device Removal Routine
+ * @pdev: platform device information struct
+ *
+ * optee_remove is called by platform subsystem to alert the driver
+ * that it should release the device
+ */
+
 static int optee_remove(struct platform_device *pdev)
 {
 	struct optee *optee = platform_get_drvdata(pdev);
@@ -603,6 +610,18 @@ static int optee_remove(struct platform_device *pdev)
 	return 0;
 }
 
+/* optee_shutdown - Device Removal Routine
+ * @pdev: platform device information struct
+ *
+ * platform_shutdown is called by the platform subsystem to alert
+ * the driver that a shutdown, reboot, or kexec is happening and
+ * device must be disabled.
+ */
+static void optee_shutdown(struct platform_device *pdev)
+{
+	optee_disable_shm_cache(platform_get_drvdata(pdev));
+}
+
 static int optee_probe(struct platform_device *pdev)
 {
 	optee_invoke_fn *invoke_fn;
@@ -739,6 +758,7 @@ MODULE_DEVICE_TABLE(of, optee_dt_match);
 static struct platform_driver optee_driver = {
 	.probe  = optee_probe,
 	.remove = optee_remove,
+	.shutdown = optee_shutdown,
 	.driver = {
 		.name = "optee",
 		.of_match_table = optee_dt_match,

From b5c10dd04b7418793517e3286cde5c04759a86de Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@linux.microsoft.com>
Date: Mon, 14 Jun 2021 17:33:13 -0500
Subject: [PATCH 064/502] optee: Clear stale cache entries during
 initialization

The shm cache could contain invalid addresses if
optee_disable_shm_cache() was not called from the .shutdown hook of the
previous kernel before a kexec. These addresses could be unmapped or
they could point to mapped but unintended locations in memory.

Clear the shared memory cache, while being careful to not translate the
addresses returned from OPTEE_SMC_DISABLE_SHM_CACHE, during driver
initialization. Once all pre-cache shm objects are removed, proceed with
enabling the cache so that we know that we can handle cached shm objects
with confidence later in the .shutdown hook.

Cc: stable@vger.kernel.org
Signed-off-by: Tyler Hicks <tyhicks@linux.microsoft.com>
Reviewed-by: Jens Wiklander <jens.wiklander@linaro.org>
Reviewed-by: Sumit Garg <sumit.garg@linaro.org>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
---
 drivers/tee/optee/call.c          | 36 ++++++++++++++++++++++++++++---
 drivers/tee/optee/core.c          |  9 ++++++++
 drivers/tee/optee/optee_private.h |  1 +
 3 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/drivers/tee/optee/call.c b/drivers/tee/optee/call.c
index 6e6eb836e9b6..387e94768182 100644
--- a/drivers/tee/optee/call.c
+++ b/drivers/tee/optee/call.c
@@ -416,11 +416,13 @@ void optee_enable_shm_cache(struct optee *optee)
 }
 
 /**
- * optee_disable_shm_cache() - Disables caching of some shared memory allocation
- *			      in OP-TEE
+ * __optee_disable_shm_cache() - Disables caching of some shared memory
+ *                               allocation in OP-TEE
  * @optee:	main service struct
+ * @is_mapped:	true if the cached shared memory addresses were mapped by this
+ *		kernel, are safe to dereference, and should be freed
  */
-void optee_disable_shm_cache(struct optee *optee)
+static void __optee_disable_shm_cache(struct optee *optee, bool is_mapped)
 {
 	struct optee_call_waiter w;
 
@@ -439,6 +441,13 @@ void optee_disable_shm_cache(struct optee *optee)
 		if (res.result.status == OPTEE_SMC_RETURN_OK) {
 			struct tee_shm *shm;
 
+			/*
+			 * Shared memory references that were not mapped by
+			 * this kernel must be ignored to prevent a crash.
+			 */
+			if (!is_mapped)
+				continue;
+
 			shm = reg_pair_to_ptr(res.result.shm_upper32,
 					      res.result.shm_lower32);
 			tee_shm_free(shm);
@@ -449,6 +458,27 @@ void optee_disable_shm_cache(struct optee *optee)
 	optee_cq_wait_final(&optee->call_queue, &w);
 }
 
+/**
+ * optee_disable_shm_cache() - Disables caching of mapped shared memory
+ *                             allocations in OP-TEE
+ * @optee:	main service struct
+ */
+void optee_disable_shm_cache(struct optee *optee)
+{
+	return __optee_disable_shm_cache(optee, true);
+}
+
+/**
+ * optee_disable_unmapped_shm_cache() - Disables caching of shared memory
+ *                                      allocations in OP-TEE which are not
+ *                                      currently mapped
+ * @optee:	main service struct
+ */
+void optee_disable_unmapped_shm_cache(struct optee *optee)
+{
+	return __optee_disable_shm_cache(optee, false);
+}
+
 #define PAGELIST_ENTRIES_PER_PAGE				\
 	((OPTEE_MSG_NONCONTIG_PAGE_SIZE / sizeof(u64)) - 1)
 
diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c
index 0987074d7ed0..651d49b53d3b 100644
--- a/drivers/tee/optee/core.c
+++ b/drivers/tee/optee/core.c
@@ -716,6 +716,15 @@ static int optee_probe(struct platform_device *pdev)
 	optee->memremaped_shm = memremaped_shm;
 	optee->pool = pool;
 
+	/*
+	 * Ensure that there are no pre-existing shm objects before enabling
+	 * the shm cache so that there's no chance of receiving an invalid
+	 * address during shutdown. This could occur, for example, if we're
+	 * kexec booting from an older kernel that did not properly cleanup the
+	 * shm cache.
+	 */
+	optee_disable_unmapped_shm_cache(optee);
+
 	optee_enable_shm_cache(optee);
 
 	if (optee->sec_caps & OPTEE_SMC_SEC_CAP_DYNAMIC_SHM)
diff --git a/drivers/tee/optee/optee_private.h b/drivers/tee/optee/optee_private.h
index e25b216a14ef..dbdd367be156 100644
--- a/drivers/tee/optee/optee_private.h
+++ b/drivers/tee/optee/optee_private.h
@@ -159,6 +159,7 @@ int optee_cancel_req(struct tee_context *ctx, u32 cancel_id, u32 session);
 
 void optee_enable_shm_cache(struct optee *optee);
 void optee_disable_shm_cache(struct optee *optee);
+void optee_disable_unmapped_shm_cache(struct optee *optee);
 
 int optee_shm_register(struct tee_context *ctx, struct tee_shm *shm,
 		       struct page **pages, size_t num_pages,

From dc7019b7d0e188d4093b34bd0747ed0d668c63bf Mon Sep 17 00:00:00 2001
From: Jens Wiklander <jens.wiklander@linaro.org>
Date: Mon, 14 Jun 2021 17:33:14 -0500
Subject: [PATCH 065/502] tee: add tee_shm_alloc_kernel_buf()

Adds a new function tee_shm_alloc_kernel_buf() to allocate shared memory
from a kernel driver. This function can later be made more lightweight
by unnecessary dma-buf export.

Cc: stable@vger.kernel.org
Reviewed-by: Tyler Hicks <tyhicks@linux.microsoft.com>
Reviewed-by: Sumit Garg <sumit.garg@linaro.org>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
---
 drivers/tee/tee_shm.c   | 18 ++++++++++++++++++
 include/linux/tee_drv.h |  1 +
 2 files changed, 19 insertions(+)

diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c
index 00472f5ce22e..c65e44707cd6 100644
--- a/drivers/tee/tee_shm.c
+++ b/drivers/tee/tee_shm.c
@@ -193,6 +193,24 @@ err_dev_put:
 }
 EXPORT_SYMBOL_GPL(tee_shm_alloc);
 
+/**
+ * tee_shm_alloc_kernel_buf() - Allocate shared memory for kernel buffer
+ * @ctx:	Context that allocates the shared memory
+ * @size:	Requested size of shared memory
+ *
+ * The returned memory registered in secure world and is suitable to be
+ * passed as a memory buffer in parameter argument to
+ * tee_client_invoke_func(). The memory allocated is later freed with a
+ * call to tee_shm_free().
+ *
+ * @returns a pointer to 'struct tee_shm'
+ */
+struct tee_shm *tee_shm_alloc_kernel_buf(struct tee_context *ctx, size_t size)
+{
+	return tee_shm_alloc(ctx, size, TEE_SHM_MAPPED | TEE_SHM_DMA_BUF);
+}
+EXPORT_SYMBOL_GPL(tee_shm_alloc_kernel_buf);
+
 struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr,
 				 size_t length, u32 flags)
 {
diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h
index 54269e47ac9a..8990f7628387 100644
--- a/include/linux/tee_drv.h
+++ b/include/linux/tee_drv.h
@@ -332,6 +332,7 @@ void *tee_get_drvdata(struct tee_device *teedev);
  * @returns a pointer to 'struct tee_shm'
  */
 struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags);
+struct tee_shm *tee_shm_alloc_kernel_buf(struct tee_context *ctx, size_t size);
 
 /**
  * tee_shm_register() - Register shared memory buffer

From 376e4199e327a5cf29b8ec8fb0f64f3d8b429819 Mon Sep 17 00:00:00 2001
From: Sumit Garg <sumit.garg@linaro.org>
Date: Mon, 14 Jun 2021 17:33:15 -0500
Subject: [PATCH 066/502] tee: Correct inappropriate usage of TEE_SHM_DMA_BUF
 flag

Currently TEE_SHM_DMA_BUF flag has been inappropriately used to not
register shared memory allocated for private usage by underlying TEE
driver: OP-TEE in this case. So rather add a new flag as TEE_SHM_PRIV
that can be utilized by underlying TEE drivers for private allocation
and usage of shared memory.

With this corrected, allow tee_shm_alloc_kernel_buf() to allocate a
shared memory region without the backing of dma-buf.

Cc: stable@vger.kernel.org
Signed-off-by: Sumit Garg <sumit.garg@linaro.org>
Co-developed-by: Tyler Hicks <tyhicks@linux.microsoft.com>
Signed-off-by: Tyler Hicks <tyhicks@linux.microsoft.com>
Reviewed-by: Jens Wiklander <jens.wiklander@linaro.org>
Reviewed-by: Sumit Garg <sumit.garg@linaro.org>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
---
 drivers/tee/optee/call.c     | 2 +-
 drivers/tee/optee/core.c     | 3 ++-
 drivers/tee/optee/rpc.c      | 5 +++--
 drivers/tee/optee/shm_pool.c | 8 ++++++--
 drivers/tee/tee_shm.c        | 4 ++--
 include/linux/tee_drv.h      | 1 +
 6 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/drivers/tee/optee/call.c b/drivers/tee/optee/call.c
index 387e94768182..945f03da0223 100644
--- a/drivers/tee/optee/call.c
+++ b/drivers/tee/optee/call.c
@@ -184,7 +184,7 @@ static struct tee_shm *get_msg_arg(struct tee_context *ctx, size_t num_params,
 	struct optee_msg_arg *ma;
 
 	shm = tee_shm_alloc(ctx, OPTEE_MSG_GET_ARG_SIZE(num_params),
-			    TEE_SHM_MAPPED);
+			    TEE_SHM_MAPPED | TEE_SHM_PRIV);
 	if (IS_ERR(shm))
 		return shm;
 
diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c
index 651d49b53d3b..5ce13b099d7d 100644
--- a/drivers/tee/optee/core.c
+++ b/drivers/tee/optee/core.c
@@ -278,7 +278,8 @@ static void optee_release(struct tee_context *ctx)
 	if (!ctxdata)
 		return;
 
-	shm = tee_shm_alloc(ctx, sizeof(struct optee_msg_arg), TEE_SHM_MAPPED);
+	shm = tee_shm_alloc(ctx, sizeof(struct optee_msg_arg),
+			    TEE_SHM_MAPPED | TEE_SHM_PRIV);
 	if (!IS_ERR(shm)) {
 		arg = tee_shm_get_va(shm, 0);
 		/*
diff --git a/drivers/tee/optee/rpc.c b/drivers/tee/optee/rpc.c
index 1849180b0278..efbaff7ad7e5 100644
--- a/drivers/tee/optee/rpc.c
+++ b/drivers/tee/optee/rpc.c
@@ -314,7 +314,7 @@ static void handle_rpc_func_cmd_shm_alloc(struct tee_context *ctx,
 		shm = cmd_alloc_suppl(ctx, sz);
 		break;
 	case OPTEE_RPC_SHM_TYPE_KERNEL:
-		shm = tee_shm_alloc(ctx, sz, TEE_SHM_MAPPED);
+		shm = tee_shm_alloc(ctx, sz, TEE_SHM_MAPPED | TEE_SHM_PRIV);
 		break;
 	default:
 		arg->ret = TEEC_ERROR_BAD_PARAMETERS;
@@ -502,7 +502,8 @@ void optee_handle_rpc(struct tee_context *ctx, struct optee_rpc_param *param,
 
 	switch (OPTEE_SMC_RETURN_GET_RPC_FUNC(param->a0)) {
 	case OPTEE_SMC_RPC_FUNC_ALLOC:
-		shm = tee_shm_alloc(ctx, param->a1, TEE_SHM_MAPPED);
+		shm = tee_shm_alloc(ctx, param->a1,
+				    TEE_SHM_MAPPED | TEE_SHM_PRIV);
 		if (!IS_ERR(shm) && !tee_shm_get_pa(shm, 0, &pa)) {
 			reg_pair_from_64(&param->a1, &param->a2, pa);
 			reg_pair_from_64(&param->a4, &param->a5,
diff --git a/drivers/tee/optee/shm_pool.c b/drivers/tee/optee/shm_pool.c
index da06ce9b9313..c41a9a501a6e 100644
--- a/drivers/tee/optee/shm_pool.c
+++ b/drivers/tee/optee/shm_pool.c
@@ -27,7 +27,11 @@ static int pool_op_alloc(struct tee_shm_pool_mgr *poolm,
 	shm->paddr = page_to_phys(page);
 	shm->size = PAGE_SIZE << order;
 
-	if (shm->flags & TEE_SHM_DMA_BUF) {
+	/*
+	 * Shared memory private to the OP-TEE driver doesn't need
+	 * to be registered with OP-TEE.
+	 */
+	if (!(shm->flags & TEE_SHM_PRIV)) {
 		unsigned int nr_pages = 1 << order, i;
 		struct page **pages;
 
@@ -60,7 +64,7 @@ err:
 static void pool_op_free(struct tee_shm_pool_mgr *poolm,
 			 struct tee_shm *shm)
 {
-	if (shm->flags & TEE_SHM_DMA_BUF)
+	if (!(shm->flags & TEE_SHM_PRIV))
 		optee_shm_unregister(shm->ctx, shm);
 
 	free_pages((unsigned long)shm->kaddr, get_order(shm->size));
diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c
index c65e44707cd6..8a9384a64f3e 100644
--- a/drivers/tee/tee_shm.c
+++ b/drivers/tee/tee_shm.c
@@ -117,7 +117,7 @@ struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags)
 		return ERR_PTR(-EINVAL);
 	}
 
-	if ((flags & ~(TEE_SHM_MAPPED | TEE_SHM_DMA_BUF))) {
+	if ((flags & ~(TEE_SHM_MAPPED | TEE_SHM_DMA_BUF | TEE_SHM_PRIV))) {
 		dev_err(teedev->dev.parent, "invalid shm flags 0x%x", flags);
 		return ERR_PTR(-EINVAL);
 	}
@@ -207,7 +207,7 @@ EXPORT_SYMBOL_GPL(tee_shm_alloc);
  */
 struct tee_shm *tee_shm_alloc_kernel_buf(struct tee_context *ctx, size_t size)
 {
-	return tee_shm_alloc(ctx, size, TEE_SHM_MAPPED | TEE_SHM_DMA_BUF);
+	return tee_shm_alloc(ctx, size, TEE_SHM_MAPPED);
 }
 EXPORT_SYMBOL_GPL(tee_shm_alloc_kernel_buf);
 
diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h
index 8990f7628387..3ebfea0781f1 100644
--- a/include/linux/tee_drv.h
+++ b/include/linux/tee_drv.h
@@ -27,6 +27,7 @@
 #define TEE_SHM_USER_MAPPED	BIT(4)  /* Memory mapped in user space */
 #define TEE_SHM_POOL		BIT(5)  /* Memory allocated from pool */
 #define TEE_SHM_KERNEL_MAPPED	BIT(6)  /* Memory mapped in kernel space */
+#define TEE_SHM_PRIV		BIT(7)  /* Memory private to TEE driver */
 
 struct device;
 struct tee_device;

From dfb703ad2a8d366b829818a558337be779746575 Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@linux.microsoft.com>
Date: Mon, 14 Jun 2021 17:33:16 -0500
Subject: [PATCH 067/502] tpm_ftpm_tee: Free and unregister TEE shared memory
 during kexec

dma-buf backed shared memory cannot be reliably freed and unregistered
during a kexec operation even when tee_shm_free() is called on the shm
from a .shutdown hook. The problem occurs because dma_buf_put() calls
fput() which then uses task_work_add(), with the TWA_RESUME parameter,
to queue tee_shm_release() to be called before the current task returns
to user mode. However, the current task never returns to user mode
before the kexec completes so the memory is never freed nor
unregistered.

Use tee_shm_alloc_kernel_buf() to avoid dma-buf backed shared memory
allocation so that tee_shm_free() can directly call tee_shm_release().
This will ensure that the shm can be freed and unregistered during a
kexec operation.

Fixes: 09e574831b27 ("tpm/tpm_ftpm_tee: A driver for firmware TPM running inside TEE")
Fixes: 1760eb689ed6 ("tpm/tpm_ftpm_tee: add shutdown call back")
Cc: stable@vger.kernel.org
Signed-off-by: Tyler Hicks <tyhicks@linux.microsoft.com>
Reviewed-by: Sumit Garg <sumit.garg@linaro.org>
Acked-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
---
 drivers/char/tpm/tpm_ftpm_tee.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/char/tpm/tpm_ftpm_tee.c b/drivers/char/tpm/tpm_ftpm_tee.c
index 2ccdf8ac6994..6e3235565a4d 100644
--- a/drivers/char/tpm/tpm_ftpm_tee.c
+++ b/drivers/char/tpm/tpm_ftpm_tee.c
@@ -254,11 +254,11 @@ static int ftpm_tee_probe(struct device *dev)
 	pvt_data->session = sess_arg.session;
 
 	/* Allocate dynamic shared memory with fTPM TA */
-	pvt_data->shm = tee_shm_alloc(pvt_data->ctx,
-				      MAX_COMMAND_SIZE + MAX_RESPONSE_SIZE,
-				      TEE_SHM_MAPPED | TEE_SHM_DMA_BUF);
+	pvt_data->shm = tee_shm_alloc_kernel_buf(pvt_data->ctx,
+						 MAX_COMMAND_SIZE +
+						 MAX_RESPONSE_SIZE);
 	if (IS_ERR(pvt_data->shm)) {
-		dev_err(dev, "%s: tee_shm_alloc failed\n", __func__);
+		dev_err(dev, "%s: tee_shm_alloc_kernel_buf failed\n", __func__);
 		rc = -ENOMEM;
 		goto out_shm_alloc;
 	}

From 914ab19e471d8fb535ed50dff108b0a615f3c2d8 Mon Sep 17 00:00:00 2001
From: Allen Pais <apais@linux.microsoft.com>
Date: Mon, 14 Jun 2021 17:33:17 -0500
Subject: [PATCH 068/502] firmware: tee_bnxt: Release TEE shm, session, and
 context during kexec

Implement a .shutdown hook that will be called during a kexec operation
so that the TEE shared memory, session, and context that were set up
during .probe can be properly freed/closed.

Additionally, don't use dma-buf backed shared memory for the
fw_shm_pool. dma-buf backed shared memory cannot be reliably freed and
unregistered during a kexec operation even when tee_shm_free() is called
on the shm from a .shutdown hook. The problem occurs because
dma_buf_put() calls fput() which then uses task_work_add(), with the
TWA_RESUME parameter, to queue tee_shm_release() to be called before the
current task returns to user mode. However, the current task never
returns to user mode before the kexec completes so the memory is never
freed nor unregistered.

Use tee_shm_alloc_kernel_buf() to avoid dma-buf backed shared memory
allocation so that tee_shm_free() can directly call tee_shm_release().
This will ensure that the shm can be freed and unregistered during a
kexec operation.

Fixes: 246880958ac9 ("firmware: broadcom: add OP-TEE based BNXT f/w manager")
Cc: stable@vger.kernel.org
Signed-off-by: Allen Pais <apais@linux.microsoft.com>
Co-developed-by: Tyler Hicks <tyhicks@linux.microsoft.com>
Signed-off-by: Tyler Hicks <tyhicks@linux.microsoft.com>
Reviewed-by: Sumit Garg <sumit.garg@linaro.org>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
---
 drivers/firmware/broadcom/tee_bnxt_fw.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/firmware/broadcom/tee_bnxt_fw.c b/drivers/firmware/broadcom/tee_bnxt_fw.c
index ed10da5313e8..a5bf4c3f6dc7 100644
--- a/drivers/firmware/broadcom/tee_bnxt_fw.c
+++ b/drivers/firmware/broadcom/tee_bnxt_fw.c
@@ -212,10 +212,9 @@ static int tee_bnxt_fw_probe(struct device *dev)
 
 	pvt_data.dev = dev;
 
-	fw_shm_pool = tee_shm_alloc(pvt_data.ctx, MAX_SHM_MEM_SZ,
-				    TEE_SHM_MAPPED | TEE_SHM_DMA_BUF);
+	fw_shm_pool = tee_shm_alloc_kernel_buf(pvt_data.ctx, MAX_SHM_MEM_SZ);
 	if (IS_ERR(fw_shm_pool)) {
-		dev_err(pvt_data.dev, "tee_shm_alloc failed\n");
+		dev_err(pvt_data.dev, "tee_shm_alloc_kernel_buf failed\n");
 		err = PTR_ERR(fw_shm_pool);
 		goto out_sess;
 	}
@@ -242,6 +241,14 @@ static int tee_bnxt_fw_remove(struct device *dev)
 	return 0;
 }
 
+static void tee_bnxt_fw_shutdown(struct device *dev)
+{
+	tee_shm_free(pvt_data.fw_shm_pool);
+	tee_client_close_session(pvt_data.ctx, pvt_data.session_id);
+	tee_client_close_context(pvt_data.ctx);
+	pvt_data.ctx = NULL;
+}
+
 static const struct tee_client_device_id tee_bnxt_fw_id_table[] = {
 	{UUID_INIT(0x6272636D, 0x2019, 0x0716,
 		    0x42, 0x43, 0x4D, 0x5F, 0x53, 0x43, 0x48, 0x49)},
@@ -257,6 +264,7 @@ static struct tee_client_driver tee_bnxt_fw_driver = {
 		.bus		= &tee_bus_type,
 		.probe		= tee_bnxt_fw_probe,
 		.remove		= tee_bnxt_fw_remove,
+		.shutdown	= tee_bnxt_fw_shutdown,
 	},
 };
 

From 4e9505064f58d1252805952f8547a5b7dbc5c111 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Sat, 17 Jul 2021 16:02:21 +0100
Subject: [PATCH 069/502] net/xfrm/compat: Copy xfrm_spdattr_type_t atributes

The attribute-translator has to take in mind maxtype, that is
xfrm_link::nla_max. When it is set, attributes are not of xfrm_attr_type_t.
Currently, they can be only XFRMA_SPD_MAX (message XFRM_MSG_NEWSPDINFO),
their UABI is the same for 64/32-bit, so just copy them.

Thanks to YueHaibing for reporting this:
In xfrm_user_rcv_msg_compat() if maxtype is not zero and less than
XFRMA_MAX, nlmsg_parse_deprecated() do not initialize attrs array fully.
xfrm_xlate32() will access uninit 'attrs[i]' while iterating all attrs
array.

KASAN: probably user-memory-access in range [0x0000000041b58ab0-0x0000000041b58ab7]
CPU: 0 PID: 15799 Comm: syz-executor.2 Tainted: G        W         5.14.0-rc1-syzkaller #0
RIP: 0010:nla_type include/net/netlink.h:1130 [inline]
RIP: 0010:xfrm_xlate32_attr net/xfrm/xfrm_compat.c:410 [inline]
RIP: 0010:xfrm_xlate32 net/xfrm/xfrm_compat.c:532 [inline]
RIP: 0010:xfrm_user_rcv_msg_compat+0x5e5/0x1070 net/xfrm/xfrm_compat.c:577
[...]
Call Trace:
 xfrm_user_rcv_msg+0x556/0x8b0 net/xfrm/xfrm_user.c:2774
 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2504
 xfrm_netlink_rcv+0x6b/0x90 net/xfrm/xfrm_user.c:2824
 netlink_unicast_kernel net/netlink/af_netlink.c:1314 [inline]
 netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1340
 netlink_sendmsg+0x86d/0xdb0 net/netlink/af_netlink.c:1929
 sock_sendmsg_nosec net/socket.c:702 [inline]

Fixes: 5106f4a8acff ("xfrm/compat: Add 32=>64-bit messages translator")
Cc: <stable@kernel.org>
Reported-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_compat.c | 49 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 44 insertions(+), 5 deletions(-)

diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c
index a20aec9d7393..2bf269390163 100644
--- a/net/xfrm/xfrm_compat.c
+++ b/net/xfrm/xfrm_compat.c
@@ -298,8 +298,16 @@ static int xfrm_xlate64(struct sk_buff *dst, const struct nlmsghdr *nlh_src)
 	len = nlmsg_attrlen(nlh_src, xfrm_msg_min[type]);
 
 	nla_for_each_attr(nla, attrs, len, remaining) {
-		int err = xfrm_xlate64_attr(dst, nla);
+		int err;
 
+		switch (type) {
+		case XFRM_MSG_NEWSPDINFO:
+			err = xfrm_nla_cpy(dst, nla, nla_len(nla));
+			break;
+		default:
+			err = xfrm_xlate64_attr(dst, nla);
+			break;
+		}
 		if (err)
 			return err;
 	}
@@ -341,7 +349,8 @@ static int xfrm_alloc_compat(struct sk_buff *skb, const struct nlmsghdr *nlh_src
 
 /* Calculates len of translated 64-bit message. */
 static size_t xfrm_user_rcv_calculate_len64(const struct nlmsghdr *src,
-					    struct nlattr *attrs[XFRMA_MAX+1])
+					    struct nlattr *attrs[XFRMA_MAX + 1],
+					    int maxtype)
 {
 	size_t len = nlmsg_len(src);
 
@@ -358,10 +367,20 @@ static size_t xfrm_user_rcv_calculate_len64(const struct nlmsghdr *src,
 	case XFRM_MSG_POLEXPIRE:
 		len += 8;
 		break;
+	case XFRM_MSG_NEWSPDINFO:
+		/* attirbutes are xfrm_spdattr_type_t, not xfrm_attr_type_t */
+		return len;
 	default:
 		break;
 	}
 
+	/* Unexpected for anything, but XFRM_MSG_NEWSPDINFO, please
+	 * correct both 64=>32-bit and 32=>64-bit translators to copy
+	 * new attributes.
+	 */
+	if (WARN_ON_ONCE(maxtype))
+		return len;
+
 	if (attrs[XFRMA_SA])
 		len += 4;
 	if (attrs[XFRMA_POLICY])
@@ -440,7 +459,8 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla,
 
 static int xfrm_xlate32(struct nlmsghdr *dst, const struct nlmsghdr *src,
 			struct nlattr *attrs[XFRMA_MAX+1],
-			size_t size, u8 type, struct netlink_ext_ack *extack)
+			size_t size, u8 type, int maxtype,
+			struct netlink_ext_ack *extack)
 {
 	size_t pos;
 	int i;
@@ -520,6 +540,25 @@ static int xfrm_xlate32(struct nlmsghdr *dst, const struct nlmsghdr *src,
 	}
 	pos = dst->nlmsg_len;
 
+	if (maxtype) {
+		/* attirbutes are xfrm_spdattr_type_t, not xfrm_attr_type_t */
+		WARN_ON_ONCE(src->nlmsg_type != XFRM_MSG_NEWSPDINFO);
+
+		for (i = 1; i <= maxtype; i++) {
+			int err;
+
+			if (!attrs[i])
+				continue;
+
+			/* just copy - no need for translation */
+			err = xfrm_attr_cpy32(dst, &pos, attrs[i], size,
+					nla_len(attrs[i]), nla_len(attrs[i]));
+			if (err)
+				return err;
+		}
+		return 0;
+	}
+
 	for (i = 1; i < XFRMA_MAX + 1; i++) {
 		int err;
 
@@ -564,7 +603,7 @@ static struct nlmsghdr *xfrm_user_rcv_msg_compat(const struct nlmsghdr *h32,
 	if (err < 0)
 		return ERR_PTR(err);
 
-	len = xfrm_user_rcv_calculate_len64(h32, attrs);
+	len = xfrm_user_rcv_calculate_len64(h32, attrs, maxtype);
 	/* The message doesn't need translation */
 	if (len == nlmsg_len(h32))
 		return NULL;
@@ -574,7 +613,7 @@ static struct nlmsghdr *xfrm_user_rcv_msg_compat(const struct nlmsghdr *h32,
 	if (!h64)
 		return ERR_PTR(-ENOMEM);
 
-	err = xfrm_xlate32(h64, h32, attrs, len, type, extack);
+	err = xfrm_xlate32(h64, h32, attrs, len, type, maxtype, extack);
 	if (err < 0) {
 		kvfree(h64);
 		return ERR_PTR(err);

From 70bfdf62e93a4d73cfbaf83a3ac708a483ef7a71 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Sat, 17 Jul 2021 16:02:22 +0100
Subject: [PATCH 070/502] selftests/net/ipsec: Add test for xfrm_spdattr_type_t

Set hthresh, dump it again and verify thresh.lbits && thresh.rbits.
They are passed as attributes of xfrm_spdattr_type_t, different from
other message attributes that use xfrm_attr_type_t.
Also, test attribute that is bigger than XFRMA_SPD_MAX, currently it
should be silently ignored.

Cc: Shuah Khan <shuah@kernel.org>
Cc: linux-kselftest@vger.kernel.org
Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 tools/testing/selftests/net/ipsec.c | 165 +++++++++++++++++++++++++++-
 1 file changed, 163 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c
index f23438d512c5..3d7dde2c321b 100644
--- a/tools/testing/selftests/net/ipsec.c
+++ b/tools/testing/selftests/net/ipsec.c
@@ -484,13 +484,16 @@ enum desc_type {
 	MONITOR_ACQUIRE,
 	EXPIRE_STATE,
 	EXPIRE_POLICY,
+	SPDINFO_ATTRS,
 };
 const char *desc_name[] = {
 	"create tunnel",
 	"alloc spi",
 	"monitor acquire",
 	"expire state",
-	"expire policy"
+	"expire policy",
+	"spdinfo attributes",
+	""
 };
 struct xfrm_desc {
 	enum desc_type	type;
@@ -1593,6 +1596,155 @@ out_close:
 	return ret;
 }
 
+static int xfrm_spdinfo_set_thresh(int xfrm_sock, uint32_t *seq,
+		unsigned thresh4_l, unsigned thresh4_r,
+		unsigned thresh6_l, unsigned thresh6_r,
+		bool add_bad_attr)
+
+{
+	struct {
+		struct nlmsghdr		nh;
+		union {
+			uint32_t	unused;
+			int		error;
+		};
+		char			attrbuf[MAX_PAYLOAD];
+	} req;
+	struct xfrmu_spdhthresh thresh;
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.unused));
+	req.nh.nlmsg_type	= XFRM_MSG_NEWSPDINFO;
+	req.nh.nlmsg_flags	= NLM_F_REQUEST | NLM_F_ACK;
+	req.nh.nlmsg_seq	= (*seq)++;
+
+	thresh.lbits = thresh4_l;
+	thresh.rbits = thresh4_r;
+	if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SPD_IPV4_HTHRESH, &thresh, sizeof(thresh)))
+		return -1;
+
+	thresh.lbits = thresh6_l;
+	thresh.rbits = thresh6_r;
+	if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SPD_IPV6_HTHRESH, &thresh, sizeof(thresh)))
+		return -1;
+
+	if (add_bad_attr) {
+		BUILD_BUG_ON(XFRMA_IF_ID <= XFRMA_SPD_MAX + 1);
+		if (rtattr_pack(&req.nh, sizeof(req), XFRMA_IF_ID, NULL, 0)) {
+			pr_err("adding attribute failed: no space");
+			return -1;
+		}
+	}
+
+	if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		pr_err("send()");
+		return -1;
+	}
+
+	if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
+		pr_err("recv()");
+		return -1;
+	} else if (req.nh.nlmsg_type != NLMSG_ERROR) {
+		printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
+		return -1;
+	}
+
+	if (req.error) {
+		printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
+		return -1;
+	}
+
+	return 0;
+}
+
+static int xfrm_spdinfo_attrs(int xfrm_sock, uint32_t *seq)
+{
+	struct {
+		struct nlmsghdr			nh;
+		union {
+			uint32_t	unused;
+			int		error;
+		};
+		char			attrbuf[MAX_PAYLOAD];
+	} req;
+
+	if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 31, 120, 16, false)) {
+		pr_err("Can't set SPD HTHRESH");
+		return KSFT_FAIL;
+	}
+
+	memset(&req, 0, sizeof(req));
+
+	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.unused));
+	req.nh.nlmsg_type	= XFRM_MSG_GETSPDINFO;
+	req.nh.nlmsg_flags	= NLM_F_REQUEST;
+	req.nh.nlmsg_seq	= (*seq)++;
+	if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		pr_err("send()");
+		return KSFT_FAIL;
+	}
+
+	if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
+		pr_err("recv()");
+		return KSFT_FAIL;
+	} else if (req.nh.nlmsg_type == XFRM_MSG_NEWSPDINFO) {
+		size_t len = NLMSG_PAYLOAD(&req.nh, sizeof(req.unused));
+		struct rtattr *attr = (void *)req.attrbuf;
+		int got_thresh = 0;
+
+		for (; RTA_OK(attr, len); attr = RTA_NEXT(attr, len)) {
+			if (attr->rta_type == XFRMA_SPD_IPV4_HTHRESH) {
+				struct xfrmu_spdhthresh *t = RTA_DATA(attr);
+
+				got_thresh++;
+				if (t->lbits != 32 || t->rbits != 31) {
+					pr_err("thresh differ: %u, %u",
+							t->lbits, t->rbits);
+					return KSFT_FAIL;
+				}
+			}
+			if (attr->rta_type == XFRMA_SPD_IPV6_HTHRESH) {
+				struct xfrmu_spdhthresh *t = RTA_DATA(attr);
+
+				got_thresh++;
+				if (t->lbits != 120 || t->rbits != 16) {
+					pr_err("thresh differ: %u, %u",
+							t->lbits, t->rbits);
+					return KSFT_FAIL;
+				}
+			}
+		}
+		if (got_thresh != 2) {
+			pr_err("only %d thresh returned by XFRM_MSG_GETSPDINFO", got_thresh);
+			return KSFT_FAIL;
+		}
+	} else if (req.nh.nlmsg_type != NLMSG_ERROR) {
+		printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
+		return KSFT_FAIL;
+	} else {
+		printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
+		return -1;
+	}
+
+	/* Restore the default */
+	if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 32, 128, 128, false)) {
+		pr_err("Can't restore SPD HTHRESH");
+		return KSFT_FAIL;
+	}
+
+	/*
+	 * At this moment xfrm uses nlmsg_parse_deprecated(), which
+	 * implies NL_VALIDATE_LIBERAL - ignoring attributes with
+	 * (type > maxtype). nla_parse_depricated_strict() would enforce
+	 * it. Or even stricter nla_parse().
+	 * Right now it's not expected to fail, but to be ignored.
+	 */
+	if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 32, 128, 128, true))
+		return KSFT_PASS;
+
+	return KSFT_PASS;
+}
+
 static int child_serv(int xfrm_sock, uint32_t *seq,
 		unsigned int nr, int cmd_fd, void *buf, struct xfrm_desc *desc)
 {
@@ -1717,6 +1869,9 @@ static int child_f(unsigned int nr, int test_desc_fd, int cmd_fd, void *buf)
 		case EXPIRE_POLICY:
 			ret = xfrm_expire_policy(xfrm_sock, &seq, nr, &desc);
 			break;
+		case SPDINFO_ATTRS:
+			ret = xfrm_spdinfo_attrs(xfrm_sock, &seq);
+			break;
 		default:
 			printk("Unknown desc type %d", desc.type);
 			exit(KSFT_FAIL);
@@ -1994,8 +2149,10 @@ static int write_proto_plan(int fd, int proto)
  *   sizeof(xfrm_user_polexpire)  = 168  |  sizeof(xfrm_user_polexpire)  = 176
  *
  * Check the affected by the UABI difference structures.
+ * Also, check translation for xfrm_set_spdinfo: it has it's own attributes
+ * which needs to be correctly copied, but not translated.
  */
-const unsigned int compat_plan = 4;
+const unsigned int compat_plan = 5;
 static int write_compat_struct_tests(int test_desc_fd)
 {
 	struct xfrm_desc desc = {};
@@ -2019,6 +2176,10 @@ static int write_compat_struct_tests(int test_desc_fd)
 	if (__write_desc(test_desc_fd, &desc))
 		return -1;
 
+	desc.type = SPDINFO_ATTRS;
+	if (__write_desc(test_desc_fd, &desc))
+		return -1;
+
 	return 0;
 }
 

From 990e4ad3ddcb72216caeddd6e62c5f45a21e8121 Mon Sep 17 00:00:00 2001
From: Xiangyang Zhang <xyz.sun.ok@gmail.com>
Date: Mon, 28 Jun 2021 23:22:39 +0800
Subject: [PATCH 071/502] staging: rtl8723bs: Fix a resource leak in sd_int_dpc

The "c2h_evt" variable is not freed when function call
"c2h_evt_read_88xx" failed

Fixes: 554c0a3abf21 ("staging: Add rtl8723bs sdio wifi driver")
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Xiangyang Zhang <xyz.sun.ok@gmail.com>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210628152239.5475-1-xyz.sun.ok@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8723bs/hal/sdio_ops.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/staging/rtl8723bs/hal/sdio_ops.c b/drivers/staging/rtl8723bs/hal/sdio_ops.c
index 2dd251ce177e..a545832a468e 100644
--- a/drivers/staging/rtl8723bs/hal/sdio_ops.c
+++ b/drivers/staging/rtl8723bs/hal/sdio_ops.c
@@ -909,6 +909,8 @@ void sd_int_dpc(struct adapter *adapter)
 				} else {
 					rtw_c2h_wk_cmd(adapter, (u8 *)c2h_evt);
 				}
+			} else {
+				kfree(c2h_evt);
 			}
 		} else {
 			/* Error handling for malloc fail */

From 61acabaae5ba58b3c32e6e90d24c2c0827fd27a8 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 25 Jun 2021 18:37:33 +0300
Subject: [PATCH 072/502] serial: max310x: Unprepare and disable clock in error
 path

In one error case the clock may be left prepared and enabled.
Unprepare and disable clock in that case to balance state of
the hardware.

Fixes: d4d6f03c4fb3 ("serial: max310x: Try to get crystal clock rate from property")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210625153733.12911-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/max310x.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
index 0c1e4df52215..ef11860cd69e 100644
--- a/drivers/tty/serial/max310x.c
+++ b/drivers/tty/serial/max310x.c
@@ -1293,7 +1293,8 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty
 		freq = uartclk;
 	if (freq == 0) {
 		dev_err(dev, "Cannot get clock rate\n");
-		return -EINVAL;
+		ret = -EINVAL;
+		goto out_clk;
 	}
 
 	if (xtal) {

From e5227c51090e165db4b48dcaa300605bfced7014 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@orcam.me.uk>
Date: Sat, 26 Jun 2021 06:11:05 +0200
Subject: [PATCH 073/502] serial: 8250: Mask out floating 16/32-bit bus bits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make sure only actual 8 bits of the IIR register are used in determining
the port type in `autoconfig'.

The `serial_in' port accessor returns the `unsigned int' type, meaning
that with UPIO_AU, UPIO_MEM16, UPIO_MEM32, and UPIO_MEM32BE access types
more than 8 bits of data are returned, of which the high order bits will
often come from bus lines that are left floating in the data phase.  For
example with the MIPS Malta board's CBUS UART, where the registers are
aligned on 8-byte boundaries and which uses 32-bit accesses, data as
follows is returned:

YAMON> dump -32 0xbf000900 0x40

BF000900: 1F000942 1F000942 1F000900 1F000900  ...B...B........
BF000910: 1F000901 1F000901 1F000900 1F000900  ................
BF000920: 1F000900 1F000900 1F000960 1F000960  ...........`...`
BF000930: 1F000900 1F000900 1F0009FF 1F0009FF  ................

YAMON>

Evidently high-order 24 bits return values previously driven in the
address phase (the 3 highest order address bits used with the command
above are masked out in the simple virtual address mapping used here and
come out at zeros on the external bus), a common scenario with bus lines
left floating, due to bus capacitance.

Consequently when the value of IIR, mapped at 0x1f000910, is retrieved
in `autoconfig', it comes out at 0x1f0009c1 and when it is right-shifted
by 6 and then assigned to 8-bit `scratch' variable, the value calculated
is 0x27, not one of 0, 1, 2, 3 expected in port type determination.

Fix the issue then, by assigning the value returned from `serial_in' to
`scratch' first, which masks out 24 high-order bits retrieved, and only
then right-shift the resulting 8-bit data quantity, producing the value
of 3 in this case, as expected.  Fix the same issue in `serial_dl_read'.

The problem first appeared with Linux 2.6.9-rc3 which predates our repo
history, but the origin could be identified with the old MIPS/Linux repo
also at: <git://git.kernel.org/pub/scm/linux/kernel/git/ralf/linux.git>
as commit e0d2356c0777 ("Merge with Linux 2.6.9-rc3."), where code in
`serial_in' was updated with this case:

+	case UPIO_MEM32:
+		return readl(up->port.membase + offset);
+

which made it produce results outside the unsigned 8-bit range for the
first time, though obviously it is system dependent what actual values
appear in the high order bits retrieved and it may well have been zeros
in the relevant positions with the system the change originally was
intended for.  It is at that point that code in `autoconf' should have
been updated accordingly, but clearly it was overlooked.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable@vger.kernel.org # v2.6.12+
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
Link: https://lore.kernel.org/r/alpine.DEB.2.21.2106260516220.37803@angie.orcam.me.uk
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_port.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 2164290cbd31..2e7000f79b03 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -311,7 +311,11 @@ static const struct serial8250_config uart_config[] = {
 /* Uart divisor latch read */
 static int default_serial_dl_read(struct uart_8250_port *up)
 {
-	return serial_in(up, UART_DLL) | serial_in(up, UART_DLM) << 8;
+	/* Assign these in pieces to truncate any bits above 7.  */
+	unsigned char dll = serial_in(up, UART_DLL);
+	unsigned char dlm = serial_in(up, UART_DLM);
+
+	return dll | dlm << 8;
 }
 
 /* Uart divisor latch write */
@@ -1297,9 +1301,11 @@ static void autoconfig(struct uart_8250_port *up)
 	serial_out(up, UART_LCR, 0);
 
 	serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO);
-	scratch = serial_in(up, UART_IIR) >> 6;
 
-	switch (scratch) {
+	/* Assign this as it is to truncate any bits above 7.  */
+	scratch = serial_in(up, UART_IIR);
+
+	switch (scratch >> 6) {
 	case 0:
 		autoconfig_8250(up);
 		break;

From 9a936d6c3d3d6c33ecbadf72dccdb567b5cd3c72 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@orcam.me.uk>
Date: Sat, 26 Jun 2021 06:11:13 +0200
Subject: [PATCH 074/502] MIPS: Malta: Do not byte-swap accesses to the CBUS
 UART
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Correct big-endian accesses to the CBUS UART, a Malta on-board discrete
TI16C550C part wired directly to the system controller's device bus, and
do not use byte swapping with the 32-bit accesses to the device.

The CBUS is used for devices such as the boot flash memory needed early
on in system bootstrap even before PCI has been initialised.  Therefore
it uses the system controller's device bus, which follows the endianness
set with the CPU, which means no byte-swapping is ever required for data
accesses to CBUS, unlike with PCI.

The CBUS UART uses the UPIO_MEM32 access method, that is the `readl' and
`writel' MMIO accessors, which on the MIPS platform imply byte-swapping
with PCI systems.  Consequently the wrong byte lane is accessed with the
big-endian configuration and the UART is not correctly accessed.

As it happens the UPIO_MEM32BE access method makes use of the `ioread32'
and `iowrite32' MMIO accessors, which still use `readl' and `writel'
respectively, however they byte-swap data passed, effectively cancelling
swapping done with the accessors themselves and making it suitable for
the CBUS UART.

Make the CBUS UART switch between UPIO_MEM32 and UPIO_MEM32BE then,
based on the endianness selected.  With this change in place the device
is correctly recognised with big-endian Malta at boot, along with the
Super I/O devices behind PCI:

Serial: 8250/16550 driver, 5 ports, IRQ sharing enabled
printk: console [ttyS0] disabled
serial8250.0: ttyS0 at I/O 0x3f8 (irq = 4, base_baud = 115200) is a 16550A
printk: console [ttyS0] enabled
printk: bootconsole [uart8250] disabled
serial8250.0: ttyS1 at I/O 0x2f8 (irq = 3, base_baud = 115200) is a 16550A
serial8250.0: ttyS2 at MMIO 0x1f000900 (irq = 20, base_baud = 230400) is a 16550A

Fixes: e7c4782f92fc ("[MIPS] Put an end to <asm/serial.h>'s long and annyoing existence")
Cc: stable@vger.kernel.org # v2.6.23+
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
Link: https://lore.kernel.org/r/alpine.DEB.2.21.2106260524430.37803@angie.orcam.me.uk
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/mti-malta/malta-platform.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/mips/mti-malta/malta-platform.c b/arch/mips/mti-malta/malta-platform.c
index ee7471984fe7..4ffbcc58c6f6 100644
--- a/arch/mips/mti-malta/malta-platform.c
+++ b/arch/mips/mti-malta/malta-platform.c
@@ -48,7 +48,8 @@ static struct plat_serial8250_port uart8250_data[] = {
 		.mapbase	= 0x1f000900,	/* The CBUS UART */
 		.irq		= MIPS_CPU_IRQ_BASE + MIPSCPU_INT_MB2,
 		.uartclk	= 3686400,	/* Twice the usual clk! */
-		.iotype		= UPIO_MEM32,
+		.iotype		= IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) ?
+				  UPIO_MEM32BE : UPIO_MEM32,
 		.flags		= CBUS_UART_FLAGS,
 		.regshift	= 3,
 	},

From cc9ca4d95846cbbece48d9cd385550f8fba6a3c1 Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Wed, 30 Jun 2021 13:56:43 +0100
Subject: [PATCH 075/502] serial: tegra: Only print FIFO error message when an
 error occurs

The Tegra serial driver always prints an error message when enabling the
FIFO for devices that have support for checking the FIFO enable status.
Fix this by displaying the error message, only when an error occurs.

Finally, update the error message to make it clear that enabling the
FIFO failed and display the error code.

Fixes: 222dcdff3405 ("serial: tegra: check for FIFO mode enabled status")
Cc: <stable@vger.kernel.org>
Acked-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Link: https://lore.kernel.org/r/20210630125643.264264-1-jonathanh@nvidia.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/serial-tegra.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/serial-tegra.c b/drivers/tty/serial/serial-tegra.c
index 222032792d6c..eba5b9ecba34 100644
--- a/drivers/tty/serial/serial-tegra.c
+++ b/drivers/tty/serial/serial-tegra.c
@@ -1045,9 +1045,11 @@ static int tegra_uart_hw_init(struct tegra_uart_port *tup)
 
 	if (tup->cdata->fifo_mode_enable_status) {
 		ret = tegra_uart_wait_fifo_mode_enabled(tup);
-		dev_err(tup->uport.dev, "FIFO mode not enabled\n");
-		if (ret < 0)
+		if (ret < 0) {
+			dev_err(tup->uport.dev,
+				"Failed to enable FIFO mode: %d\n", ret);
 			return ret;
+		}
 	} else {
 		/*
 		 * For all tegra devices (up to t210), there is a hardware

From 853a9ae29e978d37f5dfa72622a68c9ae3d7fa89 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 14 Jul 2021 10:04:27 +0200
Subject: [PATCH 076/502] serial: 8250: fix handle_irq locking

The 8250 handle_irq callback is not just called from the interrupt
handler but also from a timer callback when polling (e.g. for ports
without an interrupt line). Consequently the callback must explicitly
disable interrupts to avoid a potential deadlock with another interrupt
in polled mode.

Add back an irqrestore-version of the sysrq port-unlock helper and use
it in the 8250 callbacks that need it.

Fixes: 75f4e830fa9c ("serial: do not restore interrupt state in sysrq helper")
Cc: stable@vger.kernel.org	# 5.13
Cc: Joel Stanley <joel@jms.id.au>
Cc: Andrew Jeffery <andrew@aj.id.au>
Reported-by: kernel test robot <oliver.sang@intel.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Link: https://lore.kernel.org/r/20210714080427.28164-1-johan@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_aspeed_vuart.c |  5 +++--
 drivers/tty/serial/8250/8250_fsl.c          |  5 +++--
 drivers/tty/serial/8250/8250_port.c         |  5 +++--
 include/linux/serial_core.h                 | 24 +++++++++++++++++++++
 4 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/drivers/tty/serial/8250/8250_aspeed_vuart.c b/drivers/tty/serial/8250/8250_aspeed_vuart.c
index 4caab8714e2c..2350fb3bb5e4 100644
--- a/drivers/tty/serial/8250/8250_aspeed_vuart.c
+++ b/drivers/tty/serial/8250/8250_aspeed_vuart.c
@@ -329,6 +329,7 @@ static int aspeed_vuart_handle_irq(struct uart_port *port)
 {
 	struct uart_8250_port *up = up_to_u8250p(port);
 	unsigned int iir, lsr;
+	unsigned long flags;
 	unsigned int space, count;
 
 	iir = serial_port_in(port, UART_IIR);
@@ -336,7 +337,7 @@ static int aspeed_vuart_handle_irq(struct uart_port *port)
 	if (iir & UART_IIR_NO_INT)
 		return 0;
 
-	spin_lock(&port->lock);
+	spin_lock_irqsave(&port->lock, flags);
 
 	lsr = serial_port_in(port, UART_LSR);
 
@@ -370,7 +371,7 @@ static int aspeed_vuart_handle_irq(struct uart_port *port)
 	if (lsr & UART_LSR_THRE)
 		serial8250_tx_chars(up);
 
-	uart_unlock_and_check_sysrq(port);
+	uart_unlock_and_check_sysrq_irqrestore(port, flags);
 
 	return 1;
 }
diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c
index 4e75d2e4f87c..fc65a2293ce9 100644
--- a/drivers/tty/serial/8250/8250_fsl.c
+++ b/drivers/tty/serial/8250/8250_fsl.c
@@ -30,10 +30,11 @@ struct fsl8250_data {
 int fsl8250_handle_irq(struct uart_port *port)
 {
 	unsigned char lsr, orig_lsr;
+	unsigned long flags;
 	unsigned int iir;
 	struct uart_8250_port *up = up_to_u8250p(port);
 
-	spin_lock(&up->port.lock);
+	spin_lock_irqsave(&up->port.lock, flags);
 
 	iir = port->serial_in(port, UART_IIR);
 	if (iir & UART_IIR_NO_INT) {
@@ -82,7 +83,7 @@ int fsl8250_handle_irq(struct uart_port *port)
 
 	up->lsr_saved_flags = orig_lsr;
 
-	uart_unlock_and_check_sysrq(&up->port);
+	uart_unlock_and_check_sysrq_irqrestore(&up->port, flags);
 
 	return 1;
 }
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 2e7000f79b03..1da29a219842 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -1899,11 +1899,12 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir)
 	unsigned char status;
 	struct uart_8250_port *up = up_to_u8250p(port);
 	bool skip_rx = false;
+	unsigned long flags;
 
 	if (iir & UART_IIR_NO_INT)
 		return 0;
 
-	spin_lock(&port->lock);
+	spin_lock_irqsave(&port->lock, flags);
 
 	status = serial_port_in(port, UART_LSR);
 
@@ -1929,7 +1930,7 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir)
 		(up->ier & UART_IER_THRI))
 		serial8250_tx_chars(up);
 
-	uart_unlock_and_check_sysrq(port);
+	uart_unlock_and_check_sysrq_irqrestore(port, flags);
 
 	return 1;
 }
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 52d7fb92a69d..c58cc142d23f 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -518,6 +518,25 @@ static inline void uart_unlock_and_check_sysrq(struct uart_port *port)
 	if (sysrq_ch)
 		handle_sysrq(sysrq_ch);
 }
+
+static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port,
+		unsigned long flags)
+{
+	int sysrq_ch;
+
+	if (!port->has_sysrq) {
+		spin_unlock_irqrestore(&port->lock, flags);
+		return;
+	}
+
+	sysrq_ch = port->sysrq_ch;
+	port->sysrq_ch = 0;
+
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	if (sysrq_ch)
+		handle_sysrq(sysrq_ch);
+}
 #else	/* CONFIG_MAGIC_SYSRQ_SERIAL */
 static inline int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch)
 {
@@ -531,6 +550,11 @@ static inline void uart_unlock_and_check_sysrq(struct uart_port *port)
 {
 	spin_unlock(&port->lock);
 }
+static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port,
+		unsigned long flags)
+{
+	spin_unlock_irqrestore(&port->lock, flags);
+}
 #endif	/* CONFIG_MAGIC_SYSRQ_SERIAL */
 
 /*

From 7f0909db761535aefafa77031062603a71557267 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 13 Jul 2021 13:17:39 +0300
Subject: [PATCH 077/502] serial: 8250_pci: Enumerate Elkhart Lake UARTs via
 dedicated driver

Elkhart Lake UARTs are PCI enumerated Synopsys DesignWare v4.0+ UART
integrated with Intel iDMA 32-bit DMA controller. There is a specific
driver to handle them, i.e. 8250_lpss. Hence, disable 8250_pci
enumeration for these UARTs.

Fixes: 1b91d97c66ef ("serial: 8250_lpss: Add ->setup() for Elkhart Lake ports")
Fixes: 4f912b898dc2 ("serial: 8250_lpss: Enable HS UART on Elkhart Lake")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210713101739.36962-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_pci.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index 75827b608fdb..02985cf90ef2 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -3836,6 +3836,12 @@ static const struct pci_device_id blacklist[] = {
 	{ PCI_VDEVICE(INTEL, 0x0f0c), },
 	{ PCI_VDEVICE(INTEL, 0x228a), },
 	{ PCI_VDEVICE(INTEL, 0x228c), },
+	{ PCI_VDEVICE(INTEL, 0x4b96), },
+	{ PCI_VDEVICE(INTEL, 0x4b97), },
+	{ PCI_VDEVICE(INTEL, 0x4b98), },
+	{ PCI_VDEVICE(INTEL, 0x4b99), },
+	{ PCI_VDEVICE(INTEL, 0x4b9a), },
+	{ PCI_VDEVICE(INTEL, 0x4b9b), },
 	{ PCI_VDEVICE(INTEL, 0x9ce3), },
 	{ PCI_VDEVICE(INTEL, 0x9ce4), },
 

From cb7abd1db6e5f99a05f1a00b65be29029a6a152a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 21 Jul 2021 17:35:44 +0200
Subject: [PATCH 078/502] staging: rtl8723bs: select CONFIG_CRYPTO_LIB_ARC4

The other rtlwifi drivers already have this, but r8723bs
was converted to the generic implementation without adding
the select:

ERROR: modpost: "arc4_crypt" [drivers/staging/rtl8723bs/r8723bs.ko] undefined!
ERROR: modpost: "arc4_setkey" [drivers/staging/rtl8723bs/r8723bs.ko] undefined!

Fixes: 1b11e893eda0 ("staging: rtl8723bs: replace private arc4 encryption with in-kernel one")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20210721153550.3624490-1-arnd@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8723bs/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/staging/rtl8723bs/Kconfig b/drivers/staging/rtl8723bs/Kconfig
index a88467334dac..7eae820eae3b 100644
--- a/drivers/staging/rtl8723bs/Kconfig
+++ b/drivers/staging/rtl8723bs/Kconfig
@@ -5,6 +5,7 @@ config RTL8723BS
 	depends on m
 	select WIRELESS_EXT
 	select WEXT_PRIV
+	select CRYPTO_LIB_ARC4
 	help
 	This option enables support for RTL8723BS SDIO drivers, such as
 	the wifi found on the 1st gen Intel Compute Stick, the CHIP

From 1e7107c5ef44431bc1ebbd4c353f1d7c22e5f2ec Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Wed, 16 Jun 2021 08:51:57 -0400
Subject: [PATCH 079/502] cgroup1: fix leaked context root causing sporadic
 NULL deref in LTP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Richard reported sporadic (roughly one in 10 or so) null dereferences and
other strange behaviour for a set of automated LTP tests.  Things like:

   BUG: kernel NULL pointer dereference, address: 0000000000000008
   #PF: supervisor read access in kernel mode
   #PF: error_code(0x0000) - not-present page
   PGD 0 P4D 0
   Oops: 0000 [#1] PREEMPT SMP PTI
   CPU: 0 PID: 1516 Comm: umount Not tainted 5.10.0-yocto-standard #1
   Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-48-gd9c812dda519-prebuilt.qemu.org 04/01/2014
   RIP: 0010:kernfs_sop_show_path+0x1b/0x60

...or these others:

   RIP: 0010:do_mkdirat+0x6a/0xf0
   RIP: 0010:d_alloc_parallel+0x98/0x510
   RIP: 0010:do_readlinkat+0x86/0x120

There were other less common instances of some kind of a general scribble
but the common theme was mount and cgroup and a dubious dentry triggering
the NULL dereference.  I was only able to reproduce it under qemu by
replicating Richard's setup as closely as possible - I never did get it
to happen on bare metal, even while keeping everything else the same.

In commit 71d883c37e8d ("cgroup_do_mount(): massage calling conventions")
we see this as a part of the overall change:

   --------------
           struct cgroup_subsys *ss;
   -       struct dentry *dentry;

   [...]

   -       dentry = cgroup_do_mount(&cgroup_fs_type, fc->sb_flags, root,
   -                                CGROUP_SUPER_MAGIC, ns);

   [...]

   -       if (percpu_ref_is_dying(&root->cgrp.self.refcnt)) {
   -               struct super_block *sb = dentry->d_sb;
   -               dput(dentry);
   +       ret = cgroup_do_mount(fc, CGROUP_SUPER_MAGIC, ns);
   +       if (!ret && percpu_ref_is_dying(&root->cgrp.self.refcnt)) {
   +               struct super_block *sb = fc->root->d_sb;
   +               dput(fc->root);
                   deactivate_locked_super(sb);
                   msleep(10);
                   return restart_syscall();
           }
   --------------

In changing from the local "*dentry" variable to using fc->root, we now
export/leave that dentry pointer in the file context after doing the dput()
in the unlikely "is_dying" case.   With LTP doing a crazy amount of back to
back mount/unmount [testcases/bin/cgroup_regression_5_1.sh] the unlikely
becomes slightly likely and then bad things happen.

A fix would be to not leave the stale reference in fc->root as follows:

   --------------
                  dput(fc->root);
  +               fc->root = NULL;
                  deactivate_locked_super(sb);
   --------------

...but then we are just open-coding a duplicate of fc_drop_locked() so we
simply use that instead.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Tejun Heo <tj@kernel.org>
Cc: Zefan Li <lizefan.x@bytedance.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: stable@vger.kernel.org      # v5.1+
Reported-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Fixes: 71d883c37e8d ("cgroup_do_mount(): massage calling conventions")
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 fs/internal.h              | 1 -
 include/linux/fs_context.h | 1 +
 kernel/cgroup/cgroup-v1.c  | 4 +---
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/fs/internal.h b/fs/internal.h
index 3ce8edbaa3ca..82e8eb32ff3d 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -61,7 +61,6 @@ extern void __init chrdev_init(void);
  */
 extern const struct fs_context_operations legacy_fs_context_ops;
 extern int parse_monolithic_mount_data(struct fs_context *, void *);
-extern void fc_drop_locked(struct fs_context *);
 extern void vfs_clean_context(struct fs_context *fc);
 extern int finish_clean_context(struct fs_context *fc);
 
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index e2bc16300c82..6b54982fc5f3 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -141,6 +141,7 @@ extern int vfs_get_tree(struct fs_context *fc);
 extern void put_fs_context(struct fs_context *fc);
 extern int vfs_parse_fs_param_source(struct fs_context *fc,
 				     struct fs_parameter *param);
+extern void fc_drop_locked(struct fs_context *fc);
 
 /*
  * sget() wrappers to be called from the ->get_tree() op.
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 8d6bf56ed77a..de2c432dee20 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -1221,9 +1221,7 @@ int cgroup1_get_tree(struct fs_context *fc)
 		ret = cgroup_do_get_tree(fc);
 
 	if (!ret && percpu_ref_is_dying(&ctx->root->cgrp.self.refcnt)) {
-		struct super_block *sb = fc->root->d_sb;
-		dput(fc->root);
-		deactivate_locked_super(sb);
+		fc_drop_locked(fc);
 		ret = 1;
 	}
 

From b42b0bddcbc87b4c66f6497f66fc72d52b712aa7 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Wed, 14 Jul 2021 17:19:33 +0800
Subject: [PATCH 080/502] workqueue: fix UAF in pwq_unbound_release_workfn()

I got a UAF report when doing fuzz test:

[  152.880091][ T8030] ==================================================================
[  152.881240][ T8030] BUG: KASAN: use-after-free in pwq_unbound_release_workfn+0x50/0x190
[  152.882442][ T8030] Read of size 4 at addr ffff88810d31bd00 by task kworker/3:2/8030
[  152.883578][ T8030]
[  152.883932][ T8030] CPU: 3 PID: 8030 Comm: kworker/3:2 Not tainted 5.13.0+ #249
[  152.885014][ T8030] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014
[  152.886442][ T8030] Workqueue: events pwq_unbound_release_workfn
[  152.887358][ T8030] Call Trace:
[  152.887837][ T8030]  dump_stack_lvl+0x75/0x9b
[  152.888525][ T8030]  ? pwq_unbound_release_workfn+0x50/0x190
[  152.889371][ T8030]  print_address_description.constprop.10+0x48/0x70
[  152.890326][ T8030]  ? pwq_unbound_release_workfn+0x50/0x190
[  152.891163][ T8030]  ? pwq_unbound_release_workfn+0x50/0x190
[  152.891999][ T8030]  kasan_report.cold.15+0x82/0xdb
[  152.892740][ T8030]  ? pwq_unbound_release_workfn+0x50/0x190
[  152.893594][ T8030]  __asan_load4+0x69/0x90
[  152.894243][ T8030]  pwq_unbound_release_workfn+0x50/0x190
[  152.895057][ T8030]  process_one_work+0x47b/0x890
[  152.895778][ T8030]  worker_thread+0x5c/0x790
[  152.896439][ T8030]  ? process_one_work+0x890/0x890
[  152.897163][ T8030]  kthread+0x223/0x250
[  152.897747][ T8030]  ? set_kthread_struct+0xb0/0xb0
[  152.898471][ T8030]  ret_from_fork+0x1f/0x30
[  152.899114][ T8030]
[  152.899446][ T8030] Allocated by task 8884:
[  152.900084][ T8030]  kasan_save_stack+0x21/0x50
[  152.900769][ T8030]  __kasan_kmalloc+0x88/0xb0
[  152.901416][ T8030]  __kmalloc+0x29c/0x460
[  152.902014][ T8030]  alloc_workqueue+0x111/0x8e0
[  152.902690][ T8030]  __btrfs_alloc_workqueue+0x11e/0x2a0
[  152.903459][ T8030]  btrfs_alloc_workqueue+0x6d/0x1d0
[  152.904198][ T8030]  scrub_workers_get+0x1e8/0x490
[  152.904929][ T8030]  btrfs_scrub_dev+0x1b9/0x9c0
[  152.905599][ T8030]  btrfs_ioctl+0x122c/0x4e50
[  152.906247][ T8030]  __x64_sys_ioctl+0x137/0x190
[  152.906916][ T8030]  do_syscall_64+0x34/0xb0
[  152.907535][ T8030]  entry_SYSCALL_64_after_hwframe+0x44/0xae
[  152.908365][ T8030]
[  152.908688][ T8030] Freed by task 8884:
[  152.909243][ T8030]  kasan_save_stack+0x21/0x50
[  152.909893][ T8030]  kasan_set_track+0x20/0x30
[  152.910541][ T8030]  kasan_set_free_info+0x24/0x40
[  152.911265][ T8030]  __kasan_slab_free+0xf7/0x140
[  152.911964][ T8030]  kfree+0x9e/0x3d0
[  152.912501][ T8030]  alloc_workqueue+0x7d7/0x8e0
[  152.913182][ T8030]  __btrfs_alloc_workqueue+0x11e/0x2a0
[  152.913949][ T8030]  btrfs_alloc_workqueue+0x6d/0x1d0
[  152.914703][ T8030]  scrub_workers_get+0x1e8/0x490
[  152.915402][ T8030]  btrfs_scrub_dev+0x1b9/0x9c0
[  152.916077][ T8030]  btrfs_ioctl+0x122c/0x4e50
[  152.916729][ T8030]  __x64_sys_ioctl+0x137/0x190
[  152.917414][ T8030]  do_syscall_64+0x34/0xb0
[  152.918034][ T8030]  entry_SYSCALL_64_after_hwframe+0x44/0xae
[  152.918872][ T8030]
[  152.919203][ T8030] The buggy address belongs to the object at ffff88810d31bc00
[  152.919203][ T8030]  which belongs to the cache kmalloc-512 of size 512
[  152.921155][ T8030] The buggy address is located 256 bytes inside of
[  152.921155][ T8030]  512-byte region [ffff88810d31bc00, ffff88810d31be00)
[  152.922993][ T8030] The buggy address belongs to the page:
[  152.923800][ T8030] page:ffffea000434c600 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x10d318
[  152.925249][ T8030] head:ffffea000434c600 order:2 compound_mapcount:0 compound_pincount:0
[  152.926399][ T8030] flags: 0x57ff00000010200(slab|head|node=1|zone=2|lastcpupid=0x7ff)
[  152.927515][ T8030] raw: 057ff00000010200 dead000000000100 dead000000000122 ffff888009c42c80
[  152.928716][ T8030] raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000
[  152.929890][ T8030] page dumped because: kasan: bad access detected
[  152.930759][ T8030]
[  152.931076][ T8030] Memory state around the buggy address:
[  152.931851][ T8030]  ffff88810d31bc00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  152.932967][ T8030]  ffff88810d31bc80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  152.934068][ T8030] >ffff88810d31bd00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  152.935189][ T8030]                    ^
[  152.935763][ T8030]  ffff88810d31bd80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  152.936847][ T8030]  ffff88810d31be00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[  152.937940][ T8030] ==================================================================

If apply_wqattrs_prepare() fails in alloc_workqueue(), it will call put_pwq()
which invoke a work queue to call pwq_unbound_release_workfn() and use the 'wq'.
The 'wq' allocated in alloc_workqueue() will be freed in error path when
apply_wqattrs_prepare() fails. So it will lead a UAF.

CPU0                                          CPU1
alloc_workqueue()
alloc_and_link_pwqs()
apply_wqattrs_prepare() fails
apply_wqattrs_cleanup()
schedule_work(&pwq->unbound_release_work)
kfree(wq)
                                              worker_thread()
                                              pwq_unbound_release_workfn() <- trigger uaf here

If apply_wqattrs_prepare() fails, the new pwq are not linked, it doesn't
hold any reference to the 'wq', 'wq' is invalid to access in the worker,
so add check pwq if linked to fix this.

Fixes: 2d5f0764b526 ("workqueue: split apply_workqueue_attrs() into 3 stages")
Cc: stable@vger.kernel.org # v4.2+
Reported-by: Hulk Robot <hulkci@huawei.com>
Suggested-by: Lai Jiangshan <jiangshanlai@gmail.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Reviewed-by: Lai Jiangshan <jiangshanlai@gmail.com>
Tested-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 50142fc08902..f148eacda55a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3676,15 +3676,21 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
 						  unbound_release_work);
 	struct workqueue_struct *wq = pwq->wq;
 	struct worker_pool *pool = pwq->pool;
-	bool is_last;
+	bool is_last = false;
 
-	if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
-		return;
+	/*
+	 * when @pwq is not linked, it doesn't hold any reference to the
+	 * @wq, and @wq is invalid to access.
+	 */
+	if (!list_empty(&pwq->pwqs_node)) {
+		if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
+			return;
 
-	mutex_lock(&wq->mutex);
-	list_del_rcu(&pwq->pwqs_node);
-	is_last = list_empty(&wq->pwqs);
-	mutex_unlock(&wq->mutex);
+		mutex_lock(&wq->mutex);
+		list_del_rcu(&pwq->pwqs_node);
+		is_last = list_empty(&wq->pwqs);
+		mutex_unlock(&wq->mutex);
+	}
 
 	mutex_lock(&wq_pool_mutex);
 	put_unbound_pool(pool);

From 456a9dace42ecfcec7ce6e17c18d1985d628dcd0 Mon Sep 17 00:00:00 2001
From: Mike Tipton <mdtipton@codeaurora.org>
Date: Wed, 21 Jul 2021 10:54:29 -0700
Subject: [PATCH 081/502] interconnect: Zero initial BW after sync-state

The initial BW values may be used by providers to enforce floors. Zero
these values after sync-state so that providers know when to stop
enforcing them.

Fixes: b1d681d8d324 ("interconnect: Add sync state support")
Signed-off-by: Mike Tipton <mdtipton@codeaurora.org>
Link: https://lore.kernel.org/r/20210721175432.2119-2-mdtipton@codeaurora.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c
index 8a1e70e00876..945121e18b5c 100644
--- a/drivers/interconnect/core.c
+++ b/drivers/interconnect/core.c
@@ -1106,6 +1106,8 @@ void icc_sync_state(struct device *dev)
 		dev_dbg(p->dev, "interconnect provider is in synced state\n");
 		list_for_each_entry(n, &p->nodes, node_list) {
 			if (n->init_avg || n->init_peak) {
+				n->init_avg = 0;
+				n->init_peak = 0;
 				aggregate_requests(n);
 				p->set(n, n);
 			}

From 73606ba9242f8e32023699b500b7922b4cf2993c Mon Sep 17 00:00:00 2001
From: Mike Tipton <mdtipton@codeaurora.org>
Date: Wed, 21 Jul 2021 10:54:30 -0700
Subject: [PATCH 082/502] interconnect: Always call pre_aggregate before
 aggregate

The pre_aggregate callback isn't called in all cases before calling
aggregate. Add the missing calls so providers can rely on consistent
framework behavior.

Fixes: d3703b3e255f ("interconnect: Aggregate before setting initial bandwidth")
Signed-off-by: Mike Tipton <mdtipton@codeaurora.org>
Link: https://lore.kernel.org/r/20210721175432.2119-3-mdtipton@codeaurora.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/core.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c
index 945121e18b5c..1b2c564eaa99 100644
--- a/drivers/interconnect/core.c
+++ b/drivers/interconnect/core.c
@@ -973,9 +973,14 @@ void icc_node_add(struct icc_node *node, struct icc_provider *provider)
 	}
 	node->avg_bw = node->init_avg;
 	node->peak_bw = node->init_peak;
+
+	if (provider->pre_aggregate)
+		provider->pre_aggregate(node);
+
 	if (provider->aggregate)
 		provider->aggregate(node, 0, node->init_avg, node->init_peak,
 				    &node->avg_bw, &node->peak_bw);
+
 	provider->set(node, node);
 	node->avg_bw = 0;
 	node->peak_bw = 0;

From 69de4421bb4c103ef42a32bafc596e23918c106f Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason@jlekstrand.net>
Date: Wed, 21 Jul 2021 10:23:57 -0500
Subject: [PATCH 083/502] drm/ttm: Initialize debugfs from ttm_global_init()

We create a bunch of debugfs entries as a side-effect of
ttm_global_init() and then never clean them up.  This isn't usually a
problem because we free the whole debugfs directory on module unload.
However, if the global reference count ever goes to zero and then
ttm_global_init() is called again, we'll re-create those debugfs entries
and debugfs will complain in dmesg that we're creating entries that
already exist.  This patch fixes this problem by changing the lifetime
of the whole TTM debugfs directory to match that of the TTM global
state.

Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20210721152358.2893314-6-jason@jlekstrand.net
---
 drivers/gpu/drm/ttm/ttm_device.c | 12 ++++++++++++
 drivers/gpu/drm/ttm/ttm_module.c | 16 ----------------
 2 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 519deea8e39b..74e3b460132b 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -44,6 +44,8 @@ static unsigned ttm_glob_use_count;
 struct ttm_global ttm_glob;
 EXPORT_SYMBOL(ttm_glob);
 
+struct dentry *ttm_debugfs_root;
+
 static void ttm_global_release(void)
 {
 	struct ttm_global *glob = &ttm_glob;
@@ -53,6 +55,7 @@ static void ttm_global_release(void)
 		goto out;
 
 	ttm_pool_mgr_fini();
+	debugfs_remove(ttm_debugfs_root);
 
 	__free_page(glob->dummy_read_page);
 	memset(glob, 0, sizeof(*glob));
@@ -73,6 +76,13 @@ static int ttm_global_init(void)
 
 	si_meminfo(&si);
 
+	ttm_debugfs_root = debugfs_create_dir("ttm", NULL);
+	if (IS_ERR(ttm_debugfs_root)) {
+		ret = PTR_ERR(ttm_debugfs_root);
+		ttm_debugfs_root = NULL;
+		goto out;
+	}
+
 	/* Limit the number of pages in the pool to about 50% of the total
 	 * system memory.
 	 */
@@ -100,6 +110,8 @@ static int ttm_global_init(void)
 	debugfs_create_atomic_t("buffer_objects", 0444, ttm_debugfs_root,
 				&glob->bo_count);
 out:
+	if (ret && ttm_debugfs_root)
+		debugfs_remove(ttm_debugfs_root);
 	if (ret)
 		--ttm_glob_use_count;
 	mutex_unlock(&ttm_global_mutex);
diff --git a/drivers/gpu/drm/ttm/ttm_module.c b/drivers/gpu/drm/ttm/ttm_module.c
index 997c458f68a9..7fcdef278c74 100644
--- a/drivers/gpu/drm/ttm/ttm_module.c
+++ b/drivers/gpu/drm/ttm/ttm_module.c
@@ -72,22 +72,6 @@ pgprot_t ttm_prot_from_caching(enum ttm_caching caching, pgprot_t tmp)
 	return tmp;
 }
 
-struct dentry *ttm_debugfs_root;
-
-static int __init ttm_init(void)
-{
-	ttm_debugfs_root = debugfs_create_dir("ttm", NULL);
-	return 0;
-}
-
-static void __exit ttm_exit(void)
-{
-	debugfs_remove(ttm_debugfs_root);
-}
-
-module_init(ttm_init);
-module_exit(ttm_exit);
-
 MODULE_AUTHOR("Thomas Hellstrom, Jerome Glisse");
 MODULE_DESCRIPTION("TTM memory manager subsystem (for DRM device)");
 MODULE_LICENSE("GPL and additional rights");

From 1d5ccab95f06675a269f4cb223a1e3f6d1ebef42 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Wed, 21 Jul 2021 11:53:21 +0200
Subject: [PATCH 084/502] spi: spi-mux: Add module info needed for autoloading
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With the spi device table udev can autoload the spi-mux module in
the presence of an spi-mux device.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20210721095321.2165453-1-u.kleine-koenig@pengutronix.de
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-mux.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/spi/spi-mux.c b/drivers/spi/spi-mux.c
index 37dfc6e82804..9708b7827ff7 100644
--- a/drivers/spi/spi-mux.c
+++ b/drivers/spi/spi-mux.c
@@ -167,10 +167,17 @@ err_put_ctlr:
 	return ret;
 }
 
+static const struct spi_device_id spi_mux_id[] = {
+	{ "spi-mux" },
+	{ }
+};
+MODULE_DEVICE_TABLE(spi, spi_mux_id);
+
 static const struct of_device_id spi_mux_of_match[] = {
 	{ .compatible = "spi-mux" },
 	{ }
 };
+MODULE_DEVICE_TABLE(of, spi_mux_of_match);
 
 static struct spi_driver spi_mux_driver = {
 	.probe  = spi_mux_probe,
@@ -178,6 +185,7 @@ static struct spi_driver spi_mux_driver = {
 		.name   = "spi-mux",
 		.of_match_table = spi_mux_of_match,
 	},
+	.id_table = spi_mux_id,
 };
 
 module_spi_driver(spi_mux_driver);

From 8311ee2164c5cd1b63a601ea366f540eae89f10e Mon Sep 17 00:00:00 2001
From: Dongliang Mu <mudongliangabcd@gmail.com>
Date: Tue, 20 Jul 2021 18:01:16 +0800
Subject: [PATCH 085/502] spi: meson-spicc: fix memory leak in
 meson_spicc_remove

In meson_spicc_probe, the error handling code needs to clean up master
by calling spi_master_put, but the remove function does not have this
function call. This will lead to memory leak of spicc->master.

Reported-by: Dongliang Mu <mudongliangabcd@gmail.com>
Fixes: 454fa271bc4e("spi: Add Meson SPICC driver")
Signed-off-by: Dongliang Mu <mudongliangabcd@gmail.com>
Link: https://lore.kernel.org/r/20210720100116.1438974-1-mudongliangabcd@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-meson-spicc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/spi/spi-meson-spicc.c b/drivers/spi/spi-meson-spicc.c
index b2c4621db34d..c208efeadd18 100644
--- a/drivers/spi/spi-meson-spicc.c
+++ b/drivers/spi/spi-meson-spicc.c
@@ -785,6 +785,8 @@ static int meson_spicc_remove(struct platform_device *pdev)
 	clk_disable_unprepare(spicc->core);
 	clk_disable_unprepare(spicc->pclk);
 
+	spi_master_put(spicc->master);
+
 	return 0;
 }
 

From e09f2ab8eecc6dcbd7013a1303cbe56b00dc9fb0 Mon Sep 17 00:00:00 2001
From: Andreas Schwab <schwab@suse.de>
Date: Thu, 22 Jul 2021 15:48:45 +0200
Subject: [PATCH 086/502] spi: update modalias_show after
 of_device_uevent_modalias support

Commit 3ce6c9e2617e ("spi: add of_device_uevent_modalias support") is
incomplete, as it didn't update the modalias_show function to generate the
of: modalias string if available.

Fixes: 3ce6c9e2617e ("spi: add of_device_uevent_modalias support")
Signed-off-by: Andreas Schwab <schwab@suse.de>
Link: https://lore.kernel.org/r/mvmwnpi4fya.fsf@suse.de
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 35928d0843d9..397dd2959bfd 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -58,6 +58,10 @@ modalias_show(struct device *dev, struct device_attribute *a, char *buf)
 	const struct spi_device	*spi = to_spi_device(dev);
 	int len;
 
+	len = of_device_modalias(dev, buf, PAGE_SIZE);
+	if (len != -ENODEV)
+		return len;
+
 	len = acpi_device_modalias(dev, buf, PAGE_SIZE - 1);
 	if (len != -ENODEV)
 		return len;

From 29f6a20c21b5bdc7eb623a712bbf7b99612ee746 Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Fri, 2 Jul 2021 21:49:14 +0200
Subject: [PATCH 087/502] arm64: dts: ls1028: sl28: fix networking for variant
 2

The PHY configuration for the variant 2 is still missing the flag for
in-band signalling between PHY and MAC. Both sides - MAC and PHY - have
to match the setting. For now, Linux only supports setting the MAC side
and thus it has to match the setting the bootloader is configuring.
Enable in-band signalling to make ethernet work.

Fixes: ab43f0307449 ("arm64: dts: ls1028a: sl28: add support for variant 2")
Signed-off-by: Michael Walle <michael@walle.cc>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts
index dd764b720fb0..f6a79c8080d1 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts
@@ -54,6 +54,7 @@
 
 &mscc_felix_port0 {
 	label = "swp0";
+	managed = "in-band-status";
 	phy-handle = <&phy0>;
 	phy-mode = "sgmii";
 	status = "okay";
@@ -61,6 +62,7 @@
 
 &mscc_felix_port1 {
 	label = "swp1";
+	managed = "in-band-status";
 	phy-handle = <&phy1>;
 	phy-mode = "sgmii";
 	status = "okay";

From 828db68f4ff1ab6982a36a56522b585160dc8c8e Mon Sep 17 00:00:00 2001
From: Oleksandr Suvorov <oleksandr.suvorov@toradex.com>
Date: Tue, 13 Jul 2021 23:21:07 +0300
Subject: [PATCH 088/502] ARM: dts: colibri-imx6ull: limit SDIO clock to 25MHz

NXP and AzureWave don't recommend using SDIO bus mode 3.3V@50MHz due
to noise affecting the wireless throughput. Colibri iMX6ULL uses only
3.3V signaling for Wi-Fi module AW-CM276NF.

Limit the SDIO Clock on Colibri iMX6ULL to 25MHz.

Fixes: c2e4987e0e02 ("ARM: dts: imx6ull: add Toradex Colibri iMX6ULL support")
Signed-off-by: Oleksandr Suvorov <oleksandr.suvorov@toradex.com>
Reviewed-by: Fabio Estevam <festevam@gmail.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/boot/dts/imx6ull-colibri-wifi.dtsi | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/boot/dts/imx6ull-colibri-wifi.dtsi b/arch/arm/boot/dts/imx6ull-colibri-wifi.dtsi
index a0545431b3dc..9f1e38282bee 100644
--- a/arch/arm/boot/dts/imx6ull-colibri-wifi.dtsi
+++ b/arch/arm/boot/dts/imx6ull-colibri-wifi.dtsi
@@ -43,6 +43,7 @@
 	assigned-clock-rates = <0>, <198000000>;
 	cap-power-off-card;
 	keep-power-in-suspend;
+	max-frequency = <25000000>;
 	mmc-pwrseq = <&wifi_pwrseq>;
 	no-1-8-v;
 	non-removable;

From e39cdacf2f664b09029e7c1eb354c91a20c367af Mon Sep 17 00:00:00 2001
From: Zheyu Ma <zheyuma97@gmail.com>
Date: Tue, 22 Jun 2021 07:11:31 +0000
Subject: [PATCH 089/502] pcmcia: i82092: fix a null pointer dereference bug

During the driver loading process, the 'dev' field was not assigned, but
the 'dev' field was referenced in the subsequent 'i82092aa_set_mem_map'
function.

Signed-off-by: Zheyu Ma <zheyuma97@gmail.com>
CC: <stable@vger.kernel.org>
[linux@dominikbrodowski.net: shorten commit message, add Cc to stable]
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/pcmcia/i82092.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/pcmcia/i82092.c b/drivers/pcmcia/i82092.c
index 85887d885b5f..192c9049d654 100644
--- a/drivers/pcmcia/i82092.c
+++ b/drivers/pcmcia/i82092.c
@@ -112,6 +112,7 @@ static int i82092aa_pci_probe(struct pci_dev *dev,
 	for (i = 0; i < socket_count; i++) {
 		sockets[i].card_state = 1; /* 1 = present but empty */
 		sockets[i].io_base = pci_resource_start(dev, 0);
+		sockets[i].dev = dev;
 		sockets[i].socket.features |= SS_CAP_PCCARD;
 		sockets[i].socket.map_size = 0x1000;
 		sockets[i].socket.irq_mask = 0;

From 20fb73911fec01f06592de1cdbca00b66602ebd7 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 15 Jul 2021 14:23:21 +0100
Subject: [PATCH 090/502] ARM: imx: fix missing 3rd argument in macro
 imx_mmdc_perf_init

The function imx_mmdc_perf_init recently had a 3rd argument added to
it but the equivalent macro was not updated and is still the older
2 argument version. Fix this by adding in the missing 3rd argumement
mmdc_ipg_clk.

Fixes: f07ec8536580 ("ARM: imx: add missing clk_disable_unprepare()")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/mach-imx/mmdc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c
index 4a6f1359e1e9..af12668d0bf5 100644
--- a/arch/arm/mach-imx/mmdc.c
+++ b/arch/arm/mach-imx/mmdc.c
@@ -534,7 +534,7 @@ pmu_free:
 
 #else
 #define imx_mmdc_remove NULL
-#define imx_mmdc_perf_init(pdev, mmdc_base) 0
+#define imx_mmdc_perf_init(pdev, mmdc_base, mmdc_ipg_clk) 0
 #endif
 
 static int imx_mmdc_probe(struct platform_device *pdev)

From 3d9e30a52047f2d464efdfd1d561ae1f707a0286 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Sun, 18 Jul 2021 23:43:02 +0200
Subject: [PATCH 091/502] ARM: dts: imx: Swap M53Menlo
 pinctrl_power_button/pinctrl_power_out pins

The pinctrl_power_button/pinctrl_power_out each define single GPIO
pinmux, except it is exactly the other one than the matching gpio-keys
and gpio-poweroff DT nodes use for that functionality. Swap the two
GPIOs to correct this error.

Fixes: 50d29fdb765d ("ARM: dts: imx53: Add power GPIOs on M53Menlo")
Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Shawn Guo <shawnguo@kernel.org>
Cc: Fabio Estevam <festevam@gmail.com>
Cc: NXP Linux Team <linux-imx@nxp.com>
Reviewed-by: Fabio Estevam <festevam@gmail.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/boot/dts/imx53-m53menlo.dts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/imx53-m53menlo.dts b/arch/arm/boot/dts/imx53-m53menlo.dts
index f98691ae4415..d3082b9774e4 100644
--- a/arch/arm/boot/dts/imx53-m53menlo.dts
+++ b/arch/arm/boot/dts/imx53-m53menlo.dts
@@ -388,13 +388,13 @@
 
 		pinctrl_power_button: powerbutgrp {
 			fsl,pins = <
-				MX53_PAD_SD2_DATA2__GPIO1_13		0x1e4
+				MX53_PAD_SD2_DATA0__GPIO1_15		0x1e4
 			>;
 		};
 
 		pinctrl_power_out: poweroutgrp {
 			fsl,pins = <
-				MX53_PAD_SD2_DATA0__GPIO1_15		0x1e4
+				MX53_PAD_SD2_DATA2__GPIO1_13		0x1e4
 			>;
 		};
 

From ec61cd49bf566401306cfc4855bda8c08bbaa46c Mon Sep 17 00:00:00 2001
From: Johan Almbladh <johan.almbladh@anyfinetworks.com>
Date: Mon, 28 Jun 2021 14:37:13 +0200
Subject: [PATCH 092/502] mac80211: Do not strip skb headroom on monitor frames

When a monitor interface is present together with other interfaces, a
received skb is copied and received on the monitor netdev. Before, the
copied skb was allocated with exactly the amount of space needed for
the radiotap header, resulting in an skb without any headroom at all
being received on the monitor netdev. With the introduction of eBPF
and XDP in the kernel, skbs may be processed by custom eBPF programs.
However, since the skb cannot be reallocated in the eBPF program, no
more data or headers can be pushed. The old code made sure the final
headroom was zero regardless of the value of NET_SKB_PAD, so increasing
that constant would have no effect.

Now we allocate monitor skb copies with a headroom of NET_SKB_PAD bytes
before the radiotap header. Monitor interfaces now behave in the same
way as other netdev interfaces that honor the NET_SKB_PAD constant.

Signed-off-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
Link: https://lore.kernel.org/r/20210628123713.2070753-1-johan.almbladh@anyfinetworks.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 771921c057e8..2563473b5cf1 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -730,7 +730,8 @@ ieee80211_make_monitor_skb(struct ieee80211_local *local,
 		 * Need to make a copy and possibly remove radiotap header
 		 * and FCS from the original.
 		 */
-		skb = skb_copy_expand(*origskb, needed_headroom, 0, GFP_ATOMIC);
+		skb = skb_copy_expand(*origskb, needed_headroom + NET_SKB_PAD,
+				      0, GFP_ATOMIC);
 
 		if (!skb)
 			return NULL;

From 1a7915501ca94a1f10288defe333cd5ade210b63 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 29 Jun 2021 13:28:53 +0200
Subject: [PATCH 093/502] mac80211: fix starting aggregation sessions on mesh
 interfaces

The logic for starting aggregation sessions was recently moved from minstrel_ht
to mac80211, into the subif tx handler just after the sta lookup.
Unfortunately this didn't work for mesh interfaces, since the sta lookup is
deferred until a much later point in time on those.
Fix this by also calling the aggregation check right after the deferred sta
lookup.

Fixes: 08a46c642001 ("mac80211: move A-MPDU session check from minstrel_ht to mac80211")
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://lore.kernel.org/r/20210629112853.29785-1-nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 57 ++++++++++++++++++++++++++---------------------
 1 file changed, 32 insertions(+), 25 deletions(-)

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index e96981144358..8509778ff31f 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1147,6 +1147,29 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx,
 	return queued;
 }
 
+static void
+ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata,
+		     struct sta_info *sta,
+		     struct sk_buff *skb)
+{
+	struct rate_control_ref *ref = sdata->local->rate_ctrl;
+	u16 tid;
+
+	if (!ref || !(ref->ops->capa & RATE_CTRL_CAPA_AMPDU_TRIGGER))
+		return;
+
+	if (!sta || !sta->sta.ht_cap.ht_supported ||
+	    !sta->sta.wme || skb_get_queue_mapping(skb) == IEEE80211_AC_VO ||
+	    skb->protocol == sdata->control_port_protocol)
+		return;
+
+	tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
+	if (likely(sta->ampdu_mlme.tid_tx[tid]))
+		return;
+
+	ieee80211_start_tx_ba_session(&sta->sta, tid, 0);
+}
+
 /*
  * initialises @tx
  * pass %NULL for the station if unknown, a valid pointer if known
@@ -1160,6 +1183,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_hdr *hdr;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	bool aggr_check = false;
 	int tid;
 
 	memset(tx, 0, sizeof(*tx));
@@ -1188,8 +1212,10 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
 		} else if (tx->sdata->control_port_protocol == tx->skb->protocol) {
 			tx->sta = sta_info_get_bss(sdata, hdr->addr1);
 		}
-		if (!tx->sta && !is_multicast_ether_addr(hdr->addr1))
+		if (!tx->sta && !is_multicast_ether_addr(hdr->addr1)) {
 			tx->sta = sta_info_get(sdata, hdr->addr1);
+			aggr_check = true;
+		}
 	}
 
 	if (tx->sta && ieee80211_is_data_qos(hdr->frame_control) &&
@@ -1199,8 +1225,12 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
 		struct tid_ampdu_tx *tid_tx;
 
 		tid = ieee80211_get_tid(hdr);
-
 		tid_tx = rcu_dereference(tx->sta->ampdu_mlme.tid_tx[tid]);
+		if (!tid_tx && aggr_check) {
+			ieee80211_aggr_check(sdata, tx->sta, skb);
+			tid_tx = rcu_dereference(tx->sta->ampdu_mlme.tid_tx[tid]);
+		}
+
 		if (tid_tx) {
 			bool queued;
 
@@ -4120,29 +4150,6 @@ void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
 }
 EXPORT_SYMBOL(ieee80211_txq_schedule_start);
 
-static void
-ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata,
-		     struct sta_info *sta,
-		     struct sk_buff *skb)
-{
-	struct rate_control_ref *ref = sdata->local->rate_ctrl;
-	u16 tid;
-
-	if (!ref || !(ref->ops->capa & RATE_CTRL_CAPA_AMPDU_TRIGGER))
-		return;
-
-	if (!sta || !sta->sta.ht_cap.ht_supported ||
-	    !sta->sta.wme || skb_get_queue_mapping(skb) == IEEE80211_AC_VO ||
-	    skb->protocol == sdata->control_port_protocol)
-		return;
-
-	tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
-	if (likely(sta->ampdu_mlme.tid_tx[tid]))
-		return;
-
-	ieee80211_start_tx_ba_session(&sta->sta, tid, 0);
-}
-
 void __ieee80211_subif_start_xmit(struct sk_buff *skb,
 				  struct net_device *dev,
 				  u32 info_flags,

From a5d3cbdb09ff1f52cbe040932e06c8b9915c6dad Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 2 Jul 2021 07:01:11 +0200
Subject: [PATCH 094/502] mac80211: fix enabling 4-address mode on a sta vif
 after assoc

Notify the driver about the 4-address mode change and also send a nulldata
packet to the AP to notify it about the change

Fixes: 1ff4e8f2dec8 ("mac80211: notify the driver when a sta uses 4-address mode")
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://lore.kernel.org/r/20210702050111.47546-1-nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/cfg.c         | 19 +++++++++++++++++++
 net/mac80211/ieee80211_i.h |  2 ++
 net/mac80211/mlme.c        |  4 ++--
 3 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 84cc7733ea66..4e6f11e63df3 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -152,6 +152,8 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
 				  struct vif_params *params)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = sdata->local;
+	struct sta_info *sta;
 	int ret;
 
 	ret = ieee80211_if_change_type(sdata, type);
@@ -162,7 +164,24 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
 		RCU_INIT_POINTER(sdata->u.vlan.sta, NULL);
 		ieee80211_check_fast_rx_iface(sdata);
 	} else if (type == NL80211_IFTYPE_STATION && params->use_4addr >= 0) {
+		struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+
+		if (params->use_4addr == ifmgd->use_4addr)
+			return 0;
+
 		sdata->u.mgd.use_4addr = params->use_4addr;
+		if (!ifmgd->associated)
+			return 0;
+
+		mutex_lock(&local->sta_mtx);
+		sta = sta_info_get(sdata, ifmgd->bssid);
+		if (sta)
+			drv_sta_set_4addr(local, sdata, &sta->sta,
+					  params->use_4addr);
+		mutex_unlock(&local->sta_mtx);
+
+		if (params->use_4addr)
+			ieee80211_send_4addr_nullfunc(local, sdata);
 	}
 
 	if (sdata->vif.type == NL80211_IFTYPE_MONITOR) {
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 22549b95d1aa..30ce6d2ec7ce 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -2201,6 +2201,8 @@ void ieee80211_dynamic_ps_timer(struct timer_list *t);
 void ieee80211_send_nullfunc(struct ieee80211_local *local,
 			     struct ieee80211_sub_if_data *sdata,
 			     bool powersave);
+void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local,
+				   struct ieee80211_sub_if_data *sdata);
 void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata,
 			     struct ieee80211_hdr *hdr, bool ack, u16 tx_time);
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index a00f11a33699..c0ea3b1aa9e1 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1095,8 +1095,8 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
 	ieee80211_tx_skb(sdata, skb);
 }
 
-static void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local,
-					  struct ieee80211_sub_if_data *sdata)
+void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local,
+				   struct ieee80211_sub_if_data *sdata)
 {
 	struct sk_buff *skb;
 	struct ieee80211_hdr *nullfunc;

From 17109e9783799be2a063b2bd861a508194b0a487 Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@microsoft.com>
Date: Tue, 6 Jul 2021 17:44:23 +0200
Subject: [PATCH 095/502] virt_wifi: fix error on connect

When connecting without first doing a scan, the BSS list is empty
and __cfg80211_connect_result() generates this warning:

$ iw dev wlan0 connect -w VirtWifi
[   15.371989] ------------[ cut here ]------------
[   15.372179] WARNING: CPU: 0 PID: 92 at net/wireless/sme.c:756 __cfg80211_connect_result+0x402/0x440
[   15.372383] CPU: 0 PID: 92 Comm: kworker/u2:2 Not tainted 5.13.0-kvm #444
[   15.372512] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-3.fc34 04/01/2014
[   15.372597] Workqueue: cfg80211 cfg80211_event_work
[   15.372756] RIP: 0010:__cfg80211_connect_result+0x402/0x440
[   15.372818] Code: 48 2b 04 25 28 00 00 00 75 59 48 8b 3b 48 8b 76 10 48 8d 65 e0 5b 41 5c 41 5d 41 5e 5d 49 8d 65 f0 41 5d e9 d0 d4 fd ff 0f 0b <0f> 0b e9 f6 fd ff ff e8 f2 4a b4 ff e9 ec fd ff ff 0f 0b e9 19 fd
[   15.372966] RSP: 0018:ffffc900005cbdc0 EFLAGS: 00010246
[   15.373022] RAX: 0000000000000000 RBX: ffff8880028e2400 RCX: ffff8880028e2472
[   15.373088] RDX: 0000000000000002 RSI: 00000000fffffe01 RDI: ffffffff815335ba
[   15.373149] RBP: ffffc900005cbe00 R08: 0000000000000008 R09: ffff888002bdf8b8
[   15.373209] R10: ffff88803ec208f0 R11: ffffffffffffe9ae R12: ffff88801d687d98
[   15.373280] R13: ffff88801b5fe000 R14: ffffc900005cbdc0 R15: dead000000000100
[   15.373330] FS:  0000000000000000(0000) GS:ffff88803ec00000(0000) knlGS:0000000000000000
[   15.373382] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   15.373425] CR2: 000056421c468958 CR3: 000000001b458001 CR4: 0000000000170eb0
[   15.373478] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   15.373529] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[   15.373580] Call Trace:
[   15.373611]  ? cfg80211_process_wdev_events+0x10e/0x170
[   15.373743]  cfg80211_process_wdev_events+0x10e/0x170
[   15.373783]  cfg80211_process_rdev_events+0x21/0x40
[   15.373846]  cfg80211_event_work+0x20/0x30
[   15.373892]  process_one_work+0x1e9/0x340
[   15.373956]  worker_thread+0x4b/0x3f0
[   15.374017]  ? process_one_work+0x340/0x340
[   15.374053]  kthread+0x11f/0x140
[   15.374089]  ? set_kthread_struct+0x30/0x30
[   15.374153]  ret_from_fork+0x1f/0x30
[   15.374187] ---[ end trace 321ef0cb7e9c0be1 ]---
wlan0 (phy #0): connected to 00:00:00:00:00:00

Add the fake bss just before the connect so that cfg80211_get_bss()
finds the virtual network.
As some code was duplicated, move it in a common function.

Signed-off-by: Matteo Croce <mcroce@microsoft.com>
Link: https://lore.kernel.org/r/20210706154423.11065-1-mcroce@linux.microsoft.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/virt_wifi.c | 52 ++++++++++++++++++++------------
 1 file changed, 32 insertions(+), 20 deletions(-)

diff --git a/drivers/net/wireless/virt_wifi.c b/drivers/net/wireless/virt_wifi.c
index 1df959532c7d..514f2c1124b6 100644
--- a/drivers/net/wireless/virt_wifi.c
+++ b/drivers/net/wireless/virt_wifi.c
@@ -136,6 +136,29 @@ static struct ieee80211_supported_band band_5ghz = {
 /* Assigned at module init. Guaranteed locally-administered and unicast. */
 static u8 fake_router_bssid[ETH_ALEN] __ro_after_init = {};
 
+static void virt_wifi_inform_bss(struct wiphy *wiphy)
+{
+	u64 tsf = div_u64(ktime_get_boottime_ns(), 1000);
+	struct cfg80211_bss *informed_bss;
+	static const struct {
+		u8 tag;
+		u8 len;
+		u8 ssid[8];
+	} __packed ssid = {
+		.tag = WLAN_EID_SSID,
+		.len = 8,
+		.ssid = "VirtWifi",
+	};
+
+	informed_bss = cfg80211_inform_bss(wiphy, &channel_5ghz,
+					   CFG80211_BSS_FTYPE_PRESP,
+					   fake_router_bssid, tsf,
+					   WLAN_CAPABILITY_ESS, 0,
+					   (void *)&ssid, sizeof(ssid),
+					   DBM_TO_MBM(-50), GFP_KERNEL);
+	cfg80211_put_bss(wiphy, informed_bss);
+}
+
 /* Called with the rtnl lock held. */
 static int virt_wifi_scan(struct wiphy *wiphy,
 			  struct cfg80211_scan_request *request)
@@ -156,28 +179,13 @@ static int virt_wifi_scan(struct wiphy *wiphy,
 /* Acquires and releases the rdev BSS lock. */
 static void virt_wifi_scan_result(struct work_struct *work)
 {
-	struct {
-		u8 tag;
-		u8 len;
-		u8 ssid[8];
-	} __packed ssid = {
-		.tag = WLAN_EID_SSID, .len = 8, .ssid = "VirtWifi",
-	};
-	struct cfg80211_bss *informed_bss;
 	struct virt_wifi_wiphy_priv *priv =
 		container_of(work, struct virt_wifi_wiphy_priv,
 			     scan_result.work);
 	struct wiphy *wiphy = priv_to_wiphy(priv);
 	struct cfg80211_scan_info scan_info = { .aborted = false };
-	u64 tsf = div_u64(ktime_get_boottime_ns(), 1000);
 
-	informed_bss = cfg80211_inform_bss(wiphy, &channel_5ghz,
-					   CFG80211_BSS_FTYPE_PRESP,
-					   fake_router_bssid, tsf,
-					   WLAN_CAPABILITY_ESS, 0,
-					   (void *)&ssid, sizeof(ssid),
-					   DBM_TO_MBM(-50), GFP_KERNEL);
-	cfg80211_put_bss(wiphy, informed_bss);
+	virt_wifi_inform_bss(wiphy);
 
 	/* Schedules work which acquires and releases the rtnl lock. */
 	cfg80211_scan_done(priv->scan_request, &scan_info);
@@ -225,10 +233,12 @@ static int virt_wifi_connect(struct wiphy *wiphy, struct net_device *netdev,
 	if (!could_schedule)
 		return -EBUSY;
 
-	if (sme->bssid)
+	if (sme->bssid) {
 		ether_addr_copy(priv->connect_requested_bss, sme->bssid);
-	else
+	} else {
+		virt_wifi_inform_bss(wiphy);
 		eth_zero_addr(priv->connect_requested_bss);
+	}
 
 	wiphy_debug(wiphy, "connect\n");
 
@@ -241,11 +251,13 @@ static void virt_wifi_connect_complete(struct work_struct *work)
 	struct virt_wifi_netdev_priv *priv =
 		container_of(work, struct virt_wifi_netdev_priv, connect.work);
 	u8 *requested_bss = priv->connect_requested_bss;
-	bool has_addr = !is_zero_ether_addr(requested_bss);
 	bool right_addr = ether_addr_equal(requested_bss, fake_router_bssid);
 	u16 status = WLAN_STATUS_SUCCESS;
 
-	if (!priv->is_up || (has_addr && !right_addr))
+	if (is_zero_ether_addr(requested_bss))
+		requested_bss = NULL;
+
+	if (!priv->is_up || (requested_bss && !right_addr))
 		status = WLAN_STATUS_UNSPECIFIED_FAILURE;
 	else
 		priv->is_connected = true;

From 0d059964504a1605d84938c0b5b38f6573121c4a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 12 Jul 2021 21:53:30 +0200
Subject: [PATCH 096/502] nl80211: limit band information in non-split data

In non-split data, we shouldn't be adding S1G and 6 GHz
data (or future bands) since we're really close to the
4k message size limit. Remove those bands, any modern
userspace that can use S1G or 6 GHz should already be
using split dumps, and if not then it needs to update.

Link: https://lore.kernel.org/r/20210712215329.31444162a2c2.I5555312e4a074c84f8b4e7ad79dc4d1fbfc5126c@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 50eb405b0690..16c88beea48b 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2351,7 +2351,10 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
 			goto nla_put_failure;
 
 		for (band = state->band_start;
-		     band < NUM_NL80211_BANDS; band++) {
+		     band < (state->split ?
+				NUM_NL80211_BANDS :
+				NL80211_BAND_60GHZ + 1);
+		     band++) {
 			struct ieee80211_supported_band *sband;
 
 			/* omit higher bands for ancient software */

From f9a5c358c8d26fed0cc45f2afc64633d4ba21dff Mon Sep 17 00:00:00 2001
From: Nguyen Dinh Phi <phind.uet@gmail.com>
Date: Mon, 28 Jun 2021 21:23:34 +0800
Subject: [PATCH 097/502] cfg80211: Fix possible memory leak in function
 cfg80211_bss_update

When we exceed the limit of BSS entries, this function will free the
new entry, however, at this time, it is the last door to access the
inputed ies, so these ies will be unreferenced objects and cause memory
leak.
Therefore we should free its ies before deallocating the new entry, beside
of dropping it from hidden_list.

Signed-off-by: Nguyen Dinh Phi <phind.uet@gmail.com>
Link: https://lore.kernel.org/r/20210628132334.851095-1-phind.uet@gmail.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/scan.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index f03c7ac8e184..7897b1478c3c 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -1754,16 +1754,14 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
 			 * be grouped with this beacon for updates ...
 			 */
 			if (!cfg80211_combine_bsses(rdev, new)) {
-				kfree(new);
+				bss_ref_put(rdev, new);
 				goto drop;
 			}
 		}
 
 		if (rdev->bss_entries >= bss_entries_limit &&
 		    !cfg80211_bss_expire_oldest(rdev)) {
-			if (!list_empty(&new->hidden_list))
-				list_del(&new->hidden_list);
-			kfree(new);
+			bss_ref_put(rdev, new);
 			goto drop;
 		}
 

From 923f98929182dfd04e9149be839160b63a3db145 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 22 Jul 2021 13:11:34 +0300
Subject: [PATCH 098/502] arm64: dts: armada-3720-turris-mox: fixed indices for
 the SDHC controllers

Since drivers/mmc/host/sdhci-xenon.c declares the PROBE_PREFER_ASYNCHRONOUS
probe type, it is not guaranteed whether /dev/mmcblk0 will belong to
sdhci0 or sdhci1. In turn, this will break booting by:

root=/dev/mmcblk0p1

Fix the issue by adding aliases so that the old MMC controller indices
are preserved.

Fixes: 7320915c8861 ("mmc: Set PROBE_PREFER_ASYNCHRONOUS for drivers that existed in v4.14")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
---
 arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts
index ce2bcddf396f..f2d7d6f071bc 100644
--- a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts
+++ b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts
@@ -19,6 +19,8 @@
 	aliases {
 		spi0 = &spi0;
 		ethernet1 = &eth1;
+		mmc0 = &sdhci0;
+		mmc1 = &sdhci1;
 	};
 
 	chosen {

From b66541422824cf6cf20e9a35112e9cb5d82cdf62 Mon Sep 17 00:00:00 2001
From: Ye Bin <yebin10@huawei.com>
Date: Tue, 13 Jul 2021 10:27:28 +0800
Subject: [PATCH 099/502] ext4: fix potential uninitialized access to retval in
 kmmpd

if (!ext4_has_feature_mmp(sb)) then retval can be unitialized before
we jump to the wait_to_exit label.

Fixes: 61bb4a1c417e ("ext4: fix possible UAF when remounting r/o a mmp-protected file system")
Signed-off-by: Ye Bin <yebin10@huawei.com>
Link: https://lore.kernel.org/r/20210713022728.2533770-1-yebin10@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/mmp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index bc364c119af6..cebea4270817 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -138,7 +138,7 @@ static int kmmpd(void *data)
 	unsigned mmp_check_interval;
 	unsigned long last_update_time;
 	unsigned long diff;
-	int retval;
+	int retval = 0;
 
 	mmp_block = le64_to_cpu(es->s_mmp_block);
 	mmp = (struct mmp_struct *)(bh->b_data);

From 73dc707161a83c24a9e6804b2d60e6f4a4d6be74 Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <jiangguoqing@kylinos.cn>
Date: Wed, 14 Jul 2021 13:59:40 +0800
Subject: [PATCH 100/502] ext4: remove conflicting comment from __ext4_forget

We do a bforget and return for no journal case, so let's remove this
conflict comment.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Guoqing Jiang <jiangguoqing@kylinos.cn>
Link: https://lore.kernel.org/r/20210714055940.1553705-1-guoqing.jiang@linux.dev
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/ext4_jbd2.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index b96ecba91899..b60f0152ea57 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -244,9 +244,6 @@ int __ext4_journal_get_write_access(const char *where, unsigned int line,
  * "bh" may be NULL: a metadata block may have been freed from memory
  * but there may still be a record of it in the journal, and that record
  * still needs to be revoked.
- *
- * If the handle isn't valid we're not journaling, but we still need to
- * call into ext4_journal_revoke() to put the buffer head.
  */
 int __ext4_forget(const char *where, unsigned int line, handle_t *handle,
 		  int is_metadata, struct inode *inode,

From 32c3973d808301e7a980f80fee8818fdf7c82b09 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 17 Jul 2021 10:10:29 +0200
Subject: [PATCH 101/502] netfilter: flowtable: avoid possible false sharing

The flowtable follows the same timeout approach as conntrack, use the
same idiom as in cc16921351d8 ("netfilter: conntrack: avoid same-timeout
update") but also include the fix provided by e37542ba111f ("netfilter:
conntrack: avoid possible false sharing").

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_flow_table_core.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 1e50908b1b7e..551976e4284c 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -331,7 +331,11 @@ EXPORT_SYMBOL_GPL(flow_offload_add);
 void flow_offload_refresh(struct nf_flowtable *flow_table,
 			  struct flow_offload *flow)
 {
-	flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
+	u32 timeout;
+
+	timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
+	if (READ_ONCE(flow->timeout) != timeout)
+		WRITE_ONCE(flow->timeout, timeout);
 
 	if (likely(!nf_flowtable_hw_offload(flow_table)))
 		return;

From 32953df7a6eb56bd9b8f18a13034d55f9fc96cfa Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 17 Jul 2021 10:20:08 +0200
Subject: [PATCH 102/502] netfilter: nft_last: avoid possible false sharing

Use the idiom described in:

https://github.com/google/ktsan/wiki/READ_ONCE-and-WRITE_ONCE#it-may-improve-performance

Moreover, prevent a compiler optimization.

Fixes: 836382dc2471 ("netfilter: nf_tables: add last expression")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_last.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c
index 8088b99f2ee3..304e33cbed9b 100644
--- a/net/netfilter/nft_last.c
+++ b/net/netfilter/nft_last.c
@@ -48,24 +48,30 @@ static void nft_last_eval(const struct nft_expr *expr,
 {
 	struct nft_last_priv *priv = nft_expr_priv(expr);
 
-	priv->last_jiffies = jiffies;
-	priv->last_set = 1;
+	if (READ_ONCE(priv->last_jiffies) != jiffies)
+		WRITE_ONCE(priv->last_jiffies, jiffies);
+	if (READ_ONCE(priv->last_set) == 0)
+		WRITE_ONCE(priv->last_set, 1);
 }
 
 static int nft_last_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
 	struct nft_last_priv *priv = nft_expr_priv(expr);
+	unsigned long last_jiffies = READ_ONCE(priv->last_jiffies);
+	u32 last_set = READ_ONCE(priv->last_set);
 	__be64 msecs;
 
-	if (time_before(jiffies, priv->last_jiffies))
-		priv->last_set = 0;
+	if (time_before(jiffies, last_jiffies)) {
+		WRITE_ONCE(priv->last_set, 0);
+		last_set = 0;
+	}
 
-	if (priv->last_set)
-		msecs = nf_jiffies64_to_msecs(jiffies - priv->last_jiffies);
+	if (last_set)
+		msecs = nf_jiffies64_to_msecs(jiffies - last_jiffies);
 	else
 		msecs = 0;
 
-	if (nla_put_be32(skb, NFTA_LAST_SET, htonl(priv->last_set)) ||
+	if (nla_put_be32(skb, NFTA_LAST_SET, htonl(last_set)) ||
 	    nla_put_be64(skb, NFTA_LAST_MSECS, msecs, NFTA_LAST_PAD))
 		goto nla_put_failure;
 

From 30a56a2b881821625f79837d4d968c679852444e Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sun, 18 Jul 2021 18:36:00 +0200
Subject: [PATCH 103/502] netfilter: conntrack: adjust stop timestamp to real
 expiry value

In case the entry is evicted via garbage collection there is
delay between the timeout value and the eviction event.

This adjusts the stop value based on how much time has passed.

Fixes: b87a2f9199ea82 ("netfilter: conntrack: add gc worker to remove timed-out entries")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_core.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 83c52df85870..5c03e5106751 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -670,8 +670,13 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
 		return false;
 
 	tstamp = nf_conn_tstamp_find(ct);
-	if (tstamp && tstamp->stop == 0)
+	if (tstamp) {
+		s32 timeout = ct->timeout - nfct_time_stamp;
+
 		tstamp->stop = ktime_get_real_ns();
+		if (timeout < 0)
+			tstamp->stop -= jiffies_to_nsecs(-timeout);
+	}
 
 	if (nf_conntrack_event_report(IPCT_DESTROY, ct,
 				    portid, report) < 0) {

From a33f387ecd5aafae514095c2c4a8c24f7aea7e8b Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 20 Jul 2021 18:22:50 +0200
Subject: [PATCH 104/502] netfilter: nft_nat: allow to specify layer 4 protocol
 NAT only

nft_nat reports a bogus EAFNOSUPPORT if no layer 3 information is specified.

Fixes: d07db9884a5f ("netfilter: nf_tables: introduce nft_validate_register_load()")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_nat.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 0840c635b752..be1595d6979d 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -201,7 +201,9 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 		alen = sizeof_field(struct nf_nat_range, min_addr.ip6);
 		break;
 	default:
-		return -EAFNOSUPPORT;
+		if (tb[NFTA_NAT_REG_ADDR_MIN])
+			return -EAFNOSUPPORT;
+		break;
 	}
 	priv->family = family;
 

From ee7ab3f263f8131722cff3871b9618b1e7478f07 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Mon, 28 Jun 2021 17:12:29 +0200
Subject: [PATCH 105/502] arm64: dts: armada-3720-turris-mox: remove
 mrvl,i2c-fast-mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some SFP modules are not detected when i2c-fast-mode is enabled even when
clock-frequency is already set to 100000. The I2C bus violates the timing
specifications when run in fast mode. So disable fast mode on Turris Mox.

Same change was already applied for uDPU (also Armada 3720 board with SFP)
in commit fe3ec631a77d ("arm64: dts: uDPU: remove i2c-fast-mode").

Fixes: 7109d817db2e ("arm64: dts: marvell: add DTS for Turris Mox")
Signed-off-by: Pali Rohár <pali@kernel.org>
Reviewed-by: Marek Behún <kabel@kernel.org>
Acked-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
---
 arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts
index f2d7d6f071bc..a05b1ab2dd12 100644
--- a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts
+++ b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts
@@ -121,6 +121,7 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&i2c1_pins>;
 	clock-frequency = <100000>;
+	/delete-property/ mrvl,i2c-fast-mode;
 	status = "okay";
 
 	rtc@6f {

From 217e26bd87b2930856726b48a4e71c768b8c9bf5 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 21 Jul 2021 17:22:32 +0200
Subject: [PATCH 106/502] netfilter: nfnl_hook: fix unused variable warning

The only user of this variable is in an #ifdef:

net/netfilter/nfnetlink_hook.c: In function 'nfnl_hook_entries_head':
net/netfilter/nfnetlink_hook.c:177:28: error: unused variable 'netdev' [-Werror=unused-variable]

Fixes: e2cf17d3774c ("netfilter: add new hook nfnl subsystem")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink_hook.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c
index 50b4e3c9347a..202f57d17bab 100644
--- a/net/netfilter/nfnetlink_hook.c
+++ b/net/netfilter/nfnetlink_hook.c
@@ -174,7 +174,9 @@ static const struct nf_hook_entries *
 nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *dev)
 {
 	const struct nf_hook_entries *hook_head = NULL;
+#ifdef CONFIG_NETFILTER_INGRESS
 	struct net_device *netdev;
+#endif
 
 	switch (pf) {
 	case NFPROTO_IPV4:

From 65662a8dcdd01342b71ee44234bcfd0162e195af Mon Sep 17 00:00:00 2001
From: Arkadiusz Kubalewski <arkadiusz.kubalewski@intel.com>
Date: Thu, 29 Apr 2021 19:49:47 +0200
Subject: [PATCH 107/502] i40e: Fix logic of disabling queues

Correct the message flow between driver and firmware when disabling
queues.

Previously in case of PF reset (due to required reinit after reconfig),
the error like: "VSI seid 397 Tx ring 60 disable timeout" could show up
occasionally. The error was not a real issue of hardware or firmware,
it was caused by wrong sequence of messages invoked by the driver.

Fixes: 41c445ff0f48 ("i40e: main driver core")
Signed-off-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Arkadiusz Kubalewski <arkadiusz.kubalewski@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 58 ++++++++++++---------
 1 file changed, 34 insertions(+), 24 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 861e59a350bd..5297e6c59083 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -4454,11 +4454,10 @@ int i40e_control_wait_tx_q(int seid, struct i40e_pf *pf, int pf_q,
 }
 
 /**
- * i40e_vsi_control_tx - Start or stop a VSI's rings
+ * i40e_vsi_enable_tx - Start a VSI's rings
  * @vsi: the VSI being configured
- * @enable: start or stop the rings
  **/
-static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable)
+static int i40e_vsi_enable_tx(struct i40e_vsi *vsi)
 {
 	struct i40e_pf *pf = vsi->back;
 	int i, pf_q, ret = 0;
@@ -4467,7 +4466,7 @@ static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable)
 	for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) {
 		ret = i40e_control_wait_tx_q(vsi->seid, pf,
 					     pf_q,
-					     false /*is xdp*/, enable);
+					     false /*is xdp*/, true);
 		if (ret)
 			break;
 
@@ -4476,7 +4475,7 @@ static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable)
 
 		ret = i40e_control_wait_tx_q(vsi->seid, pf,
 					     pf_q + vsi->alloc_queue_pairs,
-					     true /*is xdp*/, enable);
+					     true /*is xdp*/, true);
 		if (ret)
 			break;
 	}
@@ -4574,32 +4573,25 @@ int i40e_control_wait_rx_q(struct i40e_pf *pf, int pf_q, bool enable)
 }
 
 /**
- * i40e_vsi_control_rx - Start or stop a VSI's rings
+ * i40e_vsi_enable_rx - Start a VSI's rings
  * @vsi: the VSI being configured
- * @enable: start or stop the rings
  **/
-static int i40e_vsi_control_rx(struct i40e_vsi *vsi, bool enable)
+static int i40e_vsi_enable_rx(struct i40e_vsi *vsi)
 {
 	struct i40e_pf *pf = vsi->back;
 	int i, pf_q, ret = 0;
 
 	pf_q = vsi->base_queue;
 	for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) {
-		ret = i40e_control_wait_rx_q(pf, pf_q, enable);
+		ret = i40e_control_wait_rx_q(pf, pf_q, true);
 		if (ret) {
 			dev_info(&pf->pdev->dev,
-				 "VSI seid %d Rx ring %d %sable timeout\n",
-				 vsi->seid, pf_q, (enable ? "en" : "dis"));
+				 "VSI seid %d Rx ring %d enable timeout\n",
+				 vsi->seid, pf_q);
 			break;
 		}
 	}
 
-	/* Due to HW errata, on Rx disable only, the register can indicate done
-	 * before it really is. Needs 50ms to be sure
-	 */
-	if (!enable)
-		mdelay(50);
-
 	return ret;
 }
 
@@ -4612,29 +4604,47 @@ int i40e_vsi_start_rings(struct i40e_vsi *vsi)
 	int ret = 0;
 
 	/* do rx first for enable and last for disable */
-	ret = i40e_vsi_control_rx(vsi, true);
+	ret = i40e_vsi_enable_rx(vsi);
 	if (ret)
 		return ret;
-	ret = i40e_vsi_control_tx(vsi, true);
+	ret = i40e_vsi_enable_tx(vsi);
 
 	return ret;
 }
 
+#define I40E_DISABLE_TX_GAP_MSEC	50
+
 /**
  * i40e_vsi_stop_rings - Stop a VSI's rings
  * @vsi: the VSI being configured
  **/
 void i40e_vsi_stop_rings(struct i40e_vsi *vsi)
 {
+	struct i40e_pf *pf = vsi->back;
+	int pf_q, err, q_end;
+
 	/* When port TX is suspended, don't wait */
 	if (test_bit(__I40E_PORT_SUSPENDED, vsi->back->state))
 		return i40e_vsi_stop_rings_no_wait(vsi);
 
-	/* do rx first for enable and last for disable
-	 * Ignore return value, we need to shutdown whatever we can
-	 */
-	i40e_vsi_control_tx(vsi, false);
-	i40e_vsi_control_rx(vsi, false);
+	q_end = vsi->base_queue + vsi->num_queue_pairs;
+	for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++)
+		i40e_pre_tx_queue_cfg(&pf->hw, (u32)pf_q, false);
+
+	for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++) {
+		err = i40e_control_wait_rx_q(pf, pf_q, false);
+		if (err)
+			dev_info(&pf->pdev->dev,
+				 "VSI seid %d Rx ring %d dissable timeout\n",
+				 vsi->seid, pf_q);
+	}
+
+	msleep(I40E_DISABLE_TX_GAP_MSEC);
+	pf_q = vsi->base_queue;
+	for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++)
+		wr32(&pf->hw, I40E_QTX_ENA(pf_q), 0);
+
+	i40e_vsi_wait_queues_disabled(vsi);
 }
 
 /**

From 71d6fdba4b2d82fdd883fec31dee77fbcf59773a Mon Sep 17 00:00:00 2001
From: Arkadiusz Kubalewski <arkadiusz.kubalewski@intel.com>
Date: Fri, 21 May 2021 18:41:26 +0200
Subject: [PATCH 108/502] i40e: Fix firmware LLDP agent related warning

Make warning meaningful for the user.

Previously the trace:
"Starting FW LLDP agent failed: error: I40E_ERR_ADMIN_QUEUE_ERROR, I40E_AQ_RC_EAGAIN"
was produced when user tried to start Firmware LLDP agent,
just after it was stopped with sequence:
ethtool --set-priv-flags <dev> disable-fw-lldp on
ethtool --set-priv-flags <dev> disable-fw-lldp off
(without any delay between the commands)
At that point the firmware is still processing stop command, the behavior
is expected.

Fixes: c1041d070437 ("i40e: Missing response checks in driver when starting/stopping FW LLDP")
Signed-off-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Arkadiusz Kubalewski <arkadiusz.kubalewski@intel.com>
Tested-by: Imam Hassan Reza Biswas <imam.hassan.reza.biswas@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 3e822bad4851..d9e26f9713a5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -5294,6 +5294,10 @@ flags_complete:
 					dev_warn(&pf->pdev->dev,
 						 "Device configuration forbids SW from starting the LLDP agent.\n");
 					return -EINVAL;
+				case I40E_AQ_RC_EAGAIN:
+					dev_warn(&pf->pdev->dev,
+						 "Stop FW LLDP agent command is still being processed, please try again in a second.\n");
+					return -EBUSY;
 				default:
 					dev_warn(&pf->pdev->dev,
 						 "Starting FW LLDP agent failed: error: %s, %s\n",

From dc614c46178b0b89bde86ac54fc687a28580d2b7 Mon Sep 17 00:00:00 2001
From: Lukasz Cieplicki <lukaszx.cieplicki@intel.com>
Date: Mon, 31 May 2021 16:55:49 +0000
Subject: [PATCH 109/502] i40e: Add additional info to PHY type error

In case of PHY type error occurs, the message was too generic.
Add additional info to PHY type error indicating that it can be
wrong cable connected.

Fixes: 124ed15bf126 ("i40e: Add dual speed module support")
Signed-off-by: Lukasz Cieplicki <lukaszx.cieplicki@intel.com>
Signed-off-by: Michal Maloszewski <michal.maloszewski@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index d9e26f9713a5..2c9e4eeb7270 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -980,7 +980,7 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw,
 	default:
 		/* if we got here and link is up something bad is afoot */
 		netdev_info(netdev,
-			    "WARNING: Link is up but PHY type 0x%x is not recognized.\n",
+			    "WARNING: Link is up but PHY type 0x%x is not recognized, or incorrect cable is in use\n",
 			    hw_link_info->phy_type);
 	}
 

From 89ec1f0886c127c7e41ac61a6b6d539f4fb2510b Mon Sep 17 00:00:00 2001
From: Jedrzej Jagielski <jedrzej.jagielski@intel.com>
Date: Wed, 2 Jun 2021 00:47:03 +0000
Subject: [PATCH 110/502] i40e: Fix queue-to-TC mapping on Tx

In SW DCB mode the packets sent receive incorrect UP tags. They are
constructed correctly and put into tx_ring, but UP is later remapped by
HW on the basis of TCTUPR register contents according to Tx queue
selected, and BW used is consistent with the new UP values. This is
caused by Tx queue selection in kernel not taking into account DCB
configuration. This patch fixes the issue by implementing the
ndo_select_queue NDO callback.

Fixes: fd0a05ce74ef ("i40e: transmit, receive, and NAPI")
Signed-off-by: Arkadiusz Kubalewski <arkadiusz.kubalewski@intel.com>
Signed-off-by: Jedrzej Jagielski <jedrzej.jagielski@intel.com>
Tested-by: Imam Hassan Reza Biswas <imam.hassan.reza.biswas@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c |  1 +
 drivers/net/ethernet/intel/i40e/i40e_txrx.c | 50 +++++++++++++++++++++
 drivers/net/ethernet/intel/i40e/i40e_txrx.h |  2 +
 3 files changed, 53 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 5297e6c59083..278077208f37 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -13271,6 +13271,7 @@ static const struct net_device_ops i40e_netdev_ops = {
 	.ndo_poll_controller	= i40e_netpoll,
 #endif
 	.ndo_setup_tc		= __i40e_setup_tc,
+	.ndo_select_queue	= i40e_lan_select_queue,
 	.ndo_set_features	= i40e_set_features,
 	.ndo_set_vf_mac		= i40e_ndo_set_vf_mac,
 	.ndo_set_vf_vlan	= i40e_ndo_set_vf_port_vlan,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 38eb8151ee9a..3f25bd8c4924 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -3631,6 +3631,56 @@ dma_error:
 	return -1;
 }
 
+static u16 i40e_swdcb_skb_tx_hash(struct net_device *dev,
+				  const struct sk_buff *skb,
+				  u16 num_tx_queues)
+{
+	u32 jhash_initval_salt = 0xd631614b;
+	u32 hash;
+
+	if (skb->sk && skb->sk->sk_hash)
+		hash = skb->sk->sk_hash;
+	else
+		hash = (__force u16)skb->protocol ^ skb->hash;
+
+	hash = jhash_1word(hash, jhash_initval_salt);
+
+	return (u16)(((u64)hash * num_tx_queues) >> 32);
+}
+
+u16 i40e_lan_select_queue(struct net_device *netdev,
+			  struct sk_buff *skb,
+			  struct net_device __always_unused *sb_dev)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_hw *hw;
+	u16 qoffset;
+	u16 qcount;
+	u8 tclass;
+	u16 hash;
+	u8 prio;
+
+	/* is DCB enabled at all? */
+	if (vsi->tc_config.numtc == 1)
+		return i40e_swdcb_skb_tx_hash(netdev, skb,
+					      netdev->real_num_tx_queues);
+
+	prio = skb->priority;
+	hw = &vsi->back->hw;
+	tclass = hw->local_dcbx_config.etscfg.prioritytable[prio];
+	/* sanity check */
+	if (unlikely(!(vsi->tc_config.enabled_tc & BIT(tclass))))
+		tclass = 0;
+
+	/* select a queue assigned for the given TC */
+	qcount = vsi->tc_config.tc_info[tclass].qcount;
+	hash = i40e_swdcb_skb_tx_hash(netdev, skb, qcount);
+
+	qoffset = vsi->tc_config.tc_info[tclass].qoffset;
+	return qoffset + hash;
+}
+
 /**
  * i40e_xmit_xdp_ring - transmits an XDP buffer to an XDP Tx ring
  * @xdpf: data to transmit
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 86fed05b4f19..bfc2845c99d1 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -451,6 +451,8 @@ static inline unsigned int i40e_rx_pg_order(struct i40e_ring *ring)
 
 bool i40e_alloc_rx_buffers(struct i40e_ring *rxr, u16 cleaned_count);
 netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
+u16 i40e_lan_select_queue(struct net_device *netdev, struct sk_buff *skb,
+			  struct net_device *sb_dev);
 void i40e_clean_tx_ring(struct i40e_ring *tx_ring);
 void i40e_clean_rx_ring(struct i40e_ring *rx_ring);
 int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring);

From ea52faae1d17cd3048681d86d2e8641f44de484d Mon Sep 17 00:00:00 2001
From: Jedrzej Jagielski <jedrzej.jagielski@intel.com>
Date: Fri, 18 Jun 2021 08:49:49 +0000
Subject: [PATCH 111/502] i40e: Fix log TC creation failure when max num of
 queues is exceeded

Fix missing failed message if driver does not have enough queues to
complete TC command. Without this fix no message is displayed in dmesg.

Fixes: a9ce82f744dc ("i40e: Enable 'channel' mode in mqprio for TC configs")
Signed-off-by: Grzegorz Szczurek <grzegorzx.szczurek@intel.com>
Signed-off-by: Jedrzej Jagielski <jedrzej.jagielski@intel.com>
Tested-by: Imam Hassan Reza Biswas <imam.hassan.reza.biswas@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 278077208f37..1d1f52756a93 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -7290,6 +7290,8 @@ static int i40e_validate_mqprio_qopt(struct i40e_vsi *vsi,
 	}
 	if (vsi->num_queue_pairs <
 	    (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) {
+		dev_err(&vsi->back->pdev->dev,
+			"Failed to create traffic channel, insufficient number of queues.\n");
 		return -EINVAL;
 	}
 	if (sum_max_rate > i40e_get_link_speed(vsi)) {

From d72e91efcae12f2f24ced984d00d60517c677857 Mon Sep 17 00:00:00 2001
From: Sunil Goutham <sgoutham@marvell.com>
Date: Thu, 22 Jul 2021 18:15:51 +0530
Subject: [PATCH 112/502] octeontx2-af: Remove unnecessary devm_kfree

Remove devm_kfree of memory where VLAN entry to RVU PF mapping
info is saved. This will be freed anyway at driver exit.
Having this could result in warning from devm_kfree() if
the memory is not allocated due to errors in rvu_nix_block_init()
before nix_setup_txvlan().

Fixes: 9a946def264d ("octeontx2-af: Modify nix_vtag_cfg mailbox to support TX VTAG entries")
Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index 0933699a0d2d..0d2cd5169018 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -3842,7 +3842,6 @@ static void rvu_nix_block_freemem(struct rvu *rvu, int blkaddr,
 		vlan = &nix_hw->txvlan;
 		kfree(vlan->rsrc.bmap);
 		mutex_destroy(&vlan->rsrc_lock);
-		devm_kfree(rvu->dev, vlan->entry2pfvf_map);
 
 		mcast = &nix_hw->mcast;
 		qmem_free(rvu->dev, mcast->mce_ctx);

From f8dd60de194817c86bf812700980762bb5a8d9a4 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Thu, 22 Jul 2021 12:05:41 -0400
Subject: [PATCH 113/502] tipc: fix implicit-connect for SYN+

For implicit-connect, when it's either SYN- or SYN+, an ACK should
be sent back to the client immediately. It's not appropriate for
the client to enter established state only after receiving data
from the server.

On client side, after the SYN is sent out, tipc_wait_for_connect()
should be called to wait for the ACK if timeout is set.

This patch also restricts __tipc_sendstream() to call __sendmsg()
only when it's in TIPC_OPEN state, so that the client can program
in a single loop doing both connecting and data sending like:

  for (...)
      sendmsg(dest, buf);

This makes the implicit-connect more implicit.

Fixes: b97bf3fd8f6a ("[TIPC] Initial merge")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 34a97ea36cc8..ebd300c26a44 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -158,6 +158,7 @@ static void tipc_sk_remove(struct tipc_sock *tsk);
 static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz);
 static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
 static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack);
+static int tipc_wait_for_connect(struct socket *sock, long *timeo_p);
 
 static const struct proto_ops packet_ops;
 static const struct proto_ops stream_ops;
@@ -1515,8 +1516,13 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
 		rc = 0;
 	}
 
-	if (unlikely(syn && !rc))
+	if (unlikely(syn && !rc)) {
 		tipc_set_sk_state(sk, TIPC_CONNECTING);
+		if (timeout) {
+			timeout = msecs_to_jiffies(timeout);
+			tipc_wait_for_connect(sock, &timeout);
+		}
+	}
 
 	return rc ? rc : dlen;
 }
@@ -1564,7 +1570,7 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
 		return -EMSGSIZE;
 
 	/* Handle implicit connection setup */
-	if (unlikely(dest)) {
+	if (unlikely(dest && sk->sk_state == TIPC_OPEN)) {
 		rc = __tipc_sendmsg(sock, m, dlen);
 		if (dlen && dlen == rc) {
 			tsk->peer_caps = tipc_node_get_capabilities(net, dnode);
@@ -2689,9 +2695,10 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
 		       bool kern)
 {
 	struct sock *new_sk, *sk = sock->sk;
-	struct sk_buff *buf;
 	struct tipc_sock *new_tsock;
+	struct msghdr m = {NULL,};
 	struct tipc_msg *msg;
+	struct sk_buff *buf;
 	long timeo;
 	int res;
 
@@ -2737,19 +2744,17 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
 	}
 
 	/*
-	 * Respond to 'SYN-' by discarding it & returning 'ACK'-.
-	 * Respond to 'SYN+' by queuing it on new socket.
+	 * Respond to 'SYN-' by discarding it & returning 'ACK'.
+	 * Respond to 'SYN+' by queuing it on new socket & returning 'ACK'.
 	 */
 	if (!msg_data_sz(msg)) {
-		struct msghdr m = {NULL,};
-
 		tsk_advance_rx_queue(sk);
-		__tipc_sendstream(new_sock, &m, 0);
 	} else {
 		__skb_dequeue(&sk->sk_receive_queue);
 		__skb_queue_head(&new_sk->sk_receive_queue, buf);
 		skb_set_owner_r(buf, new_sk);
 	}
+	__tipc_sendstream(new_sock, &m, 0);
 	release_sock(new_sk);
 exit:
 	release_sock(sk);

From d237a7f11719ff9320721be5818352e48071aab6 Mon Sep 17 00:00:00 2001
From: Hoang Le <hoang.h.le@dektech.com.au>
Date: Fri, 23 Jul 2021 09:25:34 +0700
Subject: [PATCH 114/502] tipc: fix sleeping in tipc accept routine

The release_sock() is blocking function, it would change the state
after sleeping. In order to evaluate the stated condition outside
the socket lock context, switch to use wait_woken() instead.

Fixes: 6398e23cdb1d8 ("tipc: standardize accept routine")
Acked-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: Hoang Le <hoang.h.le@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index ebd300c26a44..75b99b7eda22 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2652,7 +2652,7 @@ static int tipc_listen(struct socket *sock, int len)
 static int tipc_wait_for_accept(struct socket *sock, long timeo)
 {
 	struct sock *sk = sock->sk;
-	DEFINE_WAIT(wait);
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 	int err;
 
 	/* True wake-one mechanism for incoming connections: only
@@ -2661,12 +2661,12 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
 	 * anymore, the common case will execute the loop only once.
 	*/
 	for (;;) {
-		prepare_to_wait_exclusive(sk_sleep(sk), &wait,
-					  TASK_INTERRUPTIBLE);
 		if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
+			add_wait_queue(sk_sleep(sk), &wait);
 			release_sock(sk);
-			timeo = schedule_timeout(timeo);
+			timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
 			lock_sock(sk);
+			remove_wait_queue(sk_sleep(sk), &wait);
 		}
 		err = 0;
 		if (!skb_queue_empty(&sk->sk_receive_queue))
@@ -2678,7 +2678,6 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
 		if (signal_pending(current))
 			break;
 	}
-	finish_wait(sk_sleep(sk), &wait);
 	return err;
 }
 

From 227adfb2b1dfbc53dfc53b9dd7a93a6298ff7c56 Mon Sep 17 00:00:00 2001
From: Gilad Naaman <gnaaman@drivenets.com>
Date: Thu, 22 Jul 2021 20:01:28 +0300
Subject: [PATCH 115/502] net: Set true network header for ECN decapsulation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In cases where the header straight after the tunnel header was
another ethernet header (TEB), instead of the network header,
the ECN decapsulation code would treat the ethernet header as if
it was an IP header, resulting in mishandling and possible
wrong drops or corruption of the IP header.

In this case, ECT(1) is sent, so IP_ECN_decapsulate tries to copy it to the
inner IPv4 header, and correct its checksum.

The offset of the ECT bits in an IPv4 header corresponds to the
lower 2 bits of the second octet of the destination MAC address
in the ethernet header.
The IPv4 checksum corresponds to end of the source address.

In order to reproduce:

    $ ip netns add A
    $ ip netns add B
    $ ip -n A link add _v0 type veth peer name _v1 netns B
    $ ip -n A link set _v0 up
    $ ip -n A addr add dev _v0 10.254.3.1/24
    $ ip -n A route add default dev _v0 scope global
    $ ip -n B link set _v1 up
    $ ip -n B addr add dev _v1 10.254.1.6/24
    $ ip -n B route add default dev _v1 scope global
    $ ip -n B link add gre1 type gretap local 10.254.1.6 remote 10.254.3.1 key 0x49000000
    $ ip -n B link set gre1 up

    # Now send an IPv4/GRE/Eth/IPv4 frame where the outer header has ECT(1),
    # and the inner header has no ECT bits set:

    $ cat send_pkt.py
        #!/usr/bin/env python3
        from scapy.all import *

        pkt = IP(b'E\x01\x00\xa7\x00\x00\x00\x00@/`%\n\xfe\x03\x01\n\xfe\x01\x06 \x00eXI\x00'
                 b'\x00\x00\x18\xbe\x92\xa0\xee&\x18\xb0\x92\xa0l&\x08\x00E\x00\x00}\x8b\x85'
                 b'@\x00\x01\x01\xe4\xf2\x82\x82\x82\x01\x82\x82\x82\x02\x08\x00d\x11\xa6\xeb'
                 b'3\x1e\x1e\\xf3\\xf7`\x00\x00\x00\x00ZN\x00\x00\x00\x00\x00\x00\x10\x11\x12'
                 b'\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./01234'
                 b'56789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ')

        send(pkt)
    $ sudo ip netns exec B tcpdump -neqlllvi gre1 icmp & ; sleep 1
    $ sudo ip netns exec A python3 send_pkt.py

In the original packet, the source/destinatio MAC addresses are
dst=18:be:92:a0:ee:26 src=18:b0:92:a0:6c:26

In the received packet, they are
dst=18:bd:92:a0:ee:26 src=18:b0:92:a0:6c:27

Thanks to Lahav Schlesinger <lschlesinger@drivenets.com> and Isaac Garzon <isaac@speed.io>
for helping me pinpoint the origin.

Fixes: b723748750ec ("tunnel: Propagate ECT(1) when decapsulating as recommended by RFC6040")
Cc: David S. Miller <davem@davemloft.net>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: David Ahern <dsahern@kernel.org>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Gilad Naaman <gnaaman@drivenets.com>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_tunnel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 0dca00745ac3..be75b409445c 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -390,7 +390,7 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
 		tunnel->i_seqno = ntohl(tpi->seq) + 1;
 	}
 
-	skb_reset_network_header(skb);
+	skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
 
 	err = IP_ECN_decapsulate(iph, skb);
 	if (unlikely(err)) {

From 46c7655f0b56b1ac864115441064cde9ed124f4a Mon Sep 17 00:00:00 2001
From: Kangmin Park <l4stpr0gr4m@gmail.com>
Date: Fri, 23 Jul 2021 02:44:43 +0900
Subject: [PATCH 116/502] ipv6: decrease hop limit counter in ip6_forward()

Decrease hop limit counter when deliver skb to ndp proxy.

Signed-off-by: Kangmin Park <l4stpr0gr4m@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index e1b9f7ac8bad..8e6ca9ad6812 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -549,9 +549,10 @@ int ip6_forward(struct sk_buff *skb)
 	if (net->ipv6.devconf_all->proxy_ndp &&
 	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
 		int proxied = ip6_forward_proxy_check(skb);
-		if (proxied > 0)
+		if (proxied > 0) {
+			hdr->hop_limit--;
 			return ip6_input(skb);
-		else if (proxied < 0) {
+		} else if (proxied < 0) {
 			__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 			goto drop;
 		}

From c92c74131a84b508aa8f079a25d7bbe10748449e Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 22 Jul 2021 16:05:51 +0300
Subject: [PATCH 117/502] net: dsa: mv88e6xxx: silently accept the deletion of
 VID 0 too

The blamed commit modified the driver to accept the addition of VID 0
without doing anything, but deleting that VID still fails:

[   32.080780] mv88e6085 d0032004.mdio-mii:10 lan8: failed to kill vid 0081/0

Modify mv88e6xxx_port_vlan_leave() to do the same thing as the addition.

Fixes: b8b79c414eca ("net: dsa: mv88e6xxx: Fix adding vlan 0")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index beb41572d04e..272b0535d946 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2155,7 +2155,7 @@ static int mv88e6xxx_port_vlan_leave(struct mv88e6xxx_chip *chip,
 	int i, err;
 
 	if (!vid)
-		return -EOPNOTSUPP;
+		return 0;
 
 	err = mv88e6xxx_vtu_get(chip, vid, &vlan);
 	if (err)

From 68d1f1d4af188c290087958c75c7b89a816e1137 Mon Sep 17 00:00:00 2001
From: Loic Poulain <loic.poulain@linaro.org>
Date: Thu, 22 Jul 2021 20:21:05 +0200
Subject: [PATCH 118/502] wwan: core: Fix missing RTM_NEWLINK event for default
 link

A wwan link created via the wwan_create_default_link procedure is
never notified to the user (RTM_NEWLINK), causing issues with user
tools relying on such event to track network links (NetworkManager).

This is because the procedure misses a call to rtnl_configure_link(),
which sets the link as initialized and notifies the new link (cf
proper usage in __rtnl_newlink()).

Cc: stable@vger.kernel.org
Fixes: ca374290aaad ("wwan: core: support default netdev creation")
Suggested-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Acked-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/wwan_core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c
index 3e16c318e705..674a81d79db3 100644
--- a/drivers/net/wwan/wwan_core.c
+++ b/drivers/net/wwan/wwan_core.c
@@ -984,6 +984,8 @@ static void wwan_create_default_link(struct wwan_device *wwandev,
 		goto unlock;
 	}
 
+	rtnl_configure_link(dev, NULL); /* Link initialized, notify new link */
+
 unlock:
 	rtnl_unlock();
 

From 3ce6e1f662a910970880188ea7bfd00542bd3934 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Date: Tue, 6 Jul 2021 23:40:34 +0900
Subject: [PATCH 119/502] loop: reintroduce global lock for safe
 loop_validate_file() traversal

Commit 6cc8e7430801fa23 ("loop: scale loop device by introducing per
device lock") re-opened a race window for NULL pointer dereference at
loop_validate_file() where commit 310ca162d779efee ("block/loop: Use
global lock for ioctl() operation.") has closed.

Although we need to guarantee that other loop devices will not change
during traversal, we can't take remote "struct loop_device"->lo_mutex
inside loop_validate_file() in order to avoid AB-BA deadlock. Therefore,
introduce a global lock dedicated for loop_validate_file() which is
conditionally taken before local "struct loop_device"->lo_mutex is taken.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Fixes: 6cc8e7430801fa23 ("loop: scale loop device by introducing per device lock")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/loop.c | 128 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 97 insertions(+), 31 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f37b9e3d833c..f0cdff0c5fbf 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -88,6 +88,47 @@
 
 static DEFINE_IDR(loop_index_idr);
 static DEFINE_MUTEX(loop_ctl_mutex);
+static DEFINE_MUTEX(loop_validate_mutex);
+
+/**
+ * loop_global_lock_killable() - take locks for safe loop_validate_file() test
+ *
+ * @lo: struct loop_device
+ * @global: true if @lo is about to bind another "struct loop_device", false otherwise
+ *
+ * Returns 0 on success, -EINTR otherwise.
+ *
+ * Since loop_validate_file() traverses on other "struct loop_device" if
+ * is_loop_device() is true, we need a global lock for serializing concurrent
+ * loop_configure()/loop_change_fd()/__loop_clr_fd() calls.
+ */
+static int loop_global_lock_killable(struct loop_device *lo, bool global)
+{
+	int err;
+
+	if (global) {
+		err = mutex_lock_killable(&loop_validate_mutex);
+		if (err)
+			return err;
+	}
+	err = mutex_lock_killable(&lo->lo_mutex);
+	if (err && global)
+		mutex_unlock(&loop_validate_mutex);
+	return err;
+}
+
+/**
+ * loop_global_unlock() - release locks taken by loop_global_lock_killable()
+ *
+ * @lo: struct loop_device
+ * @global: true if @lo was about to bind another "struct loop_device", false otherwise
+ */
+static void loop_global_unlock(struct loop_device *lo, bool global)
+{
+	mutex_unlock(&lo->lo_mutex);
+	if (global)
+		mutex_unlock(&loop_validate_mutex);
+}
 
 static int max_part;
 static int part_shift;
@@ -672,13 +713,15 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
 	while (is_loop_device(f)) {
 		struct loop_device *l;
 
+		lockdep_assert_held(&loop_validate_mutex);
 		if (f->f_mapping->host->i_rdev == bdev->bd_dev)
 			return -EBADF;
 
 		l = I_BDEV(f->f_mapping->host)->bd_disk->private_data;
-		if (l->lo_state != Lo_bound) {
+		if (l->lo_state != Lo_bound)
 			return -EINVAL;
-		}
+		/* Order wrt setting lo->lo_backing_file in loop_configure(). */
+		rmb();
 		f = l->lo_backing_file;
 	}
 	if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
@@ -697,13 +740,18 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
 static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 			  unsigned int arg)
 {
-	struct file	*file = NULL, *old_file;
-	int		error;
-	bool		partscan;
+	struct file *file = fget(arg);
+	struct file *old_file;
+	int error;
+	bool partscan;
+	bool is_loop;
 
-	error = mutex_lock_killable(&lo->lo_mutex);
+	if (!file)
+		return -EBADF;
+	is_loop = is_loop_device(file);
+	error = loop_global_lock_killable(lo, is_loop);
 	if (error)
-		return error;
+		goto out_putf;
 	error = -ENXIO;
 	if (lo->lo_state != Lo_bound)
 		goto out_err;
@@ -713,11 +761,6 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 	if (!(lo->lo_flags & LO_FLAGS_READ_ONLY))
 		goto out_err;
 
-	error = -EBADF;
-	file = fget(arg);
-	if (!file)
-		goto out_err;
-
 	error = loop_validate_file(file, bdev);
 	if (error)
 		goto out_err;
@@ -740,7 +783,16 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 	loop_update_dio(lo);
 	blk_mq_unfreeze_queue(lo->lo_queue);
 	partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
-	mutex_unlock(&lo->lo_mutex);
+	loop_global_unlock(lo, is_loop);
+
+	/*
+	 * Flush loop_validate_file() before fput(), for l->lo_backing_file
+	 * might be pointing at old_file which might be the last reference.
+	 */
+	if (!is_loop) {
+		mutex_lock(&loop_validate_mutex);
+		mutex_unlock(&loop_validate_mutex);
+	}
 	/*
 	 * We must drop file reference outside of lo_mutex as dropping
 	 * the file ref can take open_mutex which creates circular locking
@@ -752,9 +804,9 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 	return 0;
 
 out_err:
-	mutex_unlock(&lo->lo_mutex);
-	if (file)
-		fput(file);
+	loop_global_unlock(lo, is_loop);
+out_putf:
+	fput(file);
 	return error;
 }
 
@@ -1136,22 +1188,22 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 			  struct block_device *bdev,
 			  const struct loop_config *config)
 {
-	struct file	*file;
-	struct inode	*inode;
+	struct file *file = fget(config->fd);
+	struct inode *inode;
 	struct address_space *mapping;
-	int		error;
-	loff_t		size;
-	bool		partscan;
-	unsigned short  bsize;
+	int error;
+	loff_t size;
+	bool partscan;
+	unsigned short bsize;
+	bool is_loop;
+
+	if (!file)
+		return -EBADF;
+	is_loop = is_loop_device(file);
 
 	/* This is safe, since we have a reference from open(). */
 	__module_get(THIS_MODULE);
 
-	error = -EBADF;
-	file = fget(config->fd);
-	if (!file)
-		goto out;
-
 	/*
 	 * If we don't hold exclusive handle for the device, upgrade to it
 	 * here to avoid changing device under exclusive owner.
@@ -1162,7 +1214,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 			goto out_putf;
 	}
 
-	error = mutex_lock_killable(&lo->lo_mutex);
+	error = loop_global_lock_killable(lo, is_loop);
 	if (error)
 		goto out_bdev;
 
@@ -1242,6 +1294,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 	size = get_loop_size(lo, file);
 	loop_set_size(lo, size);
 
+	/* Order wrt reading lo_state in loop_validate_file(). */
+	wmb();
+
 	lo->lo_state = Lo_bound;
 	if (part_shift)
 		lo->lo_flags |= LO_FLAGS_PARTSCAN;
@@ -1253,7 +1308,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 	 * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev).
 	 */
 	bdgrab(bdev);
-	mutex_unlock(&lo->lo_mutex);
+	loop_global_unlock(lo, is_loop);
 	if (partscan)
 		loop_reread_partitions(lo);
 	if (!(mode & FMODE_EXCL))
@@ -1261,13 +1316,12 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 	return 0;
 
 out_unlock:
-	mutex_unlock(&lo->lo_mutex);
+	loop_global_unlock(lo, is_loop);
 out_bdev:
 	if (!(mode & FMODE_EXCL))
 		bd_abort_claiming(bdev, loop_configure);
 out_putf:
 	fput(file);
-out:
 	/* This is safe: open() is still holding a reference. */
 	module_put(THIS_MODULE);
 	return error;
@@ -1283,6 +1337,18 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
 	int lo_number;
 	struct loop_worker *pos, *worker;
 
+	/*
+	 * Flush loop_configure() and loop_change_fd(). It is acceptable for
+	 * loop_validate_file() to succeed, for actual clear operation has not
+	 * started yet.
+	 */
+	mutex_lock(&loop_validate_mutex);
+	mutex_unlock(&loop_validate_mutex);
+	/*
+	 * loop_validate_file() now fails because l->lo_state != Lo_bound
+	 * became visible.
+	 */
+
 	mutex_lock(&lo->lo_mutex);
 	if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) {
 		err = -ENXIO;

From 9986066d94c971edf19464ed7bf5b26a91520e97 Mon Sep 17 00:00:00 2001
From: Subbaraya Sundeep <sbhatta@marvell.com>
Date: Fri, 23 Jul 2021 13:36:18 +0530
Subject: [PATCH 120/502] octeontx2-af: Fix uninitialized variables in
 rvu_switch

Get the number of VFs of a PF correctly by calling
rvu_get_pf_numvfs in rvu_switch_disable function.
Also hwvf is not required hence remove it.

Fixes: 23109f8dd06d ("octeontx2-af: Introduce internal packet switching")
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/af/rvu.c       |  6 ++++--
 .../net/ethernet/marvell/octeontx2/af/rvu_switch.c    | 11 ++++++-----
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index 017163fb3cd5..5fe277e354f7 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -391,8 +391,10 @@ void rvu_get_pf_numvfs(struct rvu *rvu, int pf, int *numvfs, int *hwvf)
 
 	/* Get numVFs attached to this PF and first HWVF */
 	cfg = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_CFG(pf));
-	*numvfs = (cfg >> 12) & 0xFF;
-	*hwvf = cfg & 0xFFF;
+	if (numvfs)
+		*numvfs = (cfg >> 12) & 0xFF;
+	if (hwvf)
+		*hwvf = cfg & 0xFFF;
 }
 
 static int rvu_get_hwvf(struct rvu *rvu, int pcifunc)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c
index 2e5379710aa5..820adf390b8e 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c
@@ -71,8 +71,8 @@ static int rvu_switch_install_rules(struct rvu *rvu)
 	struct rvu_switch *rswitch = &rvu->rswitch;
 	u16 start = rswitch->start_entry;
 	struct rvu_hwinfo *hw = rvu->hw;
-	int pf, vf, numvfs, hwvf;
 	u16 pcifunc, entry = 0;
+	int pf, vf, numvfs;
 	int err;
 
 	for (pf = 1; pf < hw->total_pfs; pf++) {
@@ -110,8 +110,8 @@ static int rvu_switch_install_rules(struct rvu *rvu)
 
 		rswitch->entry2pcifunc[entry++] = pcifunc;
 
-		rvu_get_pf_numvfs(rvu, pf, &numvfs, &hwvf);
-		for (vf = 0; vf < numvfs; vf++, hwvf++) {
+		rvu_get_pf_numvfs(rvu, pf, &numvfs, NULL);
+		for (vf = 0; vf < numvfs; vf++) {
 			pcifunc = pf << 10 | ((vf + 1) & 0x3FF);
 			rvu_get_nix_blkaddr(rvu, pcifunc);
 
@@ -198,7 +198,7 @@ void rvu_switch_disable(struct rvu *rvu)
 	struct npc_mcam_free_entry_req free_req = { 0 };
 	struct rvu_switch *rswitch = &rvu->rswitch;
 	struct rvu_hwinfo *hw = rvu->hw;
-	int pf, vf, numvfs, hwvf;
+	int pf, vf, numvfs;
 	struct msg_rsp rsp;
 	u16 pcifunc;
 	int err;
@@ -217,7 +217,8 @@ void rvu_switch_disable(struct rvu *rvu)
 				"Reverting RX rule for PF%d failed(%d)\n",
 				pf, err);
 
-		for (vf = 0; vf < numvfs; vf++, hwvf++) {
+		rvu_get_pf_numvfs(rvu, pf, &numvfs, NULL);
+		for (vf = 0; vf < numvfs; vf++) {
 			pcifunc = pf << 10 | ((vf + 1) & 0x3FF);
 			err = rvu_switch_install_rx_rule(rvu, pcifunc, 0xFFF);
 			if (err)

From 52f3456a96c06760b9bfae460e39596fec7af22e Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Fri, 23 Jul 2021 18:31:32 +0300
Subject: [PATCH 121/502] net: qrtr: fix memory leaks

Syzbot reported memory leak in qrtr. The problem was in unputted
struct sock. qrtr_local_enqueue() function calls qrtr_port_lookup()
which takes sock reference if port was found. Then there is the following
check:

if (!ipc || &ipc->sk == skb->sk) {
	...
	return -ENODEV;
}

Since we should drop the reference before returning from this function and
ipc can be non-NULL inside this if, we should add qrtr_port_put() inside
this if.

The similar corner case is in qrtr_endpoint_post() as Manivannan
reported. In case of sock_queue_rcv_skb() failure we need to put
port reference to avoid leaking struct sock pointer.

Fixes: e04df98adf7d ("net: qrtr: Remove receive worker")
Fixes: bdabad3e363d ("net: Add Qualcomm IPC router")
Reported-and-tested-by: syzbot+35a511c72ea7356cdcf3@syzkaller.appspotmail.com
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Reviewed-by: Manivannan Sadhasivam <mani@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/qrtr/qrtr.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index e6f4a6202f82..171b7f3be6ef 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -518,8 +518,10 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
 		if (!ipc)
 			goto err;
 
-		if (sock_queue_rcv_skb(&ipc->sk, skb))
+		if (sock_queue_rcv_skb(&ipc->sk, skb)) {
+			qrtr_port_put(ipc);
 			goto err;
+		}
 
 		qrtr_port_put(ipc);
 	}
@@ -839,6 +841,8 @@ static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb,
 
 	ipc = qrtr_port_lookup(to->sq_port);
 	if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */
+		if (ipc)
+			qrtr_port_put(ipc);
 		kfree_skb(skb);
 		return -ENODEV;
 	}

From 15bbf8bb4d4ab87108ecf5f4155ec8ffa3c141d6 Mon Sep 17 00:00:00 2001
From: Paul Jakma <paul@jakma.org>
Date: Fri, 23 Jul 2021 16:13:04 +0100
Subject: [PATCH 122/502] NIU: fix incorrect error return, missed in previous
 revert

Commit 7930742d6, reverting 26fd962, missed out on reverting an incorrect
change to a return value.  The niu_pci_vpd_scan_props(..) == 1 case appears
to be a normal path - treating it as an error and return -EINVAL was
breaking VPD_SCAN and causing the driver to fail to load.

Fix, so my Neptune card works again.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: Shannon Nelson <shannon.lee.nelson@gmail.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: stable <stable@vger.kernel.org>
Fixes: 7930742d ('Revert "niu: fix missing checks of niu_pci_eeprom_read"')
Signed-off-by: Paul Jakma <paul@jakma.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sun/niu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index 74e748662ec0..860644d182ab 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -8191,8 +8191,9 @@ static int niu_pci_vpd_fetch(struct niu *np, u32 start)
 		err = niu_pci_vpd_scan_props(np, here, end);
 		if (err < 0)
 			return err;
+		/* ret == 1 is not an error */
 		if (err == 1)
-			return -EINVAL;
+			return 0;
 	}
 	return 0;
 }

From 5ba03936c05584b6f6f79be5ebe7e5036c1dd252 Mon Sep 17 00:00:00 2001
From: Wei Shuyu <wsy@dogben.com>
Date: Mon, 28 Jun 2021 15:15:08 +0800
Subject: [PATCH 123/502] md/raid10: properly indicate failure when ending a
 failed write request

Similar to [1], this patch fixes the same bug in raid10. Also cleanup the
comments.

[1] commit 2417b9869b81 ("md/raid1: properly indicate failure when ending
                         a failed write request")
Cc: stable@vger.kernel.org
Fixes: 7cee6d4e6035 ("md/raid10: end bio when the device faulty")
Signed-off-by: Wei Shuyu <wsy@dogben.com>
Acked-by: Guoqing Jiang <jiangguoqing@kylinos.cn>
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/raid1.c  | 2 --
 drivers/md/raid10.c | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index ced076ba560e..753822ca9613 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -472,8 +472,6 @@ static void raid1_end_write_request(struct bio *bio)
 		/*
 		 * When the device is faulty, it is not necessary to
 		 * handle write error.
-		 * For failfast, this is the only remaining device,
-		 * We need to retry the write without FailFast.
 		 */
 		if (!test_bit(Faulty, &rdev->flags))
 			set_bit(R1BIO_WriteError, &r1_bio->state);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 13f5e6b2a73d..40e845fb9717 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -469,12 +469,12 @@ static void raid10_end_write_request(struct bio *bio)
 			/*
 			 * When the device is faulty, it is not necessary to
 			 * handle write error.
-			 * For failfast, this is the only remaining device,
-			 * We need to retry the write without FailFast.
 			 */
 			if (!test_bit(Faulty, &rdev->flags))
 				set_bit(R10BIO_WriteError, &r10_bio->state);
 			else {
+				/* Fail the request */
+				set_bit(R10BIO_Degraded, &r10_bio->state);
 				r10_bio->devs[slot].bio = NULL;
 				to_put = bio;
 				dec_rdev = 1;

From 6840e17b8ea992453e2d6f460d403cb05d194e76 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Fri, 23 Jul 2021 11:02:45 -0700
Subject: [PATCH 124/502] ionic: make all rx_mode work threadsafe

Move the bulk of the code from ionic_set_rx_mode(), which
can be called from atomic context, into ionic_lif_rx_mode()
which is a safe context.

A call from the stack will get pushed off into a work thread,
but it is also possible to simultaneously have a call driven
by a queue reconfig request from an ethtool command or fw
recovery event.  We add a mutex around the rx_mode work to be
sure they don't collide.

Fixes: 81dbc24147f9 ("ionic: change set_rx_mode from_ndo to can_sleep")
Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/pensando/ionic/ionic_lif.c   | 205 ++++++++----------
 .../net/ethernet/pensando/ionic/ionic_lif.h   |   4 +-
 2 files changed, 96 insertions(+), 113 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index af3a5368529c..7815e9034fb8 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -29,7 +29,7 @@ static const u8 ionic_qtype_versions[IONIC_QTYPE_MAX] = {
 				      */
 };
 
-static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode);
+static void ionic_lif_rx_mode(struct ionic_lif *lif);
 static int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr);
 static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr);
 static void ionic_link_status_check(struct ionic_lif *lif);
@@ -77,7 +77,7 @@ static void ionic_lif_deferred_work(struct work_struct *work)
 
 		switch (w->type) {
 		case IONIC_DW_TYPE_RX_MODE:
-			ionic_lif_rx_mode(lif, w->rx_mode);
+			ionic_lif_rx_mode(lif);
 			break;
 		case IONIC_DW_TYPE_RX_ADDR_ADD:
 			ionic_lif_addr_add(lif, w->addr);
@@ -1301,10 +1301,8 @@ static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr)
 	return 0;
 }
 
-static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add,
-			  bool can_sleep)
+static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add)
 {
-	struct ionic_deferred_work *work;
 	unsigned int nmfilters;
 	unsigned int nufilters;
 
@@ -1330,63 +1328,77 @@ static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add,
 			lif->nucast--;
 	}
 
-	if (!can_sleep) {
-		work = kzalloc(sizeof(*work), GFP_ATOMIC);
-		if (!work)
-			return -ENOMEM;
-		work->type = add ? IONIC_DW_TYPE_RX_ADDR_ADD :
-				   IONIC_DW_TYPE_RX_ADDR_DEL;
-		memcpy(work->addr, addr, ETH_ALEN);
-		netdev_dbg(lif->netdev, "deferred: rx_filter %s %pM\n",
-			   add ? "add" : "del", addr);
-		ionic_lif_deferred_enqueue(&lif->deferred, work);
-	} else {
-		netdev_dbg(lif->netdev, "rx_filter %s %pM\n",
-			   add ? "add" : "del", addr);
-		if (add)
-			return ionic_lif_addr_add(lif, addr);
-		else
-			return ionic_lif_addr_del(lif, addr);
-	}
+	netdev_dbg(lif->netdev, "rx_filter %s %pM\n",
+		   add ? "add" : "del", addr);
+	if (add)
+		return ionic_lif_addr_add(lif, addr);
+	else
+		return ionic_lif_addr_del(lif, addr);
 
 	return 0;
 }
 
 static int ionic_addr_add(struct net_device *netdev, const u8 *addr)
 {
-	return ionic_lif_addr(netdev_priv(netdev), addr, ADD_ADDR, CAN_SLEEP);
-}
-
-static int ionic_ndo_addr_add(struct net_device *netdev, const u8 *addr)
-{
-	return ionic_lif_addr(netdev_priv(netdev), addr, ADD_ADDR, CAN_NOT_SLEEP);
+	return ionic_lif_addr(netdev_priv(netdev), addr, ADD_ADDR);
 }
 
 static int ionic_addr_del(struct net_device *netdev, const u8 *addr)
 {
-	return ionic_lif_addr(netdev_priv(netdev), addr, DEL_ADDR, CAN_SLEEP);
+	return ionic_lif_addr(netdev_priv(netdev), addr, DEL_ADDR);
 }
 
-static int ionic_ndo_addr_del(struct net_device *netdev, const u8 *addr)
+static void ionic_lif_rx_mode(struct ionic_lif *lif)
 {
-	return ionic_lif_addr(netdev_priv(netdev), addr, DEL_ADDR, CAN_NOT_SLEEP);
-}
-
-static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode)
-{
-	struct ionic_admin_ctx ctx = {
-		.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
-		.cmd.rx_mode_set = {
-			.opcode = IONIC_CMD_RX_MODE_SET,
-			.lif_index = cpu_to_le16(lif->index),
-			.rx_mode = cpu_to_le16(rx_mode),
-		},
-	};
+	struct net_device *netdev = lif->netdev;
+	unsigned int nfilters;
+	unsigned int nd_flags;
 	char buf[128];
-	int err;
+	u16 rx_mode;
 	int i;
 #define REMAIN(__x) (sizeof(buf) - (__x))
 
+	mutex_lock(&lif->config_lock);
+
+	/* grab the flags once for local use */
+	nd_flags = netdev->flags;
+
+	rx_mode = IONIC_RX_MODE_F_UNICAST;
+	rx_mode |= (nd_flags & IFF_MULTICAST) ? IONIC_RX_MODE_F_MULTICAST : 0;
+	rx_mode |= (nd_flags & IFF_BROADCAST) ? IONIC_RX_MODE_F_BROADCAST : 0;
+	rx_mode |= (nd_flags & IFF_PROMISC) ? IONIC_RX_MODE_F_PROMISC : 0;
+	rx_mode |= (nd_flags & IFF_ALLMULTI) ? IONIC_RX_MODE_F_ALLMULTI : 0;
+
+	/* sync unicast addresses
+	 * next check to see if we're in an overflow state
+	 *    if so, we track that we overflowed and enable NIC PROMISC
+	 *    else if the overflow is set and not needed
+	 *       we remove our overflow flag and check the netdev flags
+	 *       to see if we can disable NIC PROMISC
+	 */
+	__dev_uc_sync(netdev, ionic_addr_add, ionic_addr_del);
+	nfilters = le32_to_cpu(lif->identity->eth.max_ucast_filters);
+	if (netdev_uc_count(netdev) + 1 > nfilters) {
+		rx_mode |= IONIC_RX_MODE_F_PROMISC;
+		lif->uc_overflow = true;
+	} else if (lif->uc_overflow) {
+		lif->uc_overflow = false;
+		if (!(nd_flags & IFF_PROMISC))
+			rx_mode &= ~IONIC_RX_MODE_F_PROMISC;
+	}
+
+	/* same for multicast */
+	__dev_mc_sync(netdev, ionic_addr_add, ionic_addr_del);
+	nfilters = le32_to_cpu(lif->identity->eth.max_mcast_filters);
+	if (netdev_mc_count(netdev) > nfilters) {
+		rx_mode |= IONIC_RX_MODE_F_ALLMULTI;
+		lif->mc_overflow = true;
+	} else if (lif->mc_overflow) {
+		lif->mc_overflow = false;
+		if (!(nd_flags & IFF_ALLMULTI))
+			rx_mode &= ~IONIC_RX_MODE_F_ALLMULTI;
+	}
+
 	i = scnprintf(buf, sizeof(buf), "rx_mode 0x%04x -> 0x%04x:",
 		      lif->rx_mode, rx_mode);
 	if (rx_mode & IONIC_RX_MODE_F_UNICAST)
@@ -1399,79 +1411,48 @@ static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode)
 		i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_PROMISC");
 	if (rx_mode & IONIC_RX_MODE_F_ALLMULTI)
 		i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_ALLMULTI");
-	netdev_dbg(lif->netdev, "lif%d %s\n", lif->index, buf);
+	if (rx_mode & IONIC_RX_MODE_F_RDMA_SNIFFER)
+		i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_RDMA_SNIFFER");
+	netdev_dbg(netdev, "lif%d %s\n", lif->index, buf);
 
-	err = ionic_adminq_post_wait(lif, &ctx);
-	if (err)
-		netdev_warn(lif->netdev, "set rx_mode 0x%04x failed: %d\n",
-			    rx_mode, err);
-	else
-		lif->rx_mode = rx_mode;
+	if (lif->rx_mode != rx_mode) {
+		struct ionic_admin_ctx ctx = {
+			.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+			.cmd.rx_mode_set = {
+				.opcode = IONIC_CMD_RX_MODE_SET,
+				.lif_index = cpu_to_le16(lif->index),
+			},
+		};
+		int err;
+
+		ctx.cmd.rx_mode_set.rx_mode = cpu_to_le16(rx_mode);
+		err = ionic_adminq_post_wait(lif, &ctx);
+		if (err)
+			netdev_warn(netdev, "set rx_mode 0x%04x failed: %d\n",
+				    rx_mode, err);
+		else
+			lif->rx_mode = rx_mode;
+	}
+
+	mutex_unlock(&lif->config_lock);
 }
 
 static void ionic_set_rx_mode(struct net_device *netdev, bool can_sleep)
 {
 	struct ionic_lif *lif = netdev_priv(netdev);
 	struct ionic_deferred_work *work;
-	unsigned int nfilters;
-	unsigned int rx_mode;
 
-	rx_mode = IONIC_RX_MODE_F_UNICAST;
-	rx_mode |= (netdev->flags & IFF_MULTICAST) ? IONIC_RX_MODE_F_MULTICAST : 0;
-	rx_mode |= (netdev->flags & IFF_BROADCAST) ? IONIC_RX_MODE_F_BROADCAST : 0;
-	rx_mode |= (netdev->flags & IFF_PROMISC) ? IONIC_RX_MODE_F_PROMISC : 0;
-	rx_mode |= (netdev->flags & IFF_ALLMULTI) ? IONIC_RX_MODE_F_ALLMULTI : 0;
-
-	/* sync unicast addresses
-	 * next check to see if we're in an overflow state
-	 *    if so, we track that we overflowed and enable NIC PROMISC
-	 *    else if the overflow is set and not needed
-	 *       we remove our overflow flag and check the netdev flags
-	 *       to see if we can disable NIC PROMISC
-	 */
-	if (can_sleep)
-		__dev_uc_sync(netdev, ionic_addr_add, ionic_addr_del);
-	else
-		__dev_uc_sync(netdev, ionic_ndo_addr_add, ionic_ndo_addr_del);
-	nfilters = le32_to_cpu(lif->identity->eth.max_ucast_filters);
-	if (netdev_uc_count(netdev) + 1 > nfilters) {
-		rx_mode |= IONIC_RX_MODE_F_PROMISC;
-		lif->uc_overflow = true;
-	} else if (lif->uc_overflow) {
-		lif->uc_overflow = false;
-		if (!(netdev->flags & IFF_PROMISC))
-			rx_mode &= ~IONIC_RX_MODE_F_PROMISC;
-	}
-
-	/* same for multicast */
-	if (can_sleep)
-		__dev_mc_sync(netdev, ionic_addr_add, ionic_addr_del);
-	else
-		__dev_mc_sync(netdev, ionic_ndo_addr_add, ionic_ndo_addr_del);
-	nfilters = le32_to_cpu(lif->identity->eth.max_mcast_filters);
-	if (netdev_mc_count(netdev) > nfilters) {
-		rx_mode |= IONIC_RX_MODE_F_ALLMULTI;
-		lif->mc_overflow = true;
-	} else if (lif->mc_overflow) {
-		lif->mc_overflow = false;
-		if (!(netdev->flags & IFF_ALLMULTI))
-			rx_mode &= ~IONIC_RX_MODE_F_ALLMULTI;
-	}
-
-	if (lif->rx_mode != rx_mode) {
-		if (!can_sleep) {
-			work = kzalloc(sizeof(*work), GFP_ATOMIC);
-			if (!work) {
-				netdev_err(lif->netdev, "rxmode change dropped\n");
-				return;
-			}
-			work->type = IONIC_DW_TYPE_RX_MODE;
-			work->rx_mode = rx_mode;
-			netdev_dbg(lif->netdev, "deferred: rx_mode\n");
-			ionic_lif_deferred_enqueue(&lif->deferred, work);
-		} else {
-			ionic_lif_rx_mode(lif, rx_mode);
+	if (!can_sleep) {
+		work = kzalloc(sizeof(*work), GFP_ATOMIC);
+		if (!work) {
+			netdev_err(lif->netdev, "rxmode change dropped\n");
+			return;
 		}
+		work->type = IONIC_DW_TYPE_RX_MODE;
+		netdev_dbg(lif->netdev, "deferred: rx_mode\n");
+		ionic_lif_deferred_enqueue(&lif->deferred, work);
+	} else {
+		ionic_lif_rx_mode(lif);
 	}
 }
 
@@ -3058,6 +3039,7 @@ void ionic_lif_deinit(struct ionic_lif *lif)
 	ionic_lif_qcq_deinit(lif, lif->notifyqcq);
 	ionic_lif_qcq_deinit(lif, lif->adminqcq);
 
+	mutex_destroy(&lif->config_lock);
 	mutex_destroy(&lif->queue_lock);
 	ionic_lif_reset(lif);
 }
@@ -3185,7 +3167,7 @@ static int ionic_station_set(struct ionic_lif *lif)
 		 */
 		if (!ether_addr_equal(ctx.comp.lif_getattr.mac,
 				      netdev->dev_addr))
-			ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR, CAN_SLEEP);
+			ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR);
 	} else {
 		/* Update the netdev mac with the device's mac */
 		memcpy(addr.sa_data, ctx.comp.lif_getattr.mac, netdev->addr_len);
@@ -3202,7 +3184,7 @@ static int ionic_station_set(struct ionic_lif *lif)
 
 	netdev_dbg(lif->netdev, "adding station MAC addr %pM\n",
 		   netdev->dev_addr);
-	ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR, CAN_SLEEP);
+	ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR);
 
 	return 0;
 }
@@ -3225,6 +3207,7 @@ int ionic_lif_init(struct ionic_lif *lif)
 
 	lif->hw_index = le16_to_cpu(comp.hw_index);
 	mutex_init(&lif->queue_lock);
+	mutex_init(&lif->config_lock);
 
 	/* now that we have the hw_index we can figure out our doorbell page */
 	lif->dbid_count = le32_to_cpu(lif->ionic->ident.dev.ndbpgs_per_lif);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
index 346506f01715..af291303bd7a 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
@@ -108,7 +108,6 @@ struct ionic_deferred_work {
 	struct list_head list;
 	enum ionic_deferred_work_type type;
 	union {
-		unsigned int rx_mode;
 		u8 addr[ETH_ALEN];
 		u8 fw_status;
 	};
@@ -179,6 +178,7 @@ struct ionic_lif {
 	unsigned int index;
 	unsigned int hw_index;
 	struct mutex queue_lock;	/* lock for queue structures */
+	struct mutex config_lock;	/* lock for config actions */
 	spinlock_t adminq_lock;		/* lock for AdminQ operations */
 	struct ionic_qcq *adminqcq;
 	struct ionic_qcq *notifyqcq;
@@ -199,7 +199,7 @@ struct ionic_lif {
 	unsigned int nrxq_descs;
 	u32 rx_copybreak;
 	u64 rxq_features;
-	unsigned int rx_mode;
+	u16 rx_mode;
 	u64 hw_features;
 	bool registered;
 	bool mc_overflow;

From f79eef711eb57d56874b08ea11db69221de54a6d Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Fri, 23 Jul 2021 11:02:46 -0700
Subject: [PATCH 125/502] ionic: catch no ptp support earlier

If PTP configuration is attempted on ports that don't support
it, such as VF ports, the driver will return an error status
-95, or EOPNOSUPP and print an error message
    enp98s0: hwstamp set failed: -95

Because some daemons can retry every few seconds, this can end
up filling the dmesg log and pushing out other more useful
messages.

We can catch this issue earlier in our handling and return
the error without a log message.

Fixes: 829600ce5e4e ("ionic: add ts_config replay")
Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_lif.h |  7 ++-----
 drivers/net/ethernet/pensando/ionic/ionic_phc.c | 10 +++++++---
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
index af291303bd7a..69ab59fedb6c 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
@@ -302,7 +302,7 @@ int ionic_lif_identify(struct ionic *ionic, u8 lif_type,
 int ionic_lif_size(struct ionic *ionic);
 
 #if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
-int ionic_lif_hwstamp_replay(struct ionic_lif *lif);
+void ionic_lif_hwstamp_replay(struct ionic_lif *lif);
 int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr);
 int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr);
 ktime_t ionic_lif_phc_ktime(struct ionic_lif *lif, u64 counter);
@@ -311,10 +311,7 @@ void ionic_lif_unregister_phc(struct ionic_lif *lif);
 void ionic_lif_alloc_phc(struct ionic_lif *lif);
 void ionic_lif_free_phc(struct ionic_lif *lif);
 #else
-static inline int ionic_lif_hwstamp_replay(struct ionic_lif *lif)
-{
-	return -EOPNOTSUPP;
-}
+static inline void ionic_lif_hwstamp_replay(struct ionic_lif *lif) {}
 
 static inline int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr)
 {
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_phc.c b/drivers/net/ethernet/pensando/ionic/ionic_phc.c
index a87c87e86aef..6e2403c71608 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_phc.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_phc.c
@@ -188,6 +188,9 @@ int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr)
 	struct hwtstamp_config config;
 	int err;
 
+	if (!lif->phc || !lif->phc->ptp)
+		return -EOPNOTSUPP;
+
 	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
 		return -EFAULT;
 
@@ -203,15 +206,16 @@ int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr)
 	return 0;
 }
 
-int ionic_lif_hwstamp_replay(struct ionic_lif *lif)
+void ionic_lif_hwstamp_replay(struct ionic_lif *lif)
 {
 	int err;
 
+	if (!lif->phc || !lif->phc->ptp)
+		return;
+
 	err = ionic_lif_hwstamp_set_ts_config(lif, NULL);
 	if (err)
 		netdev_info(lif->netdev, "hwstamp replay failed: %d\n", err);
-
-	return err;
 }
 
 int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr)

From a6ff85e0a2d9d074a4b4c291ba9ec1e5b0aba22b Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Fri, 23 Jul 2021 11:02:47 -0700
Subject: [PATCH 126/502] ionic: remove intr coalesce update from napi

Move the interrupt coalesce value update out of the napi
thread and into the dim_work thread and set it only when it
has actually changed.

Fixes: 04a834592bf5 ("ionic: dynamic interrupt moderation")
Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_lif.c  | 14 +++++++++++++-
 drivers/net/ethernet/pensando/ionic/ionic_txrx.c |  4 ----
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 7815e9034fb8..e795fa63ca12 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -53,7 +53,19 @@ static void ionic_dim_work(struct work_struct *work)
 	cur_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
 	qcq = container_of(dim, struct ionic_qcq, dim);
 	new_coal = ionic_coal_usec_to_hw(qcq->q.lif->ionic, cur_moder.usec);
-	qcq->intr.dim_coal_hw = new_coal ? new_coal : 1;
+	new_coal = new_coal ? new_coal : 1;
+
+	if (qcq->intr.dim_coal_hw != new_coal) {
+		unsigned int qi = qcq->cq.bound_q->index;
+		struct ionic_lif *lif = qcq->q.lif;
+
+		qcq->intr.dim_coal_hw = new_coal;
+
+		ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
+				     lif->rxqcqs[qi]->intr.index,
+				     qcq->intr.dim_coal_hw);
+	}
+
 	dim->state = DIM_START_MEASURE;
 }
 
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 08934888575c..9d3a04110685 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -463,10 +463,6 @@ static void ionic_dim_update(struct ionic_qcq *qcq)
 	lif = qcq->q.lif;
 	qi = qcq->cq.bound_q->index;
 
-	ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
-			     lif->rxqcqs[qi]->intr.index,
-			     qcq->intr.dim_coal_hw);
-
 	dim_update_sample(qcq->cq.bound_intr->rearm_count,
 			  lif->txqstats[qi].pkts,
 			  lif->txqstats[qi].bytes,

From 76ed8a4a00b484dcccef819ef2618bcf8e46f560 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Fri, 23 Jul 2021 11:02:48 -0700
Subject: [PATCH 127/502] ionic: fix up dim accounting for tx and rx

We need to count the correct Tx and/or Rx packets for dynamic
interrupt moderation, depending on which we're processing on
the queue interrupt.

Fixes: 04a834592bf5 ("ionic: dynamic interrupt moderation")
Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/pensando/ionic/ionic_txrx.c  | 28 ++++++++++++++-----
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 9d3a04110685..1c6e2b9fc96b 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -451,11 +451,12 @@ void ionic_rx_empty(struct ionic_queue *q)
 	q->tail_idx = 0;
 }
 
-static void ionic_dim_update(struct ionic_qcq *qcq)
+static void ionic_dim_update(struct ionic_qcq *qcq, int napi_mode)
 {
 	struct dim_sample dim_sample;
 	struct ionic_lif *lif;
 	unsigned int qi;
+	u64 pkts, bytes;
 
 	if (!qcq->intr.dim_coal_hw)
 		return;
@@ -463,10 +464,23 @@ static void ionic_dim_update(struct ionic_qcq *qcq)
 	lif = qcq->q.lif;
 	qi = qcq->cq.bound_q->index;
 
+	switch (napi_mode) {
+	case IONIC_LIF_F_TX_DIM_INTR:
+		pkts = lif->txqstats[qi].pkts;
+		bytes = lif->txqstats[qi].bytes;
+		break;
+	case IONIC_LIF_F_RX_DIM_INTR:
+		pkts = lif->rxqstats[qi].pkts;
+		bytes = lif->rxqstats[qi].bytes;
+		break;
+	default:
+		pkts = lif->txqstats[qi].pkts + lif->rxqstats[qi].pkts;
+		bytes = lif->txqstats[qi].bytes + lif->rxqstats[qi].bytes;
+		break;
+	}
+
 	dim_update_sample(qcq->cq.bound_intr->rearm_count,
-			  lif->txqstats[qi].pkts,
-			  lif->txqstats[qi].bytes,
-			  &dim_sample);
+			  pkts, bytes, &dim_sample);
 
 	net_dim(&qcq->dim, dim_sample);
 }
@@ -487,7 +501,7 @@ int ionic_tx_napi(struct napi_struct *napi, int budget)
 				     ionic_tx_service, NULL, NULL);
 
 	if (work_done < budget && napi_complete_done(napi, work_done)) {
-		ionic_dim_update(qcq);
+		ionic_dim_update(qcq, IONIC_LIF_F_TX_DIM_INTR);
 		flags |= IONIC_INTR_CRED_UNMASK;
 		cq->bound_intr->rearm_count++;
 	}
@@ -526,7 +540,7 @@ int ionic_rx_napi(struct napi_struct *napi, int budget)
 		ionic_rx_fill(cq->bound_q);
 
 	if (work_done < budget && napi_complete_done(napi, work_done)) {
-		ionic_dim_update(qcq);
+		ionic_dim_update(qcq, IONIC_LIF_F_RX_DIM_INTR);
 		flags |= IONIC_INTR_CRED_UNMASK;
 		cq->bound_intr->rearm_count++;
 	}
@@ -572,7 +586,7 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget)
 		ionic_rx_fill(rxcq->bound_q);
 
 	if (rx_work_done < budget && napi_complete_done(napi, rx_work_done)) {
-		ionic_dim_update(qcq);
+		ionic_dim_update(qcq, 0);
 		flags |= IONIC_INTR_CRED_UNMASK;
 		rxcq->bound_intr->rearm_count++;
 	}

From f07f9815b7046e25cc32bf8542c9c0bbc5eb6e0e Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Fri, 23 Jul 2021 11:02:49 -0700
Subject: [PATCH 128/502] ionic: count csum_none when offload enabled

Be sure to count the csum_none cases when csum offload is
enabled.

Fixes: 0f3154e6bcb3 ("ionic: Add Tx and Rx handling")
Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 1c6e2b9fc96b..08870190e4d2 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -274,12 +274,11 @@ static void ionic_rx_clean(struct ionic_queue *q,
 		}
 	}
 
-	if (likely(netdev->features & NETIF_F_RXCSUM)) {
-		if (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_CALC) {
-			skb->ip_summed = CHECKSUM_COMPLETE;
-			skb->csum = (__force __wsum)le16_to_cpu(comp->csum);
-			stats->csum_complete++;
-		}
+	if (likely(netdev->features & NETIF_F_RXCSUM) &&
+	    (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_CALC)) {
+		skb->ip_summed = CHECKSUM_COMPLETE;
+		skb->csum = (__force __wsum)le16_to_cpu(comp->csum);
+		stats->csum_complete++;
 	} else {
 		stats->csum_none++;
 	}

From cdf72837cda89b2d38bd18fbe6cc591c1d5f2416 Mon Sep 17 00:00:00 2001
From: "Geoffrey D. Bennett" <g@b4.vu>
Date: Fri, 23 Jul 2021 05:41:53 +0930
Subject: [PATCH 129/502] ALSA: scarlett2: Fix Mute/Dim/MSD Mode control names

Append "Playback Switch" to the names of "Mute" and "Dim" controls,
and append "Switch" to the "MSD Mode" control as per
Documentation/sound/designs/control-names.rst.

Signed-off-by: Geoffrey D. Bennett <g@b4.vu>
Link: https://lore.kernel.org/r/77f1000652c37e3217fb8dad8e156bc6392abc0b.1626959758.git.g@b4.vu
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/mixer_scarlett_gen2.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c
index f9d698a37153..347995ea39e4 100644
--- a/sound/usb/mixer_scarlett_gen2.c
+++ b/sound/usb/mixer_scarlett_gen2.c
@@ -228,7 +228,7 @@ enum {
 };
 
 static const char *const scarlett2_dim_mute_names[SCARLETT2_DIM_MUTE_COUNT] = {
-	"Mute", "Dim"
+	"Mute Playback Switch", "Dim Playback Switch"
 };
 
 /* Description of each hardware port type:
@@ -3455,7 +3455,7 @@ static int scarlett2_add_msd_ctl(struct usb_mixer_interface *mixer)
 
 	/* Add MSD control */
 	return scarlett2_add_new_ctl(mixer, &scarlett2_msd_ctl,
-				     0, 1, "MSD Mode", NULL);
+				     0, 1, "MSD Mode Switch", NULL);
 }
 
 /*** Cleanup/Suspend Callbacks ***/

From d3a4f784d20c696b134b916f57956f12a37ecd47 Mon Sep 17 00:00:00 2001
From: "Geoffrey D. Bennett" <g@b4.vu>
Date: Fri, 23 Jul 2021 05:42:08 +0930
Subject: [PATCH 130/502] ALSA: scarlett2: Fix Direct Monitor control name for
 2i2

The Direct Monitor control for the 2i2 is an enumerated value, not a
boolean. Fix the control name to say "Playback Enum" instead of
"Playback Switch" in this case.

Signed-off-by: Geoffrey D. Bennett <g@b4.vu>
Link: https://lore.kernel.org/r/faf5de1d2100038e7d07520d770fda4a1adc276a.1626959758.git.g@b4.vu
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/mixer_scarlett_gen2.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c
index 347995ea39e4..fa604b61066f 100644
--- a/sound/usb/mixer_scarlett_gen2.c
+++ b/sound/usb/mixer_scarlett_gen2.c
@@ -2530,14 +2530,18 @@ static int scarlett2_add_direct_monitor_ctl(struct usb_mixer_interface *mixer)
 {
 	struct scarlett2_data *private = mixer->private_data;
 	const struct scarlett2_device_info *info = private->info;
+	const char *s;
 
 	if (!info->direct_monitor)
 		return 0;
 
+	s = info->direct_monitor == 1
+	      ? "Direct Monitor Playback Switch"
+	      : "Direct Monitor Playback Enum";
+
 	return scarlett2_add_new_ctl(
 		mixer, &scarlett2_direct_monitor_ctl[info->direct_monitor - 1],
-		0, 1, "Direct Monitor Playback Switch",
-		&private->direct_monitor_ctl);
+		0, 1, s, &private->direct_monitor_ctl);
 }
 
 /*** Speaker Switching Control ***/

From 9ee0fc8366ddce380547878640708f1bd7dd2ead Mon Sep 17 00:00:00 2001
From: "Geoffrey D. Bennett" <g@b4.vu>
Date: Fri, 23 Jul 2021 05:42:48 +0930
Subject: [PATCH 131/502] ALSA: scarlett2: Correct channel mute status after
 mute button pressed

After the hardware mute button is pressed, private->vol_updated is set
so that the mute status is invalidated. As the channel mute values may
be affected by the global mute value, update scarlett2_mute_ctl_get()
to call scarlett2_update_volumes() if private->vol_updated is set.

Signed-off-by: Geoffrey D. Bennett <g@b4.vu>
Link: https://lore.kernel.org/r/aa18ddbf8d8bd7f31832ab1b6b6057c00b931202.1626959758.git.g@b4.vu
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/mixer_scarlett_gen2.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c
index fa604b61066f..3457fbc8108f 100644
--- a/sound/usb/mixer_scarlett_gen2.c
+++ b/sound/usb/mixer_scarlett_gen2.c
@@ -1856,9 +1856,15 @@ static int scarlett2_mute_ctl_get(struct snd_kcontrol *kctl,
 					struct snd_ctl_elem_value *ucontrol)
 {
 	struct usb_mixer_elem_info *elem = kctl->private_data;
-	struct scarlett2_data *private = elem->head.mixer->private_data;
+	struct usb_mixer_interface *mixer = elem->head.mixer;
+	struct scarlett2_data *private = mixer->private_data;
 	int index = line_out_remap(private, elem->control);
 
+	mutex_lock(&private->data_mutex);
+	if (private->vol_updated)
+		scarlett2_update_volumes(mixer);
+	mutex_unlock(&private->data_mutex);
+
 	ucontrol->value.integer.value[0] = private->mute_switch[index];
 	return 0;
 }

From 2b8b12be9b9752c36efda38b7dd5d83d790d01d8 Mon Sep 17 00:00:00 2001
From: "Geoffrey D. Bennett" <g@b4.vu>
Date: Fri, 23 Jul 2021 05:43:26 +0930
Subject: [PATCH 132/502] ALSA: scarlett2: Fix line out/speaker switching
 notifications

The values of the line output controls can change when the SW/HW
switches are set to HW, and also when speaker switching is enabled.
These notifications were sent with a mask of only
SNDRV_CTL_EVENT_MASK_INFO. Change the notifications to set the
SNDRV_CTL_EVENT_MASK_VALUE mask bit as well.

When the mute control is updated, the notification was sent with a
mask of SNDRV_CTL_EVENT_MASK_INFO. Change the mask to the correct
value of SNDRV_CTL_EVENT_MASK_VALUE.

Signed-off-by: Geoffrey D. Bennett <g@b4.vu>
Link: https://lore.kernel.org/r/8192e15ba62fa4bc90425c005f265c0de530be20.1626959758.git.g@b4.vu
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/mixer_scarlett_gen2.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c
index 3457fbc8108f..3d5848d5481b 100644
--- a/sound/usb/mixer_scarlett_gen2.c
+++ b/sound/usb/mixer_scarlett_gen2.c
@@ -1961,10 +1961,12 @@ static void scarlett2_vol_ctl_set_writable(struct usb_mixer_interface *mixer,
 			~SNDRV_CTL_ELEM_ACCESS_WRITE;
 	}
 
-	/* Notify of write bit change */
-	snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_INFO,
+	/* Notify of write bit and possible value change */
+	snd_ctl_notify(card,
+		       SNDRV_CTL_EVENT_MASK_VALUE | SNDRV_CTL_EVENT_MASK_INFO,
 		       &private->vol_ctls[index]->id);
-	snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_INFO,
+	snd_ctl_notify(card,
+		       SNDRV_CTL_EVENT_MASK_VALUE | SNDRV_CTL_EVENT_MASK_INFO,
 		       &private->mute_ctls[index]->id);
 }
 
@@ -2599,7 +2601,9 @@ static int scarlett2_speaker_switch_enable(struct usb_mixer_interface *mixer)
 
 		/* disable the line out SW/HW switch */
 		scarlett2_sw_hw_ctl_ro(private, i);
-		snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_INFO,
+		snd_ctl_notify(card,
+			       SNDRV_CTL_EVENT_MASK_VALUE |
+				 SNDRV_CTL_EVENT_MASK_INFO,
 			       &private->sw_hw_ctls[i]->id);
 	}
 
@@ -2923,7 +2927,7 @@ static int scarlett2_dim_mute_ctl_put(struct snd_kcontrol *kctl,
 			if (private->vol_sw_hw_switch[line_index]) {
 				private->mute_switch[line_index] = val;
 				snd_ctl_notify(mixer->chip->card,
-					       SNDRV_CTL_EVENT_MASK_INFO,
+					       SNDRV_CTL_EVENT_MASK_VALUE,
 					       &private->mute_ctls[i]->id);
 			}
 		}

From 4511781f95da0a3b2bad34f3f5e3967e80cd2d18 Mon Sep 17 00:00:00 2001
From: "chihhao.chen" <chihhao.chen@mediatek.com>
Date: Sat, 24 Jul 2021 12:23:41 +0800
Subject: [PATCH 133/502] ALSA: usb-audio: fix incorrect clock source setting

The following scenario describes an echo test for
Samsung USBC Headset (AKG) with VID/PID (0x04e8/0xa051).

We first start a capture stream(USB IN transfer) in 96Khz/24bit/1ch mode.
In clock find source function, we get value 0x2 for clock selector
and 0x1 for clock source.

Kernel-4.14 behavior
Since clock source is valid so clock selector was not set again.
We pass through this function and start a playback stream(USB OUT transfer)
in 48Khz/32bit/2ch mode. This time we get value 0x1 for clock selector
and 0x1 for clock source. Finally clock id with this setting is 0x9.

Kernel-5.10 behavior
Clock selector was always set one more time even it is valid.
When we start a playback stream, we will get 0x2 for clock selector
and 0x1 for clock source. In this case clock id becomes 0xA.
This is an incorrect clock source setting and results in severe noises.
We see wrong data rate in USB IN transfer.
(From 288 bytes/ms becomes 144 bytes/ms) It should keep in 288 bytes/ms.

This earphone works fine on older kernel version load because
this is a newly-added behavior.

Fixes: d2e8f641257d ("ALSA: usb-audio: Explicitly set up the clock selector")
Signed-off-by: chihhao.chen <chihhao.chen@mediatek.com>
Link: https://lore.kernel.org/r/1627100621-19225-1-git-send-email-chihhao.chen@mediatek.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/clock.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sound/usb/clock.c b/sound/usb/clock.c
index 52de52288e10..14456f61539e 100644
--- a/sound/usb/clock.c
+++ b/sound/usb/clock.c
@@ -324,6 +324,12 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip,
 					      sources[ret - 1],
 					      visited, validate);
 		if (ret > 0) {
+			/*
+			 * For Samsung USBC Headset (AKG), setting clock selector again
+			 * will result in incorrect default clock setting problems
+			 */
+			if (chip->usb_id == USB_ID(0x04e8, 0xa051))
+				return ret;
 			err = uac_clock_selector_set_val(chip, entity_id, cur);
 			if (err < 0)
 				return err;

From f5d156c7bfab7d728b2fd35bc63eab12eda18125 Mon Sep 17 00:00:00 2001
From: Joakim Zhang <qiangqing.zhang@nxp.com>
Date: Mon, 19 Jul 2021 15:34:37 +0800
Subject: [PATCH 134/502] arm64: dts: imx8mp: remove fallback compatible string
 for FlexCAN

FlexCAN on i.MX8MP is not derived from i.MX6Q, instead reuses from
i.MX8QM with extra ECC added and default is enabled, so that the FlexCAN
would be put into freeze mode without FLEXCAN_QUIRK_DISABLE_MECR quirk.

This patch removes "fsl,imx6q-flexcan" fallback compatible string since
it's not compatible with the i.MX6Q.

Link: https://lore.kernel.org/r/20210719073437.32078-1-qiangqing.zhang@nxp.com
Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Reviewed-by: Fabio Estevam <festevam@gmail.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 arch/arm64/boot/dts/freescale/imx8mp.dtsi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi
index ca38d0d6c3c4..f4eaab3ecf03 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi
@@ -579,7 +579,7 @@
 			};
 
 			flexcan1: can@308c0000 {
-				compatible = "fsl,imx8mp-flexcan", "fsl,imx6q-flexcan";
+				compatible = "fsl,imx8mp-flexcan";
 				reg = <0x308c0000 0x10000>;
 				interrupts = <GIC_SPI 142 IRQ_TYPE_LEVEL_HIGH>;
 				clocks = <&clk IMX8MP_CLK_IPG_ROOT>,
@@ -594,7 +594,7 @@
 			};
 
 			flexcan2: can@308d0000 {
-				compatible = "fsl,imx8mp-flexcan", "fsl,imx6q-flexcan";
+				compatible = "fsl,imx8mp-flexcan";
 				reg = <0x308d0000 0x10000>;
 				interrupts = <GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH>;
 				clocks = <&clk IMX8MP_CLK_IPG_ROOT>,

From 54f93336d000229f72c26d8a3f69dd256b744528 Mon Sep 17 00:00:00 2001
From: Ziyang Xuan <william.xuanziyang@huawei.com>
Date: Thu, 22 Jul 2021 15:08:19 +0800
Subject: [PATCH 135/502] can: raw: raw_setsockopt(): fix raw_rcv panic for
 sock UAF

We get a bug during ltp can_filter test as following.

===========================================
[60919.264984] BUG: unable to handle kernel NULL pointer dereference at 0000000000000010
[60919.265223] PGD 8000003dda726067 P4D 8000003dda726067 PUD 3dda727067 PMD 0
[60919.265443] Oops: 0000 [#1] SMP PTI
[60919.265550] CPU: 30 PID: 3638365 Comm: can_filter Kdump: loaded Tainted: G        W         4.19.90+ #1
[60919.266068] RIP: 0010:selinux_socket_sock_rcv_skb+0x3e/0x200
[60919.293289] RSP: 0018:ffff8d53bfc03cf8 EFLAGS: 00010246
[60919.307140] RAX: 0000000000000000 RBX: 000000000000001d RCX: 0000000000000007
[60919.320756] RDX: 0000000000000001 RSI: ffff8d5104a8ed00 RDI: ffff8d53bfc03d30
[60919.334319] RBP: ffff8d9338056800 R08: ffff8d53bfc29d80 R09: 0000000000000001
[60919.347969] R10: ffff8d53bfc03ec0 R11: ffffb8526ef47c98 R12: ffff8d53bfc03d30
[60919.350320] perf: interrupt took too long (3063 > 2500), lowering kernel.perf_event_max_sample_rate to 65000
[60919.361148] R13: 0000000000000001 R14: ffff8d53bcf90000 R15: 0000000000000000
[60919.361151] FS:  00007fb78b6b3600(0000) GS:ffff8d53bfc00000(0000) knlGS:0000000000000000
[60919.400812] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[60919.413730] CR2: 0000000000000010 CR3: 0000003e3f784006 CR4: 00000000007606e0
[60919.426479] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[60919.439339] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[60919.451608] PKRU: 55555554
[60919.463622] Call Trace:
[60919.475617]  <IRQ>
[60919.487122]  ? update_load_avg+0x89/0x5d0
[60919.498478]  ? update_load_avg+0x89/0x5d0
[60919.509822]  ? account_entity_enqueue+0xc5/0xf0
[60919.520709]  security_sock_rcv_skb+0x2a/0x40
[60919.531413]  sk_filter_trim_cap+0x47/0x1b0
[60919.542178]  ? kmem_cache_alloc+0x38/0x1b0
[60919.552444]  sock_queue_rcv_skb+0x17/0x30
[60919.562477]  raw_rcv+0x110/0x190 [can_raw]
[60919.572539]  can_rcv_filter+0xbc/0x1b0 [can]
[60919.582173]  can_receive+0x6b/0xb0 [can]
[60919.591595]  can_rcv+0x31/0x70 [can]
[60919.600783]  __netif_receive_skb_one_core+0x5a/0x80
[60919.609864]  process_backlog+0x9b/0x150
[60919.618691]  net_rx_action+0x156/0x400
[60919.627310]  ? sched_clock_cpu+0xc/0xa0
[60919.635714]  __do_softirq+0xe8/0x2e9
[60919.644161]  do_softirq_own_stack+0x2a/0x40
[60919.652154]  </IRQ>
[60919.659899]  do_softirq.part.17+0x4f/0x60
[60919.667475]  __local_bh_enable_ip+0x60/0x70
[60919.675089]  __dev_queue_xmit+0x539/0x920
[60919.682267]  ? finish_wait+0x80/0x80
[60919.689218]  ? finish_wait+0x80/0x80
[60919.695886]  ? sock_alloc_send_pskb+0x211/0x230
[60919.702395]  ? can_send+0xe5/0x1f0 [can]
[60919.708882]  can_send+0xe5/0x1f0 [can]
[60919.715037]  raw_sendmsg+0x16d/0x268 [can_raw]

It's because raw_setsockopt() concurrently with
unregister_netdevice_many(). Concurrent scenario as following.

	cpu0						cpu1
raw_bind
raw_setsockopt					unregister_netdevice_many
						unlist_netdevice
dev_get_by_index				raw_notifier
raw_enable_filters				......
can_rx_register
can_rcv_list_find(..., net->can.rx_alldev_list)

......

sock_close
raw_release(sock_a)

......

can_receive
can_rcv_filter(net->can.rx_alldev_list, ...)
raw_rcv(skb, sock_a)
BUG

After unlist_netdevice(), dev_get_by_index() return NULL in
raw_setsockopt(). Function raw_enable_filters() will add sock
and can_filter to net->can.rx_alldev_list. Then the sock is closed.
Followed by, we sock_sendmsg() to a new vcan device use the same
can_filter. Protocol stack match the old receiver whose sock has
been released on net->can.rx_alldev_list in can_rcv_filter().
Function raw_rcv() uses the freed sock. UAF BUG is triggered.

We can find that the key issue is that net_device has not been
protected in raw_setsockopt(). Use rtnl_lock to protect net_device
in raw_setsockopt().

Fixes: c18ce101f2e4 ("[CAN]: Add raw protocol")
Link: https://lore.kernel.org/r/20210722070819.1048263-1-william.xuanziyang@huawei.com
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/raw.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/net/can/raw.c b/net/can/raw.c
index ed4fcb7ab0c3..cd5a49380116 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -546,10 +546,18 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
 				return -EFAULT;
 		}
 
+		rtnl_lock();
 		lock_sock(sk);
 
-		if (ro->bound && ro->ifindex)
+		if (ro->bound && ro->ifindex) {
 			dev = dev_get_by_index(sock_net(sk), ro->ifindex);
+			if (!dev) {
+				if (count > 1)
+					kfree(filter);
+				err = -ENODEV;
+				goto out_fil;
+			}
+		}
 
 		if (ro->bound) {
 			/* (try to) register the new filters */
@@ -588,6 +596,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
 			dev_put(dev);
 
 		release_sock(sk);
+		rtnl_unlock();
 
 		break;
 
@@ -600,10 +609,16 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
 
 		err_mask &= CAN_ERR_MASK;
 
+		rtnl_lock();
 		lock_sock(sk);
 
-		if (ro->bound && ro->ifindex)
+		if (ro->bound && ro->ifindex) {
 			dev = dev_get_by_index(sock_net(sk), ro->ifindex);
+			if (!dev) {
+				err = -ENODEV;
+				goto out_err;
+			}
+		}
 
 		/* remove current error mask */
 		if (ro->bound) {
@@ -627,6 +642,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
 			dev_put(dev);
 
 		release_sock(sk);
+		rtnl_unlock();
 
 		break;
 

From 0c71437dd50dd687c15d8ca80b3b68f10bb21d63 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Wed, 14 Jul 2021 13:16:02 +0200
Subject: [PATCH 136/502] can: j1939: j1939_session_deactivate(): clarify
 lifetime of session object

The j1939_session_deactivate() is decrementing the session ref-count and
potentially can free() the session. This would cause use-after-free
situation.

However, the code calling j1939_session_deactivate() does always hold
another reference to the session, so that it would not be free()ed in
this code path.

This patch adds a comment to make this clear and a WARN_ON, to ensure
that future changes will not violate this requirement. Further this
patch avoids dereferencing the session pointer as a precaution to avoid
use-after-free if the session is actually free()ed.

Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol")
Link: https://lore.kernel.org/r/20210714111602.24021-1-o.rempel@pengutronix.de
Reported-by: Xiaochen Zou <xzou017@ucr.edu>
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/j1939/transport.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index c3946c355882..bb1092c3e7e3 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -1075,11 +1075,16 @@ static bool j1939_session_deactivate_locked(struct j1939_session *session)
 
 static bool j1939_session_deactivate(struct j1939_session *session)
 {
+	struct j1939_priv *priv = session->priv;
 	bool active;
 
-	j1939_session_list_lock(session->priv);
+	j1939_session_list_lock(priv);
+	/* This function should be called with a session ref-count of at
+	 * least 2.
+	 */
+	WARN_ON_ONCE(kref_read(&session->kref) < 2);
 	active = j1939_session_deactivate_locked(session);
-	j1939_session_list_unlock(session->priv);
+	j1939_session_list_unlock(priv);
 
 	return active;
 }

From c6eea1c8bda56737752465a298dc6ce07d6b8ce3 Mon Sep 17 00:00:00 2001
From: Zhang Changzhong <zhangchangzhong@huawei.com>
Date: Tue, 6 Jul 2021 19:00:08 +0800
Subject: [PATCH 137/502] can: j1939: j1939_xtp_rx_dat_one(): fix rxtimer value
 between consecutive TP.DT to 750ms

For receive side, the max time interval between two consecutive TP.DT
should be 750ms.

Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol")
Link: https://lore.kernel.org/r/1625569210-47506-1-git-send-email-zhangchangzhong@huawei.com
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com>
Acked-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/j1939/transport.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index bb1092c3e7e3..bdc95bd7a851 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -1874,7 +1874,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
 		if (!session->transmission)
 			j1939_tp_schedule_txtimer(session, 0);
 	} else {
-		j1939_tp_set_rxtimeout(session, 250);
+		j1939_tp_set_rxtimeout(session, 750);
 	}
 	session->last_cmd = 0xff;
 	consume_skb(se_skb);

From 590eb2b7d8cfafb27e8108d52d4bf4850626d31d Mon Sep 17 00:00:00 2001
From: Stephane Grosjean <s.grosjean@peak-system.com>
Date: Fri, 25 Jun 2021 15:09:29 +0200
Subject: [PATCH 138/502] can: peak_usb: pcan_usb_handle_bus_evt(): fix reading
 rxerr/txerr values

This patch fixes an incorrect way of reading error counters in messages
received for this purpose from the PCAN-USB interface. These messages
inform about the increase or decrease of the error counters, whose values
are placed in bytes 1 and 2 of the message data (not 0 and 1).

Fixes: ea8b33bde76c ("can: pcan_usb: add support of rxerr/txerr counters")
Link: https://lore.kernel.org/r/20210625130931.27438-4-s.grosjean@peak-system.com
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Stephane Grosjean <s.grosjean@peak-system.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/peak_usb/pcan_usb.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c
index 1d6f77252f01..899a3d21b77f 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb.c
@@ -117,7 +117,8 @@
 #define PCAN_USB_BERR_MASK	(PCAN_USB_ERR_RXERR | PCAN_USB_ERR_TXERR)
 
 /* identify bus event packets with rx/tx error counters */
-#define PCAN_USB_ERR_CNT		0x80
+#define PCAN_USB_ERR_CNT_DEC		0x00	/* counters are decreasing */
+#define PCAN_USB_ERR_CNT_INC		0x80	/* counters are increasing */
 
 /* private to PCAN-USB adapter */
 struct pcan_usb {
@@ -608,11 +609,12 @@ static int pcan_usb_handle_bus_evt(struct pcan_usb_msg_context *mc, u8 ir)
 
 	/* acccording to the content of the packet */
 	switch (ir) {
-	case PCAN_USB_ERR_CNT:
+	case PCAN_USB_ERR_CNT_DEC:
+	case PCAN_USB_ERR_CNT_INC:
 
 		/* save rx/tx error counters from in the device context */
-		pdev->bec.rxerr = mc->ptr[0];
-		pdev->bec.txerr = mc->ptr[1];
+		pdev->bec.rxerr = mc->ptr[1];
+		pdev->bec.txerr = mc->ptr[2];
 		break;
 
 	default:

From ef68a717960658e6a1e5f08adb0574326e9a12c2 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Sat, 24 Apr 2021 16:20:39 +0200
Subject: [PATCH 139/502] can: mcp251xfd: mcp251xfd_irq(): stop timestamping
 worker in case error in IRQ

In case an error occurred in the IRQ handler, the chip status is
dumped via devcoredump and all IRQs are disabled, but the chip stays
powered for further analysis.

The chip is in an undefined state and will not receive any CAN frames,
so shut down the timestamping worker, which reads the TBC register
regularly, too. This avoids any CRC read error messages if there is a
communication problem with the chip.

Fixes: efd8d98dfb90 ("can: mcp251xfd: add HW timestamp infrastructure")
Link: https://lore.kernel.org/r/20210724155131.471303-1-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index 47c3f408a799..9ae48072b6c6 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -2300,6 +2300,7 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id)
 		   err, priv->regs_status.intf);
 	mcp251xfd_dump(priv);
 	mcp251xfd_chip_interrupts_disable(priv);
+	mcp251xfd_timestamp_stop(priv);
 
 	return handled;
 }

From 3cf4375a090473d240281a0d2b04a3a5aaeac34b Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 23 Jul 2021 18:46:01 -0400
Subject: [PATCH 140/502] tipc: do not write skb_shinfo frags when doing
 decrytion

One skb's skb_shinfo frags are not writable, and they can be shared with
other skbs' like by pskb_copy(). To write the frags may cause other skb's
data crash.

So before doing en/decryption, skb_cow_data() should always be called for
a cloned or nonlinear skb if req dst is using the same sg as req src.
While at it, the likely branch can be removed, as it will be covered
by skb_cow_data().

Note that esp_input() has the same issue, and I will fix it in another
patch. tipc_aead_encrypt() doesn't have this issue, as it only processes
linear data in the unlikely branch.

Fixes: fc1b6d6de220 ("tipc: introduce TIPC encryption & authentication")
Reported-by: Shuang Li <shuali@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/crypto.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index e5c43d4d5a75..c9391d38de85 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -898,16 +898,10 @@ static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead,
 	if (unlikely(!aead))
 		return -ENOKEY;
 
-	/* Cow skb data if needed */
-	if (likely(!skb_cloned(skb) &&
-		   (!skb_is_nonlinear(skb) || !skb_has_frag_list(skb)))) {
-		nsg = 1 + skb_shinfo(skb)->nr_frags;
-	} else {
-		nsg = skb_cow_data(skb, 0, &unused);
-		if (unlikely(nsg < 0)) {
-			pr_err("RX: skb_cow_data() returned %d\n", nsg);
-			return nsg;
-		}
+	nsg = skb_cow_data(skb, 0, &unused);
+	if (unlikely(nsg < 0)) {
+		pr_err("RX: skb_cow_data() returned %d\n", nsg);
+		return nsg;
 	}
 
 	/* Allocate memory for the AEAD operation */

From 89bc7f456cd40e0be7b94f4fdae9186f22b76a05 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Fri, 23 Jul 2021 17:53:48 -0400
Subject: [PATCH 141/502] bnxt_en: Add missing periodic PHC overflow check

We use the timecounter APIs for the 48-bit PHC and packet timestamps.
We must periodically update the timecounter at roughly half the
overflow interval.  The overflow interval is about 78 hours, so
update it every 19 hours (1/4 interval) for some extra margins.

Fixes: 390862f45c85 ("bnxt_en: Get the full 48-bit hardware timestamp periodically")
Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 7 +++++++
 drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h | 4 ++++
 2 files changed, 11 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
index 9089e7f3fbd4..ec381c2423b8 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
@@ -353,6 +353,12 @@ static long bnxt_ptp_ts_aux_work(struct ptp_clock_info *ptp_info)
 
 	bnxt_ptp_get_current_time(bp);
 	ptp->next_period = now + HZ;
+	if (time_after_eq(now, ptp->next_overflow_check)) {
+		spin_lock_bh(&ptp->ptp_lock);
+		timecounter_read(&ptp->tc);
+		spin_unlock_bh(&ptp->ptp_lock);
+		ptp->next_overflow_check = now + BNXT_PHC_OVERFLOW_PERIOD;
+	}
 	return HZ;
 }
 
@@ -423,6 +429,7 @@ int bnxt_ptp_init(struct bnxt *bp)
 	ptp->cc.shift = 0;
 	ptp->cc.mult = 1;
 
+	ptp->next_overflow_check = jiffies + BNXT_PHC_OVERFLOW_PERIOD;
 	timecounter_init(&ptp->tc, &ptp->cc, ktime_to_ns(ktime_get_real()));
 
 	ptp->ptp_info = bnxt_ptp_caps;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
index 4135ea3ec788..254ba7bc0f99 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
@@ -32,6 +32,10 @@ struct bnxt_ptp_cfg {
 	u64			current_time;
 	u64			old_time;
 	unsigned long		next_period;
+	unsigned long		next_overflow_check;
+	/* 48-bit PHC overflows in 78 hours.  Check overflow every 19 hours. */
+	#define BNXT_PHC_OVERFLOW_PERIOD	(19 * 3600 * HZ)
+
 	u16			tx_seqid;
 	struct bnxt		*bp;
 	atomic_t		tx_avail;

From 78d9d8005e4556448f398d876f29d0ca7ab8e398 Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <jszhang@kernel.org>
Date: Fri, 16 Jul 2021 21:40:51 +0800
Subject: [PATCH 142/502] riscv: stacktrace: Fix NULL pointer dereference

When CONFIG_FRAME_POINTER=y, calling dump_stack() can always trigger
NULL pointer dereference panic similar as below:

[    0.396060] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.13.0-rc5+ #47
[    0.396692] Hardware name: riscv-virtio,qemu (DT)
[    0.397176] Call Trace:
[    0.398191] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000960
[    0.399487] Oops [#1]
[    0.399739] Modules linked in:
[    0.400135] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.13.0-rc5+ #47
[    0.400570] Hardware name: riscv-virtio,qemu (DT)
[    0.400926] epc : walk_stackframe+0xc4/0xdc
[    0.401291]  ra : dump_backtrace+0x30/0x38
[    0.401630] epc : ffffffff80004922 ra : ffffffff8000496a sp : ffffffe000f3bd00
[    0.402115]  gp : ffffffff80cfdcb8 tp : ffffffe000f30000 t0 : ffffffff80d0b0cf
[    0.402602]  t1 : ffffffff80d0b0c0 t2 : 0000000000000000 s0 : ffffffe000f3bd60
[    0.403071]  s1 : ffffffff808bc2e8 a0 : 0000000000001000 a1 : 0000000000000000
[    0.403448]  a2 : ffffffff803d7088 a3 : ffffffff808bc2e8 a4 : 6131725dbc24d400
[    0.403820]  a5 : 0000000000001000 a6 : 0000000000000002 a7 : ffffffffffffffff
[    0.404226]  s2 : 0000000000000000 s3 : 0000000000000000 s4 : 0000000000000000
[    0.404634]  s5 : ffffffff803d7088 s6 : ffffffff808bc2e8 s7 : ffffffff80630650
[    0.405085]  s8 : ffffffff80912a80 s9 : 0000000000000008 s10: ffffffff804000fc
[    0.405388]  s11: 0000000000000000 t3 : 0000000000000043 t4 : ffffffffffffffff
[    0.405616]  t5 : 000000000000003d t6 : ffffffe000f3baa8
[    0.405793] status: 0000000000000100 badaddr: 0000000000000960 cause: 000000000000000d
[    0.406135] [<ffffffff80004922>] walk_stackframe+0xc4/0xdc
[    0.407032] [<ffffffff8000496a>] dump_backtrace+0x30/0x38
[    0.407797] [<ffffffff803d7100>] show_stack+0x40/0x4c
[    0.408234] [<ffffffff803d9e5c>] dump_stack+0x90/0xb6
[    0.409019] [<ffffffff8040423e>] ptdump_init+0x20/0xc4
[    0.409681] [<ffffffff800015b6>] do_one_initcall+0x4c/0x226
[    0.410110] [<ffffffff80401094>] kernel_init_freeable+0x1f4/0x258
[    0.410562] [<ffffffff803dba88>] kernel_init+0x22/0x148
[    0.410959] [<ffffffff800029e2>] ret_from_exception+0x0/0x14
[    0.412241] ---[ end trace b2ab92c901b96251 ]---
[    0.413099] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b

The reason is the task is NULL when we finally call walk_stackframe()
the NULL is passed from __dump_stack():

|static void __dump_stack(void)
|{
|        dump_stack_print_info(KERN_DEFAULT);
|        show_stack(NULL, NULL, KERN_DEFAULT);
|}

Fix this issue by checking "task == NULL" case in walk_stackframe().

Fixes: eac2f3059e02 ("riscv: stacktrace: fix the riscv stacktrace when CONFIG_FRAME_POINTER enabled")
Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
Reviewed-by: Atish Patra <atish.patra@wdc.com>
Tested-by: Wende Tan <twd2.me@gmail.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/kernel/stacktrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
index ac7593607fa6..315db3d0229b 100644
--- a/arch/riscv/kernel/stacktrace.c
+++ b/arch/riscv/kernel/stacktrace.c
@@ -27,7 +27,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
 		fp = frame_pointer(regs);
 		sp = user_stack_pointer(regs);
 		pc = instruction_pointer(regs);
-	} else if (task == current) {
+	} else if (task == NULL || task == current) {
 		fp = (unsigned long)__builtin_frame_address(1);
 		sp = (unsigned long)__builtin_frame_address(0);
 		pc = (unsigned long)__builtin_return_address(0);

From ac059d16442f30e6a9a95d41655153e01247e710 Mon Sep 17 00:00:00 2001
From: Geetha sowjanya <gakula@marvell.com>
Date: Sun, 25 Jul 2021 13:28:24 +0530
Subject: [PATCH 143/502] octeontx2-af: Fix PKIND overlap between LBK and LMAC
 interfaces

Currently PKINDs are not assigned to LBK channels.
The default value of LBK_CHX_PKIND (channel to PKIND mapping) register
is zero, which is resulting in a overlap of pkind between LBK and CGX
LMACs. When KPU1 parser config is modified when PTP timestamping is
enabled on the CGX LMAC interface it is impacting traffic on LBK
interfaces as well.

This patch fixes the issue by reserving the PKIND#0 for LBK devices.
CGX mapped PF pkind starts from 1 and also fixes the max pkind available.

Fixes: 421572175ba5 ("octeontx2-af: Support to enable/disable HW timestamping")
Signed-off-by: Geetha sowjanya <gakula@marvell.com>
Signed-off-by: Sunil Kovvuri Goutham <sgoutham@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/af/npc.h     |  3 +++
 drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c |  1 +
 drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c | 11 +++++++----
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
index 19bad9a59c8f..243cf8070e77 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
@@ -151,7 +151,10 @@ enum npc_kpu_lh_ltype {
  * Software assigns pkind for each incoming port such as CGX
  * Ethernet interfaces, LBK interfaces, etc.
  */
+#define NPC_UNRESERVED_PKIND_COUNT NPC_RX_VLAN_EXDSA_PKIND
+
 enum npc_pkind_type {
+	NPC_RX_LBK_PKIND = 0ULL,
 	NPC_RX_VLAN_EXDSA_PKIND = 56ULL,
 	NPC_RX_CHLEN24B_PKIND = 57ULL,
 	NPC_RX_CPT_HDR_PKIND,
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index 0d2cd5169018..30067668eda7 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -298,6 +298,7 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf)
 					rvu_nix_chan_lbk(rvu, lbkid, vf + 1);
 		pfvf->rx_chan_cnt = 1;
 		pfvf->tx_chan_cnt = 1;
+		rvu_npc_set_pkind(rvu, NPC_RX_LBK_PKIND, pfvf);
 		rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf,
 					      pfvf->rx_chan_base,
 					      pfvf->rx_chan_cnt);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index 1097291aaa45..52b255426c22 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -1721,7 +1721,6 @@ static void npc_parser_profile_init(struct rvu *rvu, int blkaddr)
 {
 	struct rvu_hwinfo *hw = rvu->hw;
 	int num_pkinds, num_kpus, idx;
-	struct npc_pkind *pkind;
 
 	/* Disable all KPUs and their entries */
 	for (idx = 0; idx < hw->npc_kpus; idx++) {
@@ -1739,9 +1738,8 @@ static void npc_parser_profile_init(struct rvu *rvu, int blkaddr)
 	 * Check HW max count to avoid configuring junk or
 	 * writing to unsupported CSR addresses.
 	 */
-	pkind = &hw->pkind;
 	num_pkinds = rvu->kpu.pkinds;
-	num_pkinds = min_t(int, pkind->rsrc.max, num_pkinds);
+	num_pkinds = min_t(int, hw->npc_pkinds, num_pkinds);
 
 	for (idx = 0; idx < num_pkinds; idx++)
 		npc_config_kpuaction(rvu, blkaddr, &rvu->kpu.ikpu[idx], 0, idx, true);
@@ -1891,7 +1889,8 @@ static void rvu_npc_hw_init(struct rvu *rvu, int blkaddr)
 	if (npc_const1 & BIT_ULL(63))
 		npc_const2 = rvu_read64(rvu, blkaddr, NPC_AF_CONST2);
 
-	pkind->rsrc.max = (npc_const1 >> 12) & 0xFFULL;
+	pkind->rsrc.max = NPC_UNRESERVED_PKIND_COUNT;
+	hw->npc_pkinds = (npc_const1 >> 12) & 0xFFULL;
 	hw->npc_kpu_entries = npc_const1 & 0xFFFULL;
 	hw->npc_kpus = (npc_const >> 8) & 0x1FULL;
 	hw->npc_intfs = npc_const & 0xFULL;
@@ -2002,6 +2001,10 @@ int rvu_npc_init(struct rvu *rvu)
 	err = rvu_alloc_bitmap(&pkind->rsrc);
 	if (err)
 		return err;
+	/* Reserve PKIND#0 for LBKs. Power reset value of LBK_CH_PKIND is '0',
+	 * no need to configure PKIND for all LBKs separately.
+	 */
+	rvu_alloc_rsrc(&pkind->rsrc);
 
 	/* Allocate mem for pkind to PF and channel mapping info */
 	pkind->pfchan_map = devm_kcalloc(rvu->dev, pkind->rsrc.max,

From 69f0aeb13bb548e2d5710a350116e03f0273302e Mon Sep 17 00:00:00 2001
From: Geetha sowjanya <gakula@marvell.com>
Date: Sun, 25 Jul 2021 13:29:03 +0530
Subject: [PATCH 144/502] octeontx2-pf: Fix interface down flag on error

In the existing code while changing the number of TX/RX
queues using ethtool the PF/VF interface resources are
freed and reallocated (otx2_stop and otx2_open is called)
if the device is in running state. If any resource allocation
fails in otx2_open, driver free already allocated resources
and return. But again, when the number of queues changes
as the device state still running oxt2_stop is called.
In which we try to free already freed resources leading
to driver crash.
This patch fixes the issue by setting the INTF_DOWN flag on
error and free the resources in otx2_stop only if the flag is
not set.

Fixes: 50fe6c02e5ad ("octeontx2-pf: Register and handle link notifications")
Signed-off-by: Geetha sowjanya <gakula@marvell.com>
Signed-off-by: Sunil Kovvuri Goutham <Sunil.Goutham@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c | 7 +++----
 drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c      | 5 +++++
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
index 8df748e0677b..b906a0eb6e0d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
@@ -298,15 +298,14 @@ static int otx2_set_channels(struct net_device *dev,
 	err = otx2_set_real_num_queues(dev, channel->tx_count,
 				       channel->rx_count);
 	if (err)
-		goto fail;
+		return err;
 
 	pfvf->hw.rx_queues = channel->rx_count;
 	pfvf->hw.tx_queues = channel->tx_count;
 	pfvf->qset.cq_cnt = pfvf->hw.tx_queues +  pfvf->hw.rx_queues;
 
-fail:
 	if (if_up)
-		dev->netdev_ops->ndo_open(dev);
+		err = dev->netdev_ops->ndo_open(dev);
 
 	netdev_info(dev, "Setting num Tx rings to %d, Rx rings to %d success\n",
 		    pfvf->hw.tx_queues, pfvf->hw.rx_queues);
@@ -410,7 +409,7 @@ static int otx2_set_ringparam(struct net_device *netdev,
 	qs->rqe_cnt = rx_count;
 
 	if (if_up)
-		netdev->netdev_ops->ndo_open(netdev);
+		return netdev->netdev_ops->ndo_open(netdev);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index f300b807a85b..2c24944a4dba 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -1662,6 +1662,7 @@ int otx2_open(struct net_device *netdev)
 err_tx_stop_queues:
 	netif_tx_stop_all_queues(netdev);
 	netif_carrier_off(netdev);
+	pf->flags |= OTX2_FLAG_INTF_DOWN;
 err_free_cints:
 	otx2_free_cints(pf, qidx);
 	vec = pci_irq_vector(pf->pdev,
@@ -1689,6 +1690,10 @@ int otx2_stop(struct net_device *netdev)
 	struct otx2_rss_info *rss;
 	int qidx, vec, wrk;
 
+	/* If the DOWN flag is set resources are already freed */
+	if (pf->flags & OTX2_FLAG_INTF_DOWN)
+		return 0;
+
 	netif_carrier_off(netdev);
 	netif_tx_stop_all_queues(netdev);
 

From 4c85e57575fb9e6405d02d55aef8025c60abb824 Mon Sep 17 00:00:00 2001
From: Hariprasad Kelam <hkelam@marvell.com>
Date: Sun, 25 Jul 2021 13:29:37 +0530
Subject: [PATCH 145/502] octeontx2-pf: Dont enable backpressure on LBK links

Avoid configure backpressure for LBK links as they
don't support it and enable lmacs before configuration
pause frames.

Fixes: 75f36270990c ("octeontx2-pf: Support to enable/disable pause frames via ethtool")
Signed-off-by: Geetha sowjanya <gakula@marvell.com>
Signed-off-by: Hariprasad Kelam <hkelam@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/af/cgx.c    |  2 +-
 .../ethernet/marvell/octeontx2/nic/otx2_common.c   | 14 ++++++++------
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
index 9169849881bf..544c96c8fe1d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
@@ -1504,8 +1504,8 @@ static int cgx_lmac_init(struct cgx *cgx)
 
 		/* Add reference */
 		cgx->lmac_idmap[lmac->lmac_id] = lmac;
-		cgx->mac_ops->mac_pause_frm_config(cgx, lmac->lmac_id, true);
 		set_bit(lmac->lmac_id, &cgx->lmac_bmap);
+		cgx->mac_ops->mac_pause_frm_config(cgx, lmac->lmac_id, true);
 	}
 
 	return cgx_lmac_verify_fwi_version(cgx);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
index 7cccd802c4ed..70fcc1fd962f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -924,12 +924,14 @@ static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx)
 		aq->cq.drop = RQ_DROP_LVL_CQ(pfvf->hw.rq_skid, cq->cqe_cnt);
 		aq->cq.drop_ena = 1;
 
-		/* Enable receive CQ backpressure */
-		aq->cq.bp_ena = 1;
-		aq->cq.bpid = pfvf->bpid[0];
+		if (!is_otx2_lbkvf(pfvf->pdev)) {
+			/* Enable receive CQ backpressure */
+			aq->cq.bp_ena = 1;
+			aq->cq.bpid = pfvf->bpid[0];
 
-		/* Set backpressure level is same as cq pass level */
-		aq->cq.bp = RQ_PASS_LVL_CQ(pfvf->hw.rq_skid, qset->rqe_cnt);
+			/* Set backpressure level is same as cq pass level */
+			aq->cq.bp = RQ_PASS_LVL_CQ(pfvf->hw.rq_skid, qset->rqe_cnt);
+		}
 	}
 
 	/* Fill AQ info */
@@ -1186,7 +1188,7 @@ static int otx2_aura_init(struct otx2_nic *pfvf, int aura_id,
 	aq->aura.fc_hyst_bits = 0; /* Store count on all updates */
 
 	/* Enable backpressure for RQ aura */
-	if (aura_id < pfvf->hw.rqpool_cnt) {
+	if (aura_id < pfvf->hw.rqpool_cnt && !is_otx2_lbkvf(pfvf->pdev)) {
 		aq->aura.bp_ena = 0;
 		aq->aura.nix0_bpid = pfvf->bpid[0];
 		/* Set backpressure level for RQ's Aura */

From 149ea30fdd5c28b89a3bfdecfc75cdab1deddb14 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Fri, 23 Jul 2021 17:56:00 +0300
Subject: [PATCH 146/502] devlink: Fix phys_port_name of virtual port and merge
 error

Merge commit cited in fixes tag was incorrect. Due to it phys_port_name
of the virtual port resulted in incorrect name.

Also the phys_port_name of the physical port was written twice due to
the merge error.

Fix it by removing the old code and inserting back the misplaced code.

Related commits of interest in net and net-next branches that resulted
in merge conflict are:

in net-next branch:
commit f285f37cb1e6 ("devlink: append split port number to the port name")

in net branch:
commit b28d8f0c25a9 ("devlink: Correct VIRTUAL port to not have phys_port attributes")

Fixes: 126285651b7 ("Merge ra.kernel.org:/pub/scm/linux/kernel/git/netdev/net")
Signed-off-by: Parav Pandit <parav@nvidia.com>
Reported-by: Niklas Schnelle <schnelle@linux.ibm.com>
Tested-by: Niklas Schnelle <schnelle@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/devlink.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/net/core/devlink.c b/net/core/devlink.c
index 8fdd04f00fd7..85032626de24 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -9328,18 +9328,10 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
 
 	switch (attrs->flavour) {
 	case DEVLINK_PORT_FLAVOUR_PHYSICAL:
-	case DEVLINK_PORT_FLAVOUR_VIRTUAL:
 		n = snprintf(name, len, "p%u", attrs->phys.port_number);
 		if (n < len && attrs->split)
 			n += snprintf(name + n, len - n, "s%u",
 				      attrs->phys.split_subport_number);
-		if (!attrs->split)
-			n = snprintf(name, len, "p%u", attrs->phys.port_number);
-		else
-			n = snprintf(name, len, "p%us%u",
-				     attrs->phys.port_number,
-				     attrs->phys.split_subport_number);
-
 		break;
 	case DEVLINK_PORT_FLAVOUR_CPU:
 	case DEVLINK_PORT_FLAVOUR_DSA:
@@ -9381,6 +9373,8 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
 		n = snprintf(name, len, "pf%usf%u", attrs->pci_sf.pf,
 			     attrs->pci_sf.sf);
 		break;
+	case DEVLINK_PORT_FLAVOUR_VIRTUAL:
+		return -EOPNOTSUPP;
 	}
 
 	if (n >= len)

From ad4e1e48a6291f7fb53fbef38ca264966ffd65c9 Mon Sep 17 00:00:00 2001
From: Kevin Lo <kevlo@kevlo.org>
Date: Fri, 23 Jul 2021 21:59:27 +0800
Subject: [PATCH 147/502] net: phy: broadcom: re-add check for
 PHY_BRCM_DIS_TXCRXC_NOENRGY on the BCM54811 PHY

Restore PHY_ID_BCM54811 accidently removed by commit 5d4358ede8eb.

Fixes: 5d4358ede8eb ("net: phy: broadcom: Allow BCM54210E to configure APD")
Signed-off-by: Kevin Lo <kevlo@kevlo.org>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/broadcom.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 7bf3011b8e77..83aea5c5cd03 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -288,7 +288,7 @@ static void bcm54xx_adjust_rxrefclk(struct phy_device *phydev)
 	if (phydev->dev_flags & PHY_BRCM_DIS_TXCRXC_NOENRGY) {
 		if (BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54210E ||
 		    BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54810 ||
-		    BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54210E)
+		    BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54811)
 			val |= BCM54XX_SHD_SCR3_RXCTXC_DIS;
 		else
 			val |= BCM54XX_SHD_SCR3_TRDDAPD;

From 7e4960b3d66d7248b23de3251118147812b42da2 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Fri, 23 Jul 2021 18:36:09 +0800
Subject: [PATCH 148/502] mlx4: Fix missing error code in mlx4_load_one()

The error code is missing in this code scenario, add the error code
'-EINVAL' to the return value 'err'.

Eliminate the follow smatch warning:

drivers/net/ethernet/mellanox/mlx4/main.c:3538 mlx4_load_one() warn:
missing error code 'err'.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Fixes: 7ae0e400cd93 ("net/mlx4_core: Flexible (asymmetric) allocation of EQs and MSI-X vectors for PF/VFs")
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 00c84656b2e7..28ac4693da3c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -3535,6 +3535,7 @@ slave_start:
 
 		if (!SRIOV_VALID_STATE(dev->flags)) {
 			mlx4_err(dev, "Invalid SRIOV state\n");
+			err = -EINVAL;
 			goto err_close;
 		}
 	}

From 44379b986424b02acfa6e8c85ec5d68d89d3ccc4 Mon Sep 17 00:00:00 2001
From: Jagan Teki <jagan@amarulasolutions.com>
Date: Sun, 25 Jul 2021 23:17:37 +0530
Subject: [PATCH 149/502] drm/panel: panel-simple: Fix proper bpc for
 ytc700tlag_05_201c

ytc700tlag_05_201c panel support 8 bpc not 6 bpc as per
recent testing in i.MX8MM platform.

Fix it.

Fixes: 7a1f4fa4a629 ("drm/panel: simple: Add YTC700TLAG-05-201C")
Signed-off-by: Jagan Teki <jagan@amarulasolutions.com>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20210725174737.891106-1-jagan@amarulasolutions.com
---
 drivers/gpu/drm/panel/panel-simple.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 21939d4352cf..1b80290c2b53 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -4166,7 +4166,7 @@ static const struct drm_display_mode yes_optoelectronics_ytc700tlag_05_201c_mode
 static const struct panel_desc yes_optoelectronics_ytc700tlag_05_201c = {
 	.modes = &yes_optoelectronics_ytc700tlag_05_201c_mode,
 	.num_modes = 1,
-	.bpc = 6,
+	.bpc = 8,
 	.size = {
 		.width = 154,
 		.height = 90,

From 795e3d2ea68e489ee7039ac29e98bfea0e34a96c Mon Sep 17 00:00:00 2001
From: Harshvardhan Jha <harshvardhan.jha@oracle.com>
Date: Sun, 25 Jul 2021 23:28:04 +0530
Subject: [PATCH 150/502] net: qede: Fix end of loop tests for
 list_for_each_entry

The list_for_each_entry() iterator, "vlan" in this code, can never be
NULL so the warning will never be printed.

Signed-off-by: Harshvardhan Jha <harshvardhan.jha@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qede/qede_filter.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c
index c59b72c90293..a2e4dfb5cb44 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_filter.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c
@@ -831,7 +831,7 @@ int qede_configure_vlan_filters(struct qede_dev *edev)
 int qede_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
 {
 	struct qede_dev *edev = netdev_priv(dev);
-	struct qede_vlan *vlan = NULL;
+	struct qede_vlan *vlan;
 	int rc = 0;
 
 	DP_VERBOSE(edev, NETIF_MSG_IFDOWN, "Removing vlan 0x%04x\n", vid);
@@ -842,7 +842,7 @@ int qede_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
 		if (vlan->vid == vid)
 			break;
 
-	if (!vlan || (vlan->vid != vid)) {
+	if (list_entry_is_head(vlan, &edev->vlan_list, list)) {
 		DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN),
 			   "Vlan isn't configured\n");
 		goto out;

From 058e6e0ed0eace43401c945082dec1d669b5b231 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 25 Jul 2021 13:42:50 -0400
Subject: [PATCH 151/502] sctp: improve the code for pmtu probe send and recv
 update

This patch does 3 things:

  - make sctp_transport_pl_send() and sctp_transport_pl_recv()
    return bool type to decide if more probe is needed to send.

  - pr_debug() only when probe is really needed to send.

  - count pl.raise_count in sctp_transport_pl_send() instead of
    sctp_transport_pl_recv(), and it's only incremented for the
    1st probe for the same size.

These are preparations for the next patch to make probes happen
only when there's packet loss in Search Complete state.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  4 ++--
 net/sctp/sm_statefuns.c    | 15 +++++++-------
 net/sctp/transport.c       | 41 +++++++++++++++++++++-----------------
 3 files changed, 32 insertions(+), 28 deletions(-)

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 32fc4a309df5..f3d414ed208e 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1024,8 +1024,8 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu);
 void sctp_transport_immediate_rtx(struct sctp_transport *);
 void sctp_transport_dst_release(struct sctp_transport *t);
 void sctp_transport_dst_confirm(struct sctp_transport *t);
-void sctp_transport_pl_send(struct sctp_transport *t);
-void sctp_transport_pl_recv(struct sctp_transport *t);
+bool sctp_transport_pl_send(struct sctp_transport *t);
+bool sctp_transport_pl_recv(struct sctp_transport *t);
 
 
 /* This is the structure we use to queue packets as they come into
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 09a8f23ec709..32df65f68c12 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1109,12 +1109,12 @@ enum sctp_disposition sctp_sf_send_probe(struct net *net,
 	if (!sctp_transport_pl_enabled(transport))
 		return SCTP_DISPOSITION_CONSUME;
 
-	sctp_transport_pl_send(transport);
-
-	reply = sctp_make_heartbeat(asoc, transport, transport->pl.probe_size);
-	if (!reply)
-		return SCTP_DISPOSITION_NOMEM;
-	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply));
+	if (sctp_transport_pl_send(transport)) {
+		reply = sctp_make_heartbeat(asoc, transport, transport->pl.probe_size);
+		if (!reply)
+			return SCTP_DISPOSITION_NOMEM;
+		sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply));
+	}
 	sctp_add_cmd_sf(commands, SCTP_CMD_PROBE_TIMER_UPDATE,
 			SCTP_TRANSPORT(transport));
 
@@ -1274,8 +1274,7 @@ enum sctp_disposition sctp_sf_backbeat_8_3(struct net *net,
 		    !sctp_transport_pl_enabled(link))
 			return SCTP_DISPOSITION_DISCARD;
 
-		sctp_transport_pl_recv(link);
-		if (link->pl.state == SCTP_PL_COMPLETE)
+		if (sctp_transport_pl_recv(link))
 			return SCTP_DISPOSITION_CONSUME;
 
 		return sctp_sf_send_probe(net, ep, asoc, type, link, commands);
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 397a6244dd97..23e7bd3e3bd4 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -258,16 +258,12 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
 	sctp_transport_pl_update(transport);
 }
 
-void sctp_transport_pl_send(struct sctp_transport *t)
+bool sctp_transport_pl_send(struct sctp_transport *t)
 {
-	pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n",
-		 __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high);
-
-	if (t->pl.probe_count < SCTP_MAX_PROBES) {
-		t->pl.probe_count++;
-		return;
-	}
+	if (t->pl.probe_count < SCTP_MAX_PROBES)
+		goto out;
 
+	t->pl.probe_count = 0;
 	if (t->pl.state == SCTP_PL_BASE) {
 		if (t->pl.probe_size == SCTP_BASE_PLPMTU) { /* BASE_PLPMTU Confirmation Failed */
 			t->pl.state = SCTP_PL_ERROR; /* Base -> Error */
@@ -299,10 +295,20 @@ void sctp_transport_pl_send(struct sctp_transport *t)
 			sctp_assoc_sync_pmtu(t->asoc);
 		}
 	}
-	t->pl.probe_count = 1;
+
+out:
+	if (t->pl.state == SCTP_PL_COMPLETE && t->pl.raise_count < 30 &&
+	    !t->pl.probe_count)
+		t->pl.raise_count++;
+
+	pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n",
+		 __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high);
+
+	t->pl.probe_count++;
+	return true;
 }
 
-void sctp_transport_pl_recv(struct sctp_transport *t)
+bool sctp_transport_pl_recv(struct sctp_transport *t)
 {
 	pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n",
 		 __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high);
@@ -323,7 +329,7 @@ void sctp_transport_pl_recv(struct sctp_transport *t)
 		if (!t->pl.probe_high) {
 			t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_BIG_STEP,
 					       SCTP_MAX_PLPMTU);
-			return;
+			return false;
 		}
 		t->pl.probe_size += SCTP_PL_MIN_STEP;
 		if (t->pl.probe_size >= t->pl.probe_high) {
@@ -335,14 +341,13 @@ void sctp_transport_pl_recv(struct sctp_transport *t)
 			t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
 			sctp_assoc_sync_pmtu(t->asoc);
 		}
-	} else if (t->pl.state == SCTP_PL_COMPLETE) {
-		t->pl.raise_count++;
-		if (t->pl.raise_count == 30) {
-			/* Raise probe_size again after 30 * interval in Search Complete */
-			t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */
-			t->pl.probe_size += SCTP_PL_MIN_STEP;
-		}
+	} else if (t->pl.state == SCTP_PL_COMPLETE && t->pl.raise_count == 30) {
+		/* Raise probe_size again after 30 * interval in Search Complete */
+		t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */
+		t->pl.probe_size += SCTP_PL_MIN_STEP;
 	}
+
+	return t->pl.state == SCTP_PL_COMPLETE;
 }
 
 static bool sctp_transport_pl_toobig(struct sctp_transport *t, u32 pmtu)

From eacf078cf4c7aa23e9591738511f142cc39b5186 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 25 Jul 2021 13:42:51 -0400
Subject: [PATCH 152/502] sctp: send pmtu probe only if packet loss in Search
 Complete state

This patch is to introduce last_rtx_chunks into sctp_transport to detect
if there's any packet retransmission/loss happened by checking against
asoc's rtx_data_chunks in sctp_transport_pl_send().

If there is, namely, transport->last_rtx_chunks != asoc->rtx_data_chunks,
the pmtu probe will be sent out. Otherwise, increment the pl.raise_count
and return when it's in Search Complete state.

With this patch, if in Search Complete state, which is a long period, it
doesn't need to keep probing the current pmtu unless there's data packet
loss. This will save quite some traffic.

v1->v2:
  - add the missing Fixes tag.

Fixes: 0dac127c0557 ("sctp: do black hole detection in search complete state")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 1 +
 net/sctp/transport.c       | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index f3d414ed208e..651bba654d77 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -984,6 +984,7 @@ struct sctp_transport {
 	} cacc;
 
 	struct {
+		__u32 last_rtx_chunks;
 		__u16 pmtu;
 		__u16 probe_size;
 		__u16 probe_high;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 23e7bd3e3bd4..a3d3ca6dd63d 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -263,6 +263,7 @@ bool sctp_transport_pl_send(struct sctp_transport *t)
 	if (t->pl.probe_count < SCTP_MAX_PROBES)
 		goto out;
 
+	t->pl.last_rtx_chunks = t->asoc->rtx_data_chunks;
 	t->pl.probe_count = 0;
 	if (t->pl.state == SCTP_PL_BASE) {
 		if (t->pl.probe_size == SCTP_BASE_PLPMTU) { /* BASE_PLPMTU Confirmation Failed */
@@ -298,8 +299,10 @@ bool sctp_transport_pl_send(struct sctp_transport *t)
 
 out:
 	if (t->pl.state == SCTP_PL_COMPLETE && t->pl.raise_count < 30 &&
-	    !t->pl.probe_count)
+	    !t->pl.probe_count && t->pl.last_rtx_chunks == t->asoc->rtx_data_chunks) {
 		t->pl.raise_count++;
+		return false;
+	}
 
 	pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n",
 		 __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high);
@@ -313,6 +316,7 @@ bool sctp_transport_pl_recv(struct sctp_transport *t)
 	pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n",
 		 __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high);
 
+	t->pl.last_rtx_chunks = t->asoc->rtx_data_chunks;
 	t->pl.pmtu = t->pl.probe_size;
 	t->pl.probe_count = 0;
 	if (t->pl.state == SCTP_PL_BASE) {

From 9f66861181e64dc192bea136da6c91528910002e Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 4 Jul 2021 16:01:37 -0700
Subject: [PATCH 153/502] m68k/coldfire: change pll var. to clk_pll

DEFINE_CLK() makes the variable name be clk_xyz, so variable
'pll' should instead be 'clk_pll'.

In file included from ../arch/m68k/coldfire/m525x.c:12:
../arch/m68k/coldfire/m525x.c:29:30: error: 'pll' undeclared here (not in a function)
   29 |  CLKDEV_INIT(NULL, "pll.0", &pll),
      |                              ^~~
../include/linux/clkdev.h:30:10: note: in definition of macro 'CLKDEV_INIT'
   30 |   .clk = c, \
      |          ^
In file included from ../arch/m68k/coldfire/m525x.c:21:
../arch/m68k/include/asm/mcfclk.h:43:27: warning: 'clk_pll' defined but not used [-Wunused-variable]
   43 |         static struct clk clk_##clk_ref = { \
      |                           ^~~~
../arch/m68k/coldfire/m525x.c:25:1: note: in expansion of macro 'DEFINE_CLK'
   25 | DEFINE_CLK(pll, "pll.0", MCF_CLK);
      | ^~~~~~~~~~

Fixes: 63aadb77669a ("m68k: coldfire: use clkdev_lookup on most coldfire")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Greg Ungerer <gerg@linux-m68k.org>
Cc: linux-m68k@lists.linux-m68k.org
Cc: uclinux-dev@uclinux.org
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Greg Ungerer <gerg@linux-m68k.org>
---
 arch/m68k/coldfire/m525x.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/m68k/coldfire/m525x.c b/arch/m68k/coldfire/m525x.c
index 2c4d2ca2f20d..485375112e28 100644
--- a/arch/m68k/coldfire/m525x.c
+++ b/arch/m68k/coldfire/m525x.c
@@ -26,7 +26,7 @@ DEFINE_CLK(pll, "pll.0", MCF_CLK);
 DEFINE_CLK(sys, "sys.0", MCF_BUSCLK);
 
 static struct clk_lookup m525x_clk_lookup[] = {
-	CLKDEV_INIT(NULL, "pll.0", &pll),
+	CLKDEV_INIT(NULL, "pll.0", &clk_pll),
 	CLKDEV_INIT(NULL, "sys.0", &clk_sys),
 	CLKDEV_INIT("mcftmr.0", NULL, &clk_sys),
 	CLKDEV_INIT("mcftmr.1", NULL, &clk_sys),

From e4b016f4b44176807e545fd437cd519b6380e86f Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Fri, 8 Nov 2019 15:36:08 +0000
Subject: [PATCH 154/502] alpha: __udiv_qrnnd should be exported

When building an alpha kernel with mpi set as module, I hit this build
error:
ERROR: "__udiv_qrnnd" [lib/mpi/mpi.ko] undefined!
make[2]: *** [scripts/Makefile.modpost:92: __modpost] Error 1
make[1]: *** [Makefile:1266: modules] Error 2

This is due to __udiv_qrnnd not exported.
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Matt Turner <mattst88@gmail.com>
---
 arch/alpha/math-emu/math.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/alpha/math-emu/math.c b/arch/alpha/math-emu/math.c
index d568cd9a3e43..5617ac0889b8 100644
--- a/arch/alpha/math-emu/math.c
+++ b/arch/alpha/math-emu/math.c
@@ -401,3 +401,5 @@ alpha_fp_emul_imprecise (struct pt_regs *regs, unsigned long write_mask)
 egress:
 	return si_code;
 }
+
+EXPORT_SYMBOL(__udiv_qrnnd);

From a09c33cbf3db545d44eab16eb528acf834310690 Mon Sep 17 00:00:00 2001
From: "Alexander A. Klimov" <grandmaster@al2klimov.de>
Date: Mon, 13 Jul 2020 11:43:41 +0200
Subject: [PATCH 155/502] alpha: Kconfig: Replace HTTP links with HTTPS ones

Rationale:
Reduces attack surface on kernel devs opening the links for MITM
as HTTPS traffic is much harder to manipulate.

Deterministic algorithm:
For each file:
  If not .svg:
    For each line:
      If doesn't contain `\bxmlns\b`:
        For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`:
	  If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`:
            If both the HTTP and HTTPS versions
            return 200 OK and serve the same content:
              Replace HTTP with HTTPS.

Signed-off-by: Alexander A. Klimov <grandmaster@al2klimov.de>
Signed-off-by: Matt Turner <mattst88@gmail.com>
---
 arch/alpha/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 77d3280dc678..f4ffad76e8b1 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -532,7 +532,7 @@ config SMP
 	  will run faster if you say N here.
 
 	  See also the SMP-HOWTO available at
-	  <http://www.tldp.org/docs.html#howto>.
+	  <https://www.tldp.org/docs.html#howto>.
 
 	  If you don't know what to do here, say N.
 

From 5e3c3a0ae5d194f0a464aaaa71d764d96f2e7245 Mon Sep 17 00:00:00 2001
From: Chen Li <chenli@uniontech.com>
Date: Tue, 13 Oct 2020 14:31:52 +0800
Subject: [PATCH 156/502] alpha: remove undef inline in compiler.h

since 889b3c1245de48ed0cacf7aebb25c489d3e4a3e9, CONFIG_OPTIMIZE_INLINING
is removed entirely and inline is always defined to `inline __gnu_inline
__inline_maybe_unused notrace` in compiler_types.h

Besides, undef inline here also means it never use
__attribute__((__gnu_inline__)), so `extern inline` function can never
be defined header files, otherwise multiple definition errors will
happen, e.g. if multiple translation units use alpha/include/asm/pal.h
will report multiple definitions, because there are many extern inline
function definitions in this header.

``` c
extern inline TYPE NAME(void)					\
{								\
	register TYPE __r0 __asm__("$0");			\
	__asm__ __volatile__(					\
...
```

Ofc, it is also ok to remove `extern` in `extern inline` here, then all
of iso c99 and gnuc99/89 are ok, but there are also other alpha headers
have such function definitions.

Signed-off-by: chenli <chenli@uniontech.com>
Signed-off-by: Matt Turner <mattst88@gmail.com>
---
 arch/alpha/include/asm/compiler.h | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/arch/alpha/include/asm/compiler.h b/arch/alpha/include/asm/compiler.h
index 5159ba259d65..ae645959018a 100644
--- a/arch/alpha/include/asm/compiler.h
+++ b/arch/alpha/include/asm/compiler.h
@@ -4,15 +4,4 @@
 
 #include <uapi/asm/compiler.h>
 
-/* Some idiots over in <linux/compiler.h> thought inline should imply
-   always_inline.  This breaks stuff.  We'll include this file whenever
-   we run into such problems.  */
-
-#include <linux/compiler.h>
-#undef inline
-#undef __inline__
-#undef __inline
-#undef __always_inline
-#define __always_inline		inline __attribute__((always_inline))
-
 #endif /* __ALPHA_COMPILER_H */

From f0443da1d8560f4c664ab0f9a900ed69e9aaeb14 Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Wed, 16 Dec 2020 21:12:41 +0800
Subject: [PATCH 157/502] alpha: convert comma to semicolon

Replace a comma between expression statements by a semicolon.

Fixes: cba1ec7e88a0 ("alpha: switch to generic kernel_thread()")
Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Signed-off-by: Matt Turner <mattst88@gmail.com>
---
 arch/alpha/kernel/process.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index ef0c08ed0481..a5123ea426ce 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -256,7 +256,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 		childstack->r26 = (unsigned long) ret_from_kernel_thread;
 		childstack->r9 = usp;	/* function */
 		childstack->r10 = kthread_arg;
-		childregs->hae = alpha_mv.hae_cache,
+		childregs->hae = alpha_mv.hae_cache;
 		childti->pcb.usp = 0;
 		return 0;
 	}

From caace6ca4e06f09413fb8f8a63319594cfb7d47d Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Tue, 5 Jan 2021 10:16:27 -0500
Subject: [PATCH 158/502] alpha: Send stop IPI to send to online CPUs

This issue was noticed while debugging a shutdown issue where some
secondary CPUs are not being shutdown correctly.  A fix for that [1] requires
that secondary cpus be offlined using the cpu_online_mask so that the
stop operation is a no-op if CPU HOTPLUG is disabled.  I, like the author in
[1] looked at the architectures and found that alpha is one of two
architectures that executes smp_send_stop() on all possible CPUs.

On alpha, smp_send_stop() sends an IPI to all possible CPUs but only needs
to send them to online CPUs.

Send the stop IPI to only the online CPUs.

[1] https://lkml.org/lkml/2020/1/10/250

Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Signed-off-by: Matt Turner <mattst88@gmail.com>
---
 arch/alpha/kernel/smp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 4b2575f936d4..cb64e4797d2a 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -582,7 +582,7 @@ void
 smp_send_stop(void)
 {
 	cpumask_t to_whom;
-	cpumask_copy(&to_whom, cpu_possible_mask);
+	cpumask_copy(&to_whom, cpu_online_mask);
 	cpumask_clear_cpu(smp_processor_id(), &to_whom);
 #ifdef DEBUG_IPI_MSG
 	if (hard_smp_processor_id() != boot_cpu_id)

From bfd736e3ffcc9dfc23c0a619fcc131eefd91d7ca Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Mon, 18 Jan 2021 11:20:29 +0000
Subject: [PATCH 159/502] alpha: defconfig: add necessary configs for boot
 testing

Gentoo's KernelCI will soon boot test alpha kernel and we need
CONFIG_DEVTMPFS=y to be set for that.
Note that CONFIG_DEVTMPFS=y is already necessary for lot of other
distribution/tools like recent udev/systemd.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Matt Turner <mattst88@gmail.com>
---
 arch/alpha/configs/defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/alpha/configs/defconfig b/arch/alpha/configs/defconfig
index dd2dd9f0861f..7f1ca30b115b 100644
--- a/arch/alpha/configs/defconfig
+++ b/arch/alpha/configs/defconfig
@@ -70,3 +70,4 @@ CONFIG_DEBUG_INFO=y
 CONFIG_ALPHA_LEGACY_START_ADDRESS=y
 CONFIG_MATHEMU=y
 CONFIG_CRYPTO_HMAC=y
+CONFIG_DEVTMPFS=y

From 8f34ed9d959786e2f2a643a1237f69f0171911cf Mon Sep 17 00:00:00 2001
From: tangchunyou <tangchunyou@yulong.com>
Date: Wed, 20 Jan 2021 21:34:10 +0800
Subject: [PATCH 160/502] alpha: fix typos in a comment

"kerne" -> "kernel"

Signed-off-by: tangchunyou <tangchunyou@yulong.com>
Signed-off-by: Matt Turner <mattst88@gmail.com>
---
 arch/alpha/boot/bootpz.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/alpha/boot/bootpz.c b/arch/alpha/boot/bootpz.c
index 43af71835adf..90a2b341e9c0 100644
--- a/arch/alpha/boot/bootpz.c
+++ b/arch/alpha/boot/bootpz.c
@@ -200,7 +200,7 @@ extern char _end;
 	START_ADDR	KSEG address of the entry point of kernel code.
 
 	ZERO_PGE	KSEG address of page full of zeroes, but 
-			upon entry to kerne cvan be expected
+			upon entry to kernel, it can be expected
 			to hold the parameter list and possible
 			INTRD information.
 

From 6208721f1399912a0a53c77ed86dcc25d3e20efb Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Tue, 20 Apr 2021 19:56:31 +0200
Subject: [PATCH 161/502] binfmt: remove support for em86 (alpha only)

We have a fairly specific alpha binary loader in Linux: running x86
(i386, i486) binaries via the em86 [1] emulator. As noted in the Kconfig
option, the same behavior can be achieved via binfmt_misc, for example,
more nowadays used for running qemu-user.

An example on how to get binfmt_misc running with em86 can be found in
Documentation/admin-guide/binfmt-misc.rst

The defconfig does not have CONFIG_BINFMT_EM86=y set. And doing a
	make defconfig && make olddefconfig
results in
	# CONFIG_BINFMT_EM86 is not set

... as we don't seem to have any supported Linux distirbution for alpha
anymore, there isn't really any "default" user of that feature anymore.

Searching for "CONFIG_BINFMT_EM86=y" reveals mostly discussions from
around 20 years ago, like [2] describing how to get netscape via em86
running via em86, or [3] discussing that running wine or installing
Win 3.11 through em86 would be a nice feature.

The latest binaries available for em86 are from 2000, version 2.2.1 [4] --
which translates to "unsupported"; further, em86 doesn't even work with
glibc-2.x but only with glibc-2.0 [4, 5]. These are clear signs that
there might not be too many em86 users out there, especially users
relying on modern Linux kernels.

Even though the code footprint is relatively small, let's just get rid
of this blast from the past that's effectively unused.

[1] http://ftp.dreamtime.org/pub/linux/Linux-Alpha/em86/v0.4/docs/em86.html
[2] https://static.lwn.net/1998/1119/a/alpha-netscape.html
[3] https://groups.google.com/g/linux.debian.alpha/c/AkGuQHeCe0Y
[4] http://zeniv.linux.org.uk/pub/linux/alpha/em86/v2.2-1/relnotes.2.2.1.html
[5] https://forum.teamspeak.com/archive/index.php/t-1477.html

Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-api@vger.kernel.org
Cc: linux-alpha@vger.kernel.org
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Matt Turner <mattst88@gmail.com>
---
 fs/Kconfig.binfmt |  15 -------
 fs/Makefile       |   1 -
 fs/binfmt_em86.c  | 110 ----------------------------------------------
 3 files changed, 126 deletions(-)
 delete mode 100644 fs/binfmt_em86.c

diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 06fb7a93a1bd..4d5ae61580aa 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -168,21 +168,6 @@ config OSF4_COMPAT
 	  with v4 shared libraries freely available from Compaq. If you're
 	  going to use shared libraries from Tru64 version 5.0 or later, say N.
 
-config BINFMT_EM86
-	tristate "Kernel support for Linux/Intel ELF binaries"
-	depends on ALPHA
-	help
-	  Say Y here if you want to be able to execute Linux/Intel ELF
-	  binaries just like native Alpha binaries on your Alpha machine. For
-	  this to work, you need to have the emulator /usr/bin/em86 in place.
-
-	  You can get the same functionality by saying N here and saying Y to
-	  "Kernel support for MISC binaries".
-
-	  You may answer M to compile the emulation support as a module and
-	  later load the module when you want to use a Linux/Intel binary. The
-	  module will be called binfmt_em86. If unsure, say Y.
-
 config BINFMT_MISC
 	tristate "Kernel support for MISC binaries"
 	help
diff --git a/fs/Makefile b/fs/Makefile
index 9c708e1fbe8f..f98f3e691c37 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -39,7 +39,6 @@ obj-$(CONFIG_FS_ENCRYPTION)	+= crypto/
 obj-$(CONFIG_FS_VERITY)		+= verity/
 obj-$(CONFIG_FILE_LOCKING)      += locks.o
 obj-$(CONFIG_BINFMT_AOUT)	+= binfmt_aout.o
-obj-$(CONFIG_BINFMT_EM86)	+= binfmt_em86.o
 obj-$(CONFIG_BINFMT_MISC)	+= binfmt_misc.o
 obj-$(CONFIG_BINFMT_SCRIPT)	+= binfmt_script.o
 obj-$(CONFIG_BINFMT_ELF)	+= binfmt_elf.o
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
deleted file mode 100644
index 06b9b9fddf70..000000000000
--- a/fs/binfmt_em86.c
+++ /dev/null
@@ -1,110 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  linux/fs/binfmt_em86.c
- *
- *  Based on linux/fs/binfmt_script.c
- *  Copyright (C) 1996  Martin von Löwis
- *  original #!-checking implemented by tytso.
- *
- *  em86 changes Copyright (C) 1997  Jim Paradis
- */
-
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/binfmts.h>
-#include <linux/elf.h>
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/errno.h>
-
-
-#define EM86_INTERP	"/usr/bin/em86"
-#define EM86_I_NAME	"em86"
-
-static int load_em86(struct linux_binprm *bprm)
-{
-	const char *i_name, *i_arg;
-	char *interp;
-	struct file * file;
-	int retval;
-	struct elfhdr	elf_ex;
-
-	/* Make sure this is a Linux/Intel ELF executable... */
-	elf_ex = *((struct elfhdr *)bprm->buf);
-
-	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
-		return  -ENOEXEC;
-
-	/* First of all, some simple consistency checks */
-	if ((elf_ex.e_type != ET_EXEC && elf_ex.e_type != ET_DYN) ||
-		(!((elf_ex.e_machine == EM_386) || (elf_ex.e_machine == EM_486))) ||
-		!bprm->file->f_op->mmap) {
-			return -ENOEXEC;
-	}
-
-	/* Need to be able to load the file after exec */
-	if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)
-		return -ENOENT;
-
-	/* Unlike in the script case, we don't have to do any hairy
-	 * parsing to find our interpreter... it's hardcoded!
-	 */
-	interp = EM86_INTERP;
-	i_name = EM86_I_NAME;
-	i_arg = NULL;		/* We reserve the right to add an arg later */
-
-	/*
-	 * Splice in (1) the interpreter's name for argv[0]
-	 *           (2) (optional) argument to interpreter
-	 *           (3) filename of emulated file (replace argv[0])
-	 *
-	 * This is done in reverse order, because of how the
-	 * user environment and arguments are stored.
-	 */
-	remove_arg_zero(bprm);
-	retval = copy_string_kernel(bprm->filename, bprm);
-	if (retval < 0) return retval; 
-	bprm->argc++;
-	if (i_arg) {
-		retval = copy_string_kernel(i_arg, bprm);
-		if (retval < 0) return retval; 
-		bprm->argc++;
-	}
-	retval = copy_string_kernel(i_name, bprm);
-	if (retval < 0)	return retval;
-	bprm->argc++;
-
-	/*
-	 * OK, now restart the process with the interpreter's inode.
-	 * Note that we use open_exec() as the name is now in kernel
-	 * space, and we don't need to copy it.
-	 */
-	file = open_exec(interp);
-	if (IS_ERR(file))
-		return PTR_ERR(file);
-
-	bprm->interpreter = file;
-	return 0;
-}
-
-static struct linux_binfmt em86_format = {
-	.module		= THIS_MODULE,
-	.load_binary	= load_em86,
-};
-
-static int __init init_em86_binfmt(void)
-{
-	register_binfmt(&em86_format);
-	return 0;
-}
-
-static void __exit exit_em86_binfmt(void)
-{
-	unregister_binfmt(&em86_format);
-}
-
-core_initcall(init_em86_binfmt);
-module_exit(exit_em86_binfmt);
-MODULE_LICENSE("GPL");

From 15b9e384030cf34de33deed70d670a8dc0fc784a Mon Sep 17 00:00:00 2001
From: He Zhe <zhe.he@windriver.com>
Date: Mon, 26 Apr 2021 17:16:29 +0800
Subject: [PATCH 162/502] alpha: Add syscall_get_return_value()

audit now requires syscall_get_return_value instead of regs_return_value
to retrieve syscall return code . Other architectures that support audit
have already define this function.

Signed-off-by: He Zhe <zhe.he@windriver.com>
Signed-off-by: Matt Turner <mattst88@gmail.com>
---
 arch/alpha/include/asm/syscall.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/alpha/include/asm/syscall.h b/arch/alpha/include/asm/syscall.h
index 11c688c1d7ec..f21babaeed85 100644
--- a/arch/alpha/include/asm/syscall.h
+++ b/arch/alpha/include/asm/syscall.h
@@ -9,4 +9,10 @@ static inline int syscall_get_arch(struct task_struct *task)
 	return AUDIT_ARCH_ALPHA;
 }
 
+static inline long syscall_get_return_value(struct task_struct *task,
+					    struct pt_regs *regs)
+{
+	return regs->r0;
+}
+
 #endif	/* _ASM_ALPHA_SYSCALL_H */

From ee3e9fa29e8b2553097009dac270cbed0f03f6d2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 14 May 2021 23:37:20 +0200
Subject: [PATCH 163/502] alpha: fp_emul: avoid init/cleanup_module names

This is one of the last modules using the old calling conventions
for module init/exit functions. Change it over to the style used
everywhere else.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Matt Turner <mattst88@gmail.com>
---
 arch/alpha/math-emu/math.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/alpha/math-emu/math.c b/arch/alpha/math-emu/math.c
index 5617ac0889b8..f7cef66af88d 100644
--- a/arch/alpha/math-emu/math.c
+++ b/arch/alpha/math-emu/math.c
@@ -65,7 +65,7 @@ static long (*save_emul) (unsigned long pc);
 long do_alpha_fp_emul_imprecise(struct pt_regs *, unsigned long);
 long do_alpha_fp_emul(unsigned long);
 
-int init_module(void)
+static int alpha_fp_emul_init_module(void)
 {
 	save_emul_imprecise = alpha_fp_emul_imprecise;
 	save_emul = alpha_fp_emul;
@@ -73,12 +73,14 @@ int init_module(void)
 	alpha_fp_emul = do_alpha_fp_emul;
 	return 0;
 }
+module_init(alpha_fp_emul_init_module);
 
-void cleanup_module(void)
+static void alpha_fp_emul_cleanup_module(void)
 {
 	alpha_fp_emul_imprecise = save_emul_imprecise;
 	alpha_fp_emul = save_emul;
 }
+module_exit(alpha_fp_emul_cleanup_module);
 
 #undef  alpha_fp_emul_imprecise
 #define alpha_fp_emul_imprecise		do_alpha_fp_emul_imprecise

From 3e0c6d15adeafa2afcb4c95c892bb5980c1430e6 Mon Sep 17 00:00:00 2001
From: gushengxian <gushengxian@yulong.com>
Date: Tue, 25 May 2021 20:16:10 -0700
Subject: [PATCH 164/502] alpha: Remove space between * and parameter name

'struct pcb_struct * pcb_va' should be 'struct pcb_struct *pcb_va'.

Signed-off-by: gushengxian <gushengxian@yulong.com>
Signed-off-by: Matt Turner <mattst88@gmail.com>
---
 arch/alpha/boot/bootp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/alpha/boot/bootp.c b/arch/alpha/boot/bootp.c
index 00266e6e1b71..b4faba2432d5 100644
--- a/arch/alpha/boot/bootp.c
+++ b/arch/alpha/boot/bootp.c
@@ -23,7 +23,7 @@
 #include "ksize.h"
 
 extern unsigned long switch_to_osf_pal(unsigned long nr,
-	struct pcb_struct * pcb_va, struct pcb_struct * pcb_pa,
+	struct pcb_struct *pcb_va, struct pcb_struct *pcb_pa,
 	unsigned long *vptb);
 
 extern void move_stack(unsigned long new_stack);

From fc520525c18ac2207792eb2067c6b626326a87ad Mon Sep 17 00:00:00 2001
From: gushengxian <gushengxian@yulong.com>
Date: Fri, 2 Jul 2021 05:48:12 -0700
Subject: [PATCH 165/502] alpha: fix spelling mistakes

Fix some spelling mistakes in comments:
delarations ==> declarations
softare ==> software
suffiently ==> sufficiently
requred ==> required
unaliged ==> unaligned

Signed-off-by: gushengxian <gushengxian@yulong.com>
Signed-off-by: Matt Turner <mattst88@gmail.com>
---
 arch/alpha/boot/misc.c           | 2 +-
 arch/alpha/kernel/osf_sys.c      | 4 ++--
 arch/alpha/kernel/perf_event.c   | 2 +-
 arch/alpha/kernel/sys_nautilus.c | 2 +-
 arch/alpha/kernel/traps.c        | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/alpha/boot/misc.c b/arch/alpha/boot/misc.c
index d65192202703..325d4dd4f904 100644
--- a/arch/alpha/boot/misc.c
+++ b/arch/alpha/boot/misc.c
@@ -30,7 +30,7 @@ extern long srm_printk(const char *, ...)
      __attribute__ ((format (printf, 1, 2)));
 
 /*
- * gzip delarations
+ * gzip declarations
  */
 #define OF(args)  args
 #define STATIC static
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index d5367a1c6300..d31167e3269c 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -834,7 +834,7 @@ SYSCALL_DEFINE5(osf_setsysinfo, unsigned long, op, void __user *, buffer,
 			return -EFAULT;
 		state = &current_thread_info()->ieee_state;
 
-		/* Update softare trap enable bits.  */
+		/* Update software trap enable bits.  */
 		*state = (*state & ~IEEE_SW_MASK) | (swcr & IEEE_SW_MASK);
 
 		/* Update the real fpcr.  */
@@ -854,7 +854,7 @@ SYSCALL_DEFINE5(osf_setsysinfo, unsigned long, op, void __user *, buffer,
 		state = &current_thread_info()->ieee_state;
 		exc &= IEEE_STATUS_MASK;
 
-		/* Update softare trap enable bits.  */
+		/* Update software trap enable bits.  */
  		swcr = (*state & IEEE_SW_MASK) | exc;
 		*state |= exc;
 
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index e7a59d927d78..efcf7321701b 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -574,7 +574,7 @@ static void alpha_pmu_start(struct perf_event *event, int flags)
  * Check that CPU performance counters are supported.
  * - currently support EV67 and later CPUs.
  * - actually some later revisions of the EV6 have the same PMC model as the
- *     EV67 but we don't do suffiently deep CPU detection to detect them.
+ *     EV67 but we don't do sufficiently deep CPU detection to detect them.
  *     Bad luck to the very few people who might have one, I guess.
  */
 static int supported_cpu(void)
diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c
index 53adf43dcd44..96fd6ff3fe81 100644
--- a/arch/alpha/kernel/sys_nautilus.c
+++ b/arch/alpha/kernel/sys_nautilus.c
@@ -212,7 +212,7 @@ nautilus_init_pci(void)
 
 	/* Use default IO. */
 	pci_add_resource(&bridge->windows, &ioport_resource);
-	/* Irongate PCI memory aperture, calculate requred size before
+	/* Irongate PCI memory aperture, calculate required size before
 	   setting it up. */
 	pci_add_resource(&bridge->windows, &irongate_mem);
 
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index 921d4b6e4d95..5398f982bdd1 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -730,7 +730,7 @@ do_entUnaUser(void __user * va, unsigned long opcode,
 	long error;
 
 	/* Check the UAC bits to decide what the user wants us to do
-	   with the unaliged access.  */
+	   with the unaligned access.  */
 
 	if (!(current_thread_info()->status & TS_UAC_NOPRINT)) {
 		if (__ratelimit(&ratelimit)) {

From d7f237df53457cf0cbdb9943b9b7c93a05e2fdb6 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Fri, 23 Jul 2021 05:52:25 -0400
Subject: [PATCH 166/502] drm/i915/bios: Fix ports mask
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PORT_A to PORT_F are regular integers defined in the enum port,
while for_each_port_masked requires a bit mask for the ports.

Current given mask: 0b111
Desired mask: 0b111111

I noticed this while Christoph was reporting a bug found on headless
GVT configuration which bisect blamed commit 3ae04c0c7e63 ("drm/i915/bios:
limit default outputs to ports A through F")

v2: Avoid unnecessary line continuations as pointed by CI and Christoph

Cc: Christoph Hellwig <hch@infradead.org>
Fixes: 3ae04c0c7e63 ("drm/i915/bios: limit default outputs to ports A through F")
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Tested-by: Christoph Hellwig <hch@infradead.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20210723095225.562913-1-rodrigo.vivi@intel.com
(cherry picked from commit 9b52aa720168859526bf90d77fa210fc0336f170)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/display/intel_bios.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
index 5b6922e28ef2..aa667fa71158 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@@ -2166,7 +2166,8 @@ static void
 init_vbt_missing_defaults(struct drm_i915_private *i915)
 {
 	enum port port;
-	int ports = PORT_A | PORT_B | PORT_C | PORT_D | PORT_E | PORT_F;
+	int ports = BIT(PORT_A) | BIT(PORT_B) | BIT(PORT_C) |
+		    BIT(PORT_D) | BIT(PORT_E) | BIT(PORT_F);
 
 	if (!HAS_DDI(i915) && !IS_CHERRYVIEW(i915))
 		return;

From 5d3a618f356595f132ee85c63a1b5f007a71f23c Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 23 Jul 2021 16:43:52 -0700
Subject: [PATCH 167/502] drm/i915: fix not reading DSC disable fuse in GLK

We were using GRAPHICS_VER() to handle SKL_DFSM register, which means we
were not handling GLK correctly since that has GRAPHICS_VER == 9, but
DISPLAY_VER == 10. Switch the entire branch to check DISPLAY_VER
which makes it more in line with Bspec.

Even though the Bspec has an exception for RKL in
TGL_DFSM_PIPE_D_DISABLE, we don't have to do anything as the bit has
disable semantic and RKL doesn't have pipe D.

Bspec: 50075, 7548
Fixes: 2b5a4562edd0 ("drm/i915/display: Simplify GLK display version tests")
Cc: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210723234352.214459-1-lucas.demarchi@intel.com
(cherry picked from commit 4fd177288a4ee046bd8590355a64de855dcf77e2)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/intel_device_info.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index 7eaa92fee421..e0a10f36acc1 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -325,7 +325,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
 			info->pipe_mask &= ~BIT(PIPE_C);
 			info->cpu_transcoder_mask &= ~BIT(TRANSCODER_C);
 		}
-	} else if (HAS_DISPLAY(dev_priv) && GRAPHICS_VER(dev_priv) >= 9) {
+	} else if (HAS_DISPLAY(dev_priv) && DISPLAY_VER(dev_priv) >= 9) {
 		u32 dfsm = intel_de_read(dev_priv, SKL_DFSM);
 
 		if (dfsm & SKL_DFSM_PIPE_A_DISABLE) {
@@ -340,7 +340,8 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
 			info->pipe_mask &= ~BIT(PIPE_C);
 			info->cpu_transcoder_mask &= ~BIT(TRANSCODER_C);
 		}
-		if (GRAPHICS_VER(dev_priv) >= 12 &&
+
+		if (DISPLAY_VER(dev_priv) >= 12 &&
 		    (dfsm & TGL_DFSM_PIPE_D_DISABLE)) {
 			info->pipe_mask &= ~BIT(PIPE_D);
 			info->cpu_transcoder_mask &= ~BIT(TRANSCODER_D);
@@ -352,10 +353,10 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
 		if (dfsm & SKL_DFSM_DISPLAY_PM_DISABLE)
 			info->display.has_fbc = 0;
 
-		if (GRAPHICS_VER(dev_priv) >= 11 && (dfsm & ICL_DFSM_DMC_DISABLE))
+		if (DISPLAY_VER(dev_priv) >= 11 && (dfsm & ICL_DFSM_DMC_DISABLE))
 			info->display.has_dmc = 0;
 
-		if (GRAPHICS_VER(dev_priv) >= 10 &&
+		if (DISPLAY_VER(dev_priv) >= 10 &&
 		    (dfsm & CNL_DFSM_DISPLAY_DSC_DISABLE))
 			info->display.has_dsc = 0;
 	}

From b4bde5554f70fb04ff07989fdc1356ab84d6f482 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 22 Jul 2021 16:29:22 -0700
Subject: [PATCH 168/502] drm/i915/display: split DISPLAY_VER 9 and 10 in
 intel_setup_outputs()

Commit 5a9d38b20a5a ("drm/i915/display: hide workaround for broken vbt
in intel_bios.c") moved the workaround for broken or missing VBT to
intel_bios.c. However is_port_valid() only protects the handling of
different skus of the same display version. Since in
intel_setup_outputs() we share the code path with version 9, this would
also create port F for SKL/KBL, which does not exist.

Missing VBT can be reproduced when starting a headless QEMU with no
opregion available.

Avoid the issue by splitting versions 9 and 10 in intel_setup_outputs(),
which also makes it more clear what code path it's taking for each
version.

v2: move generic display version after Geminilake since that one has
a different set of outputs

Fixes: 5a9d38b20a5a ("drm/i915/display: hide workaround for broken vbt in intel_bios.c")
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reported-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Tested-by: Christoph Hellwig <hch@lst.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20210722232922.3796835-1-lucas.demarchi@intel.com
(cherry picked from commit ec387b8ff8d757561369be9a280cf63f23bbb926)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/display/intel_display.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 3bad4e00f7be..2d5d21740c25 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -11361,13 +11361,19 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv)
 		intel_ddi_init(dev_priv, PORT_B);
 		intel_ddi_init(dev_priv, PORT_C);
 		vlv_dsi_init(dev_priv);
-	} else if (DISPLAY_VER(dev_priv) >= 9) {
+	} else if (DISPLAY_VER(dev_priv) == 10) {
 		intel_ddi_init(dev_priv, PORT_A);
 		intel_ddi_init(dev_priv, PORT_B);
 		intel_ddi_init(dev_priv, PORT_C);
 		intel_ddi_init(dev_priv, PORT_D);
 		intel_ddi_init(dev_priv, PORT_E);
 		intel_ddi_init(dev_priv, PORT_F);
+	} else if (DISPLAY_VER(dev_priv) >= 9) {
+		intel_ddi_init(dev_priv, PORT_A);
+		intel_ddi_init(dev_priv, PORT_B);
+		intel_ddi_init(dev_priv, PORT_C);
+		intel_ddi_init(dev_priv, PORT_D);
+		intel_ddi_init(dev_priv, PORT_E);
 	} else if (HAS_DDI(dev_priv)) {
 		u32 found;
 

From 480e93e12aa04d857f7cc2e6fcec181c0d690404 Mon Sep 17 00:00:00 2001
From: Harshvardhan Jha <harshvardhan.jha@oracle.com>
Date: Sun, 25 Jul 2021 23:23:55 +0530
Subject: [PATCH 169/502] net: xfrm: Fix end of loop tests for
 list_for_each_entry

The list_for_each_entry() iterator, "pos" in this code, can never be
NULL so the warning will never be printed.

Signed-off-by: Harshvardhan Jha <harshvardhan.jha@oracle.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_ipcomp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index 2e8afe078d61..cb40ff0ff28d 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -241,7 +241,7 @@ static void ipcomp_free_tfms(struct crypto_comp * __percpu *tfms)
 			break;
 	}
 
-	WARN_ON(!pos);
+	WARN_ON(list_entry_is_head(pos, &ipcomp_tfms_list, list));
 
 	if (--pos->users)
 		return;

From f0c6225531e4a9e43e51c5f7b02089bdd725c734 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Sat, 17 Jul 2021 23:11:38 -0500
Subject: [PATCH 170/502] ACPI: PM: Add support for upcoming AMD uPEP HID
 AMDI007

AMD systems with uPEP HID AMDI007 should be using revision 2 and
the AMD method.

Fixes: 8fbd6c15ea0a ("ACPI: PM: Adjust behavior for field problems on AMD systems")
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/x86/s2idle.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c
index 1c507804fb10..fbdbef0ab552 100644
--- a/drivers/acpi/x86/s2idle.c
+++ b/drivers/acpi/x86/s2idle.c
@@ -378,19 +378,25 @@ static int lps0_device_attach(struct acpi_device *adev,
 		 * AMDI0006:
 		 * - should use rev_id 0x0
 		 * - function mask = 0x3: Should use Microsoft method
+		 * AMDI0007:
+		 * - Should use rev_id 0x2
+		 * - Should only use AMD method
 		 */
 		const char *hid = acpi_device_hid(adev);
-		rev_id = 0;
+		rev_id = strcmp(hid, "AMDI0007") ? 0 : 2;
 		lps0_dsm_func_mask = validate_dsm(adev->handle,
 					ACPI_LPS0_DSM_UUID_AMD, rev_id, &lps0_dsm_guid);
 		lps0_dsm_func_mask_microsoft = validate_dsm(adev->handle,
-					ACPI_LPS0_DSM_UUID_MICROSOFT, rev_id,
+					ACPI_LPS0_DSM_UUID_MICROSOFT, 0,
 					&lps0_dsm_guid_microsoft);
 		if (lps0_dsm_func_mask > 0x3 && (!strcmp(hid, "AMD0004") ||
 						 !strcmp(hid, "AMDI0005"))) {
 			lps0_dsm_func_mask = (lps0_dsm_func_mask << 1) | 0x1;
 			acpi_handle_debug(adev->handle, "_DSM UUID %s: Adjusted function mask: 0x%x\n",
 					  ACPI_LPS0_DSM_UUID_AMD, lps0_dsm_func_mask);
+		} else if (lps0_dsm_func_mask_microsoft > 0 && !strcmp(hid, "AMDI0007")) {
+			lps0_dsm_func_mask_microsoft = -EINVAL;
+			acpi_handle_debug(adev->handle, "_DSM Using AMD method\n");
 		}
 	} else {
 		rev_id = 1;

From 94cbe7db7d757c2d481c3617ab5579a28cfc2175 Mon Sep 17 00:00:00 2001
From: Mohammad Athari Bin Ismail <mohammad.athari.ismail@intel.com>
Date: Mon, 26 Jul 2021 10:20:20 +0800
Subject: [PATCH 171/502] net: stmmac: add est_irq_status callback function for
 GMAC 4.10 and 5.10

Assign dwmac5_est_irq_status to est_irq_status callback function for
GMAC 4.10 and 5.10. With this, EST related interrupts could be handled
properly.

Fixes: e49aa315cb01 ("net: stmmac: EST interrupts handling and error reporting")
Cc: <stable@vger.kernel.org> # 5.13.x
Signed-off-by: Mohammad Athari Bin Ismail <mohammad.athari.ismail@intel.com>
Acked-by: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index 67ba083eb90c..b21745368983 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -1249,6 +1249,7 @@ const struct stmmac_ops dwmac410_ops = {
 	.config_l3_filter = dwmac4_config_l3_filter,
 	.config_l4_filter = dwmac4_config_l4_filter,
 	.est_configure = dwmac5_est_configure,
+	.est_irq_status = dwmac5_est_irq_status,
 	.fpe_configure = dwmac5_fpe_configure,
 	.fpe_send_mpacket = dwmac5_fpe_send_mpacket,
 	.fpe_irq_status = dwmac5_fpe_irq_status,
@@ -1300,6 +1301,7 @@ const struct stmmac_ops dwmac510_ops = {
 	.config_l3_filter = dwmac4_config_l3_filter,
 	.config_l4_filter = dwmac4_config_l4_filter,
 	.est_configure = dwmac5_est_configure,
+	.est_irq_status = dwmac5_est_irq_status,
 	.fpe_configure = dwmac5_fpe_configure,
 	.fpe_send_mpacket = dwmac5_fpe_send_mpacket,
 	.fpe_irq_status = dwmac5_fpe_irq_status,

From 2ebda027148315581b89a2ed2fef84ad53b2aedd Mon Sep 17 00:00:00 2001
From: Chen Shen <peterchenshen@gmail.com>
Date: Mon, 26 Jul 2021 13:47:34 +0800
Subject: [PATCH 172/502] sctp: delete addr based on sin6_scope_id

sctp_inet6addr_event deletes 'addr' from 'local_addr_list' when setting
netdev down, but it is possible to delete the incorrect entry (match
the first one with the same ipaddr, but the different 'ifindex'), if
there are some netdevs with the same 'local-link' ipaddr added already.
It should delete the entry depending on 'sin6_addr' and 'sin6_scope_id'
both. otherwise, the endpoint will call 'sctp_sf_ootb' if it can't find
the according association when receives 'heartbeat', and finally will
reply 'abort'.

For example:
1.when linux startup
the entries in local_addr_list:
ifindex:35 addr:fe80::40:43ff:fe80:0 (eths0.201)
ifindex:36 addr:fe80::40:43ff:fe80:0 (eths0.209)
ifindex:37 addr:fe80::40:43ff:fe80:0 (eths0.210)

the route table:
local fe80::40:43ff:fe80:0 dev eths0.201
local fe80::40:43ff:fe80:0 dev eths0.209
local fe80::40:43ff:fe80:0 dev eths0.210

2.after 'ifconfig eths0.209 down'
the entries in local_addr_list:
ifindex:36 addr:fe80::40:43ff:fe80:0 (eths0.209)
ifindex:37 addr:fe80::40:43ff:fe80:0 (eths0.210)

the route table:
local fe80::40:43ff:fe80:0 dev eths0.201
local fe80::40:43ff:fe80:0 dev eths0.210

3.asoc not found for src:[fe80::40:43ff:fe80:0]:37381 dst:[:1]:53335
::1->fe80::40:43ff:fe80:0 HEARTBEAT
fe80::40:43ff:fe80:0->::1 ABORT

Signed-off-by: Chen Shen <peterchenshen@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/ipv6.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index e48dd909dee5..470dbdc27d58 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -100,8 +100,9 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
 		list_for_each_entry_safe(addr, temp,
 					&net->sctp.local_addr_list, list) {
 			if (addr->a.sa.sa_family == AF_INET6 &&
-					ipv6_addr_equal(&addr->a.v6.sin6_addr,
-						&ifa->addr)) {
+			    ipv6_addr_equal(&addr->a.v6.sin6_addr,
+					    &ifa->addr) &&
+			    addr->a.v6.sin6_scope_id == ifa->idev->dev->ifindex) {
 				sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL);
 				found = 1;
 				addr->valid = 0;

From 9a9e74819bb0e4694279fb437e136fe485878d25 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 16 Jul 2021 16:41:04 +0200
Subject: [PATCH 173/502] KVM: nSVM: Rename nested_svm_vmloadsave() to
 svm_copy_vmloadsave_state()

To match svm_copy_vmrun_state(), rename nested_svm_vmloadsave() to
svm_copy_vmloadsave_state().

Opportunistically add missing braces to 'else' branch in
vmload_vmsave_interception().

No functional change intended.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20210716144104.465269-1-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/nested.c | 2 +-
 arch/x86/kvm/svm/svm.c    | 7 ++++---
 arch/x86/kvm/svm/svm.h    | 2 +-
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 3bd09c50c98b..8493592b63b4 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -722,7 +722,7 @@ void svm_copy_vmrun_state(struct vmcb_save_area *from_save,
 	to_save->cpl = 0;
 }
 
-void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
+void svm_copy_vmloadsave_state(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
 {
 	to_vmcb->save.fs = from_vmcb->save.fs;
 	to_vmcb->save.gs = from_vmcb->save.gs;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 664d20f0689c..cfe165d74093 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2147,11 +2147,12 @@ static int vmload_vmsave_interception(struct kvm_vcpu *vcpu, bool vmload)
 	ret = kvm_skip_emulated_instruction(vcpu);
 
 	if (vmload) {
-		nested_svm_vmloadsave(vmcb12, svm->vmcb);
+		svm_copy_vmloadsave_state(vmcb12, svm->vmcb);
 		svm->sysenter_eip_hi = 0;
 		svm->sysenter_esp_hi = 0;
-	} else
-		nested_svm_vmloadsave(svm->vmcb, vmcb12);
+	} else {
+		svm_copy_vmloadsave_state(svm->vmcb, vmcb12);
+	}
 
 	kvm_vcpu_unmap(vcpu, &map, true);
 
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 7e2090752d8f..1b65ee3a9569 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -466,7 +466,7 @@ int svm_allocate_nested(struct vcpu_svm *svm);
 int nested_svm_vmrun(struct kvm_vcpu *vcpu);
 void svm_copy_vmrun_state(struct vmcb_save_area *from_save,
 			  struct vmcb_save_area *to_save);
-void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb);
+void svm_copy_vmloadsave_state(struct vmcb *from_vmcb, struct vmcb *to_vmcb);
 int nested_svm_vmexit(struct vcpu_svm *svm);
 
 static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code)

From 2bb16bea5feaa582fbbdbfd84ecaa1ab61bbb34c Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Mon, 19 Jul 2021 11:03:22 +0200
Subject: [PATCH 174/502] KVM: nSVM: Swap the parameter order for
 svm_copy_vmrun_state()/svm_copy_vmloadsave_state()

Make svm_copy_vmrun_state()/svm_copy_vmloadsave_state() interface match
'memcpy(dest, src)' to avoid any confusion.

No functional change intended.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20210719090322.625277-1-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/nested.c |  8 ++++----
 arch/x86/kvm/svm/svm.c    | 12 ++++++------
 arch/x86/kvm/svm/svm.h    |  6 +++---
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 8493592b63b4..1c2a0414a88d 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -702,8 +702,8 @@ out:
 }
 
 /* Copy state save area fields which are handled by VMRUN */
-void svm_copy_vmrun_state(struct vmcb_save_area *from_save,
-			  struct vmcb_save_area *to_save)
+void svm_copy_vmrun_state(struct vmcb_save_area *to_save,
+			  struct vmcb_save_area *from_save)
 {
 	to_save->es = from_save->es;
 	to_save->cs = from_save->cs;
@@ -722,7 +722,7 @@ void svm_copy_vmrun_state(struct vmcb_save_area *from_save,
 	to_save->cpl = 0;
 }
 
-void svm_copy_vmloadsave_state(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
+void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
 {
 	to_vmcb->save.fs = from_vmcb->save.fs;
 	to_vmcb->save.gs = from_vmcb->save.gs;
@@ -1385,7 +1385,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
 
 	svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
 
-	svm_copy_vmrun_state(save, &svm->vmcb01.ptr->save);
+	svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save);
 	nested_load_control_from_vmcb12(svm, ctl);
 
 	svm_switch_vmcb(svm, &svm->nested.vmcb02);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index cfe165d74093..9a6987549e1b 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2147,11 +2147,11 @@ static int vmload_vmsave_interception(struct kvm_vcpu *vcpu, bool vmload)
 	ret = kvm_skip_emulated_instruction(vcpu);
 
 	if (vmload) {
-		svm_copy_vmloadsave_state(vmcb12, svm->vmcb);
+		svm_copy_vmloadsave_state(svm->vmcb, vmcb12);
 		svm->sysenter_eip_hi = 0;
 		svm->sysenter_esp_hi = 0;
 	} else {
-		svm_copy_vmloadsave_state(svm->vmcb, vmcb12);
+		svm_copy_vmloadsave_state(vmcb12, svm->vmcb);
 	}
 
 	kvm_vcpu_unmap(vcpu, &map, true);
@@ -4345,8 +4345,8 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
 
 		BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400);
 
-		svm_copy_vmrun_state(&svm->vmcb01.ptr->save,
-				     map_save.hva + 0x400);
+		svm_copy_vmrun_state(map_save.hva + 0x400,
+				     &svm->vmcb01.ptr->save);
 
 		kvm_vcpu_unmap(vcpu, &map_save, true);
 	}
@@ -4394,8 +4394,8 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
 					 &map_save) == -EINVAL)
 				return 1;
 
-			svm_copy_vmrun_state(map_save.hva + 0x400,
-					     &svm->vmcb01.ptr->save);
+			svm_copy_vmrun_state(&svm->vmcb01.ptr->save,
+					     map_save.hva + 0x400);
 
 			kvm_vcpu_unmap(vcpu, &map_save, true);
 		}
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 1b65ee3a9569..bd0fe94c2920 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -464,9 +464,9 @@ void svm_leave_nested(struct vcpu_svm *svm);
 void svm_free_nested(struct vcpu_svm *svm);
 int svm_allocate_nested(struct vcpu_svm *svm);
 int nested_svm_vmrun(struct kvm_vcpu *vcpu);
-void svm_copy_vmrun_state(struct vmcb_save_area *from_save,
-			  struct vmcb_save_area *to_save);
-void svm_copy_vmloadsave_state(struct vmcb *from_vmcb, struct vmcb *to_vmcb);
+void svm_copy_vmrun_state(struct vmcb_save_area *to_save,
+			  struct vmcb_save_area *from_save);
+void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb);
 int nested_svm_vmexit(struct vcpu_svm *svm);
 
 static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code)

From 0e691ee7b5034c91a31b565d3ff9a50e01dde445 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu, 22 Jul 2021 11:26:28 +0200
Subject: [PATCH 175/502] KVM: Documentation: Fix
 KVM_CAP_ENFORCE_PV_FEATURE_CPUID name

'KVM_CAP_ENFORCE_PV_CPUID' doesn't match the define in
include/uapi/linux/kvm.h.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20210722092628.236474-1-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virt/kvm/api.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index c7b165ca70b6..1a1d2061227b 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -7049,7 +7049,7 @@ In combination with KVM_CAP_X86_USER_SPACE_MSR, this allows user space to
 trap and emulate MSRs that are outside of the scope of KVM as well as
 limit the attack surface on KVM's MSR emulation code.
 
-8.28 KVM_CAP_ENFORCE_PV_CPUID
+8.28 KVM_CAP_ENFORCE_PV_FEATURE_CPUID
 -----------------------------
 
 Architectures: x86

From 3b1c8c5682672d73c1e977944af8c3ebed4a0ce1 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 22 Jul 2021 11:50:03 +0200
Subject: [PATCH 176/502] docs: virt: kvm: api.rst: replace some characters

The conversion tools used during DocBook/LaTeX/html/Markdown->ReST
conversion and some cut-and-pasted text contain some characters that
aren't easily reachable on standard keyboards and/or could cause
troubles when parsed by the documentation build system.

Replace the occurences of the following characters:

	- U+00a0 (' '): NO-BREAK SPACE
	  as it can cause lines being truncated on PDF output

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Message-Id: <ff70cb42d63f3a1da66af1b21b8d038418ed5189.1626947264.git.mchehab+huawei@kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virt/kvm/api.rst | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 1a1d2061227b..dae68e68ca23 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -855,7 +855,7 @@ in-kernel irqchip (GIC), and for in-kernel irqchip can tell the GIC to
 use PPIs designated for specific cpus.  The irq field is interpreted
 like this::
 
-  bits:  |  31 ... 28  | 27 ... 24 | 23  ... 16 | 15 ... 0 |
+  bits:  |  31 ... 28  | 27 ... 24 | 23  ... 16 | 15 ... 0 |
   field: | vcpu2_index | irq_type  | vcpu_index |  irq_id  |
 
 The irq_type field has the following values:
@@ -2149,10 +2149,10 @@ prior to calling the KVM_RUN ioctl.
 Errors:
 
   ======   ============================================================
-  ENOENT   no such register
-  EINVAL   invalid register ID, or no such register or used with VMs in
+  ENOENT   no such register
+  EINVAL   invalid register ID, or no such register or used with VMs in
            protected virtualization mode on s390
-  EPERM    (arm64) register access not allowed before vcpu finalization
+  EPERM    (arm64) register access not allowed before vcpu finalization
   ======   ============================================================
 
 (These error codes are indicative only: do not rely on a specific error
@@ -2590,10 +2590,10 @@ following id bit patterns::
 Errors include:
 
   ======== ============================================================
-  ENOENT   no such register
-  EINVAL   invalid register ID, or no such register or used with VMs in
+  ENOENT   no such register
+  EINVAL   invalid register ID, or no such register or used with VMs in
            protected virtualization mode on s390
-  EPERM    (arm64) register access not allowed before vcpu finalization
+  EPERM    (arm64) register access not allowed before vcpu finalization
   ======== ============================================================
 
 (These error codes are indicative only: do not rely on a specific error
@@ -3112,13 +3112,13 @@ current state.  "addr" is ignored.
 Errors:
 
   ======     =================================================================
-  EINVAL     the target is unknown, or the combination of features is invalid.
-  ENOENT     a features bit specified is unknown.
+  EINVAL     the target is unknown, or the combination of features is invalid.
+  ENOENT     a features bit specified is unknown.
   ======     =================================================================
 
 This tells KVM what type of CPU to present to the guest, and what
-optional features it should have.  This will cause a reset of the cpu
-registers to their initial values.  If this is not called, KVM_RUN will
+optional features it should have.  This will cause a reset of the cpu
+registers to their initial values.  If this is not called, KVM_RUN will
 return ENOEXEC for that vcpu.
 
 The initial values are defined as:
@@ -3239,8 +3239,8 @@ VCPU matching underlying host.
 Errors:
 
   =====      ==============================================================
-  E2BIG      the reg index list is too big to fit in the array specified by
-             the user (the number required will be written into n).
+  E2BIG      the reg index list is too big to fit in the array specified by
+             the user (the number required will be written into n).
   =====      ==============================================================
 
 ::
@@ -3288,7 +3288,7 @@ specific device.
 ARM/arm64 divides the id field into two parts, a device id and an
 address type id specific to the individual device::
 
-  bits:  | 63        ...       32 | 31    ...    16 | 15    ...    0 |
+  bits:  | 63        ...       32 | 31    ...    16 | 15    ...    0 |
   field: |        0x00000000      |     device id   |  addr type id  |
 
 ARM/arm64 currently only require this when using the in-kernel GIC

From 0a31df6823232516f61f174907e444f710941dfe Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu, 22 Jul 2021 14:30:18 +0200
Subject: [PATCH 177/502] KVM: x86: Check the right feature bit for
 MSR_KVM_ASYNC_PF_ACK access

MSR_KVM_ASYNC_PF_ACK MSR is part of interrupt based asynchronous page fault
interface and not the original (deprecated) KVM_FEATURE_ASYNC_PF. This is
stated in Documentation/virt/kvm/msr.rst.

Fixes: 66570e966dd9 ("kvm: x86: only provide PV features if enabled in guest's CPUID")
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Oliver Upton <oupton@google.com>
Message-Id: <20210722123018.260035-1-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a4fd10604f72..4116567f3d44 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3407,7 +3407,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			return 1;
 		break;
 	case MSR_KVM_ASYNC_PF_ACK:
-		if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
+		if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
 			return 1;
 		if (data & 0x1) {
 			vcpu->arch.apf.pageready_pending = false;
@@ -3746,7 +3746,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		msr_info->data = vcpu->arch.apf.msr_int_val;
 		break;
 	case MSR_KVM_ASYNC_PF_ACK:
-		if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
+		if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
 			return 1;
 
 		msr_info->data = 0;

From 92766c4628ea349c8ddab0cd7bd0488f36e5c4ce Mon Sep 17 00:00:00 2001
From: Letu Ren <fantasquex@gmail.com>
Date: Sun, 25 Jul 2021 21:45:12 +0800
Subject: [PATCH 178/502] net/qla3xxx: fix schedule while atomic in
 ql_wait_for_drvr_lock and ql_adapter_reset

When calling the 'ql_wait_for_drvr_lock' and 'ql_adapter_reset', the driver
has already acquired the spin lock, so the driver should not call 'ssleep'
in atomic context.

This bug can be fixed by using 'mdelay' instead of 'ssleep'.

Reported-by: Letu Ren <fantasquex@gmail.com>
Signed-off-by: Letu Ren <fantasquex@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qla3xxx.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c
index 2376b2729633..c00ad57575ea 100644
--- a/drivers/net/ethernet/qlogic/qla3xxx.c
+++ b/drivers/net/ethernet/qlogic/qla3xxx.c
@@ -154,7 +154,7 @@ static int ql_wait_for_drvr_lock(struct ql3_adapter *qdev)
 				      "driver lock acquired\n");
 			return 1;
 		}
-		ssleep(1);
+		mdelay(1000);
 	} while (++i < 10);
 
 	netdev_err(qdev->ndev, "Timed out waiting for driver lock...\n");
@@ -3274,7 +3274,7 @@ static int ql_adapter_reset(struct ql3_adapter *qdev)
 		if ((value & ISP_CONTROL_SR) == 0)
 			break;
 
-		ssleep(1);
+		mdelay(1000);
 	} while ((--max_wait_time));
 
 	/*
@@ -3310,7 +3310,7 @@ static int ql_adapter_reset(struct ql3_adapter *qdev)
 						   ispControlStatus);
 			if ((value & ISP_CONTROL_FSR) == 0)
 				break;
-			ssleep(1);
+			mdelay(1000);
 		} while ((--max_wait_time));
 	}
 	if (max_wait_time == 0)

From 44eff40a32e8f5228ae041006352e32638ad2368 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 26 Jul 2021 14:14:31 +0100
Subject: [PATCH 179/502] io_uring: fix io_prep_async_link locking

io_prep_async_link() may be called after arming a linked timeout,
automatically making it unsafe to traverse the linked list. Guard
with completion_lock if there was a linked timeout.

Cc: stable@vger.kernel.org # 5.9+
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/93f7c617e2b4f012a2a175b3dab6bc2f27cebc48.1627304436.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5a0fd6bcd318..c4d2b320cdd4 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1279,8 +1279,17 @@ static void io_prep_async_link(struct io_kiocb *req)
 {
 	struct io_kiocb *cur;
 
-	io_for_each_link(cur, req)
-		io_prep_async_work(cur);
+	if (req->flags & REQ_F_LINK_TIMEOUT) {
+		struct io_ring_ctx *ctx = req->ctx;
+
+		spin_lock_irq(&ctx->completion_lock);
+		io_for_each_link(cur, req)
+			io_prep_async_work(cur);
+		spin_unlock_irq(&ctx->completion_lock);
+	} else {
+		io_for_each_link(cur, req)
+			io_prep_async_work(cur);
+	}
 }
 
 static void io_queue_async_work(struct io_kiocb *req)

From d47255d3f87338164762ac56df1f28d751e27246 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Thu, 24 Jun 2021 13:20:21 +0200
Subject: [PATCH 180/502] drm/amdgpu: Fix resource leak on probe error path

This reverts commit 4192f7b5768912ceda82be2f83c87ea7181f9980.

It is not true (as stated in the reverted commit changelog) that we never
unmap the BAR on failure; it actually does happen properly on
amdgpu_driver_load_kms() -> amdgpu_driver_unload_kms() ->
amdgpu_device_fini() error path.

What's worse, this commit actually completely breaks resource freeing on
probe failure (like e.g. failure to load microcode), as
amdgpu_driver_unload_kms() notices adev->rmmio being NULL and bails too
early, leaving all the resources that'd normally be freed in
amdgpu_acpi_fini() and amdgpu_device_fini() still hanging around, leading
to all sorts of oopses when someone tries to, for example, access the
sysfs and procfs resources which are still around while the driver is
gone.

Fixes: 4192f7b57689 ("drm/amdgpu: unmap register bar on device init failure")
Reported-by: Vojtech Pavlik <vojtech@ucw.cz>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index d303e88e3c23..f3fd5ec710b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3504,13 +3504,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	r = amdgpu_device_get_job_timeout_settings(adev);
 	if (r) {
 		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
-		goto failed_unmap;
+		return r;
 	}
 
 	/* early init functions */
 	r = amdgpu_device_ip_early_init(adev);
 	if (r)
-		goto failed_unmap;
+		return r;
 
 	/* doorbell bar mapping and doorbell index init*/
 	amdgpu_device_doorbell_init(adev);
@@ -3736,10 +3736,6 @@ release_ras_con:
 failed:
 	amdgpu_vf_error_trans_all(adev);
 
-failed_unmap:
-	iounmap(adev->rmmio);
-	adev->rmmio = NULL;
-
 	return r;
 }
 

From 110aa25c3ce417a44e35990cf8ed22383277933a Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 26 Jul 2021 10:42:56 -0600
Subject: [PATCH 181/502] io_uring: fix race in unified task_work running

We use a bit to manage if we need to add the shared task_work, but
a list + lock for the pending work. Before aborting a current run
of the task_work we check if the list is empty, but we do so without
grabbing the lock that protects it. This can lead to races where
we think we have nothing left to run, where in practice we could be
racing with a task adding new work to the list. If we do hit that
race condition, we could be left with work items that need processing,
but the shared task_work is not active.

Ensure that we grab the lock before checking if the list is empty,
so we know if it's safe to exit the run or not.

Link: https://lore.kernel.org/io-uring/c6bd5987-e9ae-cd02-49d0-1b3ac1ef65b1@tnonline.net/
Cc: stable@vger.kernel.org # 5.11+
Reported-by: Forza <forza@tnonline.net>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index c4d2b320cdd4..a4331deb0427 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1959,9 +1959,13 @@ static void tctx_task_work(struct callback_head *cb)
 			node = next;
 		}
 		if (wq_list_empty(&tctx->task_list)) {
+			spin_lock_irq(&tctx->task_lock);
 			clear_bit(0, &tctx->task_state);
-			if (wq_list_empty(&tctx->task_list))
+			if (wq_list_empty(&tctx->task_list)) {
+				spin_unlock_irq(&tctx->task_lock);
 				break;
+			}
+			spin_unlock_irq(&tctx->task_lock);
 			/* another tctx_task_work() is enqueued, yield */
 			if (test_and_set_bit(0, &tctx->task_state))
 				break;

From 6aade587d329ebe88319dfdb8e8c7b6aede80417 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Thu, 24 Jun 2021 13:11:36 +0200
Subject: [PATCH 182/502] drm/amdgpu: Avoid printing of stack contents on
 firmware load error

In case when psp_init_asd_microcode() fails to load ASD microcode file,
psp_v12_0_init_microcode() tries to print the firmware filename that
failed to load before bailing out.

This is wrong because:

- the firmware filename it would want it print is an incorrect one as
  psp_init_asd_microcode() and psp_v12_0_init_microcode() are loading
  different filenames
- it tries to print fw_name, but that's not yet been initialized by that
  time, so it prints random stack contents, e.g.

    amdgpu 0000:04:00.0: Direct firmware load for amdgpu/renoir_asd.bin failed with error -2
    amdgpu 0000:04:00.0: amdgpu: fail to initialize asd microcode
    amdgpu 0000:04:00.0: amdgpu: psp v12.0: Failed to load firmware "\xfeTO\x8e\xff\xff"

Fix that by bailing out immediately, instead of priting the bogus error
message.

Reported-by: Vojtech Pavlik <vojtech@ucw.cz>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/psp_v12_0.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
index 618e5b6b85d9..536d41f327c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
@@ -67,7 +67,7 @@ static int psp_v12_0_init_microcode(struct psp_context *psp)
 
 	err = psp_init_asd_microcode(psp, chip_name);
 	if (err)
-		goto out;
+		return err;
 
 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
 	err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
@@ -80,7 +80,7 @@ static int psp_v12_0_init_microcode(struct psp_context *psp)
 	} else {
 		err = amdgpu_ucode_validate(adev->psp.ta_fw);
 		if (err)
-			goto out2;
+			goto out;
 
 		ta_hdr = (const struct ta_firmware_header_v1_0 *)
 				 adev->psp.ta_fw->data;
@@ -105,10 +105,9 @@ static int psp_v12_0_init_microcode(struct psp_context *psp)
 
 	return 0;
 
-out2:
+out:
 	release_firmware(adev->psp.ta_fw);
 	adev->psp.ta_fw = NULL;
-out:
 	if (err) {
 		dev_err(adev->dev,
 			"psp v12.0: Failed to load firmware \"%s\"\n",

From 66291b6adb66dd3bc96b0f594d88c2ff1300d95f Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 26 Jul 2021 08:26:56 +0200
Subject: [PATCH 183/502] ALSA: usb-audio: Fix superfluous autosuspend recovery

The change to restore the autosuspend from the disabled state uses a
wrong check: namely, it should have been the exact comparison of the
quirk_type instead of the bitwise and (&).  Otherwise it matches
wrongly with the other quirk types.

Although re-enabling the autosuspend for the already enabled device
shouldn't matter much, it's better to fix the unbalanced call.

Fixes: 9799110825db ("ALSA: usb-audio: Disable USB autosuspend properly in setup_disable_autosuspend()")
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/s5hr1flh9ov.wl-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/card.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/usb/card.c b/sound/usb/card.c
index 2f6a62416c05..a1f8c3a026f5 100644
--- a/sound/usb/card.c
+++ b/sound/usb/card.c
@@ -907,7 +907,7 @@ static void usb_audio_disconnect(struct usb_interface *intf)
 		}
 	}
 
-	if (chip->quirk_type & QUIRK_SETUP_DISABLE_AUTOSUSPEND)
+	if (chip->quirk_type == QUIRK_SETUP_DISABLE_AUTOSUSPEND)
 		usb_enable_autosuspend(interface_to_usbdev(intf));
 
 	chip->num_interfaces--;

From 53ca18acbe645656132fb5a329833db711067e54 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Mon, 26 Jul 2021 12:01:02 +0200
Subject: [PATCH 184/502] spi: imx: mx51-ecspi: Fix low-speed CONFIGREG delay
 calculation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The spi_imx->spi_bus_clk may be uninitialized and thus also zero in
mx51_ecspi_prepare_message(), which would lead to division by zero
in kernel. Since bitbang .setup_transfer callback which initializes
the spi_imx->spi_bus_clk is called after bitbang prepare_message
callback, iterate over all the transfers in spi_message, find the
one with lowest bus frequency, and use that bus frequency for the
delay calculation.

Note that it is not possible to move this CONFIGREG delay back into
the .setup_transfer callback, because that is invoked too late, after
the GPIO chipselects were already configured.

Fixes: 135cbd378eab ("spi: imx: mx51-ecspi: Reinstate low-speed CONFIGREG delay")
Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Cc: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210726100102.5188-1-marex@denx.de
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-imx.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
index 4aee3db6d6df..2872993550bd 100644
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c
@@ -505,7 +505,9 @@ static int mx51_ecspi_prepare_message(struct spi_imx_data *spi_imx,
 				      struct spi_message *msg)
 {
 	struct spi_device *spi = msg->spi;
+	struct spi_transfer *xfer;
 	u32 ctrl = MX51_ECSPI_CTRL_ENABLE;
+	u32 min_speed_hz = ~0U;
 	u32 testreg, delay;
 	u32 cfg = readl(spi_imx->base + MX51_ECSPI_CONFIG);
 
@@ -577,8 +579,20 @@ static int mx51_ecspi_prepare_message(struct spi_imx_data *spi_imx,
 	 * be asserted before the SCLK polarity changes, which would disrupt
 	 * the SPI communication as the device on the other end would consider
 	 * the change of SCLK polarity as a clock tick already.
+	 *
+	 * Because spi_imx->spi_bus_clk is only set in bitbang prepare_message
+	 * callback, iterate over all the transfers in spi_message, find the
+	 * one with lowest bus frequency, and use that bus frequency for the
+	 * delay calculation. In case all transfers have speed_hz == 0, then
+	 * min_speed_hz is ~0 and the resulting delay is zero.
 	 */
-	delay = (2 * 1000000) / spi_imx->spi_bus_clk;
+	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
+		if (!xfer->speed_hz)
+			continue;
+		min_speed_hz = min(xfer->speed_hz, min_speed_hz);
+	}
+
+	delay = (2 * 1000000) / min_speed_hz;
 	if (likely(delay < 10))	/* SCLK is faster than 100 kHz */
 		udelay(delay);
 	else			/* SCLK is _very_ slow */

From 758684e49f4c7ea2a75e249e486659f0950cd63e Mon Sep 17 00:00:00 2001
From: Somnath Kotur <somnath.kotur@broadcom.com>
Date: Mon, 26 Jul 2021 14:52:48 -0400
Subject: [PATCH 185/502] bnxt_en: Fix static checker warning in
 bnxt_fw_reset_task()

Now that we return when bnxt_open() fails in bnxt_fw_reset_task(),
there is no need to check for 'rc' value again before invoking
bnxt_reenable_sriov().

Fixes: 3958b1da725a ("bnxt_en: fix error path of FW reset")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 4db162cee911..89606587b156 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -12131,9 +12131,8 @@ static void bnxt_fw_reset_task(struct work_struct *work)
 		/* Make sure fw_reset_state is 0 before clearing the flag */
 		smp_mb__before_atomic();
 		clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
-		bnxt_ulp_start(bp, rc);
-		if (!rc)
-			bnxt_reenable_sriov(bp);
+		bnxt_ulp_start(bp, 0);
+		bnxt_reenable_sriov(bp);
 		bnxt_vf_reps_alloc(bp);
 		bnxt_vf_reps_open(bp);
 		bnxt_dl_health_recovery_done(bp);

From 24b5b1978cd5a80db58e2a19db2f9c36fe8d4f7a Mon Sep 17 00:00:00 2001
From: Dario Binacchi <dariobin@libero.it>
Date: Sun, 25 Jul 2021 18:07:25 +0200
Subject: [PATCH 186/502] clk: stm32f4: fix post divisor setup for I2S/SAI PLLs

Enabling the framebuffer leads to a system hang. Running, as a debug
hack, the store_pan() function in drivers/video/fbdev/core/fbsysfs.c
without taking the console_lock, allows to see the crash backtrace on
the serial line.

~ # echo 0 0 > /sys/class/graphics/fb0/pan

[    9.719414] Unhandled exception: IPSR = 00000005 LR = fffffff1
[    9.726937] CPU: 0 PID: 49 Comm: sh Not tainted 5.13.0-rc5 #9
[    9.733008] Hardware name: STM32 (Device Tree Support)
[    9.738296] PC is at clk_gate_is_enabled+0x0/0x28
[    9.743426] LR is at stm32f4_pll_div_set_rate+0xf/0x38
[    9.748857] pc : [<0011e4be>]    lr : [<0011f9e3>]    psr: 0100000b
[    9.755373] sp : 00bc7be0  ip : 00000000  fp : 001f3ac4
[    9.760812] r10: 002610d0  r9 : 01efe920  r8 : 00540560
[    9.766269] r7 : 02e7ddb0  r6 : 0173eed8  r5 : 00000000  r4 : 004027c0
[    9.773081] r3 : 0011e4bf  r2 : 02e7ddb0  r1 : 0173eed8  r0 : 1d3267b8
[    9.779911] xPSR: 0100000b
[    9.782719] CPU: 0 PID: 49 Comm: sh Not tainted 5.13.0-rc5 #9
[    9.788791] Hardware name: STM32 (Device Tree Support)
[    9.794120] [<0000afa1>] (unwind_backtrace) from [<0000a33f>] (show_stack+0xb/0xc)
[    9.802421] [<0000a33f>] (show_stack) from [<0000a8df>] (__invalid_entry+0x4b/0x4c)

The `pll_num' field in the post_div_data configuration contained a wrong
value which also referenced an uninitialized hardware clock when
clk_register_pll_div() was called.

Fixes: 517633ef630e ("clk: stm32f4: Add post divisor for I2S & SAI PLLs")
Signed-off-by: Dario Binacchi <dariobin@libero.it>
Reviewed-by: Gabriel Fernandez <gabriel.fernandez@st.com>
Link: https://lore.kernel.org/r/20210725160725.10788-1-dariobin@libero.it
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/clk-stm32f4.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/clk/clk-stm32f4.c b/drivers/clk/clk-stm32f4.c
index 18117ce5ff85..5c75e3d906c2 100644
--- a/drivers/clk/clk-stm32f4.c
+++ b/drivers/clk/clk-stm32f4.c
@@ -526,7 +526,7 @@ struct stm32f4_pll {
 
 struct stm32f4_pll_post_div_data {
 	int idx;
-	u8 pll_num;
+	int pll_idx;
 	const char *name;
 	const char *parent;
 	u8 flag;
@@ -557,13 +557,13 @@ static const struct clk_div_table post_divr_table[] = {
 
 #define MAX_POST_DIV 3
 static const struct stm32f4_pll_post_div_data  post_div_data[MAX_POST_DIV] = {
-	{ CLK_I2SQ_PDIV, PLL_I2S, "plli2s-q-div", "plli2s-q",
+	{ CLK_I2SQ_PDIV, PLL_VCO_I2S, "plli2s-q-div", "plli2s-q",
 		CLK_SET_RATE_PARENT, STM32F4_RCC_DCKCFGR, 0, 5, 0, NULL},
 
-	{ CLK_SAIQ_PDIV, PLL_SAI, "pllsai-q-div", "pllsai-q",
+	{ CLK_SAIQ_PDIV, PLL_VCO_SAI, "pllsai-q-div", "pllsai-q",
 		CLK_SET_RATE_PARENT, STM32F4_RCC_DCKCFGR, 8, 5, 0, NULL },
 
-	{ NO_IDX, PLL_SAI, "pllsai-r-div", "pllsai-r", CLK_SET_RATE_PARENT,
+	{ NO_IDX, PLL_VCO_SAI, "pllsai-r-div", "pllsai-r", CLK_SET_RATE_PARENT,
 		STM32F4_RCC_DCKCFGR, 16, 2, 0, post_divr_table },
 };
 
@@ -1774,7 +1774,7 @@ static void __init stm32f4_rcc_init(struct device_node *np)
 				post_div->width,
 				post_div->flag_div,
 				post_div->div_table,
-				clks[post_div->pll_num],
+				clks[post_div->pll_idx],
 				&stm32f4_clk_lock);
 
 		if (post_div->idx != NO_IDX)

From 953a92f0e55f370ec76e7f85e332906f1e898ef4 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 16 Jul 2021 21:31:59 -0700
Subject: [PATCH 187/502] clk: hisilicon: hi3559a: select RESET_HISI

The clk-hi3559a driver uses functions from reset.c so it should
select RESET_HISI to avoid build errors.

Fixes these build errors:
aarch64-linux-ld: drivers/clk/hisilicon/clk-hi3559a.o: in function `hi3559av100_crg_remove':
clk-hi3559a.c:(.text+0x158): undefined reference to `hisi_reset_exit'
aarch64-linux-ld: drivers/clk/hisilicon/clk-hi3559a.o: in function `hi3559av100_crg_probe':
clk-hi3559a.c:(.text+0x1f4): undefined reference to `hisi_reset_init'
aarch64-linux-ld: clk-hi3559a.c:(.text+0x238): undefined reference to `hisi_reset_exit'

Fixes: 6c81966107dc ("clk: hisilicon: Add clock driver for hi3559A SoC")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: kernel test robot <lkp@intel.com>
Cc: Dongjiu Geng <gengdongjiu@huawei.com>
Cc: Stephen Boyd <sboyd@kernel.org>
Cc: stable@vger.kernel.org
Cc: linux-clk@vger.kernel.org
Cc: Michael Turquette <mturquette@baylibre.com>
Link: https://lore.kernel.org/r/20210717043159.12566-1-rdunlap@infradead.org
Reviewed-by: Dongjiu Geng <gengdongjiu1@gmail.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/hisilicon/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/clk/hisilicon/Kconfig b/drivers/clk/hisilicon/Kconfig
index 5ecc37aaa118..c1ec75aa4ccd 100644
--- a/drivers/clk/hisilicon/Kconfig
+++ b/drivers/clk/hisilicon/Kconfig
@@ -18,6 +18,7 @@ config COMMON_CLK_HI3519
 config COMMON_CLK_HI3559A
 	bool "Hi3559A Clock Driver"
 	depends on ARCH_HISI || COMPILE_TEST
+	select RESET_HISI
 	default ARCH_HISI
 	help
 	  Build the clock driver for hi3559a.

From f2a26a3cff27dfa456fef386fe5df56dcb4b47b6 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Fri, 23 Jul 2021 18:35:15 -0500
Subject: [PATCH 188/502] SMB3: fix readpage for large swap cache

readpage was calculating the offset of the page incorrectly
for the case of large swapcaches.

    loff_t offset = (loff_t)page->index << PAGE_SHIFT;

As pointed out by Matthew Wilcox, this needs to use
page_file_offset() to calculate the offset instead.
Pages coming from the swap cache have page->index set
to their index within the swapcache, not within the backing
file.  For a sufficiently large swapcache, we could have
overlapping values of page->index within the same backing file.

Suggested by: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: <stable@vger.kernel.org> # v5.7+
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index cd108607a070..0a72840a88f1 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -4619,7 +4619,7 @@ read_complete:
 
 static int cifs_readpage(struct file *file, struct page *page)
 {
-	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
+	loff_t offset = page_file_offset(page);
 	int rc = -EACCES;
 	unsigned int xid;
 

From 5ad4df56cd2158965f73416d41fce37906724822 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Mon, 26 Jul 2021 16:22:55 -0500
Subject: [PATCH 189/502] smb3: rc uninitialized in one fallocate path

Clang detected a problem with rc possibly being unitialized
(when length is zero) in a recently added fallocate code path.

Reported-by: kernel test robot <lkp@intel.com>
Reviewed-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/smb2ops.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 23d6f4d71649..2dfd0d8297eb 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -3617,7 +3617,8 @@ static int smb3_simple_fallocate_write_range(unsigned int xid,
 					     char *buf)
 {
 	struct cifs_io_parms io_parms = {0};
-	int rc, nbytes;
+	int nbytes;
+	int rc = 0;
 	struct kvec iov[2];
 
 	io_parms.netfid = cfile->fid.netfid;

From 35171fbfc0d94aa31b009bb475d156ad1941ab50 Mon Sep 17 00:00:00 2001
From: Nikos Liolios <liolios.nk@gmail.com>
Date: Tue, 27 Jul 2021 06:05:10 +0300
Subject: [PATCH 190/502] ALSA: hda/realtek: Fix headset mic for Acer SWIFT
 SF314-56 (ALC256)

The issue on Acer SWIFT SF314-56 is that headset microphone doesn't work.
The following quirk fixed headset microphone issue. The fixup was found by trial and error.
Note that the fixup of SF314-54/55 (ALC256_FIXUP_ACER_HEADSET_MIC) was not successful on my SF314-56.

Signed-off-by: Nikos Liolios <liolios.nk@gmail.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210727030510.36292-1-liolios.nk@gmail.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index caaf0e8aac11..14e1ab7c7954 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -8274,6 +8274,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
 	SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
 	SND_PCI_QUIRK(0x1025, 0x129c, "Acer SWIFT SF314-55", ALC256_FIXUP_ACER_HEADSET_MIC),
+	SND_PCI_QUIRK(0x1025, 0x1300, "Acer SWIFT SF314-56", ALC256_FIXUP_ACER_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1025, 0x1308, "Acer Aspire Z24-890", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
 	SND_PCI_QUIRK(0x1025, 0x132a, "Acer TravelMate B114-21", ALC233_FIXUP_ACER_HEADSET_MIC),
 	SND_PCI_QUIRK(0x1025, 0x1330, "Acer TravelMate X514-51T", ALC255_FIXUP_ACER_HEADSET_MIC),

From b070f9ca78680486927b799cf6126b128a7c2c1b Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Tue, 20 Jul 2021 11:47:10 -0700
Subject: [PATCH 191/502] ARM: omap2+: hwmod: fix potential NULL pointer access

omap_hwmod_get_pwrdm() may access a NULL clk_hw pointer in some failure
cases. Add a check for the case and bail out gracely if this happens.

Reported-by: Dan Murphy <dmurphy@ti.com>
Signed-off-by: Tero Kristo <t-kristo@ti.com>
Cc: stable@vger.kernel.org # v5.10+
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/omap_hwmod.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index 65934b2924fb..12b26e04686f 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -3776,6 +3776,7 @@ struct powerdomain *omap_hwmod_get_pwrdm(struct omap_hwmod *oh)
 	struct omap_hwmod_ocp_if *oi;
 	struct clockdomain *clkdm;
 	struct clk_hw_omap *clk;
+	struct clk_hw *hw;
 
 	if (!oh)
 		return NULL;
@@ -3792,7 +3793,14 @@ struct powerdomain *omap_hwmod_get_pwrdm(struct omap_hwmod *oh)
 		c = oi->_clk;
 	}
 
-	clk = to_clk_hw_omap(__clk_get_hw(c));
+	hw = __clk_get_hw(c);
+	if (!hw)
+		return NULL;
+
+	clk = to_clk_hw_omap(hw);
+	if (!clk)
+		return NULL;
+
 	clkdm = clk->clkdm;
 	if (!clkdm)
 		return NULL;

From a6d90e9f22328f07343e49e08a4ca483ae8e8abb Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@baylibre.com>
Date: Tue, 20 Jul 2021 11:27:16 -0700
Subject: [PATCH 192/502] bus: ti-sysc: AM3: RNG is GP only

Make the RNG on AM3 GP only.

Based on this patch from TI v5.4 tree which is based on hwmod data
which are now removed:

| ARM: AM43xx: hwmod: Move RNG to a GP only links table
|
| On non-GP devices the RNG is controlled by the secure-side software,
| like in DRA7xx hwmod we should not control this IP when we are not
| a GP device.
|
| Signed-off-by: Andrew F. Davis <afd@ti.com>

Cc: stable@vger.kernel.org # v5.10+
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 drivers/bus/ti-sysc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c
index 38cb116ed433..2587ed43ee8a 100644
--- a/drivers/bus/ti-sysc.c
+++ b/drivers/bus/ti-sysc.c
@@ -2951,6 +2951,8 @@ static int sysc_init_soc(struct sysc *ddata)
 		case SOC_3430 ... SOC_3630:
 			sysc_add_disabled(0x48304000);	/* timer12 */
 			break;
+		case SOC_AM3:
+			sysc_add_disabled(0x48310000);  /* rng */
 		default:
 			break;
 		}

From 20a6b3fd8e2e2c063b25fbf2ee74d86b898e5087 Mon Sep 17 00:00:00 2001
From: Dave Gerlach <d-gerlach@ti.com>
Date: Fri, 16 Jul 2021 09:07:30 -0700
Subject: [PATCH 193/502] ARM: dts: am43x-epos-evm: Reduce i2c0 bus speed for
 tps65218

Based on the latest timing specifications for the TPS65218 from the data
sheet, http://www.ti.com/lit/ds/symlink/tps65218.pdf, document SLDS206
from November 2014, we must change the i2c bus speed to better fit within
the minimum high SCL time required for proper i2c transfer.

When running at 400khz, measurements show that SCL spends
0.8125 uS/1.666 uS high/low which violates the requirement for minimum
high period of SCL provided in datasheet Table 7.6 which is 1 uS.
Switching to 100khz gives us 5 uS/5 uS high/low which both fall above
the minimum given values for 100 khz, 4.0 uS/4.7 uS high/low.

Without this patch occasionally a voltage set operation from the kernel
will appear to have worked but the actual voltage reflected on the PMIC
will not have updated, causing problems especially with cpufreq that may
update to a higher OPP without actually raising the voltage on DCDC2,
leading to a hang.

Signed-off-by: Dave Gerlach <d-gerlach@ti.com>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/am43x-epos-evm.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/am43x-epos-evm.dts b/arch/arm/boot/dts/am43x-epos-evm.dts
index aae0af10a5b1..2aa75abf85a9 100644
--- a/arch/arm/boot/dts/am43x-epos-evm.dts
+++ b/arch/arm/boot/dts/am43x-epos-evm.dts
@@ -582,7 +582,7 @@
 	status = "okay";
 	pinctrl-names = "default";
 	pinctrl-0 = <&i2c0_pins>;
-	clock-frequency = <400000>;
+	clock-frequency = <100000>;
 
 	tps65218: tps65218@24 {
 		reg = <0x24>;

From 0162a9964365fd26e34575e121b17d021204c481 Mon Sep 17 00:00:00 2001
From: Dario Binacchi <dariobin@libero.it>
Date: Mon, 26 Jul 2021 15:15:25 +0200
Subject: [PATCH 194/502] ARM: dts: am437x-l4: fix typo in can@0 node

Replace clock-name with clock-names.

Fixes: 2a4117df9b43 ("ARM: dts: Fix dcan driver probe failed on am437x platform")
Signed-off-by: Dario Binacchi <dariobin@libero.it>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/am437x-l4.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/am437x-l4.dtsi b/arch/arm/boot/dts/am437x-l4.dtsi
index 40ef3973f2a9..ba58e6b0da1d 100644
--- a/arch/arm/boot/dts/am437x-l4.dtsi
+++ b/arch/arm/boot/dts/am437x-l4.dtsi
@@ -1595,7 +1595,7 @@
 				compatible = "ti,am4372-d_can", "ti,am3352-d_can";
 				reg = <0x0 0x2000>;
 				clocks = <&dcan1_fck>;
-				clock-name = "fck";
+				clock-names = "fck";
 				syscon-raminit = <&scm_conf 0x644 1>;
 				interrupts = <GIC_SPI 49 IRQ_TYPE_LEVEL_HIGH>;
 				status = "disabled";

From c68ef4ad180e09805fa46965d15e1dfadf09ffa5 Mon Sep 17 00:00:00 2001
From: "H. Nikolaus Schaller" <hns@goldelico.com>
Date: Thu, 1 Jul 2021 16:00:22 +0200
Subject: [PATCH 195/502] omap5-board-common: remove not physically existing
 vdds_1v8_main fixed-regulator

This device tree include file describes a fixed-regulator
connecting smps7_reg output (1.8V) to some 1.8V rail and
consumers (vdds_1v8_main).

This regulator does not physically exist.

I assume it was introduced as a wrapper around smps7_reg
to provide a speaking signal name "vdds_1v8_main" as label.

This fixed-regulator without real function was not an issue
in driver code until

  Commit 98e48cd9283d ("regulator: core: resolve supply for boot-on/always-on regulators")

introduced a new check for regulator initialization which
makes Palmas regulator registration fail:

[    5.407712] ldo1: supplied by vsys_cobra
[    5.412748] ldo2: supplied by vsys_cobra
[    5.417603] palmas-pmic 48070000.i2c:palmas@48:palmas_pmic: failed to register 48070000.i2c:palmas@48:palmas_pmic regulator

The reason is that the supply-chain of regulators is too
long and goes from ldo3 through the virtual vdds_1v8_main
regulator and then back to smps7. This adds a cross-dependency
of probing Palmas regulators and the fixed-regulator which
leads to probe deferral by the new check and is no longer
resolved.

Since we do not control what device tree files including this
one reference (either &vdds_1v8_main or &smps7_reg or both)
we keep both labels for smps7 for compatibility.

Fixes: 98e48cd9283d ("regulator: core: resolve supply for boot-on/always-on regulators")
Signed-off-by: H. Nikolaus Schaller <hns@goldelico.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/omap5-board-common.dtsi | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi
index 45435bb88c89..373984c130e0 100644
--- a/arch/arm/boot/dts/omap5-board-common.dtsi
+++ b/arch/arm/boot/dts/omap5-board-common.dtsi
@@ -30,14 +30,6 @@
 		regulator-max-microvolt = <5000000>;
 	};
 
-	vdds_1v8_main: fixedregulator-vdds_1v8_main {
-		compatible = "regulator-fixed";
-		regulator-name = "vdds_1v8_main";
-		vin-supply = <&smps7_reg>;
-		regulator-min-microvolt = <1800000>;
-		regulator-max-microvolt = <1800000>;
-	};
-
 	vmmcsd_fixed: fixedregulator-mmcsd {
 		compatible = "regulator-fixed";
 		regulator-name = "vmmcsd_fixed";
@@ -487,6 +479,7 @@
 					regulator-boot-on;
 				};
 
+				vdds_1v8_main:
 				smps7_reg: smps7 {
 					/* VDDS_1v8_OMAP over VDDS_1v8_MAIN */
 					regulator-name = "smps7";

From 9f59efcd51e332aad01e7fa2b3a97cd22d347ceb Mon Sep 17 00:00:00 2001
From: Michael Zaidman <michael.zaidman@gmail.com>
Date: Mon, 10 May 2021 19:34:28 +0300
Subject: [PATCH 196/502] HID: ft260: fix format type warning in
 ft260_word_show()

Fixes: 6a82582d9fa4 ("HID: ft260: add usb hid to i2c host bridge driver")

Fix warning reported by static analysis when built with W=1 for arm64 by
clang version 13.0.0

>> drivers/hid/hid-ft260.c:794:44: warning: format specifies type 'short' but
   the argument has type 'int' [-Wformat]
           return scnprintf(buf, PAGE_SIZE, "%hi\n", le16_to_cpu(*field));
                                             ~~~     ^~~~~~~~~~~~~~~~~~~
                                             %i
   include/linux/byteorder/generic.h:91:21: note: expanded from
                                            macro 'le16_to_cpu'
   #define le16_to_cpu __le16_to_cpu
                       ^
   include/uapi/linux/byteorder/big_endian.h:36:26: note: expanded from
                                                    macro '__le16_to_cpu'
   #define __le16_to_cpu(x) __swab16((__force __u16)(__le16)(x))
                            ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/uapi/linux/swab.h:105:2: note: expanded from macro '__swab16'
           (__builtin_constant_p((__u16)(x)) ?     \
           ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Any sprintf style use of %h or %hi for a sub-int sized value isn't useful
since integer promotion is done on the value anyway. So, use %d instead.

https://lore.kernel.org/lkml/CAHk-=wgoxnmsj8GEVFJSvTwdnWm8wVJthefNk2n6+4TC=20e0Q@mail.gmail.com/

Signed-off-by: Michael Zaidman <michael.zaidman@gmail.com>
Suggested-by: Joe Perches <joe@perches.com>
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-ft260.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/hid/hid-ft260.c b/drivers/hid/hid-ft260.c
index f43a8406cb9a..6f10df2042c4 100644
--- a/drivers/hid/hid-ft260.c
+++ b/drivers/hid/hid-ft260.c
@@ -785,7 +785,7 @@ static int ft260_byte_show(struct hid_device *hdev, int id, u8 *cfg, int len,
 	if (ret < 0)
 		return ret;
 
-	return scnprintf(buf, PAGE_SIZE, "%hi\n", *field);
+	return scnprintf(buf, PAGE_SIZE, "%d\n", *field);
 }
 
 static int ft260_word_show(struct hid_device *hdev, int id, u8 *cfg, int len,
@@ -797,7 +797,7 @@ static int ft260_word_show(struct hid_device *hdev, int id, u8 *cfg, int len,
 	if (ret < 0)
 		return ret;
 
-	return scnprintf(buf, PAGE_SIZE, "%hi\n", le16_to_cpu(*field));
+	return scnprintf(buf, PAGE_SIZE, "%d\n", le16_to_cpu(*field));
 }
 
 #define FT260_ATTR_SHOW(name, reptype, id, type, func)			       \

From 4b0556b96e1fe7723629bd40e3813a30cd632faf Mon Sep 17 00:00:00 2001
From: Alexander Tsoy <alexander@tsoy.me>
Date: Tue, 27 Jul 2021 12:33:26 +0300
Subject: [PATCH 197/502] ALSA: usb-audio: Add registration quirk for JBL
 Quantum 600

Apparently JBL Quantum 600 has multiple hardware revisions. Apply
registration quirk to another device id as well.

Signed-off-by: Alexander Tsoy <alexander@tsoy.me>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210727093326.1153366-1-alexander@tsoy.me
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/quirks.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index e7accd87e063..326d1b0ea5e6 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1899,6 +1899,7 @@ static const struct registration_quirk registration_quirks[] = {
 	REG_QUIRK_ENTRY(0x0951, 0x16ea, 2),	/* Kingston HyperX Cloud Flight S */
 	REG_QUIRK_ENTRY(0x0ecb, 0x1f46, 2),	/* JBL Quantum 600 */
 	REG_QUIRK_ENTRY(0x0ecb, 0x2039, 2),	/* JBL Quantum 400 */
+	REG_QUIRK_ENTRY(0x0ecb, 0x203c, 2),	/* JBL Quantum 600 */
 	REG_QUIRK_ENTRY(0x0ecb, 0x203e, 2),	/* JBL Quantum 800 */
 	{ 0 }					/* terminator */
 };

From fcef709c2c4baf758950bd7395e4b10527b81e2c Mon Sep 17 00:00:00 2001
From: Sunil Goutham <sgoutham@marvell.com>
Date: Sun, 25 Jul 2021 18:54:52 +0530
Subject: [PATCH 198/502] octeontx2-af: Do NIX_RX_SW_SYNC twice

NIX_RX_SW_SYNC ensures all existing transactions are finished and
pkts are written to LLC/DRAM, queues should be teared down after
successful SW_SYNC. Due to a HW errata, in some rare scenarios
an existing transaction might end after SW_SYNC operation. To
ensure operation is fully done, do the SW_SYNC twice.

Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/marvell/octeontx2/af/rvu_nix.c   | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index 30067668eda7..4bfbbdf38770 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -196,11 +196,22 @@ static void nix_rx_sync(struct rvu *rvu, int blkaddr)
 {
 	int err;
 
-	/*Sync all in flight RX packets to LLC/DRAM */
+	/* Sync all in flight RX packets to LLC/DRAM */
 	rvu_write64(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0));
 	err = rvu_poll_reg(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0), true);
 	if (err)
-		dev_err(rvu->dev, "NIX RX software sync failed\n");
+		dev_err(rvu->dev, "SYNC1: NIX RX software sync failed\n");
+
+	/* SW_SYNC ensures all existing transactions are finished and pkts
+	 * are written to LLC/DRAM, queues should be teared down after
+	 * successful SW_SYNC. Due to a HW errata, in some rare scenarios
+	 * an existing transaction might end after SW_SYNC operation. To
+	 * ensure operation is fully done, do the SW_SYNC twice.
+	 */
+	rvu_write64(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0));
+	err = rvu_poll_reg(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0), true);
+	if (err)
+		dev_err(rvu->dev, "SYNC2: NIX RX software sync failed\n");
 }
 
 static bool is_valid_txschq(struct rvu *rvu, int blkaddr,

From c7c9d2102c9c098916ab9e0ab248006107d00d6c Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Sun, 25 Jul 2021 00:11:59 +0300
Subject: [PATCH 199/502] net: llc: fix skb_over_panic

Syzbot reported skb_over_panic() in llc_pdu_init_as_xid_cmd(). The
problem was in wrong LCC header manipulations.

Syzbot's reproducer tries to send XID packet. llc_ui_sendmsg() is
doing following steps:

	1. skb allocation with size = len + header size
		len is passed from userpace and header size
		is 3 since addr->sllc_xid is set.

	2. skb_reserve() for header_len = 3
	3. filling all other space with memcpy_from_msg()

Ok, at this moment we have fully loaded skb, only headers needs to be
filled.

Then code comes to llc_sap_action_send_xid_c(). This function pushes 3
bytes for LLC PDU header and initializes it. Then comes
llc_pdu_init_as_xid_cmd(). It initalizes next 3 bytes *AFTER* LLC PDU
header and call skb_push(skb, 3). This looks wrong for 2 reasons:

	1. Bytes rigth after LLC header are user data, so this function
	   was overwriting payload.

	2. skb_push(skb, 3) call can cause skb_over_panic() since
	   all free space was filled in llc_ui_sendmsg(). (This can
	   happen is user passed 686 len: 686 + 14 (eth header) + 3 (LLC
	   header) = 703. SKB_DATA_ALIGN(703) = 704)

So, in this patch I added 2 new private constansts: LLC_PDU_TYPE_U_XID
and LLC_PDU_LEN_U_XID. LLC_PDU_LEN_U_XID is used to correctly reserve
header size to handle LLC + XID case. LLC_PDU_TYPE_U_XID is used by
llc_pdu_header_init() function to push 6 bytes instead of 3. And finally
I removed skb_push() call from llc_pdu_init_as_xid_cmd().

This changes should not affect other parts of LLC, since after
all steps we just transmit buffer.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-and-tested-by: syzbot+5e5a981ad7cc54c4b2b4@syzkaller.appspotmail.com
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/llc_pdu.h | 31 +++++++++++++++++++++++--------
 net/llc/af_llc.c      | 10 +++++++++-
 net/llc/llc_s_ac.c    |  2 +-
 3 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h
index c0f0a13ed818..49aa79c7b278 100644
--- a/include/net/llc_pdu.h
+++ b/include/net/llc_pdu.h
@@ -15,9 +15,11 @@
 #include <linux/if_ether.h>
 
 /* Lengths of frame formats */
-#define LLC_PDU_LEN_I	4       /* header and 2 control bytes */
-#define LLC_PDU_LEN_S	4
-#define LLC_PDU_LEN_U	3       /* header and 1 control byte */
+#define LLC_PDU_LEN_I		4       /* header and 2 control bytes */
+#define LLC_PDU_LEN_S		4
+#define LLC_PDU_LEN_U		3       /* header and 1 control byte */
+/* header and 1 control byte and XID info */
+#define LLC_PDU_LEN_U_XID	(LLC_PDU_LEN_U + sizeof(struct llc_xid_info))
 /* Known SAP addresses */
 #define LLC_GLOBAL_SAP	0xFF
 #define LLC_NULL_SAP	0x00	/* not network-layer visible */
@@ -50,9 +52,10 @@
 #define LLC_PDU_TYPE_U_MASK    0x03	/* 8-bit control field */
 #define LLC_PDU_TYPE_MASK      0x03
 
-#define LLC_PDU_TYPE_I	0	/* first bit */
-#define LLC_PDU_TYPE_S	1	/* first two bits */
-#define LLC_PDU_TYPE_U	3	/* first two bits */
+#define LLC_PDU_TYPE_I		0	/* first bit */
+#define LLC_PDU_TYPE_S		1	/* first two bits */
+#define LLC_PDU_TYPE_U		3	/* first two bits */
+#define LLC_PDU_TYPE_U_XID	4	/* private type for detecting XID commands */
 
 #define LLC_PDU_TYPE_IS_I(pdu) \
 	((!(pdu->ctrl_1 & LLC_PDU_TYPE_I_MASK)) ? 1 : 0)
@@ -230,9 +233,18 @@ static inline struct llc_pdu_un *llc_pdu_un_hdr(struct sk_buff *skb)
 static inline void llc_pdu_header_init(struct sk_buff *skb, u8 type,
 				       u8 ssap, u8 dsap, u8 cr)
 {
-	const int hlen = type == LLC_PDU_TYPE_U ? 3 : 4;
+	int hlen = 4; /* default value for I and S types */
 	struct llc_pdu_un *pdu;
 
+	switch (type) {
+	case LLC_PDU_TYPE_U:
+		hlen = 3;
+		break;
+	case LLC_PDU_TYPE_U_XID:
+		hlen = 6;
+		break;
+	}
+
 	skb_push(skb, hlen);
 	skb_reset_network_header(skb);
 	pdu = llc_pdu_un_hdr(skb);
@@ -374,7 +386,10 @@ static inline void llc_pdu_init_as_xid_cmd(struct sk_buff *skb,
 	xid_info->fmt_id = LLC_XID_FMT_ID;	/* 0x81 */
 	xid_info->type	 = svcs_supported;
 	xid_info->rw	 = rx_window << 1;	/* size of receive window */
-	skb_put(skb, sizeof(struct llc_xid_info));
+
+	/* no need to push/put since llc_pdu_header_init() has already
+	 * pushed 3 + 3 bytes
+	 */
 }
 
 /**
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 7180979114e4..ac5cadd02cfa 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -98,8 +98,16 @@ static inline u8 llc_ui_header_len(struct sock *sk, struct sockaddr_llc *addr)
 {
 	u8 rc = LLC_PDU_LEN_U;
 
-	if (addr->sllc_test || addr->sllc_xid)
+	if (addr->sllc_test)
 		rc = LLC_PDU_LEN_U;
+	else if (addr->sllc_xid)
+		/* We need to expand header to sizeof(struct llc_xid_info)
+		 * since llc_pdu_init_as_xid_cmd() sets 4,5,6 bytes of LLC header
+		 * as XID PDU. In llc_ui_sendmsg() we reserved header size and then
+		 * filled all other space with user data. If we won't reserve this
+		 * bytes, llc_pdu_init_as_xid_cmd() will overwrite user data
+		 */
+		rc = LLC_PDU_LEN_U_XID;
 	else if (sk->sk_type == SOCK_STREAM)
 		rc = LLC_PDU_LEN_I;
 	return rc;
diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c
index b554f26c68ee..79d1cef8f15a 100644
--- a/net/llc/llc_s_ac.c
+++ b/net/llc/llc_s_ac.c
@@ -79,7 +79,7 @@ int llc_sap_action_send_xid_c(struct llc_sap *sap, struct sk_buff *skb)
 	struct llc_sap_state_ev *ev = llc_sap_ev(skb);
 	int rc;
 
-	llc_pdu_header_init(skb, LLC_PDU_TYPE_U, ev->saddr.lsap,
+	llc_pdu_header_init(skb, LLC_PDU_TYPE_U_XID, ev->saddr.lsap,
 			    ev->daddr.lsap, LLC_PDU_CMD);
 	llc_pdu_init_as_xid_cmd(skb, LLC_XID_NULL_CLASS_2, 0);
 	rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac);

From 4d1014c1816c0395eca5d1d480f196a4c63119d0 Mon Sep 17 00:00:00 2001
From: Filip Schauer <filip@mg6.at>
Date: Tue, 27 Jul 2021 13:23:11 +0200
Subject: [PATCH 200/502] drivers core: Fix oops when driver probe fails

dma_range_map is freed to early, which might cause an oops when
a driver probe fails.
 Call trace:
  is_free_buddy_page+0xe4/0x1d4
  __free_pages+0x2c/0x88
  dma_free_contiguous+0x64/0x80
  dma_direct_free+0x38/0xb4
  dma_free_attrs+0x88/0xa0
  dmam_release+0x28/0x34
  release_nodes+0x78/0x8c
  devres_release_all+0xa8/0x110
  really_probe+0x118/0x2d0
  __driver_probe_device+0xc8/0xe0
  driver_probe_device+0x54/0xec
  __driver_attach+0xe0/0xf0
  bus_for_each_dev+0x7c/0xc8
  driver_attach+0x30/0x3c
  bus_add_driver+0x17c/0x1c4
  driver_register+0xc0/0xf8
  __platform_driver_register+0x34/0x40
  ...

This issue is introduced by commit d0243bbd5dd3 ("drivers core:
Free dma_range_map when driver probe failed"). It frees
dma_range_map before the call to devres_release_all, which is too
early. The solution is to free dma_range_map only after
devres_release_all.

Fixes: d0243bbd5dd3 ("drivers core: Free dma_range_map when driver probe failed")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Filip Schauer <filip@mg6.at>
Link: https://lore.kernel.org/r/20210727112311.GA7645@DESKTOP-E8BN1B0.localdomain
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/dd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index daeb9b5763ae..437cd61343b2 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -653,8 +653,6 @@ dev_groups_failed:
 	else if (drv->remove)
 		drv->remove(dev);
 probe_failed:
-	kfree(dev->dma_range_map);
-	dev->dma_range_map = NULL;
 	if (dev->bus)
 		blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
 					     BUS_NOTIFY_DRIVER_NOT_BOUND, dev);
@@ -662,6 +660,8 @@ pinctrl_bind_failed:
 	device_links_no_driver(dev);
 	devres_release_all(dev);
 	arch_teardown_dma_ops(dev);
+	kfree(dev->dma_range_map);
+	dev->dma_range_map = NULL;
 	driver_sysfs_remove(dev);
 	dev->driver = NULL;
 	dev_set_drvdata(dev, NULL);

From 55f24c27b6c1a840b62fe297616f1f9ea3576cb7 Mon Sep 17 00:00:00 2001
From: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Date: Tue, 27 Jul 2021 14:47:32 +0900
Subject: [PATCH 201/502] dmaengine: uniphier-xdmac: Use
 readl_poll_timeout_atomic() in atomic state

The function uniphier_xdmac_chan_stop() is only called in atomic state.
Should use readl_poll_timeout_atomic() there instead of
readl_poll_timeout().

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Fixes: 667b9251440b ("dmaengine: uniphier-xdmac: Add UniPhier external DMA controller driver")
Signed-off-by: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Link: https://lore.kernel.org/r/1627364852-28432-1-git-send-email-hayashi.kunihiko@socionext.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/uniphier-xdmac.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/uniphier-xdmac.c b/drivers/dma/uniphier-xdmac.c
index 16b19654873d..d6b8a202474f 100644
--- a/drivers/dma/uniphier-xdmac.c
+++ b/drivers/dma/uniphier-xdmac.c
@@ -209,8 +209,8 @@ static int uniphier_xdmac_chan_stop(struct uniphier_xdmac_chan *xc)
 	writel(0, xc->reg_ch_base + XDMAC_TSS);
 
 	/* wait until transfer is stopped */
-	return readl_poll_timeout(xc->reg_ch_base + XDMAC_STAT, val,
-				  !(val & XDMAC_STAT_TENF), 100, 1000);
+	return readl_poll_timeout_atomic(xc->reg_ch_base + XDMAC_STAT, val,
+					 !(val & XDMAC_STAT_TENF), 100, 1000);
 }
 
 /* xc->vc.lock must be held by caller */

From 801e541c79bbc63af852ca21b713ba87cc97c6ad Mon Sep 17 00:00:00 2001
From: Tang Bin <tangbin@cmss.chinamobile.com>
Date: Tue, 27 Jul 2021 20:25:06 +0800
Subject: [PATCH 202/502] nfc: s3fwrn5: fix undefined parameter values in
 dev_err()

In the function s3fwrn5_fw_download(), the 'ret' is not assigned,
so the correct value should be given in dev_err function.

Fixes: a0302ff5906a ("nfc: s3fwrn5: remove unnecessary label")
Signed-off-by: Zhang Shengju <zhangshengju@cmss.chinamobile.com>
Signed-off-by: Tang Bin <tangbin@cmss.chinamobile.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nfc/s3fwrn5/firmware.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nfc/s3fwrn5/firmware.c b/drivers/nfc/s3fwrn5/firmware.c
index eb5d7a5beac7..1340fab9565e 100644
--- a/drivers/nfc/s3fwrn5/firmware.c
+++ b/drivers/nfc/s3fwrn5/firmware.c
@@ -423,7 +423,7 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info)
 	if (IS_ERR(tfm)) {
 		ret = PTR_ERR(tfm);
 		dev_err(&fw_info->ndev->nfc_dev->dev,
-			"Cannot allocate shash (code=%d)\n", ret);
+			"Cannot allocate shash (code=%ld)\n", PTR_ERR(tfm));
 		goto out;
 	}
 

From 9be550ee43919b070bcd77f9228bdbbbc073245b Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Wed, 21 Jul 2021 22:34:36 +0300
Subject: [PATCH 203/502] staging: rtl8712: get rid of flush_scheduled_work

This patch is preparation for following patch for error handling
refactoring.

flush_scheduled_work() takes (wq_completion)events lock and
it can lead to deadlock when r871xu_dev_remove() is called from workqueue.
To avoid deadlock sutiation we can change flush_scheduled_work() call to
flush_work() call for all possibly scheduled works in this driver,
since next patch adds device_release_driver() in case of fw load failure.

Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/6e028b4c457eeb7156c76c6ea3cdb3cb0207c7e1.1626895918.git.paskripkin@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8712/rtl8712_led.c     | 8 ++++++++
 drivers/staging/rtl8712/rtl871x_led.h     | 1 +
 drivers/staging/rtl8712/rtl871x_pwrctrl.c | 8 ++++++++
 drivers/staging/rtl8712/rtl871x_pwrctrl.h | 1 +
 drivers/staging/rtl8712/usb_intf.c        | 3 ++-
 5 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/rtl8712/rtl8712_led.c b/drivers/staging/rtl8712/rtl8712_led.c
index 5901026949f2..d5fc9026b036 100644
--- a/drivers/staging/rtl8712/rtl8712_led.c
+++ b/drivers/staging/rtl8712/rtl8712_led.c
@@ -1820,3 +1820,11 @@ void LedControl871x(struct _adapter *padapter, enum LED_CTL_MODE LedAction)
 		break;
 	}
 }
+
+void r8712_flush_led_works(struct _adapter *padapter)
+{
+	struct led_priv *pledpriv = &padapter->ledpriv;
+
+	flush_work(&pledpriv->SwLed0.BlinkWorkItem);
+	flush_work(&pledpriv->SwLed1.BlinkWorkItem);
+}
diff --git a/drivers/staging/rtl8712/rtl871x_led.h b/drivers/staging/rtl8712/rtl871x_led.h
index ee19c873cf01..2f0768132ad8 100644
--- a/drivers/staging/rtl8712/rtl871x_led.h
+++ b/drivers/staging/rtl8712/rtl871x_led.h
@@ -112,6 +112,7 @@ struct led_priv {
 void r8712_InitSwLeds(struct _adapter *padapter);
 void r8712_DeInitSwLeds(struct _adapter *padapter);
 void LedControl871x(struct _adapter *padapter, enum LED_CTL_MODE LedAction);
+void r8712_flush_led_works(struct _adapter *padapter);
 
 #endif
 
diff --git a/drivers/staging/rtl8712/rtl871x_pwrctrl.c b/drivers/staging/rtl8712/rtl871x_pwrctrl.c
index 23cff43437e2..cd6d9ff0bebc 100644
--- a/drivers/staging/rtl8712/rtl871x_pwrctrl.c
+++ b/drivers/staging/rtl8712/rtl871x_pwrctrl.c
@@ -224,3 +224,11 @@ void r8712_unregister_cmd_alive(struct _adapter *padapter)
 	}
 	mutex_unlock(&pwrctrl->mutex_lock);
 }
+
+void r8712_flush_rwctrl_works(struct _adapter *padapter)
+{
+	struct pwrctrl_priv *pwrctrl = &padapter->pwrctrlpriv;
+
+	flush_work(&pwrctrl->SetPSModeWorkItem);
+	flush_work(&pwrctrl->rpwm_workitem);
+}
diff --git a/drivers/staging/rtl8712/rtl871x_pwrctrl.h b/drivers/staging/rtl8712/rtl871x_pwrctrl.h
index bf6623cfaf27..b35b9c7920eb 100644
--- a/drivers/staging/rtl8712/rtl871x_pwrctrl.h
+++ b/drivers/staging/rtl8712/rtl871x_pwrctrl.h
@@ -108,5 +108,6 @@ void r8712_cpwm_int_hdl(struct _adapter *padapter,
 void r8712_set_ps_mode(struct _adapter *padapter, uint ps_mode,
 			uint smart_ps);
 void r8712_set_rpwm(struct _adapter *padapter, u8 val8);
+void r8712_flush_rwctrl_works(struct _adapter *padapter);
 
 #endif  /* __RTL871X_PWRCTRL_H_ */
diff --git a/drivers/staging/rtl8712/usb_intf.c b/drivers/staging/rtl8712/usb_intf.c
index 2434b13c8b12..643f21eb1128 100644
--- a/drivers/staging/rtl8712/usb_intf.c
+++ b/drivers/staging/rtl8712/usb_intf.c
@@ -606,7 +606,8 @@ static void r871xu_dev_remove(struct usb_interface *pusb_intf)
 			padapter->surprise_removed = true;
 		if (pnetdev->reg_state != NETREG_UNINITIALIZED)
 			unregister_netdev(pnetdev); /* will call netdev_close() */
-		flush_scheduled_work();
+		r8712_flush_rwctrl_works(padapter);
+		r8712_flush_led_works(padapter);
 		udelay(1);
 		/* Stop driver mlme relation timer */
 		r8712_stop_drv_timers(padapter);

From e9e6aa51b2735d83a67d9fa0119cf11abef80d99 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Wed, 21 Jul 2021 22:34:47 +0300
Subject: [PATCH 204/502] staging: rtl8712: error handling refactoring

There was strange error handling logic in case of fw load failure. For
some reason fw loader callback was doing clean up stuff when fw is not
available. I don't see any reason behind doing this. Since this driver
doesn't have EEPROM firmware let's just disconnect it in case of fw load
failure. Doing clean up stuff in 2 different place which can run
concurently is not good idea and syzbot found 2 bugs related to this
strange approach.

So, in this pacth I deleted all clean up code from fw callback and made
a call to device_release_driver() under device_lock(parent) in case of fw
load failure. This approach is more generic and it defend driver from UAF
bugs, since all clean up code is moved to one place.

Fixes: e02a3b945816 ("staging: rtl8712: fix memory leak in rtl871x_load_fw_cb")
Fixes: 8c213fa59199 ("staging: r8712u: Use asynchronous firmware loading")
Cc: stable <stable@vger.kernel.org>
Reported-and-tested-by: syzbot+5872a520e0ce0a7c7230@syzkaller.appspotmail.com
Reported-and-tested-by: syzbot+cc699626e48a6ebaf295@syzkaller.appspotmail.com
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Link: https://lore.kernel.org/r/d49ecc56e97c4df181d7bd4d240b031f315eacc3.1626895918.git.paskripkin@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8712/hal_init.c | 30 ++++++++++++-------
 drivers/staging/rtl8712/usb_intf.c | 48 +++++++++++++-----------------
 2 files changed, 41 insertions(+), 37 deletions(-)

diff --git a/drivers/staging/rtl8712/hal_init.c b/drivers/staging/rtl8712/hal_init.c
index 22974277afa0..4eff3fdecdb8 100644
--- a/drivers/staging/rtl8712/hal_init.c
+++ b/drivers/staging/rtl8712/hal_init.c
@@ -29,21 +29,31 @@
 #define FWBUFF_ALIGN_SZ 512
 #define MAX_DUMP_FWSZ (48 * 1024)
 
+static void rtl871x_load_fw_fail(struct _adapter *adapter)
+{
+	struct usb_device *udev = adapter->dvobjpriv.pusbdev;
+	struct device *dev = &udev->dev;
+	struct device *parent = dev->parent;
+
+	complete(&adapter->rtl8712_fw_ready);
+
+	dev_err(&udev->dev, "r8712u: Firmware request failed\n");
+
+	if (parent)
+		device_lock(parent);
+
+	device_release_driver(dev);
+
+	if (parent)
+		device_unlock(parent);
+}
+
 static void rtl871x_load_fw_cb(const struct firmware *firmware, void *context)
 {
 	struct _adapter *adapter = context;
 
 	if (!firmware) {
-		struct usb_device *udev = adapter->dvobjpriv.pusbdev;
-		struct usb_interface *usb_intf = adapter->pusb_intf;
-
-		dev_err(&udev->dev, "r8712u: Firmware request failed\n");
-		usb_put_dev(udev);
-		usb_set_intfdata(usb_intf, NULL);
-		r8712_free_drv_sw(adapter);
-		adapter->dvobj_deinit(adapter);
-		complete(&adapter->rtl8712_fw_ready);
-		free_netdev(adapter->pnetdev);
+		rtl871x_load_fw_fail(adapter);
 		return;
 	}
 	adapter->fw = firmware;
diff --git a/drivers/staging/rtl8712/usb_intf.c b/drivers/staging/rtl8712/usb_intf.c
index 643f21eb1128..505ebeb643dc 100644
--- a/drivers/staging/rtl8712/usb_intf.c
+++ b/drivers/staging/rtl8712/usb_intf.c
@@ -591,36 +591,30 @@ static void r871xu_dev_remove(struct usb_interface *pusb_intf)
 {
 	struct net_device *pnetdev = usb_get_intfdata(pusb_intf);
 	struct usb_device *udev = interface_to_usbdev(pusb_intf);
+	struct _adapter *padapter = netdev_priv(pnetdev);
 
-	if (pnetdev) {
-		struct _adapter *padapter = netdev_priv(pnetdev);
+	/* never exit with a firmware callback pending */
+	wait_for_completion(&padapter->rtl8712_fw_ready);
+	usb_set_intfdata(pusb_intf, NULL);
+	release_firmware(padapter->fw);
+	if (drvpriv.drv_registered)
+		padapter->surprise_removed = true;
+	if (pnetdev->reg_state != NETREG_UNINITIALIZED)
+		unregister_netdev(pnetdev); /* will call netdev_close() */
+	r8712_flush_rwctrl_works(padapter);
+	r8712_flush_led_works(padapter);
+	udelay(1);
+	/* Stop driver mlme relation timer */
+	r8712_stop_drv_timers(padapter);
+	r871x_dev_unload(padapter);
+	r8712_free_drv_sw(padapter);
+	free_netdev(pnetdev);
 
-		/* never exit with a firmware callback pending */
-		wait_for_completion(&padapter->rtl8712_fw_ready);
-		pnetdev = usb_get_intfdata(pusb_intf);
-		usb_set_intfdata(pusb_intf, NULL);
-		if (!pnetdev)
-			goto firmware_load_fail;
-		release_firmware(padapter->fw);
-		if (drvpriv.drv_registered)
-			padapter->surprise_removed = true;
-		if (pnetdev->reg_state != NETREG_UNINITIALIZED)
-			unregister_netdev(pnetdev); /* will call netdev_close() */
-		r8712_flush_rwctrl_works(padapter);
-		r8712_flush_led_works(padapter);
-		udelay(1);
-		/* Stop driver mlme relation timer */
-		r8712_stop_drv_timers(padapter);
-		r871x_dev_unload(padapter);
-		r8712_free_drv_sw(padapter);
-		free_netdev(pnetdev);
+	/* decrease the reference count of the usb device structure
+	 * when disconnect
+	 */
+	usb_put_dev(udev);
 
-		/* decrease the reference count of the usb device structure
-		 * when disconnect
-		 */
-		usb_put_dev(udev);
-	}
-firmware_load_fail:
 	/* If we didn't unplug usb dongle and remove/insert module, driver
 	 * fails on sitesurvey for the first time when device is up.
 	 * Reset usb port for sitesurvey fail issue.

From c7b65650c7f41d3946c4e2f0bb56dfdb92cfe127 Mon Sep 17 00:00:00 2001
From: Sergio Paracuellos <sergio.paracuellos@gmail.com>
Date: Tue, 27 Jul 2021 07:40:58 +0200
Subject: [PATCH 205/502] staging: mt7621-pci: avoid to re-disable clock for
 those pcies not in use

Clock driver for this SoC is using some gates to properly enabling
and disabling the access to peripherals. Those gates that are not
in use are properly being automatically disabled by the kernel.
Pcie driver is explicitly doing a 'clk_disable_unprepare' call for
gates of those pcies that are not used. Since kernel has already
disabled them, the following warnings appear:

WARNING: CPU: 0 PID: 1 at drivers/clk/clk.c:952 clk_core_disable+0xe4/0x100
pcie2 already disabled
Modules linked in:
CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0 #0
Stack : 81661680 80082d00 807c0000 00000004 00000000
        80a20000 80860000 80792380
        814503d4 80862e83 00000000 1431b70 81454360
        00000000 00000000 80792380
        81431a08 ffffefff fffffea 00000000 81431a14
        0000007b 80868820 ffffffff
        80792380 1431c70 803d7a24 00000009 807f3a74
        00000001 815df810 00000018 0000000 80a20000
...
Call Trace:
[<80007ed8>] show_stack+0x28/0xf0
[<80381e40>] dump_stack_lvl+0x60/0x80
[<8002cf90>] __warn+0xcc/0x140
[<8002d090>] warn_slowpath_fmt+0x8c/0xac
[<803d7a24>] clk_core_disable+0xe4/0x100
[<803da468>] clk_disable+0x38/0x58
[<804cb730>] mt7621_pci_probe+0x980/0xa50
[<8041e624>] platform_probe+0x50/0xbc
[<8041bfe4>] really_probe.part.0+0xa8/0x340
[<8041c3dc>] driver_probe_device+0x4c/0x154
[<8041cb88>] __driver_attach+0xb4/0x1b4
[<80419a38>] bus_for_each_dev+0x68/0xa4
[<8041b1e8>] bus_add_driver+0x134/0x214
[<8041d3bc>] driver_register+0x98/0x154
[<80001648>] do_one_initcall+0x50/0x1a8
[<808ea1fc>] kernel_init_freeable+0x270/0x30c
[<806dd9dc>] kernel_init+0x20/0x110
[<80002d98>] ret_from_kernel_thread+0x14/0x1c

WARNING: CPU: 0 PID: 1 at drivers/clk/clk.c:810 clk_core_unprepare+0xf4/0x194
pcie2 already unprepared
Modules linked in:
CPU: 0 PID: 1 Comm: swapper/0 Tainted: G        W 5.14.0 #0
Stack : 81661680 80082d00 807c0000 00000004 00000000
        00000000 81431bc4 80a20000
        80860000 80792380 814503d4 80862e83 00000000
        00000001 81431b70 81454360
        00000000 00000000 80792380 81431a08 ffffefff
        00000000 ffffffea 00000000
        81431a14 0000009b 80868820 ffffffff 80792380
        00000001 81431c70 803d7764
        00000009 807f3a74 00000001 815df810 00000018
        8040b36c 00000000 80a20000
...
Call Trace:
[<80007ed8>] show_stack+0x28/0xf0
[<80381e40>] dump_stack_lvl+0x60/0x80
[<8002cf90>] __warn+0xcc/0x140
[<8002d090>] warn_slowpath_fmt+0x8c/0xac
[<803d7764>] clk_core_unprepare+0xf4/0x194
[<803d97c4>] clk_unprepare+0x30/0x48
[<804cb738>] mt7621_pci_probe+0x988/0xa50
[<8041e624>] platform_probe+0x50/0xbc
[<8041bfe4>] really_probe.part.0+0xa8/0x340
[<8041c3dc>] driver_probe_device+0x4c/0x154
[<8041cb88>] __driver_attach+0xb4/0x1b4
[<80419a38>] bus_for_each_dev+0x68/0xa4
[<8041b1e8>] bus_add_driver+0x134/0x214
[<8041d3bc>] driver_register+0x98/0x154
[<80001648>] do_one_initcall+0x50/0x1a8
[<808ea1fc>] kernel_init_freeable+0x270/0x30c
[<806dd9dc>] kernel_init+0x20/0x110
[<80002d98>] ret_from_kernel_thread+0x14/0x1c

Avoid to explicitly disable already disabled pcie gates
fixes the problem.

Fixes: cc4e864a5ce4 ("staging: mt7621-pci: make use of kernel clock apis")
Reported-by: DENG Qingfang <dqfext@gmail.com>
Signed-off-by: Sergio Paracuellos <sergio.paracuellos@gmail.com>
Link: https://lore.kernel.org/r/20210727054058.10612-1-sergio.paracuellos@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/mt7621-pci/pci-mt7621.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/staging/mt7621-pci/pci-mt7621.c b/drivers/staging/mt7621-pci/pci-mt7621.c
index 691030e1a5ed..f9bdf4e33134 100644
--- a/drivers/staging/mt7621-pci/pci-mt7621.c
+++ b/drivers/staging/mt7621-pci/pci-mt7621.c
@@ -422,7 +422,6 @@ static void mt7621_pcie_init_ports(struct mt7621_pcie *pcie)
 			dev_err(dev, "pcie%d no card, disable it (RST & CLK)\n",
 				slot);
 			mt7621_control_assert(port);
-			clk_disable_unprepare(port->clk);
 			port->enabled = false;
 
 			if (slot == 0) {

From 30fad76ce4e98263edfa8f885c81d5426c1bf169 Mon Sep 17 00:00:00 2001
From: "Qiang.zhang" <qiang.zhang@windriver.com>
Date: Fri, 23 Jul 2021 08:43:34 +0800
Subject: [PATCH 206/502] USB: usbtmc: Fix RCU stall warning

rcu: INFO: rcu_preempt self-detected stall on CPU
rcu:    1-...!: (2 ticks this GP) idle=d92/1/0x4000000000000000
        softirq=25390/25392 fqs=3
        (t=12164 jiffies g=31645 q=43226)
rcu: rcu_preempt kthread starved for 12162 jiffies! g31645 f0x0
     RCU_GP_WAIT_FQS(5) ->state=0x0 ->cpu=0
rcu:    Unless rcu_preempt kthread gets sufficient CPU time,
        OOM is now expected behavior.
rcu: RCU grace-period kthread stack dump:
task:rcu_preempt     state:R  running task
...........
usbtmc 3-1:0.0: unknown status received: -71
usbtmc 3-1:0.0: unknown status received: -71
usbtmc 3-1:0.0: unknown status received: -71
usbtmc 3-1:0.0: unknown status received: -71
usbtmc 3-1:0.0: unknown status received: -71
usbtmc 3-1:0.0: unknown status received: -71
usbtmc 3-1:0.0: unknown status received: -71
usbtmc 3-1:0.0: unknown status received: -71
usbtmc 3-1:0.0: usb_submit_urb failed: -19

The function usbtmc_interrupt() resubmits urbs when the error status
of an urb is -EPROTO. In systems using the dummy_hcd usb controller
this can result in endless interrupt loops when the usbtmc device is
disconnected from the host system.

Since host controller drivers already try to recover from transmission
errors, there is no need to resubmit the urb or try other solutions
to repair the error situation.

In case of errors the INT pipe just stops to wait for further packets.

Fixes: dbf3e7f654c0 ("Implement an ioctl to support the USMTMC-USB488 READ_STATUS_BYTE operation")
Cc: stable@vger.kernel.org
Reported-by: syzbot+e2eae5639e7203360018@syzkaller.appspotmail.com
Signed-off-by: Qiang.zhang <qiang.zhang@windriver.com>
Acked-by: Guido Kiener <guido.kiener@rohde-schwarz.com>
Link: https://lore.kernel.org/r/20210723004334.458930-1-qiang.zhang@windriver.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/usbtmc.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
index 74d5a9c5238a..73f419adce61 100644
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -2324,17 +2324,10 @@ static void usbtmc_interrupt(struct urb *urb)
 		dev_err(dev, "overflow with length %d, actual length is %d\n",
 			data->iin_wMaxPacketSize, urb->actual_length);
 		fallthrough;
-	case -ECONNRESET:
-	case -ENOENT:
-	case -ESHUTDOWN:
-	case -EILSEQ:
-	case -ETIME:
-	case -EPIPE:
+	default:
 		/* urb terminated, clean up */
 		dev_dbg(dev, "urb terminated, status: %d\n", status);
 		return;
-	default:
-		dev_err(dev, "unknown status received: %d\n", status);
 	}
 exit:
 	rv = usb_submit_urb(urb, GFP_ATOMIC);

From fa4a8dcfd51b911f101ebc461dfe22230b74dd64 Mon Sep 17 00:00:00 2001
From: Zhang Qilong <zhangqilong3@huawei.com>
Date: Tue, 27 Jul 2021 15:31:42 +0800
Subject: [PATCH 207/502] usb: gadget: remove leaked entry from udc driver list

The usb_add_gadget_udc will add a new gadget to the udc class
driver list. Not calling usb_del_gadget_udc in error branch
will result in residual gadget entry in the udc driver list.
We fix it by calling usb_del_gadget_udc to clean it when error
return.

Fixes: 48ba02b2e2b1 ("usb: gadget: add udc driver for max3420")
Acked-by: Felipe Balbi <balbi@kernel.org>
Signed-off-by: Zhang Qilong <zhangqilong3@huawei.com>
Link: https://lore.kernel.org/r/20210727073142.84666-1-zhangqilong3@huawei.com
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/udc/max3420_udc.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/gadget/udc/max3420_udc.c b/drivers/usb/gadget/udc/max3420_udc.c
index 34f4db554977..d2a2b20cc1ad 100644
--- a/drivers/usb/gadget/udc/max3420_udc.c
+++ b/drivers/usb/gadget/udc/max3420_udc.c
@@ -1255,12 +1255,14 @@ static int max3420_probe(struct spi_device *spi)
 	err = devm_request_irq(&spi->dev, irq, max3420_irq_handler, 0,
 			       "max3420", udc);
 	if (err < 0)
-		return err;
+		goto del_gadget;
 
 	udc->thread_task = kthread_create(max3420_thread, udc,
 					  "max3420-thread");
-	if (IS_ERR(udc->thread_task))
-		return PTR_ERR(udc->thread_task);
+	if (IS_ERR(udc->thread_task)) {
+		err = PTR_ERR(udc->thread_task);
+		goto del_gadget;
+	}
 
 	irq = of_irq_get_byname(spi->dev.of_node, "vbus");
 	if (irq <= 0) { /* no vbus irq implies self-powered design */
@@ -1280,10 +1282,14 @@ static int max3420_probe(struct spi_device *spi)
 		err = devm_request_irq(&spi->dev, irq,
 				       max3420_vbus_handler, 0, "vbus", udc);
 		if (err < 0)
-			return err;
+			goto del_gadget;
 	}
 
 	return 0;
+
+del_gadget:
+	usb_del_gadget_udc(&udc->gadget);
+	return err;
 }
 
 static int max3420_remove(struct spi_device *spi)

From 2867652e4766360adf14dfda3832455e04964f2a Mon Sep 17 00:00:00 2001
From: Phil Elwell <phil@raspberrypi.com>
Date: Fri, 23 Jul 2021 18:59:30 +0300
Subject: [PATCH 208/502] usb: gadget: f_hid: fixed NULL pointer dereference

Disconnecting and reconnecting the USB cable can lead to crashes
and a variety of kernel log spam.

The problem was found and reproduced on the Raspberry Pi [1]
and the original fix was created in Raspberry's own fork [2].

Link: https://github.com/raspberrypi/linux/issues/3870 [1]
Link: https://github.com/raspberrypi/linux/commit/a6e47d5f4efbd2ea6a0b6565cd2f9b7bb217ded5 [2]
Signed-off-by: Maxim Devaev <mdevaev@gmail.com>
Signed-off-by: Phil Elwell <phil@raspberrypi.com>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210723155928.210019-1-mdevaev@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/f_hid.c | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c
index 02683ac0719d..08e73e8127b1 100644
--- a/drivers/usb/gadget/function/f_hid.c
+++ b/drivers/usb/gadget/function/f_hid.c
@@ -338,6 +338,11 @@ static ssize_t f_hidg_write(struct file *file, const char __user *buffer,
 
 	spin_lock_irqsave(&hidg->write_spinlock, flags);
 
+	if (!hidg->req) {
+		spin_unlock_irqrestore(&hidg->write_spinlock, flags);
+		return -ESHUTDOWN;
+	}
+
 #define WRITE_COND (!hidg->write_pending)
 try_again:
 	/* write queue */
@@ -358,8 +363,14 @@ try_again:
 	count  = min_t(unsigned, count, hidg->report_length);
 
 	spin_unlock_irqrestore(&hidg->write_spinlock, flags);
-	status = copy_from_user(req->buf, buffer, count);
 
+	if (!req) {
+		ERROR(hidg->func.config->cdev, "hidg->req is NULL\n");
+		status = -ESHUTDOWN;
+		goto release_write_pending;
+	}
+
+	status = copy_from_user(req->buf, buffer, count);
 	if (status != 0) {
 		ERROR(hidg->func.config->cdev,
 			"copy_from_user error\n");
@@ -387,15 +398,18 @@ try_again:
 
 	spin_unlock_irqrestore(&hidg->write_spinlock, flags);
 
-	status = usb_ep_queue(hidg->in_ep, req, GFP_ATOMIC);
-	if (status < 0) {
-		ERROR(hidg->func.config->cdev,
-			"usb_ep_queue error on int endpoint %zd\n", status);
+	if (!hidg->in_ep->enabled) {
+		ERROR(hidg->func.config->cdev, "in_ep is disabled\n");
+		status = -ESHUTDOWN;
 		goto release_write_pending;
-	} else {
-		status = count;
 	}
 
+	status = usb_ep_queue(hidg->in_ep, req, GFP_ATOMIC);
+	if (status < 0)
+		goto release_write_pending;
+	else
+		status = count;
+
 	return status;
 release_write_pending:
 	spin_lock_irqsave(&hidg->write_spinlock, flags);

From afcff6dc690e24d636a41fd4bee6057e7c70eebd Mon Sep 17 00:00:00 2001
From: Maxim Devaev <mdevaev@gmail.com>
Date: Wed, 21 Jul 2021 21:03:51 +0300
Subject: [PATCH 209/502] usb: gadget: f_hid: added GET_IDLE and SET_IDLE
 handlers

The USB HID standard declares mandatory support for GET_IDLE and SET_IDLE
requests for Boot Keyboard. Most hosts can handle their absence, but others
like some old/strange UEFIs and BIOSes consider this a critical error
and refuse to work with f_hid.

This primitive implementation of saving and returning idle is sufficient
to meet the requirements of the standard and these devices.

Acked-by: Felipe Balbi <balbi@kernel.org>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Maxim Devaev <mdevaev@gmail.com>
Link: https://lore.kernel.org/r/20210721180351.129450-1-mdevaev@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/f_hid.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c
index 08e73e8127b1..8d50c8b127fd 100644
--- a/drivers/usb/gadget/function/f_hid.c
+++ b/drivers/usb/gadget/function/f_hid.c
@@ -41,6 +41,7 @@ struct f_hidg {
 	unsigned char			bInterfaceSubClass;
 	unsigned char			bInterfaceProtocol;
 	unsigned char			protocol;
+	unsigned char			idle;
 	unsigned short			report_desc_length;
 	char				*report_desc;
 	unsigned short			report_length;
@@ -537,6 +538,14 @@ static int hidg_setup(struct usb_function *f,
 		goto respond;
 		break;
 
+	case ((USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE) << 8
+		  | HID_REQ_GET_IDLE):
+		VDBG(cdev, "get_idle\n");
+		length = min_t(unsigned int, length, 1);
+		((u8 *) req->buf)[0] = hidg->idle;
+		goto respond;
+		break;
+
 	case ((USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE) << 8
 		  | HID_REQ_SET_REPORT):
 		VDBG(cdev, "set_report | wLength=%d\n", ctrl->wLength);
@@ -560,6 +569,14 @@ static int hidg_setup(struct usb_function *f,
 		goto stall;
 		break;
 
+	case ((USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE) << 8
+		  | HID_REQ_SET_IDLE):
+		VDBG(cdev, "set_idle\n");
+		length = 0;
+		hidg->idle = value;
+		goto respond;
+		break;
+
 	case ((USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_INTERFACE) << 8
 		  | USB_REQ_GET_DESCRIPTOR):
 		switch (value >> 8) {
@@ -787,6 +804,7 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f)
 	hidg_interface_desc.bInterfaceSubClass = hidg->bInterfaceSubClass;
 	hidg_interface_desc.bInterfaceProtocol = hidg->bInterfaceProtocol;
 	hidg->protocol = HID_REPORT_PROTOCOL;
+	hidg->idle = 1;
 	hidg_ss_in_ep_desc.wMaxPacketSize = cpu_to_le16(hidg->report_length);
 	hidg_ss_in_comp_desc.wBytesPerInterval =
 				cpu_to_le16(hidg->report_length);

From 68d9f95d6fd5399d105eaf2308c243536c5d7664 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Tue, 27 Jul 2021 13:41:34 +0300
Subject: [PATCH 210/502] usb: musb: Fix suspend and resume issues for PHYs on
 I2C and SPI

As the USB PHYs typically are on I2C or SPI bus for the 2430 glue layer,
we need configure the PHYs early for suspend.

The musb glue layer we need to suspend only after musb_suspend() in
suspend_late.

Fixes: 62d472d8ad88 ("usb: musb: Add missing PM suspend and resume functions for 2430 glue")
Reported-by: Andreas Kemnade <andreas@kemnade.info>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Link: https://lore.kernel.org/r/20210727104134.52800-1-tony@atomide.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/omap2430.c | 43 ++++++++++++++++++++++++++++++++-----
 1 file changed, 38 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c
index 640a46f0d118..f086960fe2b5 100644
--- a/drivers/usb/musb/omap2430.c
+++ b/drivers/usb/musb/omap2430.c
@@ -35,6 +35,7 @@ struct omap2430_glue {
 	struct device		*control_otghs;
 	unsigned int		is_runtime_suspended:1;
 	unsigned int		needs_resume:1;
+	unsigned int		phy_suspended:1;
 };
 #define glue_to_musb(g)		platform_get_drvdata(g->musb)
 
@@ -458,8 +459,10 @@ static int omap2430_runtime_suspend(struct device *dev)
 
 	omap2430_low_level_exit(musb);
 
-	phy_power_off(musb->phy);
-	phy_exit(musb->phy);
+	if (!glue->phy_suspended) {
+		phy_power_off(musb->phy);
+		phy_exit(musb->phy);
+	}
 
 	glue->is_runtime_suspended = 1;
 
@@ -474,8 +477,10 @@ static int omap2430_runtime_resume(struct device *dev)
 	if (!musb)
 		return 0;
 
-	phy_init(musb->phy);
-	phy_power_on(musb->phy);
+	if (!glue->phy_suspended) {
+		phy_init(musb->phy);
+		phy_power_on(musb->phy);
+	}
 
 	omap2430_low_level_init(musb);
 	musb_writel(musb->mregs, OTG_INTERFSEL,
@@ -489,7 +494,21 @@ static int omap2430_runtime_resume(struct device *dev)
 	return 0;
 }
 
+/* I2C and SPI PHYs need to be suspended before the glue layer */
 static int omap2430_suspend(struct device *dev)
+{
+	struct omap2430_glue *glue = dev_get_drvdata(dev);
+	struct musb *musb = glue_to_musb(glue);
+
+	phy_power_off(musb->phy);
+	phy_exit(musb->phy);
+	glue->phy_suspended = 1;
+
+	return 0;
+}
+
+/* Glue layer needs to be suspended after musb_suspend() */
+static int omap2430_suspend_late(struct device *dev)
 {
 	struct omap2430_glue *glue = dev_get_drvdata(dev);
 
@@ -501,7 +520,7 @@ static int omap2430_suspend(struct device *dev)
 	return omap2430_runtime_suspend(dev);
 }
 
-static int omap2430_resume(struct device *dev)
+static int omap2430_resume_early(struct device *dev)
 {
 	struct omap2430_glue *glue = dev_get_drvdata(dev);
 
@@ -513,10 +532,24 @@ static int omap2430_resume(struct device *dev)
 	return omap2430_runtime_resume(dev);
 }
 
+static int omap2430_resume(struct device *dev)
+{
+	struct omap2430_glue *glue = dev_get_drvdata(dev);
+	struct musb *musb = glue_to_musb(glue);
+
+	phy_init(musb->phy);
+	phy_power_on(musb->phy);
+	glue->phy_suspended = 0;
+
+	return 0;
+}
+
 static const struct dev_pm_ops omap2430_pm_ops = {
 	.runtime_suspend = omap2430_runtime_suspend,
 	.runtime_resume = omap2430_runtime_resume,
 	.suspend = omap2430_suspend,
+	.suspend_late = omap2430_suspend_late,
+	.resume_early = omap2430_resume_early,
 	.resume = omap2430_resume,
 };
 

From 00de6a572f30ee93cad7e0704ec4232e5e72bda8 Mon Sep 17 00:00:00 2001
From: Claudiu Beznea <claudiu.beznea@microchip.com>
Date: Wed, 21 Jul 2021 16:29:05 +0300
Subject: [PATCH 211/502] usb: host: ohci-at91: suspend/resume ports
 after/before OHCI accesses

On SAMA7G5 suspending ports will cut the access to OHCI registers and
any subsequent access to them will lead to CPU being blocked trying to
access that memory. Same thing happens on resume: if OHCI memory is
accessed before resuming ports the CPU will block on that access. The
OCHI memory is accessed on suspend/resume though
ohci_suspend()/ohci_resume().

Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210721132905.1970713-1-claudiu.beznea@microchip.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/ohci-at91.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/host/ohci-at91.c b/drivers/usb/host/ohci-at91.c
index 9bbd7ddd0003..a24aea3d2759 100644
--- a/drivers/usb/host/ohci-at91.c
+++ b/drivers/usb/host/ohci-at91.c
@@ -611,8 +611,6 @@ ohci_hcd_at91_drv_suspend(struct device *dev)
 	if (ohci_at91->wakeup)
 		enable_irq_wake(hcd->irq);
 
-	ohci_at91_port_suspend(ohci_at91->sfr_regmap, 1);
-
 	ret = ohci_suspend(hcd, ohci_at91->wakeup);
 	if (ret) {
 		if (ohci_at91->wakeup)
@@ -632,7 +630,10 @@ ohci_hcd_at91_drv_suspend(struct device *dev)
 		/* flush the writes */
 		(void) ohci_readl (ohci, &ohci->regs->control);
 		msleep(1);
+		ohci_at91_port_suspend(ohci_at91->sfr_regmap, 1);
 		at91_stop_clock(ohci_at91);
+	} else {
+		ohci_at91_port_suspend(ohci_at91->sfr_regmap, 1);
 	}
 
 	return ret;
@@ -644,6 +645,8 @@ ohci_hcd_at91_drv_resume(struct device *dev)
 	struct usb_hcd	*hcd = dev_get_drvdata(dev);
 	struct ohci_at91_priv *ohci_at91 = hcd_to_ohci_at91_priv(hcd);
 
+	ohci_at91_port_suspend(ohci_at91->sfr_regmap, 0);
+
 	if (ohci_at91->wakeup)
 		disable_irq_wake(hcd->irq);
 	else
@@ -651,8 +654,6 @@ ohci_hcd_at91_drv_resume(struct device *dev)
 
 	ohci_resume(hcd, false);
 
-	ohci_at91_port_suspend(ohci_at91->sfr_regmap, 0);
-
 	return 0;
 }
 

From bf88fef0b6f1488abeca594d377991171c00e52a Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Sat, 17 Jul 2021 21:21:27 +0300
Subject: [PATCH 212/502] usb: otg-fsm: Fix hrtimer list corruption

The HNP work can be re-scheduled while it's still in-fly. This results in
re-initialization of the busy work, resetting the hrtimer's list node of
the work and crashing kernel with null dereference within kernel/timer
once work's timer is expired. It's very easy to trigger this problem by
re-plugging USB cable quickly. Initialize HNP work only once to fix this
trouble.

 Unable to handle kernel NULL pointer dereference at virtual address 00000126)
 ...
 PC is at __run_timers.part.0+0x150/0x228
 LR is at __next_timer_interrupt+0x51/0x9c
 ...
 (__run_timers.part.0) from [<c0187a2b>] (run_timer_softirq+0x2f/0x50)
 (run_timer_softirq) from [<c01013ad>] (__do_softirq+0xd5/0x2f0)
 (__do_softirq) from [<c012589b>] (irq_exit+0xab/0xb8)
 (irq_exit) from [<c0170341>] (handle_domain_irq+0x45/0x60)
 (handle_domain_irq) from [<c04c4a43>] (gic_handle_irq+0x6b/0x7c)
 (gic_handle_irq) from [<c0100b65>] (__irq_svc+0x65/0xac)

Cc: stable@vger.kernel.org
Acked-by: Peter Chen <peter.chen@kernel.org>
Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Link: https://lore.kernel.org/r/20210717182134.30262-6-digetx@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/common/usb-otg-fsm.c | 6 +++++-
 include/linux/usb/otg-fsm.h      | 1 +
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/common/usb-otg-fsm.c b/drivers/usb/common/usb-otg-fsm.c
index 3740cf95560e..0697fde51d00 100644
--- a/drivers/usb/common/usb-otg-fsm.c
+++ b/drivers/usb/common/usb-otg-fsm.c
@@ -193,7 +193,11 @@ static void otg_start_hnp_polling(struct otg_fsm *fsm)
 	if (!fsm->host_req_flag)
 		return;
 
-	INIT_DELAYED_WORK(&fsm->hnp_polling_work, otg_hnp_polling_work);
+	if (!fsm->hnp_work_inited) {
+		INIT_DELAYED_WORK(&fsm->hnp_polling_work, otg_hnp_polling_work);
+		fsm->hnp_work_inited = true;
+	}
+
 	schedule_delayed_work(&fsm->hnp_polling_work,
 					msecs_to_jiffies(T_HOST_REQ_POLL));
 }
diff --git a/include/linux/usb/otg-fsm.h b/include/linux/usb/otg-fsm.h
index 3aee78dda16d..784659d4dc99 100644
--- a/include/linux/usb/otg-fsm.h
+++ b/include/linux/usb/otg-fsm.h
@@ -196,6 +196,7 @@ struct otg_fsm {
 	struct mutex lock;
 	u8 *host_req_flag;
 	struct delayed_work hnp_polling_work;
+	bool hnp_work_inited;
 	bool state_changed;
 };
 

From 4c4c1257b844ffe5d0933684e612f92c4b78e120 Mon Sep 17 00:00:00 2001
From: Shuo Liu <shuo.a.liu@intel.com>
Date: Thu, 22 Jul 2021 14:27:36 +0800
Subject: [PATCH 213/502] virt: acrn: Do hcall_destroy_vm() before resource
 release

The ACRN hypervisor has scenarios which could run a real-time guest VM.
The real-time guest VM occupies dedicated CPU cores, be assigned with
dedicated PCI devices. It can run without the Service VM after boot up.
hcall_destroy_vm() returns failure when a real-time guest VM refuses.
The clearing of flag ACRN_VM_FLAG_DESTROYED causes some kernel resource
double-freed in a later acrn_vm_destroy().

Do hcall_destroy_vm() before resource release to drop this chance to
destroy the VM if hypercall fails.

Fixes: 9c5137aedd11 ("virt: acrn: Introduce VM management interfaces")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Shuo Liu <shuo.a.liu@intel.com>
Signed-off-by: Fei Li <fei1.li@intel.com>
Link: https://lore.kernel.org/r/20210722062736.15050-1-fei1.li@intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/virt/acrn/vm.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/virt/acrn/vm.c b/drivers/virt/acrn/vm.c
index 0d002a355a93..fbc9f1042000 100644
--- a/drivers/virt/acrn/vm.c
+++ b/drivers/virt/acrn/vm.c
@@ -64,6 +64,14 @@ int acrn_vm_destroy(struct acrn_vm *vm)
 	    test_and_set_bit(ACRN_VM_FLAG_DESTROYED, &vm->flags))
 		return 0;
 
+	ret = hcall_destroy_vm(vm->vmid);
+	if (ret < 0) {
+		dev_err(acrn_dev.this_device,
+			"Failed to destroy VM %u\n", vm->vmid);
+		clear_bit(ACRN_VM_FLAG_DESTROYED, &vm->flags);
+		return ret;
+	}
+
 	/* Remove from global VM list */
 	write_lock_bh(&acrn_vm_list_lock);
 	list_del_init(&vm->list);
@@ -78,14 +86,6 @@ int acrn_vm_destroy(struct acrn_vm *vm)
 		vm->monitor_page = NULL;
 	}
 
-	ret = hcall_destroy_vm(vm->vmid);
-	if (ret < 0) {
-		dev_err(acrn_dev.this_device,
-			"Failed to destroy VM %u\n", vm->vmid);
-		clear_bit(ACRN_VM_FLAG_DESTROYED, &vm->flags);
-		return ret;
-	}
-
 	acrn_vm_all_ram_unmap(vm);
 
 	dev_dbg(acrn_dev.this_device, "VM %u destroyed.\n", vm->vmid);

From b910a0206b59eb90ea8ff76d146f4c3156da61e9 Mon Sep 17 00:00:00 2001
From: Robert Foss <robert.foss@linaro.org>
Date: Mon, 28 Jun 2021 10:50:33 +0200
Subject: [PATCH 214/502] drm/msm/dpu: Fix sm8250_mdp register length

The downstream dts lists this value as 0x494, and not
0x45c.

Fixes: af776a3e1c30 ("drm/msm/dpu: add SM8250 to hw catalog")
Signed-off-by: Robert Foss <robert.foss@linaro.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@somainline.org>
Link: https://lore.kernel.org/r/20210628085033.9905-1-robert.foss@linaro.org
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
index d01c4c919504..704dace895cb 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
@@ -296,7 +296,7 @@ static const struct dpu_mdp_cfg sc7180_mdp[] = {
 static const struct dpu_mdp_cfg sm8250_mdp[] = {
 	{
 	.name = "top_0", .id = MDP_TOP,
-	.base = 0x0, .len = 0x45C,
+	.base = 0x0, .len = 0x494,
 	.features = 0,
 	.highest_bank_bit = 0x3, /* TODO: 2 for LP_DDR4 */
 	.clk_ctrls[DPU_CLK_CTRL_VIG0] = {

From bceddc2cb581dffc94370517f7eedbd9aa16c74b Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@chromium.org>
Date: Wed, 7 Jul 2021 11:01:13 -0700
Subject: [PATCH 215/502] drm/msm: Fix display fault handling

It turns out that when the display is enabled by the bootloader, we can
get some transient iommu faults from the display.  Which doesn't go over
too well when we install a fault handler that is gpu specific.  To avoid
this, defer installing the fault handler until we get around to setting
up per-process pgtables (which is adreno_smmu specific).  The arm-smmu
fallback error reporting is sufficient for reporting display related
faults (and in fact was all we had prior to f8f934c180f629bb927a04fd90d)

Reported-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reported-by: Yassine Oudjana <y.oudjana@protonmail.com>
Fixes: 2a574cc05d38 ("drm/msm: Improve the a6xx page fault handler")
Signed-off-by: Rob Clark <robdclark@chromium.org>
Tested-by: John Stultz <john.stultz@linaro.org>
Tested-by: Yassine Oudjana <y.oudjana@protonmail.com>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/20210707180113.840741-1-robdclark@gmail.com
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/msm_iommu.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
index eed2a762e9dd..bcaddbba564d 100644
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -142,6 +142,9 @@ static const struct iommu_flush_ops null_tlb_ops = {
 	.tlb_add_page = msm_iommu_tlb_add_page,
 };
 
+static int msm_fault_handler(struct iommu_domain *domain, struct device *dev,
+		unsigned long iova, int flags, void *arg);
+
 struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent)
 {
 	struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(parent->dev);
@@ -157,6 +160,13 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent)
 	if (!ttbr1_cfg)
 		return ERR_PTR(-ENODEV);
 
+	/*
+	 * Defer setting the fault handler until we have a valid adreno_smmu
+	 * to avoid accidentially installing a GPU specific fault handler for
+	 * the display's iommu
+	 */
+	iommu_set_fault_handler(iommu->domain, msm_fault_handler, iommu);
+
 	pagetable = kzalloc(sizeof(*pagetable), GFP_KERNEL);
 	if (!pagetable)
 		return ERR_PTR(-ENOMEM);
@@ -300,7 +310,6 @@ struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain)
 
 	iommu->domain = domain;
 	msm_mmu_init(&iommu->base, dev, &funcs, MSM_MMU_IOMMU);
-	iommu_set_fault_handler(domain, msm_fault_handler, iommu);
 
 	atomic_set(&iommu->pagetables, 0);
 

From 7591c532b818ef4b8e3e635d842547c08b3a32b4 Mon Sep 17 00:00:00 2001
From: Kuogee Hsieh <khsieh@codeaurora.org>
Date: Tue, 13 Jul 2021 08:54:01 -0700
Subject: [PATCH 216/502] drm/msm/dp: use dp_ctrl_off_link_stream during PHY
 compliance test run

DP cable should always connect to DPU during the entire PHY compliance
testing run. Since DP PHY compliance test is executed at irq_hpd event
context, dp_ctrl_off_link_stream() should be used instead of dp_ctrl_off().
dp_ctrl_off() is used for unplug event which is triggered when DP cable is
dis connected.

Changes in V2:
-- add fixes statement

Fixes: f21c8a276c2d ("drm/msm/dp: handle irq_hpd with sink_count = 0 correctly")

Signed-off-by: Kuogee Hsieh <khsieh@codeaurora.org>
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Link: https://lore.kernel.org/r/1626191647-13901-2-git-send-email-khsieh@codeaurora.org
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/dp/dp_ctrl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c
index ee221d835fa0..eaddfd739885 100644
--- a/drivers/gpu/drm/msm/dp/dp_ctrl.c
+++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c
@@ -1526,7 +1526,7 @@ static int dp_ctrl_process_phy_test_request(struct dp_ctrl_private *ctrl)
 	 * running. Add the global reset just before disabling the
 	 * link clocks and core clocks.
 	 */
-	ret = dp_ctrl_off(&ctrl->dp_ctrl);
+	ret = dp_ctrl_off_link_stream(&ctrl->dp_ctrl);
 	if (ret) {
 		DRM_ERROR("failed to disable DP controller\n");
 		return ret;

From f9a39932fa54b6421e751ada7a285da809146421 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Wed, 21 Jul 2021 19:44:34 -0700
Subject: [PATCH 217/502] drm/msm/dp: Initialize the INTF_CONFIG register

Some bootloaders set the widebus enable bit in the INTF_CONFIG register,
but configuration of widebus isn't yet supported ensure that the
register has a known value, with widebus disabled.

Fixes: c943b4948b58 ("drm/msm/dp: add displayPort driver support")
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Link: https://lore.kernel.org/r/20210722024434.3313167-1-bjorn.andersson@linaro.org
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/dp/dp_catalog.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/msm/dp/dp_catalog.c b/drivers/gpu/drm/msm/dp/dp_catalog.c
index ca96e3514790..c0423e76eed7 100644
--- a/drivers/gpu/drm/msm/dp/dp_catalog.c
+++ b/drivers/gpu/drm/msm/dp/dp_catalog.c
@@ -771,6 +771,7 @@ int dp_catalog_panel_timing_cfg(struct dp_catalog *dp_catalog)
 	dp_write_link(catalog, REG_DP_HSYNC_VSYNC_WIDTH_POLARITY,
 				dp_catalog->width_blanking);
 	dp_write_link(catalog, REG_DP_ACTIVE_HOR_VER, dp_catalog->dp_active);
+	dp_write_p0(catalog, MMSS_DP_INTF_CONFIG, 0);
 	return 0;
 }
 

From afc9b8b6bab8d3d3a9ae67e1d64093ad626c92a0 Mon Sep 17 00:00:00 2001
From: Kuogee Hsieh <khsieh@codeaurora.org>
Date: Fri, 23 Jul 2021 09:55:39 -0700
Subject: [PATCH 218/502] drm/msm/dp: signal audio plugged change at
 dp_pm_resume

There is a scenario that dp cable is unplugged from DUT during system
suspended  will cause audio option state does not match real connection
state. Fix this problem by Signaling audio plugged change with realtime
connection status at dp_pm_resume() so that audio option will be in
correct state after system resumed.

Changes in V2:
-- correct Fixes tag commit id.

Fixes: f591dbb5fb8c ("drm/msm/dp: power off DP phy at suspend")
Signed-off-by: Kuogee Hsieh <khsieh@codeaurora.org>
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Link: https://lore.kernel.org/r/1627059339-12142-1-git-send-email-khsieh@codeaurora.org
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/dp/dp_display.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
index 051c1be1de7e..1d204a0fbdd2 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -1311,6 +1311,10 @@ static int dp_pm_resume(struct device *dev)
 	else
 		dp->dp_display.is_connected = false;
 
+	dp_display_handle_plugged_change(g_dp_display,
+				dp->dp_display.is_connected);
+
+
 	mutex_unlock(&dp->event_mutex);
 
 	return 0;

From fc71c9e6f41f9912d22a75dfa76bc10811af7e22 Mon Sep 17 00:00:00 2001
From: Sean Paul <seanpaul@chromium.org>
Date: Wed, 14 Jul 2021 11:28:56 -0400
Subject: [PATCH 219/502] drm/msm/dp: Initialize dp->aux->drm_dev before
 registration

Avoids the following WARN:
[    3.009556] ------------[ cut here ]------------
[    3.014306] WARNING: CPU: 7 PID: 109 at
drivers/gpu/drm/drm_dp_helper.c:1796 drm_dp_aux_register+0xa4/0xac
[    3.024209] Modules linked in:
[    3.027351] CPU: 7 PID: 109 Comm: kworker/7:8 Not tainted 5.10.47 #69
[    3.033958] Hardware name: Google Lazor (rev1 - 2) (DT)
[    3.039323] Workqueue: events deferred_probe_work_func
[    3.044596] pstate: 60c00009 (nZCv daif +PAN +UAO -TCO BTYPE=--)
[    3.050761] pc : drm_dp_aux_register+0xa4/0xac
[    3.055329] lr : dp_aux_register+0x40/0x88
[    3.059538] sp : ffffffc010ad3920
[    3.062948] x29: ffffffc010ad3920 x28: ffffffa64196ac70
[    3.067239] mmc1: Command Queue Engine enabled
[    3.068406] x27: ffffffa64196ac68 x26: 0000000000000001
[    3.068407] x25: 0000000000000002 x24: 0000000000000060
[    3.068409] x23: ffffffa642ab3400 x22: ffffffe126c10e5b
[    3.068410] x21: ffffffa641dc3188 x20: ffffffa641963c10
[    3.068412] x19: ffffffa642aba910 x18: 00000000ffff0a00
[    3.068414] x17: 000000476f8e002a x16: 00000000000000b8
[    3.073008] mmc1: new HS400 Enhanced strobe MMC card at address 0001
[    3.078448] x15: ffffffffffffffff x14: ffffffffffffffff
[    3.078450] x13: 0000000000000030 x12: 0000000000000030
[    3.078452] x11: 0101010101010101 x10: ffffffe12647a914
[    3.078453] x9 : ffffffe12647a8cc x8 : 0000000000000000
[    3.084452] mmcblk1: mmc1:0001 DA4032 29.1 GiB
[    3.089372]
[    3.089372] x7 : 6c6064717372fefe x6 : ffffffa642b11494
[    3.089374] x5 : 0000000000000000 x4 : 6d006c657869ffff
[    3.089375] x3 : 000000006c657869 x2 : 000000000000000c
[    3.089376] x1 : ffffffe126c3ae3c x0 : ffffffa642aba910
[    3.089381] Call trace:
[    3.094931] mmcblk1boot0: mmc1:0001 DA4032 partition 1 4.00 MiB
[    3.100291]  drm_dp_aux_register+0xa4/0xac
[    3.100292]  dp_aux_register+0x40/0x88
[    3.100294]  dp_display_bind+0x64/0xcc
[    3.100295]  component_bind_all+0xdc/0x210
[    3.100298]  msm_drm_bind+0x1e8/0x5d4
[    3.100301]  try_to_bring_up_master+0x168/0x1b0
[    3.105861] mmcblk1boot1: mmc1:0001 DA4032 partition 2 4.00 MiB
[    3.112282]  __component_add+0xa0/0x158
[    3.112283]  component_add+0x1c/0x28
[    3.112284]  dp_display_probe+0x33c/0x380
[    3.112286]  platform_drv_probe+0x9c/0xbc
[    3.112287]  really_probe+0x140/0x35c
[    3.112289]  driver_probe_device+0x84/0xc0
[    3.112292]  __device_attach_driver+0x94/0xb0
[    3.117967] mmcblk1rpmb: mmc1:0001 DA4032 partition 3 16.0 MiB,
chardev (239:0)
[    3.123201]  bus_for_each_drv+0x8c/0xd8
[    3.123202]  __device_attach+0xc4/0x150
[    3.123204]  device_initial_probe+0x1c/0x28
[    3.123205]  bus_probe_device+0x3c/0x9c
[    3.123206]  deferred_probe_work_func+0x90/0xcc
[    3.123211]  process_one_work+0x218/0x3ec
[    3.131976]  mmcblk1: p1 p2 p3 p4 p5 p6 p7 p8 p9 p10 p11 p12
[    3.134123]  worker_thread+0x288/0x3e8
[    3.134124]  kthread+0x148/0x1b0
[    3.134127]  ret_from_fork+0x10/0x30
[    3.134128] ---[ end trace cfb9fce3f70f824d ]---

Signed-off-by: Sean Paul <seanpaul@chromium.org>
Reviewed-by: Abhinav Kumar <abhinavk@codeaurora.org>
Link: https://lore.kernel.org/r/20210714152910.55093-1-sean@poorly.run
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/dp/dp_display.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
index 1d204a0fbdd2..867388a399ad 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -219,6 +219,7 @@ static int dp_display_bind(struct device *dev, struct device *master,
 		goto end;
 	}
 
+	dp->aux->drm_dev = drm;
 	rc = dp_aux_register(dp->aux);
 	if (rc) {
 		DRM_ERROR("DRM DP AUX register failed\n");

From 48e8a7b5a551f956002b60d2095bdfb58db96e59 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Thu, 24 Jun 2021 17:43:03 +0100
Subject: [PATCH 220/502] perf cs-etm: Split --dump-raw-trace by AUX records

Currently --dump-raw-trace skips queueing and splitting buffers because
of an early exit condition in cs_etm__process_auxtrace_info(). Once
that is removed we can print the split data by using the queues
and searching for split buffers with the same reference as the
one that is currently being processed.

This keeps the same behaviour of dumping in file order when an AUXTRACE
event appears, rather than moving trace dump to where AUX records are in
the file.

There will be a newline and size printout for each fragment. For example
this buffer is comprised of two AUX records, but was printed as one:

  0 0 0x8098 [0x30]: PERF_RECORD_AUXTRACE size: 0xa0  offset: 0  ref: 0x491a4dfc52fc0e6e  idx: 0  t

  . ... CoreSight ETM Trace data: size 160 bytes
          Idx:0; ID:10;   I_ASYNC : Alignment Synchronisation.
          Idx:12; ID:10;  I_TRACE_INFO : Trace Info.; INFO=0x0 { CC.0 }
          Idx:17; ID:10;  I_ADDR_L_64IS0 : Address, Long, 64 bit, IS0.; Addr=0x0000000000000000;
          Idx:80; ID:10;  I_ASYNC : Alignment Synchronisation.
          Idx:92; ID:10;  I_TRACE_INFO : Trace Info.; INFO=0x0 { CC.0 }
          Idx:97; ID:10;  I_ADDR_L_64IS0 : Address, Long, 64 bit, IS0.; Addr=0xFFFFDE2AD3FD76D4;

But is now printed as two fragments:

  0 0 0x8098 [0x30]: PERF_RECORD_AUXTRACE size: 0xa0  offset: 0  ref: 0x491a4dfc52fc0e6e  idx: 0  t

  . ... CoreSight ETM Trace data: size 80 bytes
          Idx:0; ID:10;   I_ASYNC : Alignment Synchronisation.
          Idx:12; ID:10;  I_TRACE_INFO : Trace Info.; INFO=0x0 { CC.0 }
          Idx:17; ID:10;  I_ADDR_L_64IS0 : Address, Long, 64 bit, IS0.; Addr=0x0000000000000000;

  . ... CoreSight ETM Trace data: size 80 bytes
          Idx:80; ID:10;  I_ASYNC : Alignment Synchronisation.
          Idx:92; ID:10;  I_TRACE_INFO : Trace Info.; INFO=0x0 { CC.0 }
          Idx:97; ID:10;  I_ADDR_L_64IS0 : Address, Long, 64 bit, IS0.; Addr=0xFFFFDE2AD3FD76D4;

Decoding errors that appeared in problematic files are now not present,
for example:

        Idx:808; ID:1c; I_BAD_SEQUENCE : Invalid Sequence in packet.[I_ASYNC]
        ...
        PKTP_ETMV4I_0016 : 0x0014 (OCSD_ERR_INVALID_PCKT_HDR) [Invalid packet header]; TrcIdx=822

Signed-off-by: James Clark <james.clark@arm.com>
Reviewed-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Tested-by: Leo Yan <leo.yan@linaro.org>
Cc: Al Grant <al.grant@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Branislav Rankov <branislav.rankov@arm.com>
Cc: Denis Nikitin <denik@chromium.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: John Garry <john.garry@huawei.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mike Leach <mike.leach@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Suzuki Poulouse <suzuki.poulose@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: coresight@lists.linaro.org
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lore.kernel.org/lkml/20210624164303.28632-3-james.clark@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/cs-etm.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 22f8326547eb..bc1f64873c8f 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -2434,6 +2434,22 @@ static int cs_etm__process_event(struct perf_session *session,
 	return 0;
 }
 
+static void dump_queued_data(struct cs_etm_auxtrace *etm,
+			     struct perf_record_auxtrace *event)
+{
+	struct auxtrace_buffer *buf;
+	unsigned int i;
+	/*
+	 * Find all buffers with same reference in the queues and dump them.
+	 * This is because the queues can contain multiple entries of the same
+	 * buffer that were split on aux records.
+	 */
+	for (i = 0; i < etm->queues.nr_queues; ++i)
+		list_for_each_entry(buf, &etm->queues.queue_array[i].head, list)
+			if (buf->reference == event->reference)
+				cs_etm__dump_event(etm, buf);
+}
+
 static int cs_etm__process_auxtrace_event(struct perf_session *session,
 					  union perf_event *event,
 					  struct perf_tool *tool __maybe_unused)
@@ -2466,7 +2482,8 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session,
 				cs_etm__dump_event(etm, buffer);
 				auxtrace_buffer__put_data(buffer);
 			}
-	}
+	} else if (dump_trace)
+		dump_queued_data(etm, &event->auxtrace);
 
 	return 0;
 }
@@ -3042,7 +3059,6 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
 
 	if (dump_trace) {
 		cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu);
-		return 0;
 	}
 
 	err = cs_etm__synth_events(etm, session);

From 8e3341257e3b5774ec8cd3ef1ba0c0d3fada322b Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Tue, 27 Jul 2021 17:25:01 +0300
Subject: [PATCH 221/502] Revert "thunderbolt: Hide authorized attribute if
 router does not support PCIe tunnels"

This reverts commit 6f3badead6a078cf3c71f381f9d84ac922984a00.

It turns out bolt depends on having authorized attribute visible under
each device. Hiding it makes bolt crash as several people have reported
on various bug trackers. For this reason revert the commit.

Link: https://gitlab.freedesktop.org/bolt/bolt/-/issues/174
Link: https://bugzilla.redhat.com/show_bug.cgi?id=1979765
Link: https://bugs.archlinux.org/task/71569
Cc: stable@vger.kernel.org
Cc: Christian Kellner <ckellner@redhat.com>
Fixes: 6f3badead6a0 ("thunderbolt: Hide authorized attribute if router does not support PCIe tunnels")
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Link: https://lore.kernel.org/r/20210727142501.27476-1-mika.westerberg@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/thunderbolt/switch.c | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c
index 83b1ef3d5d03..10d6b228cc94 100644
--- a/drivers/thunderbolt/switch.c
+++ b/drivers/thunderbolt/switch.c
@@ -1875,18 +1875,6 @@ static struct attribute *switch_attrs[] = {
 	NULL,
 };
 
-static bool has_port(const struct tb_switch *sw, enum tb_port_type type)
-{
-	const struct tb_port *port;
-
-	tb_switch_for_each_port(sw, port) {
-		if (!port->disabled && port->config.type == type)
-			return true;
-	}
-
-	return false;
-}
-
 static umode_t switch_attr_is_visible(struct kobject *kobj,
 				      struct attribute *attr, int n)
 {
@@ -1895,8 +1883,7 @@ static umode_t switch_attr_is_visible(struct kobject *kobj,
 
 	if (attr == &dev_attr_authorized.attr) {
 		if (sw->tb->security_level == TB_SECURITY_NOPCIE ||
-		    sw->tb->security_level == TB_SECURITY_DPONLY ||
-		    !has_port(sw, TB_TYPE_PCIE_UP))
+		    sw->tb->security_level == TB_SECURITY_DPONLY)
 			return 0;
 	} else if (attr == &dev_attr_device.attr) {
 		if (!sw->device)

From c07d5c9226980ca5ae21c6a2714baa95be2ce164 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Tue, 20 Jul 2021 23:10:19 +0800
Subject: [PATCH 222/502] perf pmu: Fix alias matching

Commit c47a5599eda324ba ("perf tools: Fix pattern matching for same
substring in different PMU type"), may have fixed some alias matching,
but has broken some others.

Firstly it cannot handle the simple scenario of PMU name in form
pmu_name{digits} - it can only handle pmu_name_{digits}.

Secondly it cannot handle more complex matching in the case where we
have multiple tokens. In this scenario, the code failed to realise that
we may examine multiple substrings in the PMU name.

Fix in two ways:

- Change perf_pmu__valid_suffix() to accept a PMU name without '_' in the
  suffix

- Only pay attention to perf_pmu__valid_suffix() for the final token

Also add const qualifiers as necessary to avoid casting.

Fixes: c47a5599eda324ba ("perf tools: Fix pattern matching for same substring in different PMU type")
Signed-off-by: John Garry <john.garry@huawei.com>
Tested-by: Jin Yao <yao.jin@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lore.kernel.org/lkml/1626793819-79090-1-git-send-email-john.garry@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/pmu.c | 33 ++++++++++++++++++++++++---------
 1 file changed, 24 insertions(+), 9 deletions(-)

diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index a1bd7007a8b4..fc683bc41715 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -742,9 +742,13 @@ struct pmu_events_map *__weak pmu_events_map__find(void)
 	return perf_pmu__find_map(NULL);
 }
 
-static bool perf_pmu__valid_suffix(char *pmu_name, char *tok)
+/*
+ * Suffix must be in form tok_{digits}, or tok{digits}, or same as pmu_name
+ * to be valid.
+ */
+static bool perf_pmu__valid_suffix(const char *pmu_name, char *tok)
 {
-	char *p;
+	const char *p;
 
 	if (strncmp(pmu_name, tok, strlen(tok)))
 		return false;
@@ -753,12 +757,16 @@ static bool perf_pmu__valid_suffix(char *pmu_name, char *tok)
 	if (*p == 0)
 		return true;
 
-	if (*p != '_')
-		return false;
+	if (*p == '_')
+		++p;
 
-	++p;
-	if (*p == 0 || !isdigit(*p))
-		return false;
+	/* Ensure we end in a number */
+	while (1) {
+		if (!isdigit(*p))
+			return false;
+		if (*(++p) == 0)
+			break;
+	}
 
 	return true;
 }
@@ -789,12 +797,19 @@ bool pmu_uncore_alias_match(const char *pmu_name, const char *name)
 	 *	    match "socket" in "socketX_pmunameY" and then "pmuname" in
 	 *	    "pmunameY".
 	 */
-	for (; tok; name += strlen(tok), tok = strtok_r(NULL, ",", &tmp)) {
+	while (1) {
+		char *next_tok = strtok_r(NULL, ",", &tmp);
+
 		name = strstr(name, tok);
-		if (!name || !perf_pmu__valid_suffix((char *)name, tok)) {
+		if (!name ||
+		    (!next_tok && !perf_pmu__valid_suffix(name, tok))) {
 			res = false;
 			goto out;
 		}
+		if (!next_tok)
+			break;
+		tok = next_tok;
+		name += strlen(tok);
 	}
 
 	res = true;

From 91e273712ab8dd8c31924ac7714b21e011137e98 Mon Sep 17 00:00:00 2001
From: Pratik Vishwakarma <Pratik.Vishwakarma@amd.com>
Date: Fri, 23 Jul 2021 18:08:40 +0530
Subject: [PATCH 223/502] drm/amdgpu: Check pmops for desired suspend state

[Why]
User might change the suspend behaviour from OS.

[How]
Check with pm for target suspend state and set s0ix
flag only for s2idle state.

v2: User might change default suspend state, use target state
v3: squash in build fix

Suggested-by: Lijo Lazar <Lijo.Lazar@amd.com>
Signed-off-by: Pratik Vishwakarma <Pratik.Vishwakarma@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 84a1b4bc9bb4..6cc0d4fa4d0a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -26,6 +26,7 @@
 #include <linux/slab.h>
 #include <linux/power_supply.h>
 #include <linux/pm_runtime.h>
+#include <linux/suspend.h>
 #include <acpi/video.h>
 #include <acpi/actbl.h>
 
@@ -1042,7 +1043,7 @@ bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev)
 #if defined(CONFIG_AMD_PMC) || defined(CONFIG_AMD_PMC_MODULE)
 	if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) {
 		if (adev->flags & AMD_IS_APU)
-			return true;
+			return pm_suspend_target_state == PM_SUSPEND_TO_IDLE;
 	}
 #endif
 	return false;

From c8f8e96805b54968b4d1d54850f87fc39128a532 Mon Sep 17 00:00:00 2001
From: Victor Lu <victorchengchi.lu@amd.com>
Date: Thu, 8 Jul 2021 14:50:48 -0400
Subject: [PATCH 224/502] drm/amd/display: Guard DST_Y_PREFETCH register
 overflow in DCN21

[why]
DST_Y_PREFETCH can overflow when DestinationLinesForPrefetch values are
too large due to the former being limited to 8 bits.

[how]
Set the maximum value of DestinationLinesForPrefetch to be 255 * refclk
period.

Reviewed-by: Laktyushkin Dmytro <dmytro.laktyushkin@amd.com>
Acked-by: Solomon Chiu <solomon.chiu@amd.com>
Signed-off-by: Victor Lu <victorchengchi.lu@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
index d25a7d38d21f..6655bb99fdfd 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
@@ -841,6 +841,9 @@ static bool CalculatePrefetchSchedule(
 	else
 		*DestinationLinesForPrefetch = dst_y_prefetch_equ;
 
+	// Limit to prevent overflow in DST_Y_PREFETCH register
+	*DestinationLinesForPrefetch = dml_min(*DestinationLinesForPrefetch, 63.75);
+
 	dml_print("DML: VStartup: %d\n", VStartup);
 	dml_print("DML: TCalc: %f\n", TCalc);
 	dml_print("DML: TWait: %f\n", TWait);

From 8d177577cd9118c29960401a6de9dc4db00f2052 Mon Sep 17 00:00:00 2001
From: Victor Lu <victorchengchi.lu@amd.com>
Date: Tue, 6 Jul 2021 15:45:11 -0400
Subject: [PATCH 225/502] drm/amd/display: Add missing DCN21 IP parameter

[why]
IP parameter min_meta_chunk_size_bytes is read for bandwidth
calculations but it was never defined.

[how]
Define min_meta_chunk_size_bytes and initialize value to 256.

Reviewed-by: Laktyushkin Dmytro <dmytro.laktyushkin@amd.com>
Acked-by: Solomon Chiu <solomon.chiu@amd.com>
Signed-off-by: Victor Lu <victorchengchi.lu@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
index f3d98e3ba624..bf0a198eae15 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
@@ -109,6 +109,7 @@ struct _vcs_dpi_ip_params_st dcn2_1_ip = {
 	.max_page_table_levels = 4,
 	.pte_chunk_size_kbytes = 2,
 	.meta_chunk_size_kbytes = 2,
+	.min_meta_chunk_size_bytes = 256,
 	.writeback_chunk_size_kbytes = 2,
 	.line_buffer_size_bits = 789504,
 	.is_line_buffer_bpp_fixed = 0,

From b53e041d8e4308f7324999398aec092dbcb130f5 Mon Sep 17 00:00:00 2001
From: Dale Zhao <dale.zhao@amd.com>
Date: Fri, 16 Jul 2021 09:38:17 +0800
Subject: [PATCH 226/502] drm/amd/display: ensure dentist display clock update
 finished in DCN20

[Why]
We don't check DENTIST_DISPCLK_CHG_DONE to ensure dentist
display clockis updated to target value. In some scenarios with large
display clock margin, it will deliver unfinished display clock and cause
issues like display black screen.

[How]
Checking DENTIST_DISPCLK_CHG_DONE to ensure display clock
has been update to target value before driver do other clock related
actions.

Reviewed-by: Cyr Aric <aric.cyr@amd.com>
Acked-by: Solomon Chiu <solomon.chiu@amd.com>
Signed-off-by: Dale Zhao <dale.zhao@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
index 6e0c5c664fdc..a5331b96f551 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
@@ -197,7 +197,7 @@ void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr, struct
 
 	REG_UPDATE(DENTIST_DISPCLK_CNTL,
 			DENTIST_DISPCLK_WDIVIDER, dispclk_wdivider);
-//	REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 5, 100);
+	REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 50, 1000);
 	REG_UPDATE(DENTIST_DISPCLK_CNTL,
 			DENTIST_DPPCLK_WDIVIDER, dppclk_wdivider);
 	REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DPPCLK_CHG_DONE, 1, 5, 100);

From f2ad3accefc63e72e9932e141c21875cc04beec8 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 21 Jul 2021 18:11:51 -0400
Subject: [PATCH 227/502] drm/amdgpu/display: only enable aux backlight control
 for OLED panels

We've gotten a number of reports about backlight control not
working on panels which indicate that they use aux backlight
control.  A recent patch:

commit 2d73eabe2984a435737498ab39bb1500a9ffe9a9
Author: Camille Cho <Camille.Cho@amd.com>
Date:   Thu Jul 8 18:28:37 2021 +0800

    drm/amd/display: Only set default brightness for OLED

    [Why]
    We used to unconditionally set backlight path as AUX for panels capable
    of backlight adjustment via DPCD in set default brightness.

    [How]
    This should be limited to OLED panel only since we control backlight via
    PWM path for SDR mode in LCD HDR panel.

    Reviewed-by: Krunoslav Kovac <krunoslav.kovac@amd.com>
    Acked-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
    Signed-off-by: Camille Cho <Camille.Cho@amd.com>
    Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

Changes some other code to only use aux for backlight control on
OLED panels.  The commit message seems to indicate that PWM should
be used for SDR mode on HDR panels.  Do something similar for
backlight control in general.  This may need to be revisited if and
when HDR started to get used.

Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1438
Bug: https://bugzilla.kernel.org/show_bug.cgi?id=213715
Reviewed-by: Roman Li <Roman.Li@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index d3a2a5ff57e9..b53f49a23ddc 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2429,9 +2429,9 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
 	max_cll = conn_base->hdr_sink_metadata.hdmi_type1.max_cll;
 	min_cll = conn_base->hdr_sink_metadata.hdmi_type1.min_cll;
 
-	if (caps->ext_caps->bits.oled == 1 ||
+	if (caps->ext_caps->bits.oled == 1 /*||
 	    caps->ext_caps->bits.sdr_aux_backlight_control == 1 ||
-	    caps->ext_caps->bits.hdr_aux_backlight_control == 1)
+	    caps->ext_caps->bits.hdr_aux_backlight_control == 1*/)
 		caps->aux_support = true;
 
 	if (amdgpu_backlight == 0)

From ec30ce41f03820b6289513344b4281ca3a1151f4 Mon Sep 17 00:00:00 2001
From: Simon Ser <contact@emersion.fr>
Date: Sun, 25 Jul 2021 16:49:01 +0000
Subject: [PATCH 228/502] maintainers: add bugs and chat URLs for amdgpu
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add links to the issue tracker and the IRC channel for the amdgpu
driver.

Signed-off-by: Simon Ser <contact@emersion.fr>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Pan Xinhui <Xinhui.Pan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 19135a9d778e..056966c9aac9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15468,6 +15468,8 @@ M:	Pan, Xinhui <Xinhui.Pan@amd.com>
 L:	amd-gfx@lists.freedesktop.org
 S:	Supported
 T:	git https://gitlab.freedesktop.org/agd5f/linux.git
+B:	https://gitlab.freedesktop.org/drm/amd/-/issues
+C:	irc://irc.oftc.net/radeon
 F:	drivers/gpu/drm/amd/
 F:	drivers/gpu/drm/radeon/
 F:	include/uapi/drm/amdgpu_drm.h

From 773af69121ecc6c53d192661af8d53bb3db028ae Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 27 Jul 2021 10:25:55 -0600
Subject: [PATCH 229/502] io_uring: always reissue from task_work context

As a safeguard, if we're going to queue async work, do it from task_work
from the original task. This ensures that we can always sanely create
threads, regards of what the reissue context may be.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index a4331deb0427..6ba101cd4661 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2060,6 +2060,12 @@ static void io_req_task_queue(struct io_kiocb *req)
 	io_req_task_work_add(req);
 }
 
+static void io_req_task_queue_reissue(struct io_kiocb *req)
+{
+	req->io_task_work.func = io_queue_async_work;
+	io_req_task_work_add(req);
+}
+
 static inline void io_queue_next(struct io_kiocb *req)
 {
 	struct io_kiocb *nxt = io_req_find_next(req);
@@ -2248,7 +2254,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
 		    !(req->flags & REQ_F_DONT_REISSUE)) {
 			req->iopoll_completed = 0;
 			req_ref_get(req);
-			io_queue_async_work(req);
+			io_req_task_queue_reissue(req);
 			continue;
 		}
 
@@ -2771,7 +2777,7 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
 		req->flags &= ~REQ_F_REISSUE;
 		if (io_resubmit_prep(req)) {
 			req_ref_get(req);
-			io_queue_async_work(req);
+			io_req_task_queue_reissue(req);
 		} else {
 			int cflags = 0;
 

From ec6446d5304b3c3dd692a1e244df7e40bbb5af36 Mon Sep 17 00:00:00 2001
From: Kajol Jain <kjain@linux.ibm.com>
Date: Tue, 13 Jul 2021 13:12:16 +0530
Subject: [PATCH 230/502] fpga: dfl: fme: Fix cpu hotplug issue in performance
 reporting

The performance reporting driver added cpu hotplug
feature but it didn't add pmu migration call in cpu
offline function.
This can create an issue incase the current designated
cpu being used to collect fme pmu data got offline,
as based on current code we are not migrating fme pmu to
new target cpu. Because of that perf will still try to
fetch data from that offline cpu and hence we will not
get counter data.

Patch fixed this issue by adding pmu_migrate_context call
in fme_perf_offline_cpu function.

Fixes: 724142f8c42a ("fpga: dfl: fme: add performance reporting support")
Cc: stable@vger.kernel.org
Tested-by: Xu Yilun <yilun.xu@intel.com>
Acked-by: Wu Hao <hao.wu@intel.com>
Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
Signed-off-by: Moritz Fischer <mdf@kernel.org>
---
 drivers/fpga/dfl-fme-perf.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/fpga/dfl-fme-perf.c b/drivers/fpga/dfl-fme-perf.c
index 4299145ef347..587c82be12f7 100644
--- a/drivers/fpga/dfl-fme-perf.c
+++ b/drivers/fpga/dfl-fme-perf.c
@@ -953,6 +953,8 @@ static int fme_perf_offline_cpu(unsigned int cpu, struct hlist_node *node)
 		return 0;
 
 	priv->cpu = target;
+	perf_pmu_migrate_context(&priv->pmu, cpu, target);
+
 	return 0;
 }
 

From 4ee107c514139960682cc0f3623a24e86fda1a13 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Tue, 27 Jul 2021 17:26:13 +0800
Subject: [PATCH 231/502] clk: qcom: smd-rpm: Fix MSM8936 RPM_SMD_PCNOC_A_CLK

Commit a0384ecfe2aa ("clk: qcom: smd-rpm: De-duplicate identical
entries") introduces the following regression on MSM8936/MSM8939, as
RPM_SMD_PCNOC_A_CLK gets pointed to pcnoc_clk by mistake.  Fix it by
correcting the clock to pcnoc_a_clk.

[    1.307363] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
[    1.313593] Mem abort info:
[    1.322512]   ESR = 0x96000004
[    1.325132]   EC = 0x25: DABT (current EL), IL = 32 bits
[    1.338872]   SET = 0, FnV = 0
[    1.355483]   EA = 0, S1PTW = 0
[    1.368702]   FSC = 0x04: level 0 translation fault
[    1.383294] Data abort info:
[    1.398292]   ISV = 0, ISS = 0x00000004
[    1.398297]   CM = 0, WnR = 0
[    1.398301] [0000000000000000] user address but active_mm is swapper
[    1.404193] Internal error: Oops: 96000004 [#1] PREEMPT SMP
[    1.420596] Modules linked in:
[    1.420604] CPU: 0 PID: 5 Comm: kworker/0:0 Not tainted 5.14.0-rc3+ #198
[    1.441010] pc : __clk_register+0x48/0x780
[    1.446045] lr : __clk_register+0x3c/0x780
[    1.449953] sp : ffff800010063440
[    1.454031] x29: ffff800010063440 x28: 0000000000000004 x27: 0000000000000066
[    1.457423] x26: 0000000000000001 x25: 000000007fffffff x24: ffff800010f9f388
[    1.464540] x23: ffff00007fc12a90 x22: ffff0000034b2010 x21: 0000000000000000
[    1.471658] x20: ffff800010f9fff8 x19: ffff00000152a700 x18: 0000000000000001
[    1.478778] x17: ffff00007fbd40c8 x16: 0000000000000460 x15: 0000000000000465
[    1.485895] x14: ffffffffffffffff x13: 746e756f635f7265 x12: 696669746f6e5f6b
[    1.493013] x11: 0000000000000006 x10: 0000000000000000 x9 : 0000000000000000
[    1.500131] x8 : ffff00000152a800 x7 : 0000000000000000 x6 : 000000000000003f
[    1.507249] x5 : 0000000000000040 x4 : 0000000000000000 x3 : 0000000000000004
[    1.514367] x2 : 0000000000000000 x1 : 0000000000000cc0 x0 : ffff00000152a700
[    1.521486] Call trace:
[    1.528598]  __clk_register+0x48/0x780
[    1.530855]  clk_hw_register+0x20/0x60
[    1.534674]  devm_clk_hw_register+0x50/0xa8
[    1.538408]  rpm_smd_clk_probe+0x1a4/0x260
[    1.542488]  platform_probe+0x68/0xd8
[    1.546653]  really_probe+0x140/0x2f8
[    1.550386]  __driver_probe_device+0x78/0xe0
[    1.554033]  driver_probe_device+0x80/0x110
[    1.558373]  __device_attach_driver+0x90/0xe0
[    1.562280]  bus_for_each_drv+0x78/0xc8
[    1.566793]  __device_attach+0xf0/0x150
[    1.570438]  device_initial_probe+0x14/0x20
[    1.574259]  bus_probe_device+0x9c/0xa8
[    1.578425]  device_add+0x378/0x870
[    1.582243]  of_device_add+0x44/0x60
[    1.585716]  of_platform_device_create_pdata+0xc0/0x110
[    1.589538]  of_platform_bus_create+0x17c/0x388
[    1.594485]  of_platform_populate+0x50/0xf0
[    1.598998]  qcom_smd_rpm_probe+0xd4/0x128
[    1.603164]  rpmsg_dev_probe+0xbc/0x1a8
[    1.607330]  really_probe+0x140/0x2f8
[    1.611063]  __driver_probe_device+0x78/0xe0
[    1.614883]  driver_probe_device+0x80/0x110
[    1.619224]  __device_attach_driver+0x90/0xe0
[    1.623131]  bus_for_each_drv+0x78/0xc8
[    1.627643]  __device_attach+0xf0/0x150
[    1.631289]  device_initial_probe+0x14/0x20
[    1.635109]  bus_probe_device+0x9c/0xa8
[    1.639275]  device_add+0x378/0x870
[    1.643095]  device_register+0x20/0x30
[    1.646567]  rpmsg_register_device+0x54/0x90
[    1.650387]  qcom_channel_state_worker+0x168/0x288
[    1.654814]  process_one_work+0x1a0/0x328
[    1.659415]  worker_thread+0x4c/0x420
[    1.663494]  kthread+0x150/0x160
[    1.667138]  ret_from_fork+0x10/0x18
[    1.670442] Code: 97f56b92 b40034a0 aa0003f3 52819801 (f94002a0)
[    1.674004] ---[ end trace 412fa6f47384cdfe ]---

Fixes: a0384ecfe2aa ("clk: qcom: smd-rpm: De-duplicate identical entries")
Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Link: https://lore.kernel.org/r/20210727092613.23056-1-shawn.guo@linaro.org
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/qcom/clk-smd-rpm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/qcom/clk-smd-rpm.c b/drivers/clk/qcom/clk-smd-rpm.c
index 800b2fef1887..b2c142f3a649 100644
--- a/drivers/clk/qcom/clk-smd-rpm.c
+++ b/drivers/clk/qcom/clk-smd-rpm.c
@@ -467,7 +467,7 @@ DEFINE_CLK_SMD_RPM(msm8936, sysmmnoc_clk, sysmmnoc_a_clk, QCOM_SMD_RPM_BUS_CLK,
 
 static struct clk_smd_rpm *msm8936_clks[] = {
 	[RPM_SMD_PCNOC_CLK]		= &msm8916_pcnoc_clk,
-	[RPM_SMD_PCNOC_A_CLK]		= &msm8916_pcnoc_clk,
+	[RPM_SMD_PCNOC_A_CLK]		= &msm8916_pcnoc_a_clk,
 	[RPM_SMD_SNOC_CLK]		= &msm8916_snoc_clk,
 	[RPM_SMD_SNOC_A_CLK]		= &msm8916_snoc_a_clk,
 	[RPM_SMD_BIMC_CLK]		= &msm8916_bimc_clk,

From bb7262b295472eb6858b5c49893954794027cd84 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 6 Dec 2020 22:40:07 +0100
Subject: [PATCH 232/502] timers: Move clearing of base::timer_running under
 base:: Lock

syzbot reported KCSAN data races vs. timer_base::timer_running being set to
NULL without holding base::lock in expire_timers().

This looks innocent and most reads are clearly not problematic, but
Frederic identified an issue which is:

 int data = 0;

 void timer_func(struct timer_list *t)
 {
    data = 1;
 }

 CPU 0                                            CPU 1
 ------------------------------                   --------------------------
 base = lock_timer_base(timer, &flags);           raw_spin_unlock(&base->lock);
 if (base->running_timer != timer)                call_timer_fn(timer, fn, baseclk);
   ret = detach_if_pending(timer, base, true);    base->running_timer = NULL;
 raw_spin_unlock_irqrestore(&base->lock, flags);  raw_spin_lock(&base->lock);

 x = data;

If the timer has previously executed on CPU 1 and then CPU 0 can observe
base->running_timer == NULL and returns, assuming the timer has completed,
but it's not guaranteed on all architectures. The comment for
del_timer_sync() makes that guarantee. Moving the assignment under
base->lock prevents this.

For non-RT kernel it's performance wise completely irrelevant whether the
store happens before or after taking the lock. For an RT kernel moving the
store under the lock requires an extra unlock/lock pair in the case that
there is a waiter for the timer, but that's not the end of the world.

Reported-by: syzbot+aa7c2385d46c5eba0b89@syzkaller.appspotmail.com
Reported-by: syzbot+abea4558531bae1ba9fe@syzkaller.appspotmail.com
Fixes: 030dcdd197d7 ("timers: Prepare support for PREEMPT_RT")
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://lore.kernel.org/r/87lfea7gw8.fsf@nanos.tec.linutronix.de
Cc: stable@vger.kernel.org
---
 kernel/time/timer.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 9eb11c2209e5..e3d2c23c413d 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1265,8 +1265,10 @@ static inline void timer_base_unlock_expiry(struct timer_base *base)
 static void timer_sync_wait_running(struct timer_base *base)
 {
 	if (atomic_read(&base->timer_waiters)) {
+		raw_spin_unlock_irq(&base->lock);
 		spin_unlock(&base->expiry_lock);
 		spin_lock(&base->expiry_lock);
+		raw_spin_lock_irq(&base->lock);
 	}
 }
 
@@ -1457,14 +1459,14 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head)
 		if (timer->flags & TIMER_IRQSAFE) {
 			raw_spin_unlock(&base->lock);
 			call_timer_fn(timer, fn, baseclk);
-			base->running_timer = NULL;
 			raw_spin_lock(&base->lock);
+			base->running_timer = NULL;
 		} else {
 			raw_spin_unlock_irq(&base->lock);
 			call_timer_fn(timer, fn, baseclk);
+			raw_spin_lock_irq(&base->lock);
 			base->running_timer = NULL;
 			timer_sync_wait_running(base);
-			raw_spin_lock_irq(&base->lock);
 		}
 	}
 }

From 8373cd38a8888549ace7c7617163a2e826970a92 Mon Sep 17 00:00:00 2001
From: Yufeng Mo <moyufeng@huawei.com>
Date: Tue, 27 Jul 2021 22:03:50 +0800
Subject: [PATCH 233/502] net: hns3: change the method of obtaining default ptp
 cycle

The ptp cycle is related to the hardware, so it may cause compatibility
issues if a fixed value is used in driver. Therefore, the method of
obtaining this value is changed to read from the register rather than
use a fixed value in driver.

Fixes: 0bf5eb788512 ("net: hns3: add support for PTP")
Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../hisilicon/hns3/hns3pf/hclge_ptp.c         | 36 +++++++++++++++----
 .../hisilicon/hns3/hns3pf/hclge_ptp.h         | 10 ++++--
 2 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c
index 3b1f84502e36..befa9bcc2f2f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c
@@ -5,9 +5,27 @@
 #include "hclge_main.h"
 #include "hnae3.h"
 
+static int hclge_ptp_get_cycle(struct hclge_dev *hdev)
+{
+	struct hclge_ptp *ptp = hdev->ptp;
+
+	ptp->cycle.quo = readl(hdev->ptp->io_base + HCLGE_PTP_CYCLE_QUO_REG) &
+			 HCLGE_PTP_CYCLE_QUO_MASK;
+	ptp->cycle.numer = readl(hdev->ptp->io_base + HCLGE_PTP_CYCLE_NUM_REG);
+	ptp->cycle.den = readl(hdev->ptp->io_base + HCLGE_PTP_CYCLE_DEN_REG);
+
+	if (ptp->cycle.den == 0) {
+		dev_err(&hdev->pdev->dev, "invalid ptp cycle denominator!\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int hclge_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
 {
 	struct hclge_dev *hdev = hclge_ptp_get_hdev(ptp);
+	struct hclge_ptp_cycle *cycle = &hdev->ptp->cycle;
 	u64 adj_val, adj_base, diff;
 	unsigned long flags;
 	bool is_neg = false;
@@ -18,7 +36,7 @@ static int hclge_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
 		is_neg = true;
 	}
 
-	adj_base = HCLGE_PTP_CYCLE_ADJ_BASE * HCLGE_PTP_CYCLE_ADJ_UNIT;
+	adj_base = (u64)cycle->quo * (u64)cycle->den + (u64)cycle->numer;
 	adj_val = adj_base * ppb;
 	diff = div_u64(adj_val, 1000000000ULL);
 
@@ -29,16 +47,16 @@ static int hclge_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
 
 	/* This clock cycle is defined by three part: quotient, numerator
 	 * and denominator. For example, 2.5ns, the quotient is 2,
-	 * denominator is fixed to HCLGE_PTP_CYCLE_ADJ_UNIT, and numerator
-	 * is 0.5 * HCLGE_PTP_CYCLE_ADJ_UNIT.
+	 * denominator is fixed to ptp->cycle.den, and numerator
+	 * is 0.5 * ptp->cycle.den.
 	 */
-	quo = div_u64_rem(adj_val, HCLGE_PTP_CYCLE_ADJ_UNIT, &numerator);
+	quo = div_u64_rem(adj_val, cycle->den, &numerator);
 
 	spin_lock_irqsave(&hdev->ptp->lock, flags);
-	writel(quo, hdev->ptp->io_base + HCLGE_PTP_CYCLE_QUO_REG);
+	writel(quo & HCLGE_PTP_CYCLE_QUO_MASK,
+	       hdev->ptp->io_base + HCLGE_PTP_CYCLE_QUO_REG);
 	writel(numerator, hdev->ptp->io_base + HCLGE_PTP_CYCLE_NUM_REG);
-	writel(HCLGE_PTP_CYCLE_ADJ_UNIT,
-	       hdev->ptp->io_base + HCLGE_PTP_CYCLE_DEN_REG);
+	writel(cycle->den, hdev->ptp->io_base + HCLGE_PTP_CYCLE_DEN_REG);
 	writel(HCLGE_PTP_CYCLE_ADJ_EN,
 	       hdev->ptp->io_base + HCLGE_PTP_CYCLE_CFG_REG);
 	spin_unlock_irqrestore(&hdev->ptp->lock, flags);
@@ -475,6 +493,10 @@ int hclge_ptp_init(struct hclge_dev *hdev)
 		ret = hclge_ptp_create_clock(hdev);
 		if (ret)
 			return ret;
+
+		ret = hclge_ptp_get_cycle(hdev);
+		if (ret)
+			return ret;
 	}
 
 	ret = hclge_ptp_int_en(hdev, true);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h
index 5a202b775471..dbf5f4c08019 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h
@@ -29,6 +29,7 @@
 #define HCLGE_PTP_TIME_ADJ_REG		0x60
 #define HCLGE_PTP_TIME_ADJ_EN		BIT(0)
 #define HCLGE_PTP_CYCLE_QUO_REG		0x64
+#define HCLGE_PTP_CYCLE_QUO_MASK	GENMASK(7, 0)
 #define HCLGE_PTP_CYCLE_DEN_REG		0x68
 #define HCLGE_PTP_CYCLE_NUM_REG		0x6C
 #define HCLGE_PTP_CYCLE_CFG_REG		0x70
@@ -37,9 +38,7 @@
 #define HCLGE_PTP_CUR_TIME_SEC_L_REG	0x78
 #define HCLGE_PTP_CUR_TIME_NSEC_REG	0x7C
 
-#define HCLGE_PTP_CYCLE_ADJ_BASE	2
 #define HCLGE_PTP_CYCLE_ADJ_MAX		500000000
-#define HCLGE_PTP_CYCLE_ADJ_UNIT	100000000
 #define HCLGE_PTP_SEC_H_OFFSET		32u
 #define HCLGE_PTP_SEC_L_MASK		GENMASK(31, 0)
 
@@ -47,6 +46,12 @@
 #define HCLGE_PTP_FLAG_TX_EN		1
 #define HCLGE_PTP_FLAG_RX_EN		2
 
+struct hclge_ptp_cycle {
+	u32 quo;
+	u32 numer;
+	u32 den;
+};
+
 struct hclge_ptp {
 	struct hclge_dev *hdev;
 	struct ptp_clock *clock;
@@ -58,6 +63,7 @@ struct hclge_ptp {
 	spinlock_t lock;	/* protects ptp registers */
 	u32 ptp_cfg;
 	u32 last_tx_seqid;
+	struct hclge_ptp_cycle cycle;
 	unsigned long tx_start;
 	unsigned long tx_cnt;
 	unsigned long tx_skipped;

From 76b4f357d0e7d8f6f0013c733e6cba1773c266d3 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Thu, 1 Jul 2021 17:41:00 +0200
Subject: [PATCH 234/502] x86/kvm: fix vcpu-id indexed array sizes

KVM_MAX_VCPU_ID is the maximum vcpu-id of a guest, and not the number
of vcpu-ids. Fix array indexed by vcpu-id to have KVM_MAX_VCPU_ID+1
elements.

Note that this is currently no real problem, as KVM_MAX_VCPU_ID is
an odd number, resulting in always enough padding being available at
the end of those arrays.

Nevertheless this should be fixed in order to avoid rare problems in
case someone is using an even number for KVM_MAX_VCPU_ID.

Signed-off-by: Juergen Gross <jgross@suse.com>
Message-Id: <20210701154105.23215-2-jgross@suse.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/ioapic.c | 2 +-
 arch/x86/kvm/ioapic.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 698969e18fe3..ff005fe738a4 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -96,7 +96,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
 static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic)
 {
 	ioapic->rtc_status.pending_eoi = 0;
-	bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID);
+	bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID + 1);
 }
 
 static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic);
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index 660401700075..11e4065e1617 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -43,13 +43,13 @@ struct kvm_vcpu;
 
 struct dest_map {
 	/* vcpu bitmap where IRQ has been sent */
-	DECLARE_BITMAP(map, KVM_MAX_VCPU_ID);
+	DECLARE_BITMAP(map, KVM_MAX_VCPU_ID + 1);
 
 	/*
 	 * Vector sent to a given vcpu, only valid when
 	 * the vcpu's bit in map is set
 	 */
-	u8 vectors[KVM_MAX_VCPU_ID];
+	u8 vectors[KVM_MAX_VCPU_ID + 1];
 };
 
 

From 15b7b737deb30e1f8f116a08e723173b55ebd2f3 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Tue, 13 Jul 2021 22:09:56 +0000
Subject: [PATCH 235/502] KVM: selftests: Fix missing break in
 dirty_log_perf_test arg parsing

There is a missing break statement which causes a fallthrough to the
next statement where optarg will be null and a segmentation fault will
be generated.

Fixes: 9e965bb75aae ("KVM: selftests: Add backing src parameter to dirty_log_perf_test")
Reviewed-by: Ben Gardon <bgardon@google.com>
Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20210713220957.3493520-6-dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/dirty_log_perf_test.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 04a2641261be..80cbd3a748c0 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -312,6 +312,7 @@ int main(int argc, char *argv[])
 			break;
 		case 'o':
 			p.partition_vcpu_memory_access = false;
+			break;
 		case 's':
 			p.backing_src = parse_backing_src_type(optarg);
 			break;

From c33e05d9b067433252b1008d2f37bf64e11151f1 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Tue, 13 Jul 2021 22:09:57 +0000
Subject: [PATCH 236/502] KVM: selftests: Introduce access_tracking_perf_test

This test measures the performance effects of KVM's access tracking.
Access tracking is driven by the MMU notifiers test_young, clear_young,
and clear_flush_young. These notifiers do not have a direct userspace
API, however the clear_young notifier can be triggered by marking a
pages as idle in /sys/kernel/mm/page_idle/bitmap. This test leverages
that mechanism to enable access tracking on guest memory.

To measure performance this test runs a VM with a configurable number of
vCPUs that each touch every page in disjoint regions of memory.
Performance is measured in the time it takes all vCPUs to finish
touching their predefined region.

Example invocation:

  $ ./access_tracking_perf_test -v 8
  Testing guest mode: PA-bits:ANY, VA-bits:48,  4K pages
  guest physical test memory offset: 0xffdfffff000

  Populating memory             : 1.337752570s
  Writing to populated memory   : 0.010177640s
  Reading from populated memory : 0.009548239s
  Mark memory idle              : 23.973131748s
  Writing to idle memory        : 0.063584496s
  Mark memory idle              : 24.924652964s
  Reading from idle memory      : 0.062042814s

Breaking down the results:

 * "Populating memory": The time it takes for all vCPUs to perform the
   first write to every page in their region.

 * "Writing to populated memory" / "Reading from populated memory": The
   time it takes for all vCPUs to write and read to every page in their
   region after it has been populated. This serves as a control for the
   later results.

 * "Mark memory idle": The time it takes for every vCPU to mark every
   page in their region as idle through page_idle.

 * "Writing to idle memory" / "Reading from idle memory": The time it
   takes for all vCPUs to write and read to every page in their region
   after it has been marked idle.

This test should be portable across architectures but it is only enabled
for x86_64 since that's all I have tested.

Reviewed-by: Ben Gardon <bgardon@google.com>
Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20210713220957.3493520-7-dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/.gitignore        |   1 +
 tools/testing/selftests/kvm/Makefile          |   1 +
 .../selftests/kvm/access_tracking_perf_test.c | 429 ++++++++++++++++++
 3 files changed, 431 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/access_tracking_perf_test.c

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 06a351b4f93b..0709af0144c8 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -38,6 +38,7 @@
 /x86_64/xen_vmcall_test
 /x86_64/xss_msr_test
 /x86_64/vmx_pmu_msrs_test
+/access_tracking_perf_test
 /demand_paging_test
 /dirty_log_test
 /dirty_log_perf_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index b853be2ae3c6..5832f510a16c 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -71,6 +71,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_msrs_test
 TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test
 TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
+TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
 TEST_GEN_PROGS_x86_64 += demand_paging_test
 TEST_GEN_PROGS_x86_64 += dirty_log_test
 TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
new file mode 100644
index 000000000000..e2baa187a21e
--- /dev/null
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -0,0 +1,429 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * access_tracking_perf_test
+ *
+ * Copyright (C) 2021, Google, Inc.
+ *
+ * This test measures the performance effects of KVM's access tracking.
+ * Access tracking is driven by the MMU notifiers test_young, clear_young, and
+ * clear_flush_young. These notifiers do not have a direct userspace API,
+ * however the clear_young notifier can be triggered by marking a pages as idle
+ * in /sys/kernel/mm/page_idle/bitmap. This test leverages that mechanism to
+ * enable access tracking on guest memory.
+ *
+ * To measure performance this test runs a VM with a configurable number of
+ * vCPUs that each touch every page in disjoint regions of memory. Performance
+ * is measured in the time it takes all vCPUs to finish touching their
+ * predefined region.
+ *
+ * Note that a deterministic correctness test of access tracking is not possible
+ * by using page_idle as it exists today. This is for a few reasons:
+ *
+ * 1. page_idle only issues clear_young notifiers, which lack a TLB flush. This
+ *    means subsequent guest accesses are not guaranteed to see page table
+ *    updates made by KVM until some time in the future.
+ *
+ * 2. page_idle only operates on LRU pages. Newly allocated pages are not
+ *    immediately allocated to LRU lists. Instead they are held in a "pagevec",
+ *    which is drained to LRU lists some time in the future. There is no
+ *    userspace API to force this drain to occur.
+ *
+ * These limitations are worked around in this test by using a large enough
+ * region of memory for each vCPU such that the number of translations cached in
+ * the TLB and the number of pages held in pagevecs are a small fraction of the
+ * overall workload. And if either of those conditions are not true this test
+ * will fail rather than silently passing.
+ */
+#include <inttypes.h>
+#include <limits.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "kvm_util.h"
+#include "test_util.h"
+#include "perf_test_util.h"
+#include "guest_modes.h"
+
+/* Global variable used to synchronize all of the vCPU threads. */
+static int iteration = -1;
+
+/* Defines what vCPU threads should do during a given iteration. */
+static enum {
+	/* Run the vCPU to access all its memory. */
+	ITERATION_ACCESS_MEMORY,
+	/* Mark the vCPU's memory idle in page_idle. */
+	ITERATION_MARK_IDLE,
+} iteration_work;
+
+/* Set to true when vCPU threads should exit. */
+static bool done;
+
+/* The iteration that was last completed by each vCPU. */
+static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
+
+/* Whether to overlap the regions of memory vCPUs access. */
+static bool overlap_memory_access;
+
+struct test_params {
+	/* The backing source for the region of memory. */
+	enum vm_mem_backing_src_type backing_src;
+
+	/* The amount of memory to allocate for each vCPU. */
+	uint64_t vcpu_memory_bytes;
+
+	/* The number of vCPUs to create in the VM. */
+	int vcpus;
+};
+
+static uint64_t pread_uint64(int fd, const char *filename, uint64_t index)
+{
+	uint64_t value;
+	off_t offset = index * sizeof(value);
+
+	TEST_ASSERT(pread(fd, &value, sizeof(value), offset) == sizeof(value),
+		    "pread from %s offset 0x%" PRIx64 " failed!",
+		    filename, offset);
+
+	return value;
+
+}
+
+#define PAGEMAP_PRESENT (1ULL << 63)
+#define PAGEMAP_PFN_MASK ((1ULL << 55) - 1)
+
+static uint64_t lookup_pfn(int pagemap_fd, struct kvm_vm *vm, uint64_t gva)
+{
+	uint64_t hva = (uint64_t) addr_gva2hva(vm, gva);
+	uint64_t entry;
+	uint64_t pfn;
+
+	entry = pread_uint64(pagemap_fd, "pagemap", hva / getpagesize());
+	if (!(entry & PAGEMAP_PRESENT))
+		return 0;
+
+	pfn = entry & PAGEMAP_PFN_MASK;
+	if (!pfn) {
+		print_skip("Looking up PFNs requires CAP_SYS_ADMIN");
+		exit(KSFT_SKIP);
+	}
+
+	return pfn;
+}
+
+static bool is_page_idle(int page_idle_fd, uint64_t pfn)
+{
+	uint64_t bits = pread_uint64(page_idle_fd, "page_idle", pfn / 64);
+
+	return !!((bits >> (pfn % 64)) & 1);
+}
+
+static void mark_page_idle(int page_idle_fd, uint64_t pfn)
+{
+	uint64_t bits = 1ULL << (pfn % 64);
+
+	TEST_ASSERT(pwrite(page_idle_fd, &bits, 8, 8 * (pfn / 64)) == 8,
+		    "Set page_idle bits for PFN 0x%" PRIx64, pfn);
+}
+
+static void mark_vcpu_memory_idle(struct kvm_vm *vm, int vcpu_id)
+{
+	uint64_t base_gva = perf_test_args.vcpu_args[vcpu_id].gva;
+	uint64_t pages = perf_test_args.vcpu_args[vcpu_id].pages;
+	uint64_t page;
+	uint64_t still_idle = 0;
+	uint64_t no_pfn = 0;
+	int page_idle_fd;
+	int pagemap_fd;
+
+	/* If vCPUs are using an overlapping region, let vCPU 0 mark it idle. */
+	if (overlap_memory_access && vcpu_id)
+		return;
+
+	page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR);
+	TEST_ASSERT(page_idle_fd > 0, "Failed to open page_idle.");
+
+	pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+	TEST_ASSERT(pagemap_fd > 0, "Failed to open pagemap.");
+
+	for (page = 0; page < pages; page++) {
+		uint64_t gva = base_gva + page * perf_test_args.guest_page_size;
+		uint64_t pfn = lookup_pfn(pagemap_fd, vm, gva);
+
+		if (!pfn) {
+			no_pfn++;
+			continue;
+		}
+
+		if (is_page_idle(page_idle_fd, pfn)) {
+			still_idle++;
+			continue;
+		}
+
+		mark_page_idle(page_idle_fd, pfn);
+	}
+
+	/*
+	 * Assumption: Less than 1% of pages are going to be swapped out from
+	 * under us during this test.
+	 */
+	TEST_ASSERT(no_pfn < pages / 100,
+		    "vCPU %d: No PFN for %" PRIu64 " out of %" PRIu64 " pages.",
+		    vcpu_id, no_pfn, pages);
+
+	/*
+	 * Test that at least 90% of memory has been marked idle (the rest might
+	 * not be marked idle because the pages have not yet made it to an LRU
+	 * list or the translations are still cached in the TLB). 90% is
+	 * arbitrary; high enough that we ensure most memory access went through
+	 * access tracking but low enough as to not make the test too brittle
+	 * over time and across architectures.
+	 */
+	TEST_ASSERT(still_idle < pages / 10,
+		    "vCPU%d: Too many pages still idle (%"PRIu64 " out of %"
+		    PRIu64 ").\n",
+		    vcpu_id, still_idle, pages);
+
+	close(page_idle_fd);
+	close(pagemap_fd);
+}
+
+static void assert_ucall(struct kvm_vm *vm, uint32_t vcpu_id,
+			 uint64_t expected_ucall)
+{
+	struct ucall uc;
+	uint64_t actual_ucall = get_ucall(vm, vcpu_id, &uc);
+
+	TEST_ASSERT(expected_ucall == actual_ucall,
+		    "Guest exited unexpectedly (expected ucall %" PRIu64
+		    ", got %" PRIu64 ")",
+		    expected_ucall, actual_ucall);
+}
+
+static bool spin_wait_for_next_iteration(int *current_iteration)
+{
+	int last_iteration = *current_iteration;
+
+	do {
+		if (READ_ONCE(done))
+			return false;
+
+		*current_iteration = READ_ONCE(iteration);
+	} while (last_iteration == *current_iteration);
+
+	return true;
+}
+
+static void *vcpu_thread_main(void *arg)
+{
+	struct perf_test_vcpu_args *vcpu_args = arg;
+	struct kvm_vm *vm = perf_test_args.vm;
+	int vcpu_id = vcpu_args->vcpu_id;
+	int current_iteration = -1;
+
+	vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
+
+	while (spin_wait_for_next_iteration(&current_iteration)) {
+		switch (READ_ONCE(iteration_work)) {
+		case ITERATION_ACCESS_MEMORY:
+			vcpu_run(vm, vcpu_id);
+			assert_ucall(vm, vcpu_id, UCALL_SYNC);
+			break;
+		case ITERATION_MARK_IDLE:
+			mark_vcpu_memory_idle(vm, vcpu_id);
+			break;
+		};
+
+		vcpu_last_completed_iteration[vcpu_id] = current_iteration;
+	}
+
+	return NULL;
+}
+
+static void spin_wait_for_vcpu(int vcpu_id, int target_iteration)
+{
+	while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) !=
+	       target_iteration) {
+		continue;
+	}
+}
+
+/* The type of memory accesses to perform in the VM. */
+enum access_type {
+	ACCESS_READ,
+	ACCESS_WRITE,
+};
+
+static void run_iteration(struct kvm_vm *vm, int vcpus, const char *description)
+{
+	struct timespec ts_start;
+	struct timespec ts_elapsed;
+	int next_iteration;
+	int vcpu_id;
+
+	/* Kick off the vCPUs by incrementing iteration. */
+	next_iteration = ++iteration;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts_start);
+
+	/* Wait for all vCPUs to finish the iteration. */
+	for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++)
+		spin_wait_for_vcpu(vcpu_id, next_iteration);
+
+	ts_elapsed = timespec_elapsed(ts_start);
+	pr_info("%-30s: %ld.%09lds\n",
+		description, ts_elapsed.tv_sec, ts_elapsed.tv_nsec);
+}
+
+static void access_memory(struct kvm_vm *vm, int vcpus, enum access_type access,
+			  const char *description)
+{
+	perf_test_args.wr_fract = (access == ACCESS_READ) ? INT_MAX : 1;
+	sync_global_to_guest(vm, perf_test_args);
+	iteration_work = ITERATION_ACCESS_MEMORY;
+	run_iteration(vm, vcpus, description);
+}
+
+static void mark_memory_idle(struct kvm_vm *vm, int vcpus)
+{
+	/*
+	 * Even though this parallelizes the work across vCPUs, this is still a
+	 * very slow operation because page_idle forces the test to mark one pfn
+	 * at a time and the clear_young notifier serializes on the KVM MMU
+	 * lock.
+	 */
+	pr_debug("Marking VM memory idle (slow)...\n");
+	iteration_work = ITERATION_MARK_IDLE;
+	run_iteration(vm, vcpus, "Mark memory idle");
+}
+
+static pthread_t *create_vcpu_threads(int vcpus)
+{
+	pthread_t *vcpu_threads;
+	int i;
+
+	vcpu_threads = malloc(vcpus * sizeof(vcpu_threads[0]));
+	TEST_ASSERT(vcpu_threads, "Failed to allocate vcpu_threads.");
+
+	for (i = 0; i < vcpus; i++) {
+		vcpu_last_completed_iteration[i] = iteration;
+		pthread_create(&vcpu_threads[i], NULL, vcpu_thread_main,
+			       &perf_test_args.vcpu_args[i]);
+	}
+
+	return vcpu_threads;
+}
+
+static void terminate_vcpu_threads(pthread_t *vcpu_threads, int vcpus)
+{
+	int i;
+
+	/* Set done to signal the vCPU threads to exit */
+	done = true;
+
+	for (i = 0; i < vcpus; i++)
+		pthread_join(vcpu_threads[i], NULL);
+}
+
+static void run_test(enum vm_guest_mode mode, void *arg)
+{
+	struct test_params *params = arg;
+	struct kvm_vm *vm;
+	pthread_t *vcpu_threads;
+	int vcpus = params->vcpus;
+
+	vm = perf_test_create_vm(mode, vcpus, params->vcpu_memory_bytes,
+				 params->backing_src);
+
+	perf_test_setup_vcpus(vm, vcpus, params->vcpu_memory_bytes,
+			      !overlap_memory_access);
+
+	vcpu_threads = create_vcpu_threads(vcpus);
+
+	pr_info("\n");
+	access_memory(vm, vcpus, ACCESS_WRITE, "Populating memory");
+
+	/* As a control, read and write to the populated memory first. */
+	access_memory(vm, vcpus, ACCESS_WRITE, "Writing to populated memory");
+	access_memory(vm, vcpus, ACCESS_READ, "Reading from populated memory");
+
+	/* Repeat on memory that has been marked as idle. */
+	mark_memory_idle(vm, vcpus);
+	access_memory(vm, vcpus, ACCESS_WRITE, "Writing to idle memory");
+	mark_memory_idle(vm, vcpus);
+	access_memory(vm, vcpus, ACCESS_READ, "Reading from idle memory");
+
+	terminate_vcpu_threads(vcpu_threads, vcpus);
+	free(vcpu_threads);
+	perf_test_destroy_vm(vm);
+}
+
+static void help(char *name)
+{
+	puts("");
+	printf("usage: %s [-h] [-m mode] [-b vcpu_bytes] [-v vcpus] [-o]  [-s mem_type]\n",
+	       name);
+	puts("");
+	printf(" -h: Display this help message.");
+	guest_modes_help();
+	printf(" -b: specify the size of the memory region which should be\n"
+	       "     dirtied by each vCPU. e.g. 10M or 3G.\n"
+	       "     (default: 1G)\n");
+	printf(" -v: specify the number of vCPUs to run.\n");
+	printf(" -o: Overlap guest memory accesses instead of partitioning\n"
+	       "     them into a separate region of memory for each vCPU.\n");
+	printf(" -s: specify the type of memory that should be used to\n"
+	       "     back the guest data region.\n\n");
+	backing_src_help();
+	puts("");
+	exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+	struct test_params params = {
+		.backing_src = VM_MEM_SRC_ANONYMOUS,
+		.vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE,
+		.vcpus = 1,
+	};
+	int page_idle_fd;
+	int opt;
+
+	guest_modes_append_default();
+
+	while ((opt = getopt(argc, argv, "hm:b:v:os:")) != -1) {
+		switch (opt) {
+		case 'm':
+			guest_modes_cmdline(optarg);
+			break;
+		case 'b':
+			params.vcpu_memory_bytes = parse_size(optarg);
+			break;
+		case 'v':
+			params.vcpus = atoi(optarg);
+			break;
+		case 'o':
+			overlap_memory_access = true;
+			break;
+		case 's':
+			params.backing_src = parse_backing_src_type(optarg);
+			break;
+		case 'h':
+		default:
+			help(argv[0]);
+			break;
+		}
+	}
+
+	page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR);
+	if (page_idle_fd < 0) {
+		print_skip("CONFIG_IDLE_PAGE_TRACKING is not enabled");
+		exit(KSFT_SKIP);
+	}
+	close(page_idle_fd);
+
+	for_each_guest_mode(run_test, &params);
+
+	return 0;
+}

From 3fa5e8fd0a0e4ccc03c91df225be2e9b7100800c Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 26 Jul 2021 12:39:01 -0400
Subject: [PATCH 237/502] KVM: SVM: delay svm_vcpu_init_msrpm after svm->vmcb
 is initialized

Right now, svm_hv_vmcb_dirty_nested_enlightenments has an incorrect
dereference of vmcb->control.reserved_sw before the vmcb is checked
for being non-NULL.  The compiler is usually sinking the dereference
after the check; instead of doing this ourselves in the source,
ensure that svm_hv_vmcb_dirty_nested_enlightenments is only called
with a non-NULL VMCB.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Vineeth Pillai <viremana@linux.microsoft.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
[Untested for now due to issues with my AMD machine. - Paolo]
---
 arch/x86/kvm/svm/svm.c          | 4 ++--
 arch/x86/kvm/svm/svm_onhyperv.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 9a6987549e1b..4bcb95bb8ed7 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1406,8 +1406,6 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
 		goto error_free_vmsa_page;
 	}
 
-	svm_vcpu_init_msrpm(vcpu, svm->msrpm);
-
 	svm->vmcb01.ptr = page_address(vmcb01_page);
 	svm->vmcb01.pa = __sme_set(page_to_pfn(vmcb01_page) << PAGE_SHIFT);
 
@@ -1419,6 +1417,8 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
 	svm_switch_vmcb(svm, &svm->vmcb01);
 	init_vmcb(vcpu);
 
+	svm_vcpu_init_msrpm(vcpu, svm->msrpm);
+
 	svm_init_osvw(vcpu);
 	vcpu->arch.microcode_version = 0x01000065;
 
diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h
index 9b9a55abc29f..c53b8bf8d013 100644
--- a/arch/x86/kvm/svm/svm_onhyperv.h
+++ b/arch/x86/kvm/svm/svm_onhyperv.h
@@ -89,7 +89,7 @@ static inline void svm_hv_vmcb_dirty_nested_enlightenments(
 	 * as we mark it dirty unconditionally towards end of vcpu
 	 * init phase.
 	 */
-	if (vmcb && vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) &&
+	if (vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) &&
 	    hve->hv_enlightenments_control.msr_bitmap)
 		vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS);
 }

From bb000f640e76c4c2402990d0613d4269e9c6dd29 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Mon, 26 Jul 2021 17:01:08 +0200
Subject: [PATCH 238/502] KVM: s390: restore old debugfs names

commit bc9e9e672df9 ("KVM: debugfs: Reuse binary stats descriptors")
did replace the old definitions with the binary ones. While doing that
it missed that some files are names different than the counters. This
is especially important for kvm_stat which does have special handling
for counters named instruction_*.

Fixes: commit bc9e9e672df9 ("KVM: debugfs: Reuse binary stats descriptors")
CC: Jing Zhang <jingzhangos@google.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Message-Id: <20210726150108.5603-1-borntraeger@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/s390/include/asm/kvm_host.h | 18 +++++++++---------
 arch/s390/kvm/diag.c             | 18 +++++++++---------
 arch/s390/kvm/kvm-s390.c         | 18 +++++++++---------
 3 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 9b4473f76e56..161a9e12bfb8 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -445,15 +445,15 @@ struct kvm_vcpu_stat {
 	u64 instruction_sigp_init_cpu_reset;
 	u64 instruction_sigp_cpu_reset;
 	u64 instruction_sigp_unknown;
-	u64 diagnose_10;
-	u64 diagnose_44;
-	u64 diagnose_9c;
-	u64 diagnose_9c_ignored;
-	u64 diagnose_9c_forward;
-	u64 diagnose_258;
-	u64 diagnose_308;
-	u64 diagnose_500;
-	u64 diagnose_other;
+	u64 instruction_diagnose_10;
+	u64 instruction_diagnose_44;
+	u64 instruction_diagnose_9c;
+	u64 diag_9c_ignored;
+	u64 diag_9c_forward;
+	u64 instruction_diagnose_258;
+	u64 instruction_diagnose_308;
+	u64 instruction_diagnose_500;
+	u64 instruction_diagnose_other;
 	u64 pfault_sync;
 };
 
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 02c146f9e5cd..807fa9da1e72 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -24,7 +24,7 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
 
 	start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
 	end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + PAGE_SIZE;
-	vcpu->stat.diagnose_10++;
+	vcpu->stat.instruction_diagnose_10++;
 
 	if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end
 	    || start < 2 * PAGE_SIZE)
@@ -74,7 +74,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
 
 	VCPU_EVENT(vcpu, 3, "diag page reference parameter block at 0x%llx",
 		   vcpu->run->s.regs.gprs[rx]);
-	vcpu->stat.diagnose_258++;
+	vcpu->stat.instruction_diagnose_258++;
 	if (vcpu->run->s.regs.gprs[rx] & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 	rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm));
@@ -145,7 +145,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
 static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
 {
 	VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
-	vcpu->stat.diagnose_44++;
+	vcpu->stat.instruction_diagnose_44++;
 	kvm_vcpu_on_spin(vcpu, true);
 	return 0;
 }
@@ -169,7 +169,7 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
 	int tid;
 
 	tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
-	vcpu->stat.diagnose_9c++;
+	vcpu->stat.instruction_diagnose_9c++;
 
 	/* yield to self */
 	if (tid == vcpu->vcpu_id)
@@ -192,7 +192,7 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
 		VCPU_EVENT(vcpu, 5,
 			   "diag time slice end directed to %d: yield forwarded",
 			   tid);
-		vcpu->stat.diagnose_9c_forward++;
+		vcpu->stat.diag_9c_forward++;
 		return 0;
 	}
 
@@ -203,7 +203,7 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
 	return 0;
 no_yield:
 	VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: ignored", tid);
-	vcpu->stat.diagnose_9c_ignored++;
+	vcpu->stat.diag_9c_ignored++;
 	return 0;
 }
 
@@ -213,7 +213,7 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
 	unsigned long subcode = vcpu->run->s.regs.gprs[reg] & 0xffff;
 
 	VCPU_EVENT(vcpu, 3, "diag ipl functions, subcode %lx", subcode);
-	vcpu->stat.diagnose_308++;
+	vcpu->stat.instruction_diagnose_308++;
 	switch (subcode) {
 	case 3:
 		vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
@@ -245,7 +245,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
 {
 	int ret;
 
-	vcpu->stat.diagnose_500++;
+	vcpu->stat.instruction_diagnose_500++;
 	/* No virtio-ccw notification? Get out quickly. */
 	if (!vcpu->kvm->arch.css_support ||
 	    (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY))
@@ -299,7 +299,7 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
 	case 0x500:
 		return __diag_virtio_hypercall(vcpu);
 	default:
-		vcpu->stat.diagnose_other++;
+		vcpu->stat.instruction_diagnose_other++;
 		return -EOPNOTSUPP;
 	}
 }
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index b655a7d82bf0..4527ac7b5961 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -163,15 +163,15 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
 	STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
 	STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
 	STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
-	STATS_DESC_COUNTER(VCPU, diagnose_10),
-	STATS_DESC_COUNTER(VCPU, diagnose_44),
-	STATS_DESC_COUNTER(VCPU, diagnose_9c),
-	STATS_DESC_COUNTER(VCPU, diagnose_9c_ignored),
-	STATS_DESC_COUNTER(VCPU, diagnose_9c_forward),
-	STATS_DESC_COUNTER(VCPU, diagnose_258),
-	STATS_DESC_COUNTER(VCPU, diagnose_308),
-	STATS_DESC_COUNTER(VCPU, diagnose_500),
-	STATS_DESC_COUNTER(VCPU, diagnose_other),
+	STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
+	STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
+	STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
+	STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
+	STATS_DESC_COUNTER(VCPU, diag_9c_forward),
+	STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
+	STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
+	STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
+	STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
 	STATS_DESC_COUNTER(VCPU, pfault_sync)
 };
 static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==

From f1577ab21442476a1015d09e861c08ca76262c06 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 13 Jul 2021 17:20:16 +0300
Subject: [PATCH 239/502] KVM: SVM: svm_set_vintr don't warn if AVIC is active
 but is about to be deactivated

It is possible for AVIC inhibit and AVIC active state to be mismatched.
Currently we disable AVIC right away on vCPU which started the AVIC inhibit
request thus this warning doesn't trigger but at least in theory,
if svm_set_vintr is called at the same time on multiple vCPUs,
the warning can happen.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210713142023.106183-2-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 4bcb95bb8ed7..e8ccab50ebf6 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1568,8 +1568,11 @@ static void svm_set_vintr(struct vcpu_svm *svm)
 {
 	struct vmcb_control_area *control;
 
-	/* The following fields are ignored when AVIC is enabled */
-	WARN_ON(kvm_vcpu_apicv_active(&svm->vcpu));
+	/*
+	 * The following fields are ignored when AVIC is enabled
+	 */
+	WARN_ON(kvm_apicv_activated(svm->vcpu.kvm));
+
 	svm_set_intercept(svm, INTERCEPT_VINTR);
 
 	/*

From feea01360cb1925dd31a3d38514eb86f61d69468 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 13 Jul 2021 17:20:17 +0300
Subject: [PATCH 240/502] KVM: SVM: tweak warning about enabled AVIC on nested
 entry

It is possible that AVIC was requested to be disabled but
not yet disabled, e.g if the nested entry is done right
after svm_vcpu_after_set_cpuid.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210713142023.106183-3-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/nested.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 1c2a0414a88d..61738ff8ef33 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -515,7 +515,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
 	 * Also covers avic_vapic_bar, avic_backing_page, avic_logical_id,
 	 * avic_physical_id.
 	 */
-	WARN_ON(svm->vmcb01.ptr->control.int_ctl & AVIC_ENABLE_MASK);
+	WARN_ON(kvm_apicv_activated(svm->vcpu.kvm));
 
 	/* Copied from vmcb01.  msrpm_base can be overwritten later.  */
 	svm->vmcb->control.nested_ctl = svm->vmcb01.ptr->control.nested_ctl;

From 5868b8225ecef4ba3f5b17e65984d60bc5fd6254 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 13 Jul 2021 17:20:18 +0300
Subject: [PATCH 241/502] KVM: SVM: use vmcb01 in svm_refresh_apicv_exec_ctrl

Currently when SVM is enabled in guest CPUID, AVIC is inhibited as soon
as the guest CPUID is set.

AVIC happens to be fully disabled on all vCPUs by the time any guest
entry starts (if after migration the entry can be nested).

The reason is that currently we disable avic right away on vCPU from which
the kvm_request_apicv_update was called and for this case, it happens to be
called on all vCPUs (by svm_vcpu_after_set_cpuid).

After we stop doing this, AVIC will end up being disabled only when
KVM_REQ_APICV_UPDATE is processed which is after we done switching to the
nested guest.

Fix this by just using vmcb01 in svm_refresh_apicv_exec_ctrl for avic
(which is a right thing to do anyway).

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210713142023.106183-4-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/avic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 1d01da64c333..a8ad78a2faa1 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -646,7 +646,7 @@ out:
 void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
-	struct vmcb *vmcb = svm->vmcb;
+	struct vmcb *vmcb = svm->vmcb01.ptr;
 	bool activated = kvm_vcpu_apicv_active(vcpu);
 
 	if (!enable_apicv)

From 74775654332b2682a5580d6f954e5a9ac81e7477 Mon Sep 17 00:00:00 2001
From: Li RongQing <lirongqing@baidu.com>
Date: Tue, 27 Jul 2021 19:12:47 +0800
Subject: [PATCH 242/502] KVM: use cpu_relax when halt polling

SMT siblings share caches and other hardware, and busy halt polling
will degrade its sibling performance if its sibling is working

Sean Christopherson suggested as below:

"Rather than disallowing halt-polling entirely, on x86 it should be
sufficient to simply have the hardware thread yield to its sibling(s)
via PAUSE.  It probably won't get back all performance, but I would
expect it to be close.
This compiles on all KVM architectures, and AFAICT the intended usage
of cpu_relax() is identical for all architectures."

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Li RongQing <lirongqing@baidu.com>
Message-Id: <20210727111247.55510-1-lirongqing@baidu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/kvm_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 986959833d70..0d732813fa80 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3110,6 +3110,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 					++vcpu->stat.generic.halt_poll_invalid;
 				goto out;
 			}
+			cpu_relax();
 			poll_end = cur = ktime_get();
 		} while (kvm_vcpu_can_poll(cur, stop));
 	}

From 8750f9bbda115f3f79bfe43be85551ee5e12b6ff Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 27 Jul 2021 08:43:10 -0400
Subject: [PATCH 243/502] KVM: add missing compat KVM_CLEAR_DIRTY_LOG

The arguments to the KVM_CLEAR_DIRTY_LOG ioctl include a pointer,
therefore it needs a compat ioctl implementation.  Otherwise,
32-bit userspace fails to invoke it on 64-bit kernels; for x86
it might work fine by chance if the padding is zero, but not
on big-endian architectures.

Reported-by: Thomas Sattler
Cc: stable@vger.kernel.org
Fixes: 2a31b9db1535 ("kvm: introduce manual dirty log reprotect")
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/kvm_main.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0d732813fa80..d20fba0fc290 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4391,6 +4391,16 @@ struct compat_kvm_dirty_log {
 	};
 };
 
+struct compat_kvm_clear_dirty_log {
+	__u32 slot;
+	__u32 num_pages;
+	__u64 first_page;
+	union {
+		compat_uptr_t dirty_bitmap; /* one bit per page */
+		__u64 padding2;
+	};
+};
+
 static long kvm_vm_compat_ioctl(struct file *filp,
 			   unsigned int ioctl, unsigned long arg)
 {
@@ -4400,6 +4410,24 @@ static long kvm_vm_compat_ioctl(struct file *filp,
 	if (kvm->mm != current->mm)
 		return -EIO;
 	switch (ioctl) {
+#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
+	case KVM_CLEAR_DIRTY_LOG: {
+		struct compat_kvm_clear_dirty_log compat_log;
+		struct kvm_clear_dirty_log log;
+
+		if (copy_from_user(&compat_log, (void __user *)arg,
+				   sizeof(compat_log)))
+			return -EFAULT;
+		log.slot	 = compat_log.slot;
+		log.num_pages	 = compat_log.num_pages;
+		log.first_page	 = compat_log.first_page;
+		log.padding2	 = compat_log.padding2;
+		log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap);
+
+		r = kvm_vm_ioctl_clear_dirty_log(kvm, &log);
+		break;
+	}
+#endif
 	case KVM_GET_DIRTY_LOG: {
 		struct compat_kvm_dirty_log compat_log;
 		struct kvm_dirty_log log;

From 2bcc025ab9bbd029b1730cde71cb4e4f0ed35d0f Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Sat, 17 Jul 2021 14:27:42 +0300
Subject: [PATCH 244/502] clk: tegra: Implement disable_unused() of
 tegra_clk_sdmmc_mux_ops

Implement disable_unused() callback of tegra_clk_sdmmc_mux_ops to fix
imbalanced disabling of the unused MMC clock on Tegra210 Jetson Nano.

Fixes: c592c8a28f58 ("clk: tegra: Fix refcounting of gate clocks")
Reported-by: Jon Hunter <jonathanh@nvidia.com> # T210 Nano
Tested-by: Jon Hunter <jonathanh@nvidia.com> # T210 Nano
Acked-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Link: https://lore.kernel.org/r/20210717112742.7196-1-digetx@gmail.com
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/tegra/clk-sdmmc-mux.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/clk/tegra/clk-sdmmc-mux.c b/drivers/clk/tegra/clk-sdmmc-mux.c
index 316912d3b1a4..4f2c3309eea4 100644
--- a/drivers/clk/tegra/clk-sdmmc-mux.c
+++ b/drivers/clk/tegra/clk-sdmmc-mux.c
@@ -194,6 +194,15 @@ static void clk_sdmmc_mux_disable(struct clk_hw *hw)
 	gate_ops->disable(gate_hw);
 }
 
+static void clk_sdmmc_mux_disable_unused(struct clk_hw *hw)
+{
+	struct tegra_sdmmc_mux *sdmmc_mux = to_clk_sdmmc_mux(hw);
+	const struct clk_ops *gate_ops = sdmmc_mux->gate_ops;
+	struct clk_hw *gate_hw = &sdmmc_mux->gate.hw;
+
+	gate_ops->disable_unused(gate_hw);
+}
+
 static void clk_sdmmc_mux_restore_context(struct clk_hw *hw)
 {
 	struct clk_hw *parent = clk_hw_get_parent(hw);
@@ -218,6 +227,7 @@ static const struct clk_ops tegra_clk_sdmmc_mux_ops = {
 	.is_enabled = clk_sdmmc_mux_is_enabled,
 	.enable = clk_sdmmc_mux_enable,
 	.disable = clk_sdmmc_mux_disable,
+	.disable_unused = clk_sdmmc_mux_disable_unused,
 	.restore_context = clk_sdmmc_mux_restore_context,
 };
 

From 343597d558e79fe704ba8846b5b2ed24056b89c2 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Tue, 27 Jul 2021 09:04:58 -0700
Subject: [PATCH 245/502] bpf, sockmap: Zap ingress queues after stopping
 strparser

We don't want strparser to run and pass skbs into skmsg handlers when
the psock is null. We just sk_drop them in this case. When removing
a live socket from map it means extra drops that we do not need to
incur. Move the zap below strparser close to avoid this condition.

This way we stop the stream parser first stopping it from processing
packets and then delete the psock.

Fixes: a136678c0bdbb ("bpf: sk_msg, zap ingress queue on psock down")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20210727160500.1713554-2-john.fastabend@gmail.com
---
 net/core/skmsg.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 15d71288e741..28115ef742e8 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -773,8 +773,6 @@ static void sk_psock_destroy(struct work_struct *work)
 
 void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
 {
-	sk_psock_stop(psock, false);
-
 	write_lock_bh(&sk->sk_callback_lock);
 	sk_psock_restore_proto(sk, psock);
 	rcu_assign_sk_user_data(sk, NULL);
@@ -784,6 +782,8 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
 		sk_psock_stop_verdict(sk, psock);
 	write_unlock_bh(&sk->sk_callback_lock);
 
+	sk_psock_stop(psock, false);
+
 	INIT_RCU_WORK(&psock->rwork, sk_psock_destroy);
 	queue_rcu_work(system_wq, &psock->rwork);
 }

From 476d98018f32e68e7c5d4e8456940cf2b6d66f10 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Tue, 27 Jul 2021 09:04:59 -0700
Subject: [PATCH 246/502] bpf, sockmap: On cleanup we additionally need to
 remove cached skb

Its possible if a socket is closed and the receive thread is under memory
pressure it may have cached a skb. We need to ensure these skbs are
free'd along with the normal ingress_skb queue.

Before 799aa7f98d53 ("skmsg: Avoid lock_sock() in sk_psock_backlog()") tear
down and backlog processing both had sock_lock for the common case of
socket close or unhash. So it was not possible to have both running in
parrallel so all we would need is the kfree in those kernels.

But, latest kernels include the commit 799aa7f98d5e and this requires a
bit more work. Without the ingress_lock guarding reading/writing the
state->skb case its possible the tear down could run before the state
update causing it to leak memory or worse when the backlog reads the state
it could potentially run interleaved with the tear down and we might end up
free'ing the state->skb from tear down side but already have the reference
from backlog side. To resolve such races we wrap accesses in ingress_lock
on both sides serializing tear down and backlog case. In both cases this
only happens after an EAGAIN error case so having an extra lock in place
is likely fine. The normal path will skip the locks.

Note, we check state->skb before grabbing lock. This works because
we can only enqueue with the mutex we hold already. Avoiding a race
on adding state->skb after the check. And if tear down path is running
that is also fine if the tear down path then removes state->skb we
will simply set skb=NULL and the subsequent goto is skipped. This
slight complication avoids locking in normal case.

With this fix we no longer see this warning splat from tcp side on
socket close when we hit the above case with redirect to ingress self.

[224913.935822] WARNING: CPU: 3 PID: 32100 at net/core/stream.c:208 sk_stream_kill_queues+0x212/0x220
[224913.935841] Modules linked in: fuse overlay bpf_preload x86_pkg_temp_thermal intel_uncore wmi_bmof squashfs sch_fq_codel efivarfs ip_tables x_tables uas xhci_pci ixgbe mdio xfrm_algo xhci_hcd wmi
[224913.935897] CPU: 3 PID: 32100 Comm: fgs-bench Tainted: G          I       5.14.0-rc1alu+ #181
[224913.935908] Hardware name: Dell Inc. Precision 5820 Tower/002KVM, BIOS 1.9.2 01/24/2019
[224913.935914] RIP: 0010:sk_stream_kill_queues+0x212/0x220
[224913.935923] Code: 8b 83 20 02 00 00 85 c0 75 20 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 89 df e8 2b 11 fe ff eb c3 0f 0b e9 7c ff ff ff 0f 0b eb ce <0f> 0b 5b 5d 41 5c 41 5d 41 5e 41 5f c3 90 0f 1f 44 00 00 41 57 41
[224913.935932] RSP: 0018:ffff88816271fd38 EFLAGS: 00010206
[224913.935941] RAX: 0000000000000ae8 RBX: ffff88815acd5240 RCX: dffffc0000000000
[224913.935948] RDX: 0000000000000003 RSI: 0000000000000ae8 RDI: ffff88815acd5460
[224913.935954] RBP: ffff88815acd5460 R08: ffffffff955c0ae8 R09: fffffbfff2e6f543
[224913.935961] R10: ffffffff9737aa17 R11: fffffbfff2e6f542 R12: ffff88815acd5390
[224913.935967] R13: ffff88815acd5480 R14: ffffffff98d0c080 R15: ffffffff96267500
[224913.935974] FS:  00007f86e6bd1700(0000) GS:ffff888451cc0000(0000) knlGS:0000000000000000
[224913.935981] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[224913.935988] CR2: 000000c0008eb000 CR3: 00000001020e0005 CR4: 00000000003706e0
[224913.935994] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[224913.936000] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[224913.936007] Call Trace:
[224913.936016]  inet_csk_destroy_sock+0xba/0x1f0
[224913.936033]  __tcp_close+0x620/0x790
[224913.936047]  tcp_close+0x20/0x80
[224913.936056]  inet_release+0x8f/0xf0
[224913.936070]  __sock_release+0x72/0x120
[224913.936083]  sock_close+0x14/0x20

Fixes: a136678c0bdbb ("bpf: sk_msg, zap ingress queue on psock down")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20210727160500.1713554-3-john.fastabend@gmail.com
---
 net/core/skmsg.c | 35 +++++++++++++++++++++++++++++------
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 28115ef742e8..036cdb33a94a 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -590,23 +590,42 @@ static void sock_drop(struct sock *sk, struct sk_buff *skb)
 	kfree_skb(skb);
 }
 
+static void sk_psock_skb_state(struct sk_psock *psock,
+			       struct sk_psock_work_state *state,
+			       struct sk_buff *skb,
+			       int len, int off)
+{
+	spin_lock_bh(&psock->ingress_lock);
+	if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
+		state->skb = skb;
+		state->len = len;
+		state->off = off;
+	} else {
+		sock_drop(psock->sk, skb);
+	}
+	spin_unlock_bh(&psock->ingress_lock);
+}
+
 static void sk_psock_backlog(struct work_struct *work)
 {
 	struct sk_psock *psock = container_of(work, struct sk_psock, work);
 	struct sk_psock_work_state *state = &psock->work_state;
-	struct sk_buff *skb;
+	struct sk_buff *skb = NULL;
 	bool ingress;
 	u32 len, off;
 	int ret;
 
 	mutex_lock(&psock->work_mutex);
-	if (state->skb) {
+	if (unlikely(state->skb)) {
+		spin_lock_bh(&psock->ingress_lock);
 		skb = state->skb;
 		len = state->len;
 		off = state->off;
 		state->skb = NULL;
-		goto start;
+		spin_unlock_bh(&psock->ingress_lock);
 	}
+	if (skb)
+		goto start;
 
 	while ((skb = skb_dequeue(&psock->ingress_skb))) {
 		len = skb->len;
@@ -621,9 +640,8 @@ start:
 							  len, ingress);
 			if (ret <= 0) {
 				if (ret == -EAGAIN) {
-					state->skb = skb;
-					state->len = len;
-					state->off = off;
+					sk_psock_skb_state(psock, state, skb,
+							   len, off);
 					goto end;
 				}
 				/* Hard errors break pipe and stop xmit. */
@@ -722,6 +740,11 @@ static void __sk_psock_zap_ingress(struct sk_psock *psock)
 		skb_bpf_redirect_clear(skb);
 		sock_drop(psock->sk, skb);
 	}
+	kfree_skb(psock->work_state.skb);
+	/* We null the skb here to ensure that calls to sk_psock_backlog
+	 * do not pick up the free'd skb.
+	 */
+	psock->work_state.skb = NULL;
 	__sk_psock_purge_ingress_msg(psock);
 }
 

From 9635720b7c88592214562cb72605bdab6708006c Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Tue, 27 Jul 2021 09:05:00 -0700
Subject: [PATCH 247/502] bpf, sockmap: Fix memleak on ingress msg enqueue

If backlog handler is running during a tear down operation we may enqueue
data on the ingress msg queue while tear down is trying to free it.

 sk_psock_backlog()
   sk_psock_handle_skb()
     skb_psock_skb_ingress()
       sk_psock_skb_ingress_enqueue()
         sk_psock_queue_msg(psock,msg)
                                           spin_lock(ingress_lock)
                                            sk_psock_zap_ingress()
                                             _sk_psock_purge_ingerss_msg()
                                              _sk_psock_purge_ingress_msg()
                                            -- free ingress_msg list --
                                           spin_unlock(ingress_lock)
           spin_lock(ingress_lock)
           list_add_tail(msg,ingress_msg) <- entry on list with no one
                                             left to free it.
           spin_unlock(ingress_lock)

To fix we only enqueue from backlog if the ENABLED bit is set. The tear
down logic clears the bit with ingress_lock set so we wont enqueue the
msg in the last step.

Fixes: 799aa7f98d53 ("skmsg: Avoid lock_sock() in sk_psock_backlog()")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20210727160500.1713554-4-john.fastabend@gmail.com
---
 include/linux/skmsg.h | 54 ++++++++++++++++++++++++++++---------------
 net/core/skmsg.c      |  6 -----
 2 files changed, 35 insertions(+), 25 deletions(-)

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 96f319099744..14ab0c0bc924 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -285,11 +285,45 @@ static inline struct sk_psock *sk_psock(const struct sock *sk)
 	return rcu_dereference_sk_user_data(sk);
 }
 
+static inline void sk_psock_set_state(struct sk_psock *psock,
+				      enum sk_psock_state_bits bit)
+{
+	set_bit(bit, &psock->state);
+}
+
+static inline void sk_psock_clear_state(struct sk_psock *psock,
+					enum sk_psock_state_bits bit)
+{
+	clear_bit(bit, &psock->state);
+}
+
+static inline bool sk_psock_test_state(const struct sk_psock *psock,
+				       enum sk_psock_state_bits bit)
+{
+	return test_bit(bit, &psock->state);
+}
+
+static inline void sock_drop(struct sock *sk, struct sk_buff *skb)
+{
+	sk_drops_add(sk, skb);
+	kfree_skb(skb);
+}
+
+static inline void drop_sk_msg(struct sk_psock *psock, struct sk_msg *msg)
+{
+	if (msg->skb)
+		sock_drop(psock->sk, msg->skb);
+	kfree(msg);
+}
+
 static inline void sk_psock_queue_msg(struct sk_psock *psock,
 				      struct sk_msg *msg)
 {
 	spin_lock_bh(&psock->ingress_lock);
-	list_add_tail(&msg->list, &psock->ingress_msg);
+	if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
+		list_add_tail(&msg->list, &psock->ingress_msg);
+	else
+		drop_sk_msg(psock, msg);
 	spin_unlock_bh(&psock->ingress_lock);
 }
 
@@ -406,24 +440,6 @@ static inline void sk_psock_restore_proto(struct sock *sk,
 		psock->psock_update_sk_prot(sk, psock, true);
 }
 
-static inline void sk_psock_set_state(struct sk_psock *psock,
-				      enum sk_psock_state_bits bit)
-{
-	set_bit(bit, &psock->state);
-}
-
-static inline void sk_psock_clear_state(struct sk_psock *psock,
-					enum sk_psock_state_bits bit)
-{
-	clear_bit(bit, &psock->state);
-}
-
-static inline bool sk_psock_test_state(const struct sk_psock *psock,
-				       enum sk_psock_state_bits bit)
-{
-	return test_bit(bit, &psock->state);
-}
-
 static inline struct sk_psock *sk_psock_get(struct sock *sk)
 {
 	struct sk_psock *psock;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 036cdb33a94a..2d6249b28928 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -584,12 +584,6 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
 	return sk_psock_skb_ingress(psock, skb);
 }
 
-static void sock_drop(struct sock *sk, struct sk_buff *skb)
-{
-	sk_drops_add(sk, skb);
-	kfree_skb(skb);
-}
-
 static void sk_psock_skb_state(struct sk_psock *psock,
 			       struct sk_psock_work_state *state,
 			       struct sk_buff *skb,

From b93af3055d6f32d3b0361cfdb110c9399c1241ba Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Tue, 27 Jul 2021 17:32:53 +0800
Subject: [PATCH 248/502] blk-mq-sched: Fix blk_mq_sched_alloc_tags() error
 handling

If the blk_mq_sched_alloc_tags() -> blk_mq_alloc_rqs() call fails, then we
call blk_mq_sched_free_tags() -> blk_mq_free_rqs().

It is incorrect to do so, as any rqs would have already been freed in the
blk_mq_alloc_rqs() call.

Fix by calling blk_mq_free_rq_map() only directly.

Fixes: 6917ff0b5bd41 ("blk-mq-sched: refactor scheduler initialization")
Signed-off-by: John Garry <john.garry@huawei.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/1627378373-148090-1-git-send-email-john.garry@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sched.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index c838d81ac058..0f006cabfd91 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -515,17 +515,6 @@ void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
 	percpu_ref_put(&q->q_usage_counter);
 }
 
-static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
-				   struct blk_mq_hw_ctx *hctx,
-				   unsigned int hctx_idx)
-{
-	if (hctx->sched_tags) {
-		blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
-		blk_mq_free_rq_map(hctx->sched_tags, set->flags);
-		hctx->sched_tags = NULL;
-	}
-}
-
 static int blk_mq_sched_alloc_tags(struct request_queue *q,
 				   struct blk_mq_hw_ctx *hctx,
 				   unsigned int hctx_idx)
@@ -539,8 +528,10 @@ static int blk_mq_sched_alloc_tags(struct request_queue *q,
 		return -ENOMEM;
 
 	ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests);
-	if (ret)
-		blk_mq_sched_free_tags(set, hctx, hctx_idx);
+	if (ret) {
+		blk_mq_free_rq_map(hctx->sched_tags, set->flags);
+		hctx->sched_tags = NULL;
+	}
 
 	return ret;
 }

From 8b54874ef1617185048029a3083d510569e93751 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@nvidia.com>
Date: Mon, 26 Jul 2021 09:20:14 +0300
Subject: [PATCH 249/502] net/mlx5: Fix flow table chaining

Fix a bug when flow table is created in priority that already
has other flow tables as shown in the below diagram.
If the new flow table (FT-B) has the lowest level in the priority,
we need to connect the flow tables from the previous priority (p0)
to this new table. In addition when this flow table is destroyed
(FT-B), we need to connect the flow tables from the previous
priority (p0) to the next level flow table (FT-C) in the same
priority of the destroyed table (if exists).

                       ---------
                       |root_ns|
                       ---------
                            |
            --------------------------------
            |               |              |
       ----------      ----------      ---------
       |p(prio)-x|     |   p-y  |      |   p-n |
       ----------      ----------      ---------
            |               |
     ----------------  ------------------
     |ns(e.g bypass)|  |ns(e.g. kernel) |
     ----------------  ------------------
            |            |           |
	-------	       ------       ----
        |  p0 |        | p1 |       |p2|
        -------        ------       ----
           |             |    \
        --------       ------- ------
        | FT-A |       |FT-B | |FT-C|
        --------       ------- ------

Fixes: f90edfd279f3 ("net/mlx5_core: Connect flow tables")
Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index d7bf0a3e4a52..c0697e1b7118 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1024,17 +1024,19 @@ static int connect_fwd_rules(struct mlx5_core_dev *dev,
 static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft,
 			      struct fs_prio *prio)
 {
-	struct mlx5_flow_table *next_ft;
+	struct mlx5_flow_table *next_ft, *first_ft;
 	int err = 0;
 
 	/* Connect_prev_fts and update_root_ft_create are mutually exclusive */
 
-	if (list_empty(&prio->node.children)) {
+	first_ft = list_first_entry_or_null(&prio->node.children,
+					    struct mlx5_flow_table, node.list);
+	if (!first_ft || first_ft->level > ft->level) {
 		err = connect_prev_fts(dev, ft, prio);
 		if (err)
 			return err;
 
-		next_ft = find_next_chained_ft(prio);
+		next_ft = first_ft ? first_ft : find_next_chained_ft(prio);
 		err = connect_fwd_rules(dev, ft, next_ft);
 		if (err)
 			return err;
@@ -2120,7 +2122,7 @@ static int disconnect_flow_table(struct mlx5_flow_table *ft)
 				node.list) == ft))
 		return 0;
 
-	next_ft = find_next_chained_ft(prio);
+	next_ft = find_next_ft(ft);
 	err = connect_fwd_rules(dev, next_ft, ft);
 	if (err)
 		return err;

From 90b22b9bcd242a3ba238f2c6f7eab771799001f8 Mon Sep 17 00:00:00 2001
From: Maor Dickman <maord@nvidia.com>
Date: Thu, 8 Jul 2021 15:24:58 +0300
Subject: [PATCH 250/502] net/mlx5e: Disable Rx ntuple offload for uplink
 representor

Rx ntuple offload is not supported in switchdev mode.
Tryng to enable it cause kernel panic.

 BUG: kernel NULL pointer dereference, address: 0000000000000008
 #PF: supervisor read access in kernel mode
 #PF: error_code(0x0000) - not-present page
 PGD 80000001065a5067 P4D 80000001065a5067 PUD 106594067 PMD 0
 Oops: 0000 [#1] SMP PTI
 CPU: 7 PID: 1089 Comm: ethtool Not tainted 5.13.0-rc7_for_upstream_min_debug_2021_06_23_16_44 #1
 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
 RIP: 0010:mlx5e_arfs_enable+0x70/0xd0 [mlx5_core]
 Code: 44 24 10 00 00 00 00 48 c7 44 24 18 00 00 00 00 49 63 c4 48 89 e2 44 89 e6 48 69 c0 20 08 00 00 48 89 ef 48 03 85 68 ac 00 00 <48> 8b 40 08 48 89 44 24 08 e8 d2 aa fd ff 48 83 05 82 96 18 00 01
 RSP: 0018:ffff8881047679e0 EFLAGS: 00010246
 RAX: 0000000000000000 RBX: 0000004000000000 RCX: 0000004000000000
 RDX: ffff8881047679e0 RSI: 0000000000000000 RDI: ffff888115100880
 RBP: ffff888115100880 R08: ffffffffa00f6cb0 R09: ffff888104767a18
 R10: ffff8881151000a0 R11: ffff888109479540 R12: 0000000000000000
 R13: ffff888104767bb8 R14: ffff888115100000 R15: ffff8881151000a0
 FS:  00007f41a64ab740(0000) GS:ffff8882f5dc0000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 0000000000000008 CR3: 0000000104cbc005 CR4: 0000000000370ea0
 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
 Call Trace:
  set_feature_arfs+0x1e/0x40 [mlx5_core]
  mlx5e_handle_feature+0x43/0xa0 [mlx5_core]
  mlx5e_set_features+0x139/0x1b0 [mlx5_core]
  __netdev_update_features+0x2b3/0xaf0
  ethnl_set_features+0x176/0x3a0
  ? __nla_parse+0x22/0x30
  genl_family_rcv_msg_doit+0xe2/0x140
  genl_rcv_msg+0xde/0x1d0
  ? features_reply_size+0xe0/0xe0
  ? genl_get_cmd+0xd0/0xd0
  netlink_rcv_skb+0x4e/0xf0
  genl_rcv+0x24/0x40
  netlink_unicast+0x1f6/0x2b0
  netlink_sendmsg+0x225/0x450
  sock_sendmsg+0x33/0x40
  __sys_sendto+0xd4/0x120
  ? __sys_recvmsg+0x4e/0x90
  ? exc_page_fault+0x219/0x740
  __x64_sys_sendto+0x25/0x30
  do_syscall_64+0x3f/0x80
  entry_SYSCALL_64_after_hwframe+0x44/0xae
 RIP: 0033:0x7f41a65b0cba
 Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c
 RSP: 002b:00007ffd8d688358 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
 RAX: ffffffffffffffda RBX: 00000000010f42a0 RCX: 00007f41a65b0cba
 RDX: 0000000000000058 RSI: 00000000010f43b0 RDI: 0000000000000003
 RBP: 000000000047ae60 R08: 00007f41a667c000 R09: 000000000000000c
 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000010f4340
 R13: 00000000010f4350 R14: 00007ffd8d688400 R15: 00000000010f42a0
 Modules linked in: mlx5_vdpa vhost_iotlb vdpa xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 br_netfilter rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad ib_ipoib rdma_cm iw_cm ib_cm mlx5_ib ib_uverbs ib_core overlay mlx5_core ptp pps_core fuse
 CR2: 0000000000000008
 ---[ end trace c66523f2aba94b43 ]---

Fixes: 7a9fb35e8c3a ("net/mlx5e: Do not reload ethernet ports when changing eswitch mode")
Signed-off-by: Maor Dickman <maord@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 29 +++++++++++++------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index d09e65557e75..c6f99fc77411 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3829,6 +3829,24 @@ int mlx5e_set_features(struct net_device *netdev, netdev_features_t features)
 	return 0;
 }
 
+static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev,
+						       netdev_features_t features)
+{
+	features &= ~NETIF_F_HW_TLS_RX;
+	if (netdev->features & NETIF_F_HW_TLS_RX)
+		netdev_warn(netdev, "Disabling hw_tls_rx, not supported in switchdev mode\n");
+
+	features &= ~NETIF_F_HW_TLS_TX;
+	if (netdev->features & NETIF_F_HW_TLS_TX)
+		netdev_warn(netdev, "Disabling hw_tls_tx, not supported in switchdev mode\n");
+
+	features &= ~NETIF_F_NTUPLE;
+	if (netdev->features & NETIF_F_NTUPLE)
+		netdev_warn(netdev, "Disabling ntuple, not supported in switchdev mode\n");
+
+	return features;
+}
+
 static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
 					    netdev_features_t features)
 {
@@ -3860,15 +3878,8 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
 			netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n");
 	}
 
-	if (mlx5e_is_uplink_rep(priv)) {
-		features &= ~NETIF_F_HW_TLS_RX;
-		if (netdev->features & NETIF_F_HW_TLS_RX)
-			netdev_warn(netdev, "Disabling hw_tls_rx, not supported in switchdev mode\n");
-
-		features &= ~NETIF_F_HW_TLS_TX;
-		if (netdev->features & NETIF_F_HW_TLS_TX)
-			netdev_warn(netdev, "Disabling hw_tls_tx, not supported in switchdev mode\n");
-	}
+	if (mlx5e_is_uplink_rep(priv))
+		features = mlx5e_fix_uplink_rep_features(netdev, features);
 
 	mutex_unlock(&priv->state_lock);
 

From c671972534c6f7fce789ac8156a2bc3bd146f806 Mon Sep 17 00:00:00 2001
From: Maor Dickman <maord@nvidia.com>
Date: Tue, 22 Jun 2021 17:07:02 +0300
Subject: [PATCH 251/502] net/mlx5: E-Switch, Set destination vport vhca id
 only when merged eswitch is supported

Destination vport vhca id is valid flag is set only merged eswitch isn't supported.
Change destination vport vhca id value to be set also only when merged eswitch
is supported.

Fixes: e4ad91f23f10 ("net/mlx5e: Split offloaded eswitch TC rules for port mirroring")
Signed-off-by: Maor Dickman <maord@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 7579f3402776..b0a2ca9037ac 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -382,10 +382,11 @@ esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *f
 {
 	dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
 	dest[dest_idx].vport.num = esw_attr->dests[attr_idx].rep->vport;
-	dest[dest_idx].vport.vhca_id =
-		MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id);
-	if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+	if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) {
+		dest[dest_idx].vport.vhca_id =
+			MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id);
 		dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
+	}
 	if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP) {
 		if (pkt_reformat) {
 			flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;

From dd3fddb82780bfa24124834edd90bbc63bd689cc Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Wed, 2 Jun 2021 14:17:07 +0300
Subject: [PATCH 252/502] net/mlx5: E-Switch, handle devcom events only for
 ports on the same device

This is the same check as LAG mode checks if to enable lag.
This will fix adding peer miss rules if lag is not supported
and even an incorrect rules in socket direct mode.

Also fix the incorrect comment on mlx5_get_next_phys_dev() as flow #1
doesn't exists.

Fixes: ac004b832128 ("net/mlx5e: E-Switch, Add peer miss rules")
Signed-off-by: Roi Dayan <roid@nvidia.com>
Reviewed-by: Maor Dickman <maord@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/dev.c              | 5 +----
 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 3 +++
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index ceebfc20f65e..def2156e50ee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -500,10 +500,7 @@ static int next_phys_dev(struct device *dev, const void *data)
 	return 1;
 }
 
-/* This function is called with two flows:
- * 1. During initialization of mlx5_core_dev and we don't need to lock it.
- * 2. During LAG configure stage and caller holds &mlx5_intf_mutex.
- */
+/* Must be called with intf_mutex held */
 struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
 {
 	struct auxiliary_device *adev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index b0a2ca9037ac..011e766e4f67 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -2368,6 +2368,9 @@ static int mlx5_esw_offloads_devcom_event(int event,
 
 	switch (event) {
 	case ESW_OFFLOADS_DEVCOM_PAIR:
+		if (mlx5_get_next_phys_dev(esw->dev) != peer_esw->dev)
+			break;
+
 		if (mlx5_eswitch_vport_match_metadata_enabled(esw) !=
 		    mlx5_eswitch_vport_match_metadata_enabled(peer_esw))
 			break;

From e2351e517068718724f1d3b4010e2a41ec91fa76 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@nvidia.com>
Date: Wed, 30 Jun 2021 13:45:05 +0300
Subject: [PATCH 253/502] net/mlx5e: RX, Avoid possible data corruption when
 relaxed ordering and LRO combined

When HW aggregates packets for an LRO session, it writes the payload
of two consecutive packets of a flow contiguously, so that they usually
share a cacheline.

The first byte of a packet's payload is written immediately after
the last byte of the preceding packet.
In this flow, there are two consecutive write requests to the shared
cacheline:
1. Regular write for the earlier packet.
2. Read-modify-write for the following packet.

In case of relaxed-ordering on, these two writes might be re-ordered.
Using the end padding optimization (to avoid partial write for the last
cacheline of a packet) becomes problematic if the two writes occur
out-of-order, as the padding would overwrite payload that belongs to
the following packet, causing data corruption.

Avoid this by disabling the end padding optimization when both
LRO and relaxed-ordering are enabled.

Fixes: 17347d5430c4 ("net/mlx5e: Add support for PCI relaxed ordering")
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/params.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 150c8e82c738..2cbf18c967f7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -471,6 +471,15 @@ static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev,
 	param->cq_period_mode = params->rx_cq_moderation.cq_period_mode;
 }
 
+static u8 rq_end_pad_mode(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+{
+	bool ro = pcie_relaxed_ordering_enabled(mdev->pdev) &&
+		MLX5_CAP_GEN(mdev, relaxed_ordering_write);
+
+	return ro && params->lro_en ?
+		MLX5_WQ_END_PAD_MODE_NONE : MLX5_WQ_END_PAD_MODE_ALIGN;
+}
+
 int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
 			 struct mlx5e_params *params,
 			 struct mlx5e_xsk_param *xsk,
@@ -508,7 +517,7 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
 	}
 
 	MLX5_SET(wq, wq, wq_type,          params->rq_wq_type);
-	MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
+	MLX5_SET(wq, wq, end_padding_mode, rq_end_pad_mode(mdev, params));
 	MLX5_SET(wq, wq, log_wq_stride,
 		 mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs));
 	MLX5_SET(wq, wq, pd,               mdev->mlx5e_res.hw_objs.pdn);

From 9841d58f3550d11c6181424427e8ad8c9c80f1b6 Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@nvidia.com>
Date: Wed, 30 Jun 2021 13:33:31 +0300
Subject: [PATCH 254/502] net/mlx5e: Add NETIF_F_HW_TC to hw_features when HTB
 offload is available

If a feature flag is only present in features, but not in hw_features,
the user can't reset it. Although hw_features may contain NETIF_F_HW_TC
by the point where the driver checks whether HTB offload is supported,
this flag is controlled by another condition that may not hold. Set it
explicitly to make sure the user can disable it.

Fixes: 214baf22870c ("net/mlx5e: Support HTB offload")
Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index c6f99fc77411..c5a2e3e6fe4b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4870,6 +4870,9 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 	if (MLX5_CAP_ETH(mdev, scatter_fcs))
 		netdev->hw_features |= NETIF_F_RXFCS;
 
+	if (mlx5_qos_is_supported(mdev))
+		netdev->hw_features |= NETIF_F_HW_TC;
+
 	netdev->features          = netdev->hw_features;
 
 	/* Defaults */
@@ -4890,8 +4893,6 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 		netdev->hw_features	 |= NETIF_F_NTUPLE;
 #endif
 	}
-	if (mlx5_qos_is_supported(mdev))
-		netdev->features |= NETIF_F_HW_TC;
 
 	netdev->features         |= NETIF_F_HIGHDMA;
 	netdev->features         |= NETIF_F_HW_VLAN_STAG_FILTER;

From a759f845d1f78634b54744db0fa48524ef6d0e14 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Wed, 30 Jun 2021 11:17:12 +0300
Subject: [PATCH 255/502] net/mlx5e: Consider PTP-RQ when setting RX VLAN
 stripping

Add PTP-RQ to the loop when setting rx-vlan-offload feature via ethtool.
On PTP-RQ's creation, set rx-vlan-offload into its parameters.

Fixes: a099da8ffcf6 ("net/mlx5e: Add RQ to PTP channel")
Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c  | 5 ++++-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 +++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index 778e229310a9..07b429b94d93 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -482,8 +482,11 @@ static void mlx5e_ptp_build_params(struct mlx5e_ptp *c,
 		params->log_sq_size = orig->log_sq_size;
 		mlx5e_ptp_build_sq_param(c->mdev, params, &cparams->txq_sq_param);
 	}
-	if (test_bit(MLX5E_PTP_STATE_RX, c->state))
+	/* RQ */
+	if (test_bit(MLX5E_PTP_STATE_RX, c->state)) {
+		params->vlan_strip_disable = orig->vlan_strip_disable;
 		mlx5e_ptp_build_rq_param(c->mdev, c->netdev, c->priv->q_counter, cparams);
+	}
 }
 
 static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index c5a2e3e6fe4b..37c440837945 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3384,7 +3384,7 @@ static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool en
 
 static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd)
 {
-	int err = 0;
+	int err;
 	int i;
 
 	for (i = 0; i < chs->num; i++) {
@@ -3392,6 +3392,8 @@ static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd)
 		if (err)
 			return err;
 	}
+	if (chs->ptp && test_bit(MLX5E_PTP_STATE_RX, chs->ptp->state))
+		return mlx5e_modify_rq_vsd(&chs->ptp->rq, vsd);
 
 	return 0;
 }

From 497008e783452a2ec45c7ec5835cfe6950dcb097 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Mon, 21 Jun 2021 18:04:07 +0300
Subject: [PATCH 256/502] net/mlx5e: Fix page allocation failure for trap-RQ
 over SF

Set the correct device pointer to the trap-RQ, to allow access to
dma_mask and avoid allocation request with the wrong pci-dev.

WARNING: CPU: 1 PID: 12005 at kernel/dma/mapping.c:151 dma_map_page_attrs+0x139/0x1c0
...
all Trace:
<IRQ>
? __page_pool_alloc_pages_slow+0x5a/0x210
mlx5e_post_rx_wqes+0x258/0x400 [mlx5_core]
mlx5e_trap_napi_poll+0x44/0xc0 [mlx5_core]
__napi_poll+0x24/0x150
net_rx_action+0x22b/0x280
__do_softirq+0xc7/0x27e
do_softirq+0x61/0x80
</IRQ>
__local_bh_enable_ip+0x4b/0x50
mlx5e_handle_action_trap+0x2dd/0x4d0 [mlx5_core]
blocking_notifier_call_chain+0x5a/0x80
mlx5_devlink_trap_action_set+0x8b/0x100 [mlx5_core]

Fixes: 5543e989fe5e ("net/mlx5e: Add trap entity to ETH driver")
Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/trap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
index 86ab4e864fe6..7f94508594fb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
@@ -37,7 +37,7 @@ static void mlx5e_init_trap_rq(struct mlx5e_trap *t, struct mlx5e_params *params
 	struct mlx5e_priv *priv = t->priv;
 
 	rq->wq_type      = params->rq_wq_type;
-	rq->pdev         = mdev->device;
+	rq->pdev         = t->pdev;
 	rq->netdev       = priv->netdev;
 	rq->priv         = priv;
 	rq->clock        = &mdev->clock;

From 678b1ae1af4aef488fcc42baa663e737b9a531ba Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Tue, 22 Jun 2021 10:24:17 +0300
Subject: [PATCH 257/502] net/mlx5e: Fix page allocation failure for ptp-RQ
 over SF

Set the correct pci-device pointer to the ptp-RQ. This allows access to
dma_mask and avoids allocation request with wrong pci-device.

Fixes: a099da8ffcf6 ("net/mlx5e: Add RQ to PTP channel")
Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index 07b429b94d93..efef4adce086 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -497,7 +497,7 @@ static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params,
 	int err;
 
 	rq->wq_type      = params->rq_wq_type;
-	rq->pdev         = mdev->device;
+	rq->pdev         = c->pdev;
 	rq->netdev       = priv->netdev;
 	rq->priv         = priv;
 	rq->clock        = &mdev->clock;

From 7f331bf0f060c2727e36d64f9b098b4ee5f3dfad Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Wed, 16 Jun 2021 19:11:03 +0300
Subject: [PATCH 258/502] net/mlx5: Unload device upon firmware fatal error

When fw_fatal reporter reports an error, the firmware in not responding.
Unload the device to ensure that the driver closes all its resources,
even if recovery is not due (user disabled auto-recovery or reporter is
in grace period). On successful recovery the device is loaded back up.

Fixes: b3bd076f7501 ("net/mlx5: Report devlink health on FW fatal issues")
Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/health.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 9ff163c5bcde..9abeb80ffa31 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -626,8 +626,16 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work)
 	}
 	fw_reporter_ctx.err_synd = health->synd;
 	fw_reporter_ctx.miss_counter = health->miss_counter;
-	devlink_health_report(health->fw_fatal_reporter,
-			      "FW fatal error reported", &fw_reporter_ctx);
+	if (devlink_health_report(health->fw_fatal_reporter,
+				  "FW fatal error reported", &fw_reporter_ctx) == -ECANCELED) {
+		/* If recovery wasn't performed, due to grace period,
+		 * unload the driver. This ensures that the driver
+		 * closes all its resources and it is not subjected to
+		 * requests from the kernel.
+		 */
+		mlx5_core_err(dev, "Driver is in error state. Unloading\n");
+		mlx5_unload_one(dev);
+	}
 }
 
 static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = {

From b1c2f6312c5005c928a72e668bf305a589d828d4 Mon Sep 17 00:00:00 2001
From: Dima Chumak <dchumak@nvidia.com>
Date: Mon, 26 Apr 2021 15:16:26 +0300
Subject: [PATCH 259/502] net/mlx5e: Fix nullptr in mlx5e_hairpin_get_mdev()

The result of __dev_get_by_index() is not checked for NULL and then gets
dereferenced immediately.

Also, __dev_get_by_index() must be called while holding either RTNL lock
or @dev_base_lock, which isn't satisfied by mlx5e_hairpin_get_mdev() or
its callers. This makes the underlying hlist_for_each_entry() loop not
safe, and can have adverse effects in itself.

Fix by using dev_get_by_index() and handling nullptr return value when
ifindex device is not found. Update mlx5e_hairpin_get_mdev() callers to
check for possible PTR_ERR() result.

Fixes: 77ab67b7f0f9 ("net/mlx5e: Basic setup of hairpin object")
Addresses-Coverity: ("Dereference null return value")
Signed-off-by: Dima Chumak <dchumak@nvidia.com>
Reviewed-by: Vlad Buslov <vladbu@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_tc.c   | 33 +++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 629a61e8022f..d273758255c3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -452,12 +452,32 @@ static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv,
 static
 struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex)
 {
+	struct mlx5_core_dev *mdev;
 	struct net_device *netdev;
 	struct mlx5e_priv *priv;
 
-	netdev = __dev_get_by_index(net, ifindex);
+	netdev = dev_get_by_index(net, ifindex);
+	if (!netdev)
+		return ERR_PTR(-ENODEV);
+
 	priv = netdev_priv(netdev);
-	return priv->mdev;
+	mdev = priv->mdev;
+	dev_put(netdev);
+
+	/* Mirred tc action holds a refcount on the ifindex net_device (see
+	 * net/sched/act_mirred.c:tcf_mirred_get_dev). So, it's okay to continue using mdev
+	 * after dev_put(netdev), while we're in the context of adding a tc flow.
+	 *
+	 * The mdev pointer corresponds to the peer/out net_device of a hairpin. It is then
+	 * stored in a hairpin object, which exists until all flows, that refer to it, get
+	 * removed.
+	 *
+	 * On the other hand, after a hairpin object has been created, the peer net_device may
+	 * be removed/unbound while there are still some hairpin flows that are using it. This
+	 * case is handled by mlx5e_tc_hairpin_update_dead_peer, which is hooked to
+	 * NETDEV_UNREGISTER event of the peer net_device.
+	 */
+	return mdev;
 }
 
 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
@@ -666,6 +686,10 @@ mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params
 
 	func_mdev = priv->mdev;
 	peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
+	if (IS_ERR(peer_mdev)) {
+		err = PTR_ERR(peer_mdev);
+		goto create_pair_err;
+	}
 
 	pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params);
 	if (IS_ERR(pair)) {
@@ -804,6 +828,11 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
 	int err;
 
 	peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
+	if (IS_ERR(peer_mdev)) {
+		NL_SET_ERR_MSG_MOD(extack, "invalid ifindex of mirred device");
+		return PTR_ERR(peer_mdev);
+	}
+
 	if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) {
 		NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported");
 		return -EOPNOTSUPP;

From 740452e09cf5fc489ce60831cf11abef117b5d26 Mon Sep 17 00:00:00 2001
From: Chris Mi <cmi@nvidia.com>
Date: Mon, 26 Apr 2021 11:06:37 +0800
Subject: [PATCH 260/502] net/mlx5: Fix mlx5_vport_tbl_attr chain from u16 to
 u32

The offending refactor commit uses u16 chain wrongly. Actually, it
should be u32.

Fixes: c620b772152b ("net/mlx5: Refactor tc flow attributes structure")
CC: Ariel Levkovich <lariel@nvidia.com>
Signed-off-by: Chris Mi <cmi@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 48cac5bf606d..d562edf5b0bc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -636,7 +636,7 @@ struct esw_vport_tbl_namespace {
 };
 
 struct mlx5_vport_tbl_attr {
-	u16 chain;
+	u32 chain;
 	u16 prio;
 	u16 vport;
 	const struct esw_vport_tbl_namespace *vport_ns;

From 5ab189cf3abbc9994bae3be524c5b88589ed56e2 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 27 Jul 2021 14:38:09 -1000
Subject: [PATCH 261/502] blk-iocost: fix operation ordering in iocg_wake_fn()

iocg_wake_fn() open-codes wait_queue_entry removal and wakeup because it
wants the wq_entry to be always removed whether it ended up waking the
task or not. finish_wait() tests whether wq_entry needs removal without
grabbing the wait_queue lock and expects the waker to use
list_del_init_careful() after all waking operations are complete, which
iocg_wake_fn() didn't do. The operation order was wrong and the regular
list_del_init() was used.

The result is that if a waiter wakes up racing the waker, it can free pop
the wq_entry off stack before the waker is still looking at it, which can
lead to a backtrace like the following.

  [7312084.588951] general protection fault, probably for non-canonical address 0x586bf4005b2b88: 0000 [#1] SMP
  ...
  [7312084.647079] RIP: 0010:queued_spin_lock_slowpath+0x171/0x1b0
  ...
  [7312084.858314] Call Trace:
  [7312084.863548]  _raw_spin_lock_irqsave+0x22/0x30
  [7312084.872605]  try_to_wake_up+0x4c/0x4f0
  [7312084.880444]  iocg_wake_fn+0x71/0x80
  [7312084.887763]  __wake_up_common+0x71/0x140
  [7312084.895951]  iocg_kick_waitq+0xe8/0x2b0
  [7312084.903964]  ioc_rqos_throttle+0x275/0x650
  [7312084.922423]  __rq_qos_throttle+0x20/0x30
  [7312084.930608]  blk_mq_make_request+0x120/0x650
  [7312084.939490]  generic_make_request+0xca/0x310
  [7312084.957600]  submit_bio+0x173/0x200
  [7312084.981806]  swap_readpage+0x15c/0x240
  [7312084.989646]  read_swap_cache_async+0x58/0x60
  [7312084.998527]  swap_cluster_readahead+0x201/0x320
  [7312085.023432]  swapin_readahead+0x2df/0x450
  [7312085.040672]  do_swap_page+0x52f/0x820
  [7312085.058259]  handle_mm_fault+0xa16/0x1420
  [7312085.066620]  do_page_fault+0x2c6/0x5c0
  [7312085.074459]  page_fault+0x2f/0x40

Fix it by switching to list_del_init_careful() and putting it at the end.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Rik van Riel <riel@surriel.com>
Fixes: 7caa47151ab2 ("blkcg: implement blk-iocost")
Cc: stable@vger.kernel.org # v5.4+
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-iocost.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index c2d6bc88d3f1..5fac3757e6e0 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -1440,16 +1440,17 @@ static int iocg_wake_fn(struct wait_queue_entry *wq_entry, unsigned mode,
 		return -1;
 
 	iocg_commit_bio(ctx->iocg, wait->bio, wait->abs_cost, cost);
+	wait->committed = true;
 
 	/*
 	 * autoremove_wake_function() removes the wait entry only when it
-	 * actually changed the task state.  We want the wait always
-	 * removed.  Remove explicitly and use default_wake_function().
+	 * actually changed the task state. We want the wait always removed.
+	 * Remove explicitly and use default_wake_function(). Note that the
+	 * order of operations is important as finish_wait() tests whether
+	 * @wq_entry is removed without grabbing the lock.
 	 */
-	list_del_init(&wq_entry->entry);
-	wait->committed = true;
-
 	default_wake_function(wq_entry, mode, flags, key);
+	list_del_init_careful(&wq_entry->entry);
 	return 0;
 }
 

From 340e84573878b2b9d63210482af46883366361b9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 22 Jul 2021 09:53:54 +0200
Subject: [PATCH 262/502] block: delay freeing the gendisk

blkdev_get_no_open acquires a reference to the block_device through
the block device inode and then tries to acquire a device model
reference to the gendisk.  But at this point the disk migh already
be freed (although the race is free).  Fix this by only freeing the
gendisk from the whole device bdevs ->free_inode callback as well.

Fixes: 22ae8ce8b892 ("block: simplify bdev/disk lookup in blkdev_get")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20210722075402.983367-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c  | 3 +--
 fs/block_dev.c | 2 ++
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index af4d2ab4a633..298ee78c1bda 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1079,10 +1079,9 @@ static void disk_release(struct device *dev)
 	disk_release_events(disk);
 	kfree(disk->random);
 	xa_destroy(&disk->part_tbl);
-	bdput(disk->part0);
 	if (test_bit(GD_QUEUE_REF, &disk->state) && disk->queue)
 		blk_put_queue(disk->queue);
-	kfree(disk);
+	bdput(disk->part0);	/* frees the disk */
 }
 struct class block_class = {
 	.name		= "block",
diff --git a/fs/block_dev.c b/fs/block_dev.c
index ca8bf1869ca8..a38b0f33211c 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -812,6 +812,8 @@ static void bdev_free_inode(struct inode *inode)
 	free_percpu(bdev->bd_stats);
 	kfree(bdev->bd_meta_info);
 
+	if (!bdev_is_partition(bdev))
+		kfree(bdev->bd_disk);
 	kmem_cache_free(bdev_cachep, BDEV_I(inode));
 }
 

From fa20bada3f934e3b3e4af4c77e5b518cd5a282e5 Mon Sep 17 00:00:00 2001
From: Maxim Devaev <mdevaev@gmail.com>
Date: Tue, 27 Jul 2021 21:58:00 +0300
Subject: [PATCH 263/502] usb: gadget: f_hid: idle uses the highest byte for
 duration

SET_IDLE value must be shifted 8 bits to the right to get duration.
This confirmed by USBCV test.

Fixes: afcff6dc690e ("usb: gadget: f_hid: added GET_IDLE and SET_IDLE handlers")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Maxim Devaev <mdevaev@gmail.com>
Link: https://lore.kernel.org/r/20210727185800.43796-1-mdevaev@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/f_hid.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c
index 8d50c8b127fd..bb476e121eae 100644
--- a/drivers/usb/gadget/function/f_hid.c
+++ b/drivers/usb/gadget/function/f_hid.c
@@ -573,7 +573,7 @@ static int hidg_setup(struct usb_function *f,
 		  | HID_REQ_SET_IDLE):
 		VDBG(cdev, "set_idle\n");
 		length = 0;
-		hidg->idle = value;
+		hidg->idle = value >> 8;
 		goto respond;
 		break;
 

From d54db74ad6e0dea8c253fb68c689b836657ab914 Mon Sep 17 00:00:00 2001
From: Zhang Qilong <zhangqilong3@huawei.com>
Date: Mon, 7 Jun 2021 14:46:38 +0800
Subject: [PATCH 264/502] dmaengine: stm32-dma: Fix PM usage counter imbalance
 in stm32 dma ops

pm_runtime_get_sync will increment pm usage counter
even it failed. Forgetting to putting operation will
result in reference leak here. We fix it by replacing
it with pm_runtime_resume_and_get to keep usage counter
balanced.

Fixes: 48bc73ba14bcd ("dmaengine: stm32-dma: Add PM Runtime support")
Fixes: 05f8740a0e6fc ("dmaengine: stm32-dma: add suspend/resume power management support")
Signed-off-by: Zhang Qilong <zhangqilong3@huawei.com>
Link: https://lore.kernel.org/r/20210607064640.121394-2-zhangqilong3@huawei.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/stm32-dma.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c
index f54ecb123a52..7dd1d3d0bf06 100644
--- a/drivers/dma/stm32-dma.c
+++ b/drivers/dma/stm32-dma.c
@@ -1200,7 +1200,7 @@ static int stm32_dma_alloc_chan_resources(struct dma_chan *c)
 
 	chan->config_init = false;
 
-	ret = pm_runtime_get_sync(dmadev->ddev.dev);
+	ret = pm_runtime_resume_and_get(dmadev->ddev.dev);
 	if (ret < 0)
 		return ret;
 
@@ -1470,7 +1470,7 @@ static int stm32_dma_suspend(struct device *dev)
 	struct stm32_dma_device *dmadev = dev_get_drvdata(dev);
 	int id, ret, scr;
 
-	ret = pm_runtime_get_sync(dev);
+	ret = pm_runtime_resume_and_get(dev);
 	if (ret < 0)
 		return ret;
 

From baa16371c9525f24d508508e4d296c031e1de29c Mon Sep 17 00:00:00 2001
From: Zhang Qilong <zhangqilong3@huawei.com>
Date: Mon, 7 Jun 2021 14:46:39 +0800
Subject: [PATCH 265/502] dmaengine: stm32-dmamux: Fix PM usage counter
 unbalance in stm32 dmamux ops

pm_runtime_get_sync will increment pm usage counter
even it failed. Forgetting to putting operation will
result in reference leak here. We fix it by replacing
it with pm_runtime_resume_and_get to keep usage counter
balanced.

Fixes: 4f3ceca254e0f ("dmaengine: stm32-dmamux: Add PM Runtime support")
Signed-off-by: Zhang Qilong <zhangqilong3@huawei.com>
Link: https://lore.kernel.org/r/20210607064640.121394-3-zhangqilong3@huawei.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/stm32-dmamux.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/dma/stm32-dmamux.c b/drivers/dma/stm32-dmamux.c
index ef0d0555103d..a42164389ebc 100644
--- a/drivers/dma/stm32-dmamux.c
+++ b/drivers/dma/stm32-dmamux.c
@@ -137,7 +137,7 @@ static void *stm32_dmamux_route_allocate(struct of_phandle_args *dma_spec,
 
 	/* Set dma request */
 	spin_lock_irqsave(&dmamux->lock, flags);
-	ret = pm_runtime_get_sync(&pdev->dev);
+	ret = pm_runtime_resume_and_get(&pdev->dev);
 	if (ret < 0) {
 		spin_unlock_irqrestore(&dmamux->lock, flags);
 		goto error;
@@ -336,7 +336,7 @@ static int stm32_dmamux_suspend(struct device *dev)
 	struct stm32_dmamux_data *stm32_dmamux = platform_get_drvdata(pdev);
 	int i, ret;
 
-	ret = pm_runtime_get_sync(dev);
+	ret = pm_runtime_resume_and_get(dev);
 	if (ret < 0)
 		return ret;
 
@@ -361,7 +361,7 @@ static int stm32_dmamux_resume(struct device *dev)
 	if (ret < 0)
 		return ret;
 
-	ret = pm_runtime_get_sync(dev);
+	ret = pm_runtime_resume_and_get(dev);
 	if (ret < 0)
 		return ret;
 

From eda97cb095f2958bbad55684a6ca3e7d7af0176a Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@gmail.com>
Date: Sat, 17 Jul 2021 22:00:21 +0300
Subject: [PATCH 266/502] dmaengine: of-dma: router_xlate to return
 -EPROBE_DEFER if controller is not yet available

If the router_xlate can not find the controller in the available DMA
devices then it should return with -EPORBE_DEFER in a same way as the
of_dma_request_slave_channel() does.

The issue can be reproduced if the event router is registered before the
DMA controller itself and a driver would request for a channel before the
controller is registered.
In of_dma_request_slave_channel():
1. of_dma_find_controller() would find the dma_router
2. ofdma->of_dma_xlate() would fail and returned NULL
3. -ENODEV is returned as error code

with this patch we would return in this case the correct -EPROBE_DEFER and
the client can try to request the channel later.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@gmail.com>
Link: https://lore.kernel.org/r/20210717190021.21897-1-peter.ujfalusi@gmail.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/of-dma.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c
index ec00b20ae8e4..ac61ecda2926 100644
--- a/drivers/dma/of-dma.c
+++ b/drivers/dma/of-dma.c
@@ -67,8 +67,12 @@ static struct dma_chan *of_dma_router_xlate(struct of_phandle_args *dma_spec,
 		return NULL;
 
 	ofdma_target = of_dma_find_controller(&dma_spec_target);
-	if (!ofdma_target)
-		return NULL;
+	if (!ofdma_target) {
+		ofdma->dma_router->route_free(ofdma->dma_router->dev,
+					      route_data);
+		chan = ERR_PTR(-EPROBE_DEFER);
+		goto err;
+	}
 
 	chan = ofdma_target->of_dma_xlate(&dma_spec_target, ofdma_target);
 	if (IS_ERR_OR_NULL(chan)) {
@@ -89,6 +93,7 @@ static struct dma_chan *of_dma_router_xlate(struct of_phandle_args *dma_spec,
 		}
 	}
 
+err:
 	/*
 	 * Need to put the node back since the ofdma->of_dma_route_allocate
 	 * has taken it for generating the new, translated dma_spec

From 46573e3ab08fb041d5ba7bf7bf3215a1e724c78c Mon Sep 17 00:00:00 2001
From: Tang Bin <tangbin@cmss.chinamobile.com>
Date: Wed, 28 Jul 2021 09:49:25 +0800
Subject: [PATCH 267/502] nfc: s3fwrn5: fix undefined parameter values in
 dev_err()

In the function s3fwrn5_fw_download(), the 'ret' is not assigned,
so the correct value should be given in dev_err function.

Fixes: a0302ff5906a ("nfc: s3fwrn5: remove unnecessary label")
Signed-off-by: Zhang Shengju <zhangshengju@cmss.chinamobile.com>
Signed-off-by: Tang Bin <tangbin@cmss.chinamobile.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nfc/s3fwrn5/firmware.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nfc/s3fwrn5/firmware.c b/drivers/nfc/s3fwrn5/firmware.c
index 1340fab9565e..e3e72b8a29e3 100644
--- a/drivers/nfc/s3fwrn5/firmware.c
+++ b/drivers/nfc/s3fwrn5/firmware.c
@@ -423,7 +423,7 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info)
 	if (IS_ERR(tfm)) {
 		ret = PTR_ERR(tfm);
 		dev_err(&fw_info->ndev->nfc_dev->dev,
-			"Cannot allocate shash (code=%ld)\n", PTR_ERR(tfm));
+			"Cannot allocate shash (code=%pe)\n", tfm);
 		goto out;
 	}
 

From 557fb5862c9272ad9b21407afe1da8acfd9b53eb Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Tue, 27 Jul 2021 23:40:54 -0300
Subject: [PATCH 268/502] sctp: fix return value check in
 __sctp_rcv_asconf_lookup

As Ben Hutchings noticed, this check should have been inverted: the call
returns true in case of success.

Reported-by: Ben Hutchings <ben@decadent.org.uk>
Fixes: 0c5dc070ff3d ("sctp: validate from_addr_param return")
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Reviewed-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sctp/input.c b/net/sctp/input.c
index eb3c2a34a31c..5ef86fdb1176 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -1203,7 +1203,7 @@ static struct sctp_association *__sctp_rcv_asconf_lookup(
 	if (unlikely(!af))
 		return NULL;
 
-	if (af->from_addr_param(&paddr, param, peer_port, 0))
+	if (!af->from_addr_param(&paddr, param, peer_port, 0))
 		return NULL;
 
 	return __sctp_lookup_association(net, laddr, &paddr, transportp);

From 76a16be07b209a3f507c72abe823bd3af1c8661a Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Wed, 28 Jul 2021 15:43:13 +0800
Subject: [PATCH 269/502] tulip: windbond-840: Fix missing pci_disable_device()
 in probe and remove

Replace pci_enable_device() with pcim_enable_device(),
pci_disable_device() and pci_release_regions() will be
called in release automatically.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/dec/tulip/winbond-840.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c
index f6ff1f76eacb..1876f15dd827 100644
--- a/drivers/net/ethernet/dec/tulip/winbond-840.c
+++ b/drivers/net/ethernet/dec/tulip/winbond-840.c
@@ -357,7 +357,7 @@ static int w840_probe1(struct pci_dev *pdev, const struct pci_device_id *ent)
 	int i, option = find_cnt < MAX_UNITS ? options[find_cnt] : 0;
 	void __iomem *ioaddr;
 
-	i = pci_enable_device(pdev);
+	i = pcim_enable_device(pdev);
 	if (i) return i;
 
 	pci_set_master(pdev);
@@ -379,7 +379,7 @@ static int w840_probe1(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	ioaddr = pci_iomap(pdev, TULIP_BAR, netdev_res_size);
 	if (!ioaddr)
-		goto err_out_free_res;
+		goto err_out_netdev;
 
 	for (i = 0; i < 3; i++)
 		((__le16 *)dev->dev_addr)[i] = cpu_to_le16(eeprom_read(ioaddr, i));
@@ -458,8 +458,6 @@ static int w840_probe1(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 err_out_cleardev:
 	pci_iounmap(pdev, ioaddr);
-err_out_free_res:
-	pci_release_regions(pdev);
 err_out_netdev:
 	free_netdev (dev);
 	return -ENODEV;
@@ -1526,7 +1524,6 @@ static void w840_remove1(struct pci_dev *pdev)
 	if (dev) {
 		struct netdev_private *np = netdev_priv(dev);
 		unregister_netdev(dev);
-		pci_release_regions(pdev);
 		pci_iounmap(pdev, np->base_addr);
 		free_netdev(dev);
 	}

From 5e7b30d24a5b8cb691c173b45b50e3ca0191be19 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Wed, 28 Jul 2021 08:49:09 +0200
Subject: [PATCH 270/502] nfc: nfcsim: fix use after free during module unload
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is a use after free memory corruption during module exit:
 - nfcsim_exit()
  - nfcsim_device_free(dev0)
    - nfc_digital_unregister_device()
      This iterates over command queue and frees all commands,
    - dev->up = false
    - nfcsim_link_shutdown()
      - nfcsim_link_recv_wake()
        This wakes the sleeping thread nfcsim_link_recv_skb().

 - nfcsim_link_recv_skb()
   Wake from wait_event_interruptible_timeout(),
   call directly the deb->cb callback even though (dev->up == false),
   - digital_send_cmd_complete()
     Dereference of "struct digital_cmd" cmd which was freed earlier by
     nfc_digital_unregister_device().

This causes memory corruption shortly after (with unrelated stack
trace):

  nfc nfc0: NFC: nfcsim_recv_wq: Device is down
  llcp: nfc_llcp_recv: err -19
  nfc nfc1: NFC: nfcsim_recv_wq: Device is down
  BUG: unable to handle page fault for address: ffffffffffffffed
  Call Trace:
   fsnotify+0x54b/0x5c0
   __fsnotify_parent+0x1fe/0x300
   ? vfs_write+0x27c/0x390
   vfs_write+0x27c/0x390
   ksys_write+0x63/0xe0
   do_syscall_64+0x3b/0x90
   entry_SYSCALL_64_after_hwframe+0x44/0xae

KASAN report:

  BUG: KASAN: use-after-free in digital_send_cmd_complete+0x16/0x50
  Write of size 8 at addr ffff88800a05f720 by task kworker/0:2/71
  Workqueue: events nfcsim_recv_wq [nfcsim]
  Call Trace:
   dump_stack_lvl+0x45/0x59
   print_address_description.constprop.0+0x21/0x140
   ? digital_send_cmd_complete+0x16/0x50
   ? digital_send_cmd_complete+0x16/0x50
   kasan_report.cold+0x7f/0x11b
   ? digital_send_cmd_complete+0x16/0x50
   ? digital_dep_link_down+0x60/0x60
   digital_send_cmd_complete+0x16/0x50
   nfcsim_recv_wq+0x38f/0x3d5 [nfcsim]
   ? nfcsim_in_send_cmd+0x4a/0x4a [nfcsim]
   ? lock_is_held_type+0x98/0x110
   ? finish_wait+0x110/0x110
   ? rcu_read_lock_sched_held+0x9c/0xd0
   ? rcu_read_lock_bh_held+0xb0/0xb0
   ? lockdep_hardirqs_on_prepare+0x12e/0x1f0

This flow of calling digital_send_cmd_complete() callback on driver exit
is specific to nfcsim which implements reading and sending work queues.
Since the NFC digital device was unregistered, the callback should not
be called.

Fixes: 204bddcb508f ("NFC: nfcsim: Make use of the Digital layer")
Cc: <stable@vger.kernel.org>
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nfc/nfcsim.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/nfc/nfcsim.c b/drivers/nfc/nfcsim.c
index a9864fcdfba6..dd27c85190d3 100644
--- a/drivers/nfc/nfcsim.c
+++ b/drivers/nfc/nfcsim.c
@@ -192,8 +192,7 @@ static void nfcsim_recv_wq(struct work_struct *work)
 
 		if (!IS_ERR(skb))
 			dev_kfree_skb(skb);
-
-		skb = ERR_PTR(-ENODEV);
+		return;
 	}
 
 	dev->cb(dev->nfc_digital_dev, dev->arg, skb);

From e9c6729acb38bcf027e40a5b50b2e1b0aa4bc170 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Fri, 23 Jul 2021 17:08:40 +0200
Subject: [PATCH 271/502] HID: fix typo in Kconfig

There is a missing space in "relyingon".
Add it.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 160554903ef9..76937f716fbe 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -576,7 +576,7 @@ config HID_LOGITECH_HIDPP
 	depends on HID_LOGITECH
 	select POWER_SUPPLY
 	help
-	Support for Logitech devices relyingon the HID++ Logitech specification
+	Support for Logitech devices relying on the HID++ Logitech specification
 
 	Say Y if you want support for Logitech devices relying on the HID++
 	specification. Such devices are the various Logitech Touchpads (T650,

From ebe0b42a4252333aa4af60fd4d11b69405aa6068 Mon Sep 17 00:00:00 2001
From: Haochen Tong <i@hexchain.org>
Date: Sun, 18 Jul 2021 01:04:31 +0800
Subject: [PATCH 272/502] HID: apple: Add support for Keychron K1 wireless
 keyboard

The Keychron K1 wireless keyboard has a set of Apple-like function keys
and an Fn key that works like on an Apple bluetooth keyboard. It
identifies as an Apple Alu RevB ANSI keyboard (05ac:024f) over USB and
BT. Use hid-apple for it so the Fn key and function keys work correctly.

Signed-off-by: Haochen Tong <i@hexchain.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-apple.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c
index 6b8f0d004d34..dc6bd4299c54 100644
--- a/drivers/hid/hid-apple.c
+++ b/drivers/hid/hid-apple.c
@@ -501,6 +501,8 @@ static const struct hid_device_id apple_devices[] = {
 			APPLE_RDESC_JIS },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI),
 		.driver_data = APPLE_HAS_FN },
+	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI),
+		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ISO),
 		.driver_data = APPLE_HAS_FN },
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ISO),

From 0818ec1f508fc3b8e957f6c7f77b988c5bc24da7 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 19 Jul 2021 11:27:31 +0100
Subject: [PATCH 273/502] HID: Kconfig: Fix spelling mistake "Uninterruptable"
 -> "Uninterruptible"

There is a spelling mistake in the Kconfig text. Fix it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/usbhid/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hid/usbhid/Kconfig b/drivers/hid/usbhid/Kconfig
index dcf3a235870f..7c2032f7f44d 100644
--- a/drivers/hid/usbhid/Kconfig
+++ b/drivers/hid/usbhid/Kconfig
@@ -38,7 +38,7 @@ config USB_HIDDEV
 	help
 	  Say Y here if you want to support HID devices (from the USB
 	  specification standpoint) that aren't strictly user interface
-	  devices, like monitor controls and Uninterruptable Power Supplies.
+	  devices, like monitor controls and Uninterruptible Power Supplies.
 
 	  This module supports these devices separately using a separate
 	  event interface on /dev/usb/hiddevX (char 180:96 to 180:111).

From 6ca2350e11f09d5d3e53777d1eff8ff6d300ed93 Mon Sep 17 00:00:00 2001
From: Jason Gerecke <killertofu@gmail.com>
Date: Mon, 19 Jul 2021 13:55:28 -0700
Subject: [PATCH 274/502] HID: wacom: Re-enable touch by default for Cintiq
 24HDT / 27QHDT

Commit 670e90924bfe ("HID: wacom: support named keys on older devices")
added support for sending named events from the soft buttons on the
24HDT and 27QHDT. In the process, however, it inadvertantly disabled the
touchscreen of the 24HDT and 27QHDT by default. The
`wacom_set_shared_values` function would normally enable touch by default
but because it checks the state of the non-shared `has_mute_touch_switch`
flag and `wacom_setup_touch_input_capabilities` sets the state of the
/shared/ version, touch ends up being disabled by default.

This patch sets the non-shared flag, letting `wacom_set_shared_values`
take care of copying the value over to the shared version and setting
the default touch state to "on".

Fixes: 670e90924bfe ("HID: wacom: support named keys on older devices")
CC: stable@vger.kernel.org # 5.4+
Signed-off-by: Jason Gerecke <jason.gerecke@wacom.com>
Reviewed-by: Ping Cheng <ping.cheng@wacom.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/wacom_wac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 81d7d12bcf34..496a000ef862 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -3831,7 +3831,7 @@ int wacom_setup_touch_input_capabilities(struct input_dev *input_dev,
 		    wacom_wac->shared->touch->product == 0xF6) {
 			input_dev->evbit[0] |= BIT_MASK(EV_SW);
 			__set_bit(SW_MUTE_DEVICE, input_dev->swbit);
-			wacom_wac->shared->has_mute_touch_switch = true;
+			wacom_wac->has_mute_touch_switch = true;
 		}
 		fallthrough;
 

From 7cc8524f65ce1a350042836c7cf837046aaa6e21 Mon Sep 17 00:00:00 2001
From: Jason Gerecke <killertofu@gmail.com>
Date: Mon, 19 Jul 2021 13:55:31 -0700
Subject: [PATCH 275/502] HID: wacom: Skip processing of touches with negative
 slot values

The `input_mt_get_slot_by_key` function may return a negative value
if an error occurs (e.g. running out of slots). If this occurs we
should really avoid reporting any data for the slot.

Signed-off-by: Ping Cheng <ping.cheng@wacom.com>
Signed-off-by: Jason Gerecke <jason.gerecke@wacom.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/wacom_wac.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 496a000ef862..81ba642adcb7 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -2548,6 +2548,9 @@ static void wacom_wac_finger_slot(struct wacom_wac *wacom_wac,
 		int slot;
 
 		slot = input_mt_get_slot_by_key(input, hid_data->id);
+		if (slot < 0)
+			return;
+
 		input_mt_slot(input, slot);
 		input_mt_report_slot_state(input, MT_TOOL_FINGER, prox);
 	}

From a59c7b6c6ff6d5437f293709e766f939d7107266 Mon Sep 17 00:00:00 2001
From: Ping Bao <ping.a.bao@intel.com>
Date: Wed, 21 Jul 2021 15:56:15 -0700
Subject: [PATCH 276/502] platform/x86: intel-hid: add Alder Lake ACPI device
 ID

Alder Lake has a new ACPI ID for Intel HID event filter device.

Signed-off-by: Ping Bao <ping.a.bao@intel.com>
Link: https://lore.kernel.org/r/20210721225615.20575-1-ping.a.bao@intel.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/intel-hid.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/platform/x86/intel-hid.c b/drivers/platform/x86/intel-hid.c
index 078648a9201b..e5fbe017f8e1 100644
--- a/drivers/platform/x86/intel-hid.c
+++ b/drivers/platform/x86/intel-hid.c
@@ -25,6 +25,7 @@ static const struct acpi_device_id intel_hid_ids[] = {
 	{"INT33D5", 0},
 	{"INTC1051", 0},
 	{"INTC1054", 0},
+	{"INTC1070", 0},
 	{"", 0},
 };
 MODULE_DEVICE_TABLE(acpi, intel_hid_ids);

From 2b2c66f607d00d17f879c0d946d44340bfbdc501 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Mon, 26 Jul 2021 17:36:30 +0200
Subject: [PATCH 277/502] platform/x86: gigabyte-wmi: add support for B550
 Aorus Elite V2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reported as working here:
https://github.com/t-8ch/linux-gigabyte-wmi-driver/issues/1#issuecomment-879398883

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/r/20210726153630.65213-1-linux@weissschuh.net
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/gigabyte-wmi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c
index 5529d7b0abea..fbb224a82e34 100644
--- a/drivers/platform/x86/gigabyte-wmi.c
+++ b/drivers/platform/x86/gigabyte-wmi.c
@@ -141,6 +141,7 @@ static u8 gigabyte_wmi_detect_sensor_usability(struct wmi_device *wdev)
 
 static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = {
 	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE"),
+	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE V2"),
 	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 GAMING X V2"),
 	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M AORUS PRO-P"),
 	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M DS3H"),

From 1e60cebf82948cfdc9497ea4553bab125587593c Mon Sep 17 00:00:00 2001
From: zhang kai <zhangkaiheb@126.com>
Date: Wed, 28 Jul 2021 18:54:18 +0800
Subject: [PATCH 278/502] net: let flow have same hash in two directions

using same source and destination ip/port for flow hash calculation
within the two directions.

Signed-off-by: zhang kai <zhangkaiheb@126.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/flow_dissector.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 2aadbfc5193b..4b2415d34873 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1504,7 +1504,7 @@ __be32 flow_get_u32_dst(const struct flow_keys *flow)
 }
 EXPORT_SYMBOL(flow_get_u32_dst);
 
-/* Sort the source and destination IP (and the ports if the IP are the same),
+/* Sort the source and destination IP and the ports,
  * to have consistent hash within the two directions
  */
 static inline void __flow_hash_consistentify(struct flow_keys *keys)
@@ -1515,11 +1515,11 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys)
 	case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
 		addr_diff = (__force u32)keys->addrs.v4addrs.dst -
 			    (__force u32)keys->addrs.v4addrs.src;
-		if ((addr_diff < 0) ||
-		    (addr_diff == 0 &&
-		     ((__force u16)keys->ports.dst <
-		      (__force u16)keys->ports.src))) {
+		if (addr_diff < 0)
 			swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst);
+
+		if ((__force u16)keys->ports.dst <
+		    (__force u16)keys->ports.src) {
 			swap(keys->ports.src, keys->ports.dst);
 		}
 		break;
@@ -1527,13 +1527,13 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys)
 		addr_diff = memcmp(&keys->addrs.v6addrs.dst,
 				   &keys->addrs.v6addrs.src,
 				   sizeof(keys->addrs.v6addrs.dst));
-		if ((addr_diff < 0) ||
-		    (addr_diff == 0 &&
-		     ((__force u16)keys->ports.dst <
-		      (__force u16)keys->ports.src))) {
+		if (addr_diff < 0) {
 			for (i = 0; i < 4; i++)
 				swap(keys->addrs.v6addrs.src.s6_addr32[i],
 				     keys->addrs.v6addrs.dst.s6_addr32[i]);
+		}
+		if ((__force u16)keys->ports.dst <
+		    (__force u16)keys->ports.src) {
 			swap(keys->ports.src, keys->ports.dst);
 		}
 		break;

From 89fb62fde3b226f99b7015280cf132e2a7438edf Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Wed, 28 Jul 2021 20:11:07 +0800
Subject: [PATCH 279/502] sis900: Fix missing pci_disable_device() in probe and
 remove

Replace pci_enable_device() with pcim_enable_device(),
pci_disable_device() and pci_release_regions() will be
called in release automatically.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sis/sis900.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c
index ca9c00b7f588..cff87de9178a 100644
--- a/drivers/net/ethernet/sis/sis900.c
+++ b/drivers/net/ethernet/sis/sis900.c
@@ -443,7 +443,7 @@ static int sis900_probe(struct pci_dev *pci_dev,
 #endif
 
 	/* setup various bits in PCI command register */
-	ret = pci_enable_device(pci_dev);
+	ret = pcim_enable_device(pci_dev);
 	if(ret) return ret;
 
 	i = dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(32));
@@ -469,7 +469,7 @@ static int sis900_probe(struct pci_dev *pci_dev,
 	ioaddr = pci_iomap(pci_dev, 0, 0);
 	if (!ioaddr) {
 		ret = -ENOMEM;
-		goto err_out_cleardev;
+		goto err_out;
 	}
 
 	sis_priv = netdev_priv(net_dev);
@@ -581,8 +581,6 @@ err_unmap_tx:
 			  sis_priv->tx_ring_dma);
 err_out_unmap:
 	pci_iounmap(pci_dev, ioaddr);
-err_out_cleardev:
-	pci_release_regions(pci_dev);
  err_out:
 	free_netdev(net_dev);
 	return ret;
@@ -2499,7 +2497,6 @@ static void sis900_remove(struct pci_dev *pci_dev)
 			  sis_priv->tx_ring_dma);
 	pci_iounmap(pci_dev, sis_priv->ioaddr);
 	free_netdev(net_dev);
-	pci_release_regions(pci_dev);
 }
 
 static int __maybe_unused sis900_suspend(struct device *dev)

From ef04688871f3386b6d40ade8f5c664290420f819 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 27 Jul 2021 10:50:31 -0600
Subject: [PATCH 280/502] io_uring: don't block level reissue off completion
 path

Some setups, like SCSI, can throw spurious -EAGAIN off the softirq
completion path. Normally we expect this to happen inline as part
of submission, but apparently SCSI has a weird corner case where it
can happen as part of normal completions.

This should be solved by having the -EAGAIN bubble back up the stack
as part of submission, but previous attempts at this failed and we're
not just quite there yet. Instead we currently use REQ_F_REISSUE to
handle this case.

For now, catch it in io_rw_should_reissue() and prevent a reissue
from a bogus path.

Cc: stable@vger.kernel.org
Reported-by: Fabian Ebner <f.ebner@proxmox.com>
Tested-by: Fabian Ebner <f.ebner@proxmox.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6ba101cd4661..83f67d33bf67 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2447,6 +2447,12 @@ static bool io_rw_should_reissue(struct io_kiocb *req)
 	 */
 	if (percpu_ref_is_dying(&ctx->refs))
 		return false;
+	/*
+	 * Play it safe and assume not safe to re-import and reissue if we're
+	 * not in the original thread group (or in task context).
+	 */
+	if (!same_thread_group(req->task, current) || !in_task())
+		return false;
 	return true;
 }
 #else

From a890d01e4ee016978776e45340e521b3bbbdf41f Mon Sep 17 00:00:00 2001
From: Hao Xu <haoxu@linux.alibaba.com>
Date: Wed, 28 Jul 2021 11:03:22 +0800
Subject: [PATCH 281/502] io_uring: fix poll requests leaking second poll
 entries

For pure poll requests, it doesn't remove the second poll wait entry
when it's done, neither after vfs_poll() or in the poll completion
handler. We should remove the second poll wait entry.
And we use io_poll_remove_double() rather than io_poll_remove_waitqs()
since the latter has some redundant logic.

Fixes: 88e41cf928a6 ("io_uring: add multishot mode for IORING_OP_POLL_ADD")
Cc: stable@vger.kernel.org # 5.13+
Signed-off-by: Hao Xu <haoxu@linux.alibaba.com>
Link: https://lore.kernel.org/r/20210728030322.12307-1-haoxu@linux.alibaba.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 83f67d33bf67..bf548af0426c 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4939,7 +4939,6 @@ static bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
 	if (req->poll.events & EPOLLONESHOT)
 		flags = 0;
 	if (!io_cqring_fill_event(ctx, req->user_data, error, flags)) {
-		io_poll_remove_waitqs(req);
 		req->poll.done = true;
 		flags = 0;
 	}
@@ -4962,6 +4961,7 @@ static void io_poll_task_func(struct io_kiocb *req)
 
 		done = io_poll_complete(req, req->result);
 		if (done) {
+			io_poll_remove_double(req);
 			hash_del(&req->hash_node);
 		} else {
 			req->result = 0;
@@ -5149,7 +5149,7 @@ static __poll_t __io_arm_poll_handler(struct io_kiocb *req,
 		ipt->error = -EINVAL;
 
 	spin_lock_irq(&ctx->completion_lock);
-	if (ipt->error)
+	if (ipt->error || (mask && (poll->events & EPOLLONESHOT)))
 		io_poll_remove_double(req);
 	if (likely(poll->head)) {
 		spin_lock(&poll->head->lock);
@@ -5221,7 +5221,6 @@ static int io_arm_poll_handler(struct io_kiocb *req)
 	ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask,
 					io_async_wake);
 	if (ret || ipt.error) {
-		io_poll_remove_double(req);
 		spin_unlock_irq(&ctx->completion_lock);
 		if (ret)
 			return IO_APOLL_READY;

From 36c2530ea963884eeb0097169f853fdc36f16ad7 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Tue, 27 Jul 2021 18:04:28 +0200
Subject: [PATCH 282/502] spi: imx: mx51-ecspi: Fix CONFIGREG delay comment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For (2 * 1000000) / min_speed_hz < 10 to be true in naturals with zero,
the min_speed_hz must be above 200000 (i.e. 200001 rounds down to 9, so
the condition triggers). Update the comment. No functional change.

Fixes: 6fd8b8503a0dc ("spi: spi-imx: Fix out-of-order CS/SCLK operation at low speeds")
Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Cc: Mark Brown <broonie@kernel.org>
Acked-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20210727160428.7673-1-marex@denx.de
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-imx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
index 2872993550bd..fa68e9817929 100644
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c
@@ -593,7 +593,7 @@ static int mx51_ecspi_prepare_message(struct spi_imx_data *spi_imx,
 	}
 
 	delay = (2 * 1000000) / min_speed_hz;
-	if (likely(delay < 10))	/* SCLK is faster than 100 kHz */
+	if (likely(delay < 10))	/* SCLK is faster than 200 kHz */
 		udelay(delay);
 	else			/* SCLK is _very_ slow */
 		usleep_range(delay, delay + 10);

From e0eef3690dc66b3ecc6e0f1267f332403eb22bea Mon Sep 17 00:00:00 2001
From: Hui Wang <hui.wang@canonical.com>
Date: Wed, 28 Jul 2021 23:19:58 +0800
Subject: [PATCH 283/502] Revert "ACPI: resources: Add checks for ACPI IRQ
 override"

The commit 0ec4e55e9f57 ("ACPI: resources: Add checks for ACPI IRQ
override") introduces regression on some platforms, at least it makes
the UART can't get correct irq setting on two different platforms,
and it makes the kernel can't bootup on these two platforms.

This reverts commit 0ec4e55e9f571f08970ed115ec0addc691eda613.

Regression-discuss: https://bugzilla.kernel.org/show_bug.cgi?id=213031
Reported-by: PGNd <pgnet.dev@gmail.com>
Cc: 5.4+ <stable@vger.kernel.org> # 5.4+
Signed-off-by: Hui Wang <hui.wang@canonical.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/resource.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index dc01fb550b28..ee78a210c606 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -423,13 +423,6 @@ static void acpi_dev_get_irqresource(struct resource *res, u32 gsi,
 	}
 }
 
-static bool irq_is_legacy(struct acpi_resource_irq *irq)
-{
-	return irq->triggering == ACPI_EDGE_SENSITIVE &&
-		irq->polarity == ACPI_ACTIVE_HIGH &&
-		irq->shareable == ACPI_EXCLUSIVE;
-}
-
 /**
  * acpi_dev_resource_interrupt - Extract ACPI interrupt resource information.
  * @ares: Input ACPI resource object.
@@ -468,7 +461,7 @@ bool acpi_dev_resource_interrupt(struct acpi_resource *ares, int index,
 		}
 		acpi_dev_get_irqresource(res, irq->interrupts[index],
 					 irq->triggering, irq->polarity,
-					 irq->shareable, irq_is_legacy(irq));
+					 irq->shareable, true);
 		break;
 	case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
 		ext_irq = &ares->data.extended_irq;

From 41a8457f3f6f829be1f8f8fa7577a46b9b7223ef Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Tue, 27 Jul 2021 09:18:24 -0700
Subject: [PATCH 284/502] ACPI: DPTF: Fix reading of attributes

The current assumption that methods to read PCH FIVR attributes will
return integer, is not correct. There is no good way to return integer
as negative numbers are also valid.

These read methods return a package of integers. The first integer returns
status, which is 0 on success and any other value for failure. When the
returned status is zero, then the second integer returns the actual value.

This change fixes this issue by replacing acpi_evaluate_integer() with
acpi_evaluate_object() and use acpi_extract_package() to extract results.

Fixes: 2ce6324eadb01 ("ACPI: DPTF: Add PCH FIVR participant driver")
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: 5.10+ <stable@vger.kernel.org> # 5.10+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/dptf/dptf_pch_fivr.c | 51 ++++++++++++++++++++++++++-----
 1 file changed, 43 insertions(+), 8 deletions(-)

diff --git a/drivers/acpi/dptf/dptf_pch_fivr.c b/drivers/acpi/dptf/dptf_pch_fivr.c
index 5fca18296bf6..550b9081fcbc 100644
--- a/drivers/acpi/dptf/dptf_pch_fivr.c
+++ b/drivers/acpi/dptf/dptf_pch_fivr.c
@@ -9,6 +9,42 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 
+struct pch_fivr_resp {
+	u64 status;
+	u64 result;
+};
+
+static int pch_fivr_read(acpi_handle handle, char *method, struct pch_fivr_resp *fivr_resp)
+{
+	struct acpi_buffer resp = { sizeof(struct pch_fivr_resp), fivr_resp};
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	struct acpi_buffer format = { sizeof("NN"), "NN" };
+	union acpi_object *obj;
+	acpi_status status;
+	int ret = -EFAULT;
+
+	status = acpi_evaluate_object(handle, method, NULL, &buffer);
+	if (ACPI_FAILURE(status))
+		return ret;
+
+	obj = buffer.pointer;
+	if (!obj || obj->type != ACPI_TYPE_PACKAGE)
+		goto release_buffer;
+
+	status = acpi_extract_package(obj, &format, &resp);
+	if (ACPI_FAILURE(status))
+		goto release_buffer;
+
+	if (fivr_resp->status)
+		goto release_buffer;
+
+	ret = 0;
+
+release_buffer:
+	kfree(buffer.pointer);
+	return ret;
+}
+
 /*
  * Presentation of attributes which are defined for INT1045
  * They are:
@@ -23,15 +59,14 @@ static ssize_t name##_show(struct device *dev,\
 			   char *buf)\
 {\
 	struct acpi_device *acpi_dev = dev_get_drvdata(dev);\
-	unsigned long long val;\
-	acpi_status status;\
+	struct pch_fivr_resp fivr_resp;\
+	int status;\
 \
-	status = acpi_evaluate_integer(acpi_dev->handle, #method,\
-				       NULL, &val);\
-	if (ACPI_SUCCESS(status))\
-		return sprintf(buf, "%d\n", (int)val);\
-	else\
-		return -EINVAL;\
+	status = pch_fivr_read(acpi_dev->handle, #method, &fivr_resp);\
+	if (status)\
+		return status;\
+\
+	return sprintf(buf, "%llu\n", fivr_resp.result);\
 }
 
 #define PCH_FIVR_STORE(name, method) \

From 240246f6b913b0c23733cfd2def1d283f8cc9bbe Mon Sep 17 00:00:00 2001
From: Goldwyn Rodrigues <rgoldwyn@suse.de>
Date: Fri, 9 Jul 2021 11:29:22 -0500
Subject: [PATCH 285/502] btrfs: mark compressed range uptodate only if all bio
 succeed

In compression write endio sequence, the range which the compressed_bio
writes is marked as uptodate if the last bio of the compressed (sub)bios
is completed successfully. There could be previous bio which may
have failed which is recorded in cb->errors.

Set the writeback range as uptodate only if cb->errors is zero, as opposed
to checking only the last bio's status.

Backporting notes: in all versions up to 4.4 the last argument is always
replaced by "!cb->errors".

CC: stable@vger.kernel.org # 4.4+
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 9a023ae0f98b..30d82cdf128c 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -352,7 +352,7 @@ static void end_compressed_bio_write(struct bio *bio)
 	btrfs_record_physical_zoned(inode, cb->start, bio);
 	btrfs_writepage_endio_finish_ordered(BTRFS_I(inode), NULL,
 			cb->start, cb->start + cb->len - 1,
-			bio->bi_status == BLK_STS_OK);
+			!cb->errors);
 
 	end_compressed_writeback(inode, cb);
 	/* note, our inode could be gone now */

From ecc64fab7d49c678e70bd4c35fe64d2ab3e3d212 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Tue, 27 Jul 2021 11:24:43 +0100
Subject: [PATCH 286/502] btrfs: fix lost inode on log replay after mix of
 fsync, rename and inode eviction

When checking if we need to log the new name of a renamed inode, we are
checking if the inode and its parent inode have been logged before, and if
not we don't log the new name. The check however is buggy, as it directly
compares the logged_trans field of the inodes versus the ID of the current
transaction. The problem is that logged_trans is a transient field, only
stored in memory and never persisted in the inode item, so if an inode
was logged before, evicted and reloaded, its logged_trans field is set to
a value of 0, meaning the check will return false and the new name of the
renamed inode is not logged. If the old parent directory was previously
fsynced and we deleted the logged directory entries corresponding to the
old name, we end up with a log that when replayed will delete the renamed
inode.

The following example triggers the problem:

  $ mkfs.btrfs -f /dev/sdc
  $ mount /dev/sdc /mnt

  $ mkdir /mnt/A
  $ mkdir /mnt/B
  $ echo -n "hello world" > /mnt/A/foo

  $ sync

  # Add some new file to A and fsync directory A.
  $ touch /mnt/A/bar
  $ xfs_io -c "fsync" /mnt/A

  # Now trigger inode eviction. We are only interested in triggering
  # eviction for the inode of directory A.
  $ echo 2 > /proc/sys/vm/drop_caches

  # Move foo from directory A to directory B.
  # This deletes the directory entries for foo in A from the log, and
  # does not add the new name for foo in directory B to the log, because
  # logged_trans of A is 0, which is less than the current transaction ID.
  $ mv /mnt/A/foo /mnt/B/foo

  # Now make an fsync to anything except A, B or any file inside them,
  # like for example create a file at the root directory and fsync this
  # new file. This syncs the log that contains all the changes done by
  # previous rename operation.
  $ touch /mnt/baz
  $ xfs_io -c "fsync" /mnt/baz

  <power fail>

  # Mount the filesystem and replay the log.
  $ mount /dev/sdc /mnt

  # Check the filesystem content.
  $ ls -1R /mnt
  /mnt/:
  A
  B
  baz

  /mnt/A:
  bar

  /mnt/B:
  $

  # File foo is gone, it's neither in A/ nor in B/.

Fix this by using the inode_logged() helper at btrfs_log_new_name(), which
safely checks if an inode was logged before in the current transaction.

A test case for fstests will follow soon.

CC: stable@vger.kernel.org # 4.14+
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9fd0348be7f5..e6430ac9bbe8 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -6503,8 +6503,8 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
 	 * if this inode hasn't been logged and directory we're renaming it
 	 * from hasn't been logged, we don't need to log it
 	 */
-	if (inode->logged_trans < trans->transid &&
-	    (!old_dir || old_dir->logged_trans < trans->transid))
+	if (!inode_logged(trans, inode) &&
+	    (!old_dir || !inode_logged(trans, old_dir)))
 		return;
 
 	/*

From b2a616676839e2a6b02c8e40be7f886f882ed194 Mon Sep 17 00:00:00 2001
From: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
Date: Tue, 27 Jul 2021 15:13:03 +0800
Subject: [PATCH 287/502] btrfs: fix rw device counting in
 __btrfs_free_extra_devids

When removing a writeable device in __btrfs_free_extra_devids, the rw
device count should be decremented.

This error was caught by Syzbot which reported a warning in
close_fs_devices:

  WARNING: CPU: 1 PID: 9355 at fs/btrfs/volumes.c:1168 close_fs_devices+0x763/0x880 fs/btrfs/volumes.c:1168
  Modules linked in:
  CPU: 0 PID: 9355 Comm: syz-executor552 Not tainted 5.13.0-rc1-syzkaller #0
  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
  RIP: 0010:close_fs_devices+0x763/0x880 fs/btrfs/volumes.c:1168
  RSP: 0018:ffffc9000333f2f0 EFLAGS: 00010293
  RAX: ffffffff8365f5c3 RBX: 0000000000000001 RCX: ffff888029afd4c0
  RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000000
  RBP: ffff88802846f508 R08: ffffffff8365f525 R09: ffffed100337d128
  R10: ffffed100337d128 R11: 0000000000000000 R12: dffffc0000000000
  R13: ffff888019be8868 R14: 1ffff1100337d10d R15: 1ffff1100337d10a
  FS:  00007f6f53828700(0000) GS:ffff8880b9a00000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 000000000047c410 CR3: 00000000302a6000 CR4: 00000000001506f0
  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  Call Trace:
   btrfs_close_devices+0xc9/0x450 fs/btrfs/volumes.c:1180
   open_ctree+0x8e1/0x3968 fs/btrfs/disk-io.c:3693
   btrfs_fill_super fs/btrfs/super.c:1382 [inline]
   btrfs_mount_root+0xac5/0xc60 fs/btrfs/super.c:1749
   legacy_get_tree+0xea/0x180 fs/fs_context.c:592
   vfs_get_tree+0x86/0x270 fs/super.c:1498
   fc_mount fs/namespace.c:993 [inline]
   vfs_kern_mount+0xc9/0x160 fs/namespace.c:1023
   btrfs_mount+0x3d3/0xb50 fs/btrfs/super.c:1809
   legacy_get_tree+0xea/0x180 fs/fs_context.c:592
   vfs_get_tree+0x86/0x270 fs/super.c:1498
   do_new_mount fs/namespace.c:2905 [inline]
   path_mount+0x196f/0x2be0 fs/namespace.c:3235
   do_mount fs/namespace.c:3248 [inline]
   __do_sys_mount fs/namespace.c:3456 [inline]
   __se_sys_mount+0x2f9/0x3b0 fs/namespace.c:3433
   do_syscall_64+0x3f/0xb0 arch/x86/entry/common.c:47
   entry_SYSCALL_64_after_hwframe+0x44/0xae

Because fs_devices->rw_devices was not 0 after
closing all devices. Here is the call trace that was observed:

  btrfs_mount_root():
    btrfs_scan_one_device():
      device_list_add();   <---------------- device added
    btrfs_open_devices():
      open_fs_devices():
        btrfs_open_one_device();   <-------- writable device opened,
	                                     rw device count ++
    btrfs_fill_super():
      open_ctree():
        btrfs_free_extra_devids():
	  __btrfs_free_extra_devids();  <--- writable device removed,
	                              rw device count not decremented
	  fail_tree_roots:
	    btrfs_close_devices():
	      close_fs_devices();   <------- rw device count off by 1

As a note, prior to commit cf89af146b7e ("btrfs: dev-replace: fail
mount if we don't have replace item with target device"), rw_devices
was decremented on removing a writable device in
__btrfs_free_extra_devids only if the BTRFS_DEV_STATE_REPLACE_TGT bit
was not set for the device. However, this check does not need to be
reinstated as it is now redundant and incorrect.

In __btrfs_free_extra_devids, we skip removing the device if it is the
target for replacement. This is done by checking whether device->devid
== BTRFS_DEV_REPLACE_DEVID. Since BTRFS_DEV_STATE_REPLACE_TGT is set
only on the device with devid BTRFS_DEV_REPLACE_DEVID, no devices
should have the BTRFS_DEV_STATE_REPLACE_TGT bit set after the check,
and so it's redundant to test for that bit.

Additionally, following commit 82372bc816d7 ("Btrfs: make
the logic of source device removing more clear"), rw_devices is
incremented whenever a writeable device is added to the alloc
list (including the target device in btrfs_dev_replace_finishing), so
all removals of writable devices from the alloc list should also be
accompanied by a decrement to rw_devices.

Reported-by: syzbot+a70e2ad0879f160b9217@syzkaller.appspotmail.com
Fixes: cf89af146b7e ("btrfs: dev-replace: fail mount if we don't have replace item with target device")
CC: stable@vger.kernel.org # 5.10+
Tested-by: syzbot+a70e2ad0879f160b9217@syzkaller.appspotmail.com
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c6c14315b1c9..4c83256ae37f 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1078,6 +1078,7 @@ static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices,
 		if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
 			list_del_init(&device->dev_alloc_list);
 			clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
+			fs_devices->rw_devices--;
 		}
 		list_del_init(&device->dev_list);
 		fs_devices->num_devices--;

From cbcf01128d0a92e131bd09f1688fe032480b65ca Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 28 Jul 2021 14:47:20 +0200
Subject: [PATCH 288/502] af_unix: fix garbage collect vs MSG_PEEK

unix_gc() assumes that candidate sockets can never gain an external
reference (i.e.  be installed into an fd) while the unix_gc_lock is
held.  Except for MSG_PEEK this is guaranteed by modifying inflight
count under the unix_gc_lock.

MSG_PEEK does not touch any variable protected by unix_gc_lock (file
count is not), yet it needs to be serialized with garbage collection.
Do this by locking/unlocking unix_gc_lock:

 1) increment file count

 2) lock/unlock barrier to make sure incremented file count is visible
    to garbage collection

 3) install file into fd

This is a lock barrier (unlike smp_mb()) that ensures that garbage
collection is run completely before or completely after the barrier.

Cc: <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/unix/af_unix.c | 51 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 49 insertions(+), 2 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 23c92ad15c61..ba7ced947e51 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1526,6 +1526,53 @@ out:
 	return err;
 }
 
+static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
+{
+	scm->fp = scm_fp_dup(UNIXCB(skb).fp);
+
+	/*
+	 * Garbage collection of unix sockets starts by selecting a set of
+	 * candidate sockets which have reference only from being in flight
+	 * (total_refs == inflight_refs).  This condition is checked once during
+	 * the candidate collection phase, and candidates are marked as such, so
+	 * that non-candidates can later be ignored.  While inflight_refs is
+	 * protected by unix_gc_lock, total_refs (file count) is not, hence this
+	 * is an instantaneous decision.
+	 *
+	 * Once a candidate, however, the socket must not be reinstalled into a
+	 * file descriptor while the garbage collection is in progress.
+	 *
+	 * If the above conditions are met, then the directed graph of
+	 * candidates (*) does not change while unix_gc_lock is held.
+	 *
+	 * Any operations that changes the file count through file descriptors
+	 * (dup, close, sendmsg) does not change the graph since candidates are
+	 * not installed in fds.
+	 *
+	 * Dequeing a candidate via recvmsg would install it into an fd, but
+	 * that takes unix_gc_lock to decrement the inflight count, so it's
+	 * serialized with garbage collection.
+	 *
+	 * MSG_PEEK is special in that it does not change the inflight count,
+	 * yet does install the socket into an fd.  The following lock/unlock
+	 * pair is to ensure serialization with garbage collection.  It must be
+	 * done between incrementing the file count and installing the file into
+	 * an fd.
+	 *
+	 * If garbage collection starts after the barrier provided by the
+	 * lock/unlock, then it will see the elevated refcount and not mark this
+	 * as a candidate.  If a garbage collection is already in progress
+	 * before the file count was incremented, then the lock/unlock pair will
+	 * ensure that garbage collection is finished before progressing to
+	 * installing the fd.
+	 *
+	 * (*) A -> B where B is on the queue of A or B is on the queue of C
+	 * which is on the queue of listening socket A.
+	 */
+	spin_lock(&unix_gc_lock);
+	spin_unlock(&unix_gc_lock);
+}
+
 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
 {
 	int err = 0;
@@ -2175,7 +2222,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
 		sk_peek_offset_fwd(sk, size);
 
 		if (UNIXCB(skb).fp)
-			scm.fp = scm_fp_dup(UNIXCB(skb).fp);
+			unix_peek_fds(&scm, skb);
 	}
 	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
 
@@ -2418,7 +2465,7 @@ unlock:
 			/* It is questionable, see note in unix_dgram_recvmsg.
 			 */
 			if (UNIXCB(skb).fp)
-				scm.fp = scm_fp_dup(UNIXCB(skb).fp);
+				unix_peek_fds(&scm, skb);
 
 			sk_peek_offset_fwd(sk, chunk);
 

From 25905f602fdb0cfa147017056636768a7aa1ff6f Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Wed, 21 Jul 2021 12:25:20 -0700
Subject: [PATCH 289/502] dmaengine: idxd: Change license on idxd.h to LGPL

This file was given GPL-2.0 license. But LGPL-2.1 makes more sense
as it needs to be used by libraries outside of the kernel source tree.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/idxd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h
index e33997b4d750..edc346a77c91 100644
--- a/include/uapi/linux/idxd.h
+++ b/include/uapi/linux/idxd.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* SPDX-License-Identifier: LGPL-2.1 WITH Linux-syscall-note */
 /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
 #ifndef _USR_IDXD_H_
 #define _USR_IDXD_H_

From 345daff2e994ee844d6a609c37f085695fbb4c4d Mon Sep 17 00:00:00 2001
From: Alexey Gladkov <legion@kernel.org>
Date: Tue, 27 Jul 2021 17:24:18 +0200
Subject: [PATCH 290/502] ucounts: Fix race condition between alloc_ucounts and
 put_ucounts

The race happens because put_ucounts() doesn't use spinlock and
get_ucounts is not under spinlock:

CPU0                    CPU1
----                    ----
alloc_ucounts()         put_ucounts()

spin_lock_irq(&ucounts_lock);
ucounts = find_ucounts(ns, uid, hashent);

                        atomic_dec_and_test(&ucounts->count))

spin_unlock_irq(&ucounts_lock);

                        spin_lock_irqsave(&ucounts_lock, flags);
                        hlist_del_init(&ucounts->node);
                        spin_unlock_irqrestore(&ucounts_lock, flags);
                        kfree(ucounts);

ucounts = get_ucounts(ucounts);

==================================================================
BUG: KASAN: use-after-free in instrument_atomic_read_write include/linux/instrumented.h:101 [inline]
BUG: KASAN: use-after-free in atomic_add_negative include/asm-generic/atomic-instrumented.h:556 [inline]
BUG: KASAN: use-after-free in get_ucounts kernel/ucount.c:152 [inline]
BUG: KASAN: use-after-free in get_ucounts kernel/ucount.c:150 [inline]
BUG: KASAN: use-after-free in alloc_ucounts+0x19b/0x5b0 kernel/ucount.c:188
Write of size 4 at addr ffff88802821e41c by task syz-executor.4/16785

CPU: 1 PID: 16785 Comm: syz-executor.4 Not tainted 5.14.0-rc1-next-20210712-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:88 [inline]
 dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:105
 print_address_description.constprop.0.cold+0x6c/0x309 mm/kasan/report.c:233
 __kasan_report mm/kasan/report.c:419 [inline]
 kasan_report.cold+0x83/0xdf mm/kasan/report.c:436
 check_region_inline mm/kasan/generic.c:183 [inline]
 kasan_check_range+0x13d/0x180 mm/kasan/generic.c:189
 instrument_atomic_read_write include/linux/instrumented.h:101 [inline]
 atomic_add_negative include/asm-generic/atomic-instrumented.h:556 [inline]
 get_ucounts kernel/ucount.c:152 [inline]
 get_ucounts kernel/ucount.c:150 [inline]
 alloc_ucounts+0x19b/0x5b0 kernel/ucount.c:188
 set_cred_ucounts+0x171/0x3a0 kernel/cred.c:684
 __sys_setuid+0x285/0x400 kernel/sys.c:623
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae
RIP: 0033:0x4665d9
Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007fde54097188 EFLAGS: 00000246 ORIG_RAX: 0000000000000069
RAX: ffffffffffffffda RBX: 000000000056bf80 RCX: 00000000004665d9
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00000000000000ff
RBP: 00000000004bfcb9 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 000000000056bf80
R13: 00007ffc8655740f R14: 00007fde54097300 R15: 0000000000022000

Allocated by task 16784:
 kasan_save_stack+0x1b/0x40 mm/kasan/common.c:38
 kasan_set_track mm/kasan/common.c:46 [inline]
 set_alloc_info mm/kasan/common.c:434 [inline]
 ____kasan_kmalloc mm/kasan/common.c:513 [inline]
 ____kasan_kmalloc mm/kasan/common.c:472 [inline]
 __kasan_kmalloc+0x9b/0xd0 mm/kasan/common.c:522
 kmalloc include/linux/slab.h:591 [inline]
 kzalloc include/linux/slab.h:721 [inline]
 alloc_ucounts+0x23d/0x5b0 kernel/ucount.c:169
 set_cred_ucounts+0x171/0x3a0 kernel/cred.c:684
 __sys_setuid+0x285/0x400 kernel/sys.c:623
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae

Freed by task 16785:
 kasan_save_stack+0x1b/0x40 mm/kasan/common.c:38
 kasan_set_track+0x1c/0x30 mm/kasan/common.c:46
 kasan_set_free_info+0x20/0x30 mm/kasan/generic.c:360
 ____kasan_slab_free mm/kasan/common.c:366 [inline]
 ____kasan_slab_free mm/kasan/common.c:328 [inline]
 __kasan_slab_free+0xfb/0x130 mm/kasan/common.c:374
 kasan_slab_free include/linux/kasan.h:229 [inline]
 slab_free_hook mm/slub.c:1650 [inline]
 slab_free_freelist_hook+0xdf/0x240 mm/slub.c:1675
 slab_free mm/slub.c:3235 [inline]
 kfree+0xeb/0x650 mm/slub.c:4295
 put_ucounts kernel/ucount.c:200 [inline]
 put_ucounts+0x117/0x150 kernel/ucount.c:192
 put_cred_rcu+0x27a/0x520 kernel/cred.c:124
 rcu_do_batch kernel/rcu/tree.c:2550 [inline]
 rcu_core+0x7ab/0x1380 kernel/rcu/tree.c:2785
 __do_softirq+0x29b/0x9c2 kernel/softirq.c:558

Last potentially related work creation:
 kasan_save_stack+0x1b/0x40 mm/kasan/common.c:38
 kasan_record_aux_stack+0xe5/0x110 mm/kasan/generic.c:348
 insert_work+0x48/0x370 kernel/workqueue.c:1332
 __queue_work+0x5c1/0xed0 kernel/workqueue.c:1498
 queue_work_on+0xee/0x110 kernel/workqueue.c:1525
 queue_work include/linux/workqueue.h:507 [inline]
 call_usermodehelper_exec+0x1f0/0x4c0 kernel/umh.c:435
 kobject_uevent_env+0xf8f/0x1650 lib/kobject_uevent.c:618
 netdev_queue_add_kobject net/core/net-sysfs.c:1621 [inline]
 netdev_queue_update_kobjects+0x374/0x450 net/core/net-sysfs.c:1655
 register_queue_kobjects net/core/net-sysfs.c:1716 [inline]
 netdev_register_kobject+0x35a/0x430 net/core/net-sysfs.c:1959
 register_netdevice+0xd33/0x1500 net/core/dev.c:10331
 nsim_init_netdevsim drivers/net/netdevsim/netdev.c:317 [inline]
 nsim_create+0x381/0x4d0 drivers/net/netdevsim/netdev.c:364
 __nsim_dev_port_add+0x32e/0x830 drivers/net/netdevsim/dev.c:1295
 nsim_dev_port_add_all+0x53/0x150 drivers/net/netdevsim/dev.c:1355
 nsim_dev_probe+0xcb5/0x1190 drivers/net/netdevsim/dev.c:1496
 call_driver_probe drivers/base/dd.c:517 [inline]
 really_probe+0x23c/0xcd0 drivers/base/dd.c:595
 __driver_probe_device+0x338/0x4d0 drivers/base/dd.c:747
 driver_probe_device+0x4c/0x1a0 drivers/base/dd.c:777
 __device_attach_driver+0x20b/0x2f0 drivers/base/dd.c:894
 bus_for_each_drv+0x15f/0x1e0 drivers/base/bus.c:427
 __device_attach+0x228/0x4a0 drivers/base/dd.c:965
 bus_probe_device+0x1e4/0x290 drivers/base/bus.c:487
 device_add+0xc2f/0x2180 drivers/base/core.c:3356
 nsim_bus_dev_new drivers/net/netdevsim/bus.c:431 [inline]
 new_device_store+0x436/0x710 drivers/net/netdevsim/bus.c:298
 bus_attr_store+0x72/0xa0 drivers/base/bus.c:122
 sysfs_kf_write+0x110/0x160 fs/sysfs/file.c:139
 kernfs_fop_write_iter+0x342/0x500 fs/kernfs/file.c:296
 call_write_iter include/linux/fs.h:2152 [inline]
 new_sync_write+0x426/0x650 fs/read_write.c:518
 vfs_write+0x75a/0xa40 fs/read_write.c:605
 ksys_write+0x12d/0x250 fs/read_write.c:658
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae

Second to last potentially related work creation:
 kasan_save_stack+0x1b/0x40 mm/kasan/common.c:38
 kasan_record_aux_stack+0xe5/0x110 mm/kasan/generic.c:348
 insert_work+0x48/0x370 kernel/workqueue.c:1332
 __queue_work+0x5c1/0xed0 kernel/workqueue.c:1498
 queue_work_on+0xee/0x110 kernel/workqueue.c:1525
 queue_work include/linux/workqueue.h:507 [inline]
 call_usermodehelper_exec+0x1f0/0x4c0 kernel/umh.c:435
 kobject_uevent_env+0xf8f/0x1650 lib/kobject_uevent.c:618
 kobject_synth_uevent+0x701/0x850 lib/kobject_uevent.c:208
 uevent_store+0x20/0x50 drivers/base/core.c:2371
 dev_attr_store+0x50/0x80 drivers/base/core.c:2072
 sysfs_kf_write+0x110/0x160 fs/sysfs/file.c:139
 kernfs_fop_write_iter+0x342/0x500 fs/kernfs/file.c:296
 call_write_iter include/linux/fs.h:2152 [inline]
 new_sync_write+0x426/0x650 fs/read_write.c:518
 vfs_write+0x75a/0xa40 fs/read_write.c:605
 ksys_write+0x12d/0x250 fs/read_write.c:658
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae

The buggy address belongs to the object at ffff88802821e400
 which belongs to the cache kmalloc-192 of size 192
The buggy address is located 28 bytes inside of
 192-byte region [ffff88802821e400, ffff88802821e4c0)
The buggy address belongs to the page:
page:ffffea0000a08780 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x2821e
flags: 0xfff00000000200(slab|node=0|zone=1|lastcpupid=0x7ff)
raw: 00fff00000000200 dead000000000100 dead000000000122 ffff888010841a00
raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000
page dumped because: kasan: bad access detected
page_owner tracks the page as allocated
page last allocated via order 0, migratetype Unmovable, gfp_mask 0x12cc0(GFP_KERNEL|__GFP_NOWARN|__GFP_NORETRY), pid 1, ts 12874702440, free_ts 12637793385
 prep_new_page mm/page_alloc.c:2433 [inline]
 get_page_from_freelist+0xa72/0x2f80 mm/page_alloc.c:4166
 __alloc_pages+0x1b2/0x500 mm/page_alloc.c:5374
 alloc_page_interleave+0x1e/0x200 mm/mempolicy.c:2119
 alloc_pages+0x238/0x2a0 mm/mempolicy.c:2242
 alloc_slab_page mm/slub.c:1713 [inline]
 allocate_slab+0x32b/0x4c0 mm/slub.c:1853
 new_slab mm/slub.c:1916 [inline]
 new_slab_objects mm/slub.c:2662 [inline]
 ___slab_alloc+0x4ba/0x820 mm/slub.c:2825
 __slab_alloc.constprop.0+0xa7/0xf0 mm/slub.c:2865
 slab_alloc_node mm/slub.c:2947 [inline]
 slab_alloc mm/slub.c:2989 [inline]
 __kmalloc+0x312/0x330 mm/slub.c:4133
 kmalloc include/linux/slab.h:596 [inline]
 kzalloc include/linux/slab.h:721 [inline]
 __register_sysctl_table+0x112/0x1090 fs/proc/proc_sysctl.c:1318
 rds_tcp_init_net+0x1db/0x4f0 net/rds/tcp.c:551
 ops_init+0xaf/0x470 net/core/net_namespace.c:140
 __register_pernet_operations net/core/net_namespace.c:1137 [inline]
 register_pernet_operations+0x35a/0x850 net/core/net_namespace.c:1214
 register_pernet_device+0x26/0x70 net/core/net_namespace.c:1301
 rds_tcp_init+0x77/0xe0 net/rds/tcp.c:717
 do_one_initcall+0x103/0x650 init/main.c:1285
 do_initcall_level init/main.c:1360 [inline]
 do_initcalls init/main.c:1376 [inline]
 do_basic_setup init/main.c:1396 [inline]
 kernel_init_freeable+0x6b8/0x741 init/main.c:1598
page last free stack trace:
 reset_page_owner include/linux/page_owner.h:24 [inline]
 free_pages_prepare mm/page_alloc.c:1343 [inline]
 free_pcp_prepare+0x312/0x7d0 mm/page_alloc.c:1394
 free_unref_page_prepare mm/page_alloc.c:3329 [inline]
 free_unref_page+0x19/0x690 mm/page_alloc.c:3408
 __vunmap+0x783/0xb70 mm/vmalloc.c:2587
 free_work+0x58/0x70 mm/vmalloc.c:82
 process_one_work+0x98d/0x1630 kernel/workqueue.c:2276
 worker_thread+0x658/0x11f0 kernel/workqueue.c:2422
 kthread+0x3e5/0x4d0 kernel/kthread.c:319
 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295

Memory state around the buggy address:
 ffff88802821e300: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
 ffff88802821e380: 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc
>ffff88802821e400: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
                            ^
 ffff88802821e480: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
 ffff88802821e500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
==================================================================

- The race fix has two parts.
  * Changing the code to guarantee that ucounts->count is only decremented
    when ucounts_lock is held.  This guarantees that find_ucounts
    will never find a structure with a zero reference count.
  * Changing alloc_ucounts to increment ucounts->count while
    ucounts_lock is held.  This guarantees the reference count on the
    found data structure will not be decremented to zero (and the data
    structure freed) before the reference count is incremented.
  -- Eric Biederman

Reported-by: syzbot+01985d7909f9468f013c@syzkaller.appspotmail.com
Reported-by: syzbot+59dd63761094a80ad06d@syzkaller.appspotmail.com
Reported-by: syzbot+6cd79f45bb8fa1c9eeae@syzkaller.appspotmail.com
Reported-by: syzbot+b6e65bd125a05f803d6b@syzkaller.appspotmail.com
Fixes: b6c336528926 ("Use atomic_t for ucounts reference counting")
Cc: Hillf Danton <hdanton@sina.com>
Signed-off-by: Alexey Gladkov <legion@kernel.org>
Link: https://lkml.kernel.org/r/7b2ace1759b281cdd2d66101d6b305deef722efb.1627397820.git.legion@kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 kernel/ucount.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/kernel/ucount.c b/kernel/ucount.c
index 87799e2379bd..77be3bbe3cc4 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -160,6 +160,7 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid)
 {
 	struct hlist_head *hashent = ucounts_hashentry(ns, uid);
 	struct ucounts *ucounts, *new;
+	long overflow;
 
 	spin_lock_irq(&ucounts_lock);
 	ucounts = find_ucounts(ns, uid, hashent);
@@ -184,8 +185,12 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid)
 			return new;
 		}
 	}
+	overflow = atomic_add_negative(1, &ucounts->count);
 	spin_unlock_irq(&ucounts_lock);
-	ucounts = get_ucounts(ucounts);
+	if (overflow) {
+		put_ucounts(ucounts);
+		return NULL;
+	}
 	return ucounts;
 }
 
@@ -193,8 +198,7 @@ void put_ucounts(struct ucounts *ucounts)
 {
 	unsigned long flags;
 
-	if (atomic_dec_and_test(&ucounts->count)) {
-		spin_lock_irqsave(&ucounts_lock, flags);
+	if (atomic_dec_and_lock_irqsave(&ucounts->count, &ucounts_lock, flags)) {
 		hlist_del_init(&ucounts->node);
 		spin_unlock_irqrestore(&ucounts_lock, flags);
 		kfree(ucounts);

From b946dbcfa4df80ec81b442964e07ad37000cc059 Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Wed, 28 Jul 2021 16:38:29 +1000
Subject: [PATCH 291/502] cifs: add missing parsing of backupuid

We lost parsing of backupuid in the switch to new mount API.
Add it back.

Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Reviewed-by: Shyam Prasad N <sprasad@microsoft.com>
Cc: <stable@vger.kernel.org> # v5.11+
Reported-by: Xiaoli Feng <xifeng@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/fs_context.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c
index 9a59d7ff9a11..eed59bc1d913 100644
--- a/fs/cifs/fs_context.c
+++ b/fs/cifs/fs_context.c
@@ -925,6 +925,13 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
 		ctx->cred_uid = uid;
 		ctx->cruid_specified = true;
 		break;
+	case Opt_backupuid:
+		uid = make_kuid(current_user_ns(), result.uint_32);
+		if (!uid_valid(uid))
+			goto cifs_parse_mount_err;
+		ctx->backupuid = uid;
+		ctx->backupuid_specified = true;
+		break;
 	case Opt_backupgid:
 		gid = make_kgid(current_user_ns(), result.uint_32);
 		if (!gid_valid(gid))

From f5e81d1117501546b7be050c5fbafa6efd2c722c Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 13 Jul 2021 08:18:31 +0000
Subject: [PATCH 292/502] bpf: Introduce BPF nospec instruction for mitigating
 Spectre v4

In case of JITs, each of the JIT backends compiles the BPF nospec instruction
/either/ to a machine instruction which emits a speculation barrier /or/ to
/no/ machine instruction in case the underlying architecture is not affected
by Speculative Store Bypass or has different mitigations in place already.

This covers both x86 and (implicitly) arm64: In case of x86, we use 'lfence'
instruction for mitigation. In case of arm64, we rely on the firmware mitigation
as controlled via the ssbd kernel parameter. Whenever the mitigation is enabled,
it works for all of the kernel code with no need to provide any additional
instructions here (hence only comment in arm64 JIT). Other archs can follow
as needed. The BPF nospec instruction is specifically targeting Spectre v4
since i) we don't use a serialization barrier for the Spectre v1 case, and
ii) mitigation instructions for v1 and v4 might be different on some archs.

The BPF nospec is required for a future commit, where the BPF verifier does
annotate intermediate BPF programs with speculation barriers.

Co-developed-by: Piotr Krysiuk <piotras@gmail.com>
Co-developed-by: Benedict Schlueter <benedict.schlueter@rub.de>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Piotr Krysiuk <piotras@gmail.com>
Signed-off-by: Benedict Schlueter <benedict.schlueter@rub.de>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 arch/arm/net/bpf_jit_32.c         |  3 +++
 arch/arm64/net/bpf_jit_comp.c     | 13 +++++++++++++
 arch/mips/net/ebpf_jit.c          |  3 +++
 arch/powerpc/net/bpf_jit_comp32.c |  6 ++++++
 arch/powerpc/net/bpf_jit_comp64.c |  6 ++++++
 arch/riscv/net/bpf_jit_comp32.c   |  4 ++++
 arch/riscv/net/bpf_jit_comp64.c   |  4 ++++
 arch/s390/net/bpf_jit_comp.c      |  5 +++++
 arch/sparc/net/bpf_jit_comp_64.c  |  3 +++
 arch/x86/net/bpf_jit_comp.c       |  7 +++++++
 arch/x86/net/bpf_jit_comp32.c     |  6 ++++++
 include/linux/filter.h            | 15 +++++++++++++++
 kernel/bpf/core.c                 | 19 ++++++++++++++++++-
 kernel/bpf/disasm.c               | 16 +++++++++-------
 14 files changed, 102 insertions(+), 8 deletions(-)

diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 897634d0a67c..a951276f0547 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -1602,6 +1602,9 @@ exit:
 		rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
 		emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code));
 		break;
+	/* speculation barrier */
+	case BPF_ST | BPF_NOSPEC:
+		break;
 	/* ST: *(size *)(dst + off) = imm */
 	case BPF_ST | BPF_MEM | BPF_W:
 	case BPF_ST | BPF_MEM | BPF_H:
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index dccf98a37283..41c23f474ea6 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -823,6 +823,19 @@ emit_cond_jmp:
 			return ret;
 		break;
 
+	/* speculation barrier */
+	case BPF_ST | BPF_NOSPEC:
+		/*
+		 * Nothing required here.
+		 *
+		 * In case of arm64, we rely on the firmware mitigation of
+		 * Speculative Store Bypass as controlled via the ssbd kernel
+		 * parameter. Whenever the mitigation is enabled, it works
+		 * for all of the kernel code with no need to provide any
+		 * additional instructions.
+		 */
+		break;
+
 	/* ST: *(size *)(dst + off) = imm */
 	case BPF_ST | BPF_MEM | BPF_W:
 	case BPF_ST | BPF_MEM | BPF_H:
diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c
index 939dd06764bc..3a73e9375712 100644
--- a/arch/mips/net/ebpf_jit.c
+++ b/arch/mips/net/ebpf_jit.c
@@ -1355,6 +1355,9 @@ jeq_common:
 		}
 		break;
 
+	case BPF_ST | BPF_NOSPEC: /* speculation barrier */
+		break;
+
 	case BPF_ST | BPF_B | BPF_MEM:
 	case BPF_ST | BPF_H | BPF_MEM:
 	case BPF_ST | BPF_W | BPF_MEM:
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index 34bb1583fc0c..beb12cbc8c29 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -737,6 +737,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 			}
 			break;
 
+		/*
+		 * BPF_ST NOSPEC (speculation barrier)
+		 */
+		case BPF_ST | BPF_NOSPEC:
+			break;
+
 		/*
 		 * BPF_ST(X)
 		 */
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index de8595880fee..b87a63dba9c8 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -627,6 +627,12 @@ emit_clear:
 			}
 			break;
 
+		/*
+		 * BPF_ST NOSPEC (speculation barrier)
+		 */
+		case BPF_ST | BPF_NOSPEC:
+			break;
+
 		/*
 		 * BPF_ST(X)
 		 */
diff --git a/arch/riscv/net/bpf_jit_comp32.c b/arch/riscv/net/bpf_jit_comp32.c
index 81de865f4c7c..e6497424cbf6 100644
--- a/arch/riscv/net/bpf_jit_comp32.c
+++ b/arch/riscv/net/bpf_jit_comp32.c
@@ -1251,6 +1251,10 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
 			return -1;
 		break;
 
+	/* speculation barrier */
+	case BPF_ST | BPF_NOSPEC:
+		break;
+
 	case BPF_ST | BPF_MEM | BPF_B:
 	case BPF_ST | BPF_MEM | BPF_H:
 	case BPF_ST | BPF_MEM | BPF_W:
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index 87e3bf5b9086..3af4131c22c7 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -939,6 +939,10 @@ out_be:
 		emit_ld(rd, 0, RV_REG_T1, ctx);
 		break;
 
+	/* speculation barrier */
+	case BPF_ST | BPF_NOSPEC:
+		break;
+
 	/* ST: *(size *)(dst + off) = imm */
 	case BPF_ST | BPF_MEM | BPF_B:
 		emit_imm(RV_REG_T1, imm, ctx);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 2ae419f5115a..88419263a89a 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -1153,6 +1153,11 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 			break;
 		}
 		break;
+	/*
+	 * BPF_NOSPEC (speculation barrier)
+	 */
+	case BPF_ST | BPF_NOSPEC:
+		break;
 	/*
 	 * BPF_ST(X)
 	 */
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 4b8d3c65d266..9a2f20cbd48b 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -1287,6 +1287,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 			return 1;
 		break;
 	}
+	/* speculation barrier */
+	case BPF_ST | BPF_NOSPEC:
+		break;
 	/* ST: *(size *)(dst + off) = imm */
 	case BPF_ST | BPF_MEM | BPF_W:
 	case BPF_ST | BPF_MEM | BPF_H:
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 4b951458c9fc..16d76f814e9b 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1219,6 +1219,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			}
 			break;
 
+			/* speculation barrier */
+		case BPF_ST | BPF_NOSPEC:
+			if (boot_cpu_has(X86_FEATURE_XMM2))
+				/* Emit 'lfence' */
+				EMIT3(0x0F, 0xAE, 0xE8);
+			break;
+
 			/* ST: *(u8*)(dst_reg + off) = imm */
 		case BPF_ST | BPF_MEM | BPF_B:
 			if (is_ereg(dst_reg))
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index 3da88ded6ee3..3bfda5f502cb 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -1886,6 +1886,12 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			i++;
 			break;
 		}
+		/* speculation barrier */
+		case BPF_ST | BPF_NOSPEC:
+			if (boot_cpu_has(X86_FEATURE_XMM2))
+				/* Emit 'lfence' */
+				EMIT3(0x0F, 0xAE, 0xE8);
+			break;
 		/* ST: *(u8*)(dst_reg + off) = imm */
 		case BPF_ST | BPF_MEM | BPF_H:
 		case BPF_ST | BPF_MEM | BPF_B:
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 472f97074da0..83b896044e79 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -73,6 +73,11 @@ struct ctl_table_header;
 /* unused opcode to mark call to interpreter with arguments */
 #define BPF_CALL_ARGS	0xe0
 
+/* unused opcode to mark speculation barrier for mitigating
+ * Speculative Store Bypass
+ */
+#define BPF_NOSPEC	0xc0
+
 /* As per nm, we expose JITed images as text (code) section for
  * kallsyms. That way, tools like perf can find it to match
  * addresses.
@@ -390,6 +395,16 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
 		.off   = 0,					\
 		.imm   = 0 })
 
+/* Speculation barrier */
+
+#define BPF_ST_NOSPEC()						\
+	((struct bpf_insn) {					\
+		.code  = BPF_ST | BPF_NOSPEC,			\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = 0 })
+
 /* Internal classic blocks for direct assignment */
 
 #define __BPF_STMT(CODE, K)					\
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 9b1577498373..b1a5fc04492b 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -32,6 +32,8 @@
 #include <linux/perf_event.h>
 #include <linux/extable.h>
 #include <linux/log2.h>
+
+#include <asm/barrier.h>
 #include <asm/unaligned.h>
 
 /* Registers */
@@ -1377,6 +1379,7 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
 		/* Non-UAPI available opcodes. */
 		[BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS,
 		[BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL,
+		[BPF_ST  | BPF_NOSPEC] = &&ST_NOSPEC,
 		[BPF_LDX | BPF_PROBE_MEM | BPF_B] = &&LDX_PROBE_MEM_B,
 		[BPF_LDX | BPF_PROBE_MEM | BPF_H] = &&LDX_PROBE_MEM_H,
 		[BPF_LDX | BPF_PROBE_MEM | BPF_W] = &&LDX_PROBE_MEM_W,
@@ -1621,7 +1624,21 @@ out:
 	COND_JMP(s, JSGE, >=)
 	COND_JMP(s, JSLE, <=)
 #undef COND_JMP
-	/* STX and ST and LDX*/
+	/* ST, STX and LDX*/
+	ST_NOSPEC:
+		/* Speculation barrier for mitigating Speculative Store Bypass.
+		 * In case of arm64, we rely on the firmware mitigation as
+		 * controlled via the ssbd kernel parameter. Whenever the
+		 * mitigation is enabled, it works for all of the kernel code
+		 * with no need to provide any additional instructions here.
+		 * In case of x86, we use 'lfence' insn for mitigation. We
+		 * reuse preexisting logic from Spectre v1 mitigation that
+		 * happens to produce the required code on x86 for v4 as well.
+		 */
+#ifdef CONFIG_X86
+		barrier_nospec();
+#endif
+		CONT;
 #define LDST(SIZEOP, SIZE)						\
 	STX_MEM_##SIZEOP:						\
 		*(SIZE *)(unsigned long) (DST + insn->off) = SRC;	\
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
index bbfc6bb79240..ca3cd9aaa6ce 100644
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -206,15 +206,17 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
 			verbose(cbs->private_data, "BUG_%02x\n", insn->code);
 		}
 	} else if (class == BPF_ST) {
-		if (BPF_MODE(insn->code) != BPF_MEM) {
+		if (BPF_MODE(insn->code) == BPF_MEM) {
+			verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = %d\n",
+				insn->code,
+				bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+				insn->dst_reg,
+				insn->off, insn->imm);
+		} else if (BPF_MODE(insn->code) == 0xc0 /* BPF_NOSPEC, no UAPI */) {
+			verbose(cbs->private_data, "(%02x) nospec\n", insn->code);
+		} else {
 			verbose(cbs->private_data, "BUG_st_%02x\n", insn->code);
-			return;
 		}
-		verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = %d\n",
-			insn->code,
-			bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
-			insn->dst_reg,
-			insn->off, insn->imm);
 	} else if (class == BPF_LDX) {
 		if (BPF_MODE(insn->code) != BPF_MEM) {
 			verbose(cbs->private_data, "BUG_ldx_%02x\n", insn->code);

From 2039f26f3aca5b0e419b98f65dd36481337b86ee Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 13 Jul 2021 08:18:31 +0000
Subject: [PATCH 293/502] bpf: Fix leakage due to insufficient speculative
 store bypass mitigation

Spectre v4 gadgets make use of memory disambiguation, which is a set of
techniques that execute memory access instructions, that is, loads and
stores, out of program order; Intel's optimization manual, section 2.4.4.5:

  A load instruction micro-op may depend on a preceding store. Many
  microarchitectures block loads until all preceding store addresses are
  known. The memory disambiguator predicts which loads will not depend on
  any previous stores. When the disambiguator predicts that a load does
  not have such a dependency, the load takes its data from the L1 data
  cache. Eventually, the prediction is verified. If an actual conflict is
  detected, the load and all succeeding instructions are re-executed.

af86ca4e3088 ("bpf: Prevent memory disambiguation attack") tried to mitigate
this attack by sanitizing the memory locations through preemptive "fast"
(low latency) stores of zero prior to the actual "slow" (high latency) store
of a pointer value such that upon dependency misprediction the CPU then
speculatively executes the load of the pointer value and retrieves the zero
value instead of the attacker controlled scalar value previously stored at
that location, meaning, subsequent access in the speculative domain is then
redirected to the "zero page".

The sanitized preemptive store of zero prior to the actual "slow" store is
done through a simple ST instruction based on r10 (frame pointer) with
relative offset to the stack location that the verifier has been tracking
on the original used register for STX, which does not have to be r10. Thus,
there are no memory dependencies for this store, since it's only using r10
and immediate constant of zero; hence af86ca4e3088 /assumed/ a low latency
operation.

However, a recent attack demonstrated that this mitigation is not sufficient
since the preemptive store of zero could also be turned into a "slow" store
and is thus bypassed as well:

  [...]
  // r2 = oob address (e.g. scalar)
  // r7 = pointer to map value
  31: (7b) *(u64 *)(r10 -16) = r2
  // r9 will remain "fast" register, r10 will become "slow" register below
  32: (bf) r9 = r10
  // JIT maps BPF reg to x86 reg:
  //  r9  -> r15 (callee saved)
  //  r10 -> rbp
  // train store forward prediction to break dependency link between both r9
  // and r10 by evicting them from the predictor's LRU table.
  33: (61) r0 = *(u32 *)(r7 +24576)
  34: (63) *(u32 *)(r7 +29696) = r0
  35: (61) r0 = *(u32 *)(r7 +24580)
  36: (63) *(u32 *)(r7 +29700) = r0
  37: (61) r0 = *(u32 *)(r7 +24584)
  38: (63) *(u32 *)(r7 +29704) = r0
  39: (61) r0 = *(u32 *)(r7 +24588)
  40: (63) *(u32 *)(r7 +29708) = r0
  [...]
  543: (61) r0 = *(u32 *)(r7 +25596)
  544: (63) *(u32 *)(r7 +30716) = r0
  // prepare call to bpf_ringbuf_output() helper. the latter will cause rbp
  // to spill to stack memory while r13/r14/r15 (all callee saved regs) remain
  // in hardware registers. rbp becomes slow due to push/pop latency. below is
  // disasm of bpf_ringbuf_output() helper for better visual context:
  //
  // ffffffff8117ee20: 41 54                 push   r12
  // ffffffff8117ee22: 55                    push   rbp
  // ffffffff8117ee23: 53                    push   rbx
  // ffffffff8117ee24: 48 f7 c1 fc ff ff ff  test   rcx,0xfffffffffffffffc
  // ffffffff8117ee2b: 0f 85 af 00 00 00     jne    ffffffff8117eee0 <-- jump taken
  // [...]
  // ffffffff8117eee0: 49 c7 c4 ea ff ff ff  mov    r12,0xffffffffffffffea
  // ffffffff8117eee7: 5b                    pop    rbx
  // ffffffff8117eee8: 5d                    pop    rbp
  // ffffffff8117eee9: 4c 89 e0              mov    rax,r12
  // ffffffff8117eeec: 41 5c                 pop    r12
  // ffffffff8117eeee: c3                    ret
  545: (18) r1 = map[id:4]
  547: (bf) r2 = r7
  548: (b7) r3 = 0
  549: (b7) r4 = 4
  550: (85) call bpf_ringbuf_output#194288
  // instruction 551 inserted by verifier    \
  551: (7a) *(u64 *)(r10 -16) = 0            | /both/ are now slow stores here
  // storing map value pointer r7 at fp-16   | since value of r10 is "slow".
  552: (7b) *(u64 *)(r10 -16) = r7           /
  // following "fast" read to the same memory location, but due to dependency
  // misprediction it will speculatively execute before insn 551/552 completes.
  553: (79) r2 = *(u64 *)(r9 -16)
  // in speculative domain contains attacker controlled r2. in non-speculative
  // domain this contains r7, and thus accesses r7 +0 below.
  554: (71) r3 = *(u8 *)(r2 +0)
  // leak r3

As can be seen, the current speculative store bypass mitigation which the
verifier inserts at line 551 is insufficient since /both/, the write of
the zero sanitation as well as the map value pointer are a high latency
instruction due to prior memory access via push/pop of r10 (rbp) in contrast
to the low latency read in line 553 as r9 (r15) which stays in hardware
registers. Thus, architecturally, fp-16 is r7, however, microarchitecturally,
fp-16 can still be r2.

Initial thoughts to address this issue was to track spilled pointer loads
from stack and enforce their load via LDX through r10 as well so that /both/
the preemptive store of zero /as well as/ the load use the /same/ register
such that a dependency is created between the store and load. However, this
option is not sufficient either since it can be bypassed as well under
speculation. An updated attack with pointer spill/fills now _all_ based on
r10 would look as follows:

  [...]
  // r2 = oob address (e.g. scalar)
  // r7 = pointer to map value
  [...]
  // longer store forward prediction training sequence than before.
  2062: (61) r0 = *(u32 *)(r7 +25588)
  2063: (63) *(u32 *)(r7 +30708) = r0
  2064: (61) r0 = *(u32 *)(r7 +25592)
  2065: (63) *(u32 *)(r7 +30712) = r0
  2066: (61) r0 = *(u32 *)(r7 +25596)
  2067: (63) *(u32 *)(r7 +30716) = r0
  // store the speculative load address (scalar) this time after the store
  // forward prediction training.
  2068: (7b) *(u64 *)(r10 -16) = r2
  // preoccupy the CPU store port by running sequence of dummy stores.
  2069: (63) *(u32 *)(r7 +29696) = r0
  2070: (63) *(u32 *)(r7 +29700) = r0
  2071: (63) *(u32 *)(r7 +29704) = r0
  2072: (63) *(u32 *)(r7 +29708) = r0
  2073: (63) *(u32 *)(r7 +29712) = r0
  2074: (63) *(u32 *)(r7 +29716) = r0
  2075: (63) *(u32 *)(r7 +29720) = r0
  2076: (63) *(u32 *)(r7 +29724) = r0
  2077: (63) *(u32 *)(r7 +29728) = r0
  2078: (63) *(u32 *)(r7 +29732) = r0
  2079: (63) *(u32 *)(r7 +29736) = r0
  2080: (63) *(u32 *)(r7 +29740) = r0
  2081: (63) *(u32 *)(r7 +29744) = r0
  2082: (63) *(u32 *)(r7 +29748) = r0
  2083: (63) *(u32 *)(r7 +29752) = r0
  2084: (63) *(u32 *)(r7 +29756) = r0
  2085: (63) *(u32 *)(r7 +29760) = r0
  2086: (63) *(u32 *)(r7 +29764) = r0
  2087: (63) *(u32 *)(r7 +29768) = r0
  2088: (63) *(u32 *)(r7 +29772) = r0
  2089: (63) *(u32 *)(r7 +29776) = r0
  2090: (63) *(u32 *)(r7 +29780) = r0
  2091: (63) *(u32 *)(r7 +29784) = r0
  2092: (63) *(u32 *)(r7 +29788) = r0
  2093: (63) *(u32 *)(r7 +29792) = r0
  2094: (63) *(u32 *)(r7 +29796) = r0
  2095: (63) *(u32 *)(r7 +29800) = r0
  2096: (63) *(u32 *)(r7 +29804) = r0
  2097: (63) *(u32 *)(r7 +29808) = r0
  2098: (63) *(u32 *)(r7 +29812) = r0
  // overwrite scalar with dummy pointer; same as before, also including the
  // sanitation store with 0 from the current mitigation by the verifier.
  2099: (7a) *(u64 *)(r10 -16) = 0         | /both/ are now slow stores here
  2100: (7b) *(u64 *)(r10 -16) = r7        | since store unit is still busy.
  // load from stack intended to bypass stores.
  2101: (79) r2 = *(u64 *)(r10 -16)
  2102: (71) r3 = *(u8 *)(r2 +0)
  // leak r3
  [...]

Looking at the CPU microarchitecture, the scheduler might issue loads (such
as seen in line 2101) before stores (line 2099,2100) because the load execution
units become available while the store execution unit is still busy with the
sequence of dummy stores (line 2069-2098). And so the load may use the prior
stored scalar from r2 at address r10 -16 for speculation. The updated attack
may work less reliable on CPU microarchitectures where loads and stores share
execution resources.

This concludes that the sanitizing with zero stores from af86ca4e3088 ("bpf:
Prevent memory disambiguation attack") is insufficient. Moreover, the detection
of stack reuse from af86ca4e3088 where previously data (STACK_MISC) has been
written to a given stack slot where a pointer value is now to be stored does
not have sufficient coverage as precondition for the mitigation either; for
several reasons outlined as follows:

 1) Stack content from prior program runs could still be preserved and is
    therefore not "random", best example is to split a speculative store
    bypass attack between tail calls, program A would prepare and store the
    oob address at a given stack slot and then tail call into program B which
    does the "slow" store of a pointer to the stack with subsequent "fast"
    read. From program B PoV such stack slot type is STACK_INVALID, and
    therefore also must be subject to mitigation.

 2) The STACK_SPILL must not be coupled to register_is_const(&stack->spilled_ptr)
    condition, for example, the previous content of that memory location could
    also be a pointer to map or map value. Without the fix, a speculative
    store bypass is not mitigated in such precondition and can then lead to
    a type confusion in the speculative domain leaking kernel memory near
    these pointer types.

While brainstorming on various alternative mitigation possibilities, we also
stumbled upon a retrospective from Chrome developers [0]:

  [...] For variant 4, we implemented a mitigation to zero the unused memory
  of the heap prior to allocation, which cost about 1% when done concurrently
  and 4% for scavenging. Variant 4 defeats everything we could think of. We
  explored more mitigations for variant 4 but the threat proved to be more
  pervasive and dangerous than we anticipated. For example, stack slots used
  by the register allocator in the optimizing compiler could be subject to
  type confusion, leading to pointer crafting. Mitigating type confusion for
  stack slots alone would have required a complete redesign of the backend of
  the optimizing compiler, perhaps man years of work, without a guarantee of
  completeness. [...]

From BPF side, the problem space is reduced, however, options are rather
limited. One idea that has been explored was to xor-obfuscate pointer spills
to the BPF stack:

  [...]
  // preoccupy the CPU store port by running sequence of dummy stores.
  [...]
  2106: (63) *(u32 *)(r7 +29796) = r0
  2107: (63) *(u32 *)(r7 +29800) = r0
  2108: (63) *(u32 *)(r7 +29804) = r0
  2109: (63) *(u32 *)(r7 +29808) = r0
  2110: (63) *(u32 *)(r7 +29812) = r0
  // overwrite scalar with dummy pointer; xored with random 'secret' value
  // of 943576462 before store ...
  2111: (b4) w11 = 943576462
  2112: (af) r11 ^= r7
  2113: (7b) *(u64 *)(r10 -16) = r11
  2114: (79) r11 = *(u64 *)(r10 -16)
  2115: (b4) w2 = 943576462
  2116: (af) r2 ^= r11
  // ... and restored with the same 'secret' value with the help of AX reg.
  2117: (71) r3 = *(u8 *)(r2 +0)
  [...]

While the above would not prevent speculation, it would make data leakage
infeasible by directing it to random locations. In order to be effective
and prevent type confusion under speculation, such random secret would have
to be regenerated for each store. The additional complexity involved for a
tracking mechanism that prevents jumps such that restoring spilled pointers
would not get corrupted is not worth the gain for unprivileged. Hence, the
fix in here eventually opted for emitting a non-public BPF_ST | BPF_NOSPEC
instruction which the x86 JIT translates into a lfence opcode. Inserting the
latter in between the store and load instruction is one of the mitigations
options [1]. The x86 instruction manual notes:

  [...] An LFENCE that follows an instruction that stores to memory might
  complete before the data being stored have become globally visible. [...]

The latter meaning that the preceding store instruction finished execution
and the store is at minimum guaranteed to be in the CPU's store queue, but
it's not guaranteed to be in that CPU's L1 cache at that point (globally
visible). The latter would only be guaranteed via sfence. So the load which
is guaranteed to execute after the lfence for that local CPU would have to
rely on store-to-load forwarding. [2], in section 2.3 on store buffers says:

  [...] For every store operation that is added to the ROB, an entry is
  allocated in the store buffer. This entry requires both the virtual and
  physical address of the target. Only if there is no free entry in the store
  buffer, the frontend stalls until there is an empty slot available in the
  store buffer again. Otherwise, the CPU can immediately continue adding
  subsequent instructions to the ROB and execute them out of order. On Intel
  CPUs, the store buffer has up to 56 entries. [...]

One small upside on the fix is that it lifts constraints from af86ca4e3088
where the sanitize_stack_off relative to r10 must be the same when coming
from different paths. The BPF_ST | BPF_NOSPEC gets emitted after a BPF_STX
or BPF_ST instruction. This happens either when we store a pointer or data
value to the BPF stack for the first time, or upon later pointer spills.
The former needs to be enforced since otherwise stale stack data could be
leaked under speculation as outlined earlier. For non-x86 JITs the BPF_ST |
BPF_NOSPEC mapping is currently optimized away, but others could emit a
speculation barrier as well if necessary. For real-world unprivileged
programs e.g. generated by LLVM, pointer spill/fill is only generated upon
register pressure and LLVM only tries to do that for pointers which are not
used often. The program main impact will be the initial BPF_ST | BPF_NOSPEC
sanitation for the STACK_INVALID case when the first write to a stack slot
occurs e.g. upon map lookup. In future we might refine ways to mitigate
the latter cost.

  [0] https://arxiv.org/pdf/1902.05178.pdf
  [1] https://msrc-blog.microsoft.com/2018/05/21/analysis-and-mitigation-of-speculative-store-bypass-cve-2018-3639/
  [2] https://arxiv.org/pdf/1905.05725.pdf

Fixes: af86ca4e3088 ("bpf: Prevent memory disambiguation attack")
Fixes: f7cf25b2026d ("bpf: track spill/fill of constants")
Co-developed-by: Piotr Krysiuk <piotras@gmail.com>
Co-developed-by: Benedict Schlueter <benedict.schlueter@rub.de>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Piotr Krysiuk <piotras@gmail.com>
Signed-off-by: Benedict Schlueter <benedict.schlueter@rub.de>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h |  2 +-
 kernel/bpf/verifier.c        | 87 +++++++++++++-----------------------
 2 files changed, 33 insertions(+), 56 deletions(-)

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 7ba7e800d472..828d08afeee0 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -340,8 +340,8 @@ struct bpf_insn_aux_data {
 	};
 	u64 map_key_state; /* constant (32 bit) key tracking for maps */
 	int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
-	int sanitize_stack_off; /* stack slot to be cleared */
 	u32 seen; /* this insn was processed by the verifier at env->pass_cnt */
+	bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */
 	bool zext_dst; /* this insn zero extends dst reg */
 	u8 alu_state; /* used in combination with alu_limit */
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 657062cb4d85..f9bda5476ea5 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2610,6 +2610,19 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
 	cur = env->cur_state->frame[env->cur_state->curframe];
 	if (value_regno >= 0)
 		reg = &cur->regs[value_regno];
+	if (!env->bypass_spec_v4) {
+		bool sanitize = reg && is_spillable_regtype(reg->type);
+
+		for (i = 0; i < size; i++) {
+			if (state->stack[spi].slot_type[i] == STACK_INVALID) {
+				sanitize = true;
+				break;
+			}
+		}
+
+		if (sanitize)
+			env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
+	}
 
 	if (reg && size == BPF_REG_SIZE && register_is_bounded(reg) &&
 	    !register_is_null(reg) && env->bpf_capable) {
@@ -2632,47 +2645,10 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
 			verbose(env, "invalid size of register spill\n");
 			return -EACCES;
 		}
-
 		if (state != cur && reg->type == PTR_TO_STACK) {
 			verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
 			return -EINVAL;
 		}
-
-		if (!env->bypass_spec_v4) {
-			bool sanitize = false;
-
-			if (state->stack[spi].slot_type[0] == STACK_SPILL &&
-			    register_is_const(&state->stack[spi].spilled_ptr))
-				sanitize = true;
-			for (i = 0; i < BPF_REG_SIZE; i++)
-				if (state->stack[spi].slot_type[i] == STACK_MISC) {
-					sanitize = true;
-					break;
-				}
-			if (sanitize) {
-				int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
-				int soff = (-spi - 1) * BPF_REG_SIZE;
-
-				/* detected reuse of integer stack slot with a pointer
-				 * which means either llvm is reusing stack slot or
-				 * an attacker is trying to exploit CVE-2018-3639
-				 * (speculative store bypass)
-				 * Have to sanitize that slot with preemptive
-				 * store of zero.
-				 */
-				if (*poff && *poff != soff) {
-					/* disallow programs where single insn stores
-					 * into two different stack slots, since verifier
-					 * cannot sanitize them
-					 */
-					verbose(env,
-						"insn %d cannot access two stack slots fp%d and fp%d",
-						insn_idx, *poff, soff);
-					return -EINVAL;
-				}
-				*poff = soff;
-			}
-		}
 		save_register_state(state, spi, reg);
 	} else {
 		u8 type = STACK_MISC;
@@ -11913,35 +11889,33 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
 
 	for (i = 0; i < insn_cnt; i++, insn++) {
 		bpf_convert_ctx_access_t convert_ctx_access;
+		bool ctx_access;
 
 		if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
 		    insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
 		    insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
-		    insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
+		    insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) {
 			type = BPF_READ;
-		else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
-			 insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
-			 insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
-			 insn->code == (BPF_STX | BPF_MEM | BPF_DW))
+			ctx_access = true;
+		} else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
+			   insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
+			   insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
+			   insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
+			   insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
+			   insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
+			   insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
+			   insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
 			type = BPF_WRITE;
-		else
+			ctx_access = BPF_CLASS(insn->code) == BPF_STX;
+		} else {
 			continue;
+		}
 
 		if (type == BPF_WRITE &&
-		    env->insn_aux_data[i + delta].sanitize_stack_off) {
+		    env->insn_aux_data[i + delta].sanitize_stack_spill) {
 			struct bpf_insn patch[] = {
-				/* Sanitize suspicious stack slot with zero.
-				 * There are no memory dependencies for this store,
-				 * since it's only using frame pointer and immediate
-				 * constant of zero
-				 */
-				BPF_ST_MEM(BPF_DW, BPF_REG_FP,
-					   env->insn_aux_data[i + delta].sanitize_stack_off,
-					   0),
-				/* the original STX instruction will immediately
-				 * overwrite the same stack slot with appropriate value
-				 */
 				*insn,
+				BPF_ST_NOSPEC(),
 			};
 
 			cnt = ARRAY_SIZE(patch);
@@ -11955,6 +11929,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
 			continue;
 		}
 
+		if (!ctx_access)
+			continue;
+
 		switch (env->insn_aux_data[i + delta].ptr_type) {
 		case PTR_TO_CTX:
 			if (!ops->convert_ctx_access)

From d712d3fb484b7fa8d1d57e9ca6f134bb9d8c18b1 Mon Sep 17 00:00:00 2001
From: Igor Pylypiv <ipylypiv@google.com>
Date: Wed, 7 Jul 2021 11:59:45 -0700
Subject: [PATCH 294/502] scsi: pm80xx: Fix TMF task completion race condition

The TMF timeout timer may trigger at the same time when the response from a
controller is being handled. When this happens the SAS task may get freed
before the response processing is finished.

Fix this by calling complete() only when SAS_TASK_STATE_DONE is not set.

A similar race condition was fixed in commit b90cd6f2b905 ("scsi: libsas:
fix a race condition when smp task timeout")

Link: https://lore.kernel.org/r/20210707185945.35559-1-ipylypiv@google.com
Reviewed-by: Vishakha Channapattan <vishakhavc@google.com>
Acked-by: Jack Wang <jinpu.wang@ionos.com>
Signed-off-by: Igor Pylypiv <ipylypiv@google.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/pm8001/pm8001_sas.c | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c
index 48548a95327b..32e60f0c3b14 100644
--- a/drivers/scsi/pm8001/pm8001_sas.c
+++ b/drivers/scsi/pm8001/pm8001_sas.c
@@ -684,8 +684,7 @@ int pm8001_dev_found(struct domain_device *dev)
 
 void pm8001_task_done(struct sas_task *task)
 {
-	if (!del_timer(&task->slow_task->timer))
-		return;
+	del_timer(&task->slow_task->timer);
 	complete(&task->slow_task->completion);
 }
 
@@ -693,9 +692,14 @@ static void pm8001_tmf_timedout(struct timer_list *t)
 {
 	struct sas_task_slow *slow = from_timer(slow, t, timer);
 	struct sas_task *task = slow->task;
+	unsigned long flags;
 
-	task->task_state_flags |= SAS_TASK_STATE_ABORTED;
-	complete(&task->slow_task->completion);
+	spin_lock_irqsave(&task->task_state_lock, flags);
+	if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) {
+		task->task_state_flags |= SAS_TASK_STATE_ABORTED;
+		complete(&task->slow_task->completion);
+	}
+	spin_unlock_irqrestore(&task->task_state_lock, flags);
 }
 
 #define PM8001_TASK_TIMEOUT 20
@@ -748,13 +752,10 @@ static int pm8001_exec_internal_tmf_task(struct domain_device *dev,
 		}
 		res = -TMF_RESP_FUNC_FAILED;
 		/* Even TMF timed out, return direct. */
-		if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) {
-			if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) {
-				pm8001_dbg(pm8001_ha, FAIL,
-					   "TMF task[%x]timeout.\n",
-					   tmf->tmf);
-				goto ex_err;
-			}
+		if (task->task_state_flags & SAS_TASK_STATE_ABORTED) {
+			pm8001_dbg(pm8001_ha, FAIL, "TMF task[%x]timeout.\n",
+				   tmf->tmf);
+			goto ex_err;
 		}
 
 		if (task->task_status.resp == SAS_TASK_COMPLETE &&
@@ -834,12 +835,9 @@ pm8001_exec_internal_task_abort(struct pm8001_hba_info *pm8001_ha,
 		wait_for_completion(&task->slow_task->completion);
 		res = TMF_RESP_FUNC_FAILED;
 		/* Even TMF timed out, return direct. */
-		if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) {
-			if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) {
-				pm8001_dbg(pm8001_ha, FAIL,
-					   "TMF task timeout.\n");
-				goto ex_err;
-			}
+		if (task->task_state_flags & SAS_TASK_STATE_ABORTED) {
+			pm8001_dbg(pm8001_ha, FAIL, "TMF task timeout.\n");
+			goto ex_err;
 		}
 
 		if (task->task_status.resp == SAS_TASK_COMPLETE &&

From 77541f78eadfe9fdb018a7b8b69f0f2af2cf4b82 Mon Sep 17 00:00:00 2001
From: Harshvardhan Jha <harshvardhan.jha@oracle.com>
Date: Thu, 8 Jul 2021 13:16:42 +0530
Subject: [PATCH 295/502] scsi: megaraid_mm: Fix end of loop tests for
 list_for_each_entry()

The list_for_each_entry() iterator, "adapter" in this code, can never be
NULL.  If we exit the loop without finding the correct adapter then
"adapter" points invalid memory that is an offset from the list head.  This
will eventually lead to memory corruption and presumably a kernel crash.

Link: https://lore.kernel.org/r/20210708074642.23599-1-harshvardhan.jha@oracle.com
Acked-by: Sumit Saxena <sumit.saxena@broadcom.com>
Signed-off-by: Harshvardhan Jha <harshvardhan.jha@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/megaraid/megaraid_mm.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/megaraid/megaraid_mm.c b/drivers/scsi/megaraid/megaraid_mm.c
index abf7b401f5b9..c509440bd161 100644
--- a/drivers/scsi/megaraid/megaraid_mm.c
+++ b/drivers/scsi/megaraid/megaraid_mm.c
@@ -238,7 +238,7 @@ mraid_mm_get_adapter(mimd_t __user *umimd, int *rval)
 	mimd_t		mimd;
 	uint32_t	adapno;
 	int		iterator;
-
+	bool		is_found;
 
 	if (copy_from_user(&mimd, umimd, sizeof(mimd_t))) {
 		*rval = -EFAULT;
@@ -254,12 +254,16 @@ mraid_mm_get_adapter(mimd_t __user *umimd, int *rval)
 
 	adapter = NULL;
 	iterator = 0;
+	is_found = false;
 
 	list_for_each_entry(adapter, &adapters_list_g, list) {
-		if (iterator++ == adapno) break;
+		if (iterator++ == adapno) {
+			is_found = true;
+			break;
+		}
 	}
 
-	if (!adapter) {
+	if (!is_found) {
 		*rval = -ENODEV;
 		return NULL;
 	}
@@ -725,6 +729,7 @@ ioctl_done(uioc_t *kioc)
 	uint32_t	adapno;
 	int		iterator;
 	mraid_mmadp_t*	adapter;
+	bool		is_found;
 
 	/*
 	 * When the kioc returns from driver, make sure it still doesn't
@@ -747,19 +752,23 @@ ioctl_done(uioc_t *kioc)
 		iterator	= 0;
 		adapter		= NULL;
 		adapno		= kioc->adapno;
+		is_found	= false;
 
 		con_log(CL_ANN, ( KERN_WARNING "megaraid cmm: completed "
 					"ioctl that was timedout before\n"));
 
 		list_for_each_entry(adapter, &adapters_list_g, list) {
-			if (iterator++ == adapno) break;
+			if (iterator++ == adapno) {
+				is_found = true;
+				break;
+			}
 		}
 
 		kioc->timedout = 0;
 
-		if (adapter) {
+		if (is_found)
 			mraid_mm_dealloc_kioc( adapter, kioc );
-		}
+
 	}
 	else {
 		wake_up(&wait_q);

From 640b7ea5f888b521dcf28e2564ce75d08a783fd7 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Tue, 27 Jul 2021 23:38:24 +0300
Subject: [PATCH 296/502] alpha: register early reserved memory in memblock

The memory reserved by console/PALcode or non-volatile memory is not added
to memblock.memory.

Since commit fa3354e4ea39 (mm: free_area_init: use maximal zone PFNs rather
than zone sizes) the initialization of the memory map relies on the
accuracy of memblock.memory to properly calculate zone sizes. The holes in
memblock.memory caused by absent regions reserved by the firmware cause
incorrect initialization of struct pages which leads to BUG() during the
initial page freeing:

BUG: Bad page state in process swapper  pfn:2ffc53
page:fffffc000ecf14c0 refcount:0 mapcount:1 mapping:0000000000000000 index:0x0
flags: 0x0()
raw: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
raw: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
page dumped because: nonzero mapcount
Modules linked in:
CPU: 0 PID: 0 Comm: swapper Not tainted 5.7.0-03841-gfa3354e4ea39-dirty #26
       fffffc0001b5bd68 fffffc0001b5be80 fffffc00011cd148 fffffc000ecf14c0
       fffffc00019803df fffffc0001b5be80 fffffc00011ce340 fffffc000ecf14c0
       0000000000000000 fffffc0001b5be80 fffffc0001b482c0 fffffc00027d6618
       fffffc00027da7d0 00000000002ff97a 0000000000000000 fffffc0001b5be80
       fffffc00011d1abc fffffc000ecf14c0 fffffc0002d00000 fffffc0001b5be80
       fffffc0001b2350c 0000000000300000 fffffc0001b48298 fffffc0001b482c0
Trace:
[<fffffc00011cd148>] bad_page+0x168/0x1b0
[<fffffc00011ce340>] free_pcp_prepare+0x1e0/0x290
[<fffffc00011d1abc>] free_unref_page+0x2c/0xa0
[<fffffc00014ee5f0>] cmp_ex_sort+0x0/0x30
[<fffffc00014ee5f0>] cmp_ex_sort+0x0/0x30
[<fffffc000101001c>] _stext+0x1c/0x20

Fix this by registering the reserved ranges in memblock.memory.

Link: https://lore.kernel.org/lkml/20210726192311.uffqnanxw3ac5wwi@ivybridge
Fixes: fa3354e4ea39 ("mm: free_area_init: use maximal zone PFNs rather than zone sizes")
Reported-by: Matt Turner <mattst88@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Matt Turner <mattst88@gmail.com>
---
 arch/alpha/kernel/setup.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index 7d56c217b235..b4fbbba30aa2 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -319,18 +319,19 @@ setup_memory(void *kernel_end)
 		       i, cluster->usage, cluster->start_pfn,
 		       cluster->start_pfn + cluster->numpages);
 
-		/* Bit 0 is console/PALcode reserved.  Bit 1 is
-		   non-volatile memory -- we might want to mark
-		   this for later.  */
-		if (cluster->usage & 3)
-			continue;
-
 		end = cluster->start_pfn + cluster->numpages;
 		if (end > max_low_pfn)
 			max_low_pfn = end;
 
 		memblock_add(PFN_PHYS(cluster->start_pfn),
 			     cluster->numpages << PAGE_SHIFT);
+
+		/* Bit 0 is console/PALcode reserved.  Bit 1 is
+		   non-volatile memory -- we might want to mark
+		   this for later.  */
+		if (cluster->usage & 3)
+			memblock_reserve(PFN_PHYS(cluster->start_pfn),
+				         cluster->numpages << PAGE_SHIFT);
 	}
 
 	/*

From aa35772f61752d4c636d46be51a4f7ca6c029ee6 Mon Sep 17 00:00:00 2001
From: Pawel Laszczak <pawell@cadence.com>
Date: Wed, 23 Jun 2021 09:02:47 +0200
Subject: [PATCH 297/502] usb: cdns3: Fixed incorrect gadget state

For delayed status phase, the usb_gadget->state was set
to USB_STATE_ADDRESS and it has never been updated to
USB_STATE_CONFIGURED.
Patch updates the gadget state to correct USB_STATE_CONFIGURED.
As a result of this bug the controller was not able to enter to
Test Mode while using MSC function.

Cc: <stable@vger.kernel.org>
Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver")
Signed-off-by: Pawel Laszczak <pawell@cadence.com>
Link: https://lore.kernel.org/r/20210623070247.46151-1-pawell@gli-login.cadence.com
Signed-off-by: Peter Chen <peter.chen@kernel.org>
---
 drivers/usb/cdns3/cdns3-ep0.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/cdns3/cdns3-ep0.c b/drivers/usb/cdns3/cdns3-ep0.c
index 02ec7ab4bb48..e29989d57bef 100644
--- a/drivers/usb/cdns3/cdns3-ep0.c
+++ b/drivers/usb/cdns3/cdns3-ep0.c
@@ -731,6 +731,7 @@ static int cdns3_gadget_ep0_queue(struct usb_ep *ep,
 		request->actual = 0;
 		priv_dev->status_completion_no_call = true;
 		priv_dev->pending_status_request = request;
+		usb_gadget_set_state(&priv_dev->gadget, USB_STATE_CONFIGURED);
 		spin_unlock_irqrestore(&priv_dev->lock, flags);
 
 		/*

From aa82f94e869edd72f4fadb08c6ffca8927e4934e Mon Sep 17 00:00:00 2001
From: Pawel Laszczak <pawell@cadence.com>
Date: Fri, 25 Jun 2021 12:25:02 +0200
Subject: [PATCH 298/502] usb: cdnsp: Fix incorrect supported maximum speed

Driver had hardcoded in initialization maximum supported speed
to USB_SPEED_SUPER_PLUS but it should consider the speed
returned from usb_get_maximum_speed function.

Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver")
Signed-off-by: Pawel Laszczak <pawell@cadence.com>
Link: https://lore.kernel.org/r/20210625102502.26336-1-pawell@gli-login.cadence.com
Signed-off-by: Peter Chen <peter.chen@kernel.org>
---
 drivers/usb/cdns3/cdnsp-gadget.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/cdns3/cdnsp-gadget.c b/drivers/usb/cdns3/cdnsp-gadget.c
index c23f53e9b1ef..27df0c697897 100644
--- a/drivers/usb/cdns3/cdnsp-gadget.c
+++ b/drivers/usb/cdns3/cdnsp-gadget.c
@@ -1882,7 +1882,7 @@ static int __cdnsp_gadget_init(struct cdns *cdns)
 	pdev->gadget.name = "cdnsp-gadget";
 	pdev->gadget.speed = USB_SPEED_UNKNOWN;
 	pdev->gadget.sg_supported = 1;
-	pdev->gadget.max_speed = USB_SPEED_SUPER_PLUS;
+	pdev->gadget.max_speed = max_speed;
 	pdev->gadget.lpm_capable = 1;
 
 	pdev->setup_buf = kzalloc(CDNSP_EP0_SETUP_SIZE, GFP_KERNEL);

From e913aada06830338633fb8524733b0ad3d38a7c1 Mon Sep 17 00:00:00 2001
From: Pawel Laszczak <pawell@cadence.com>
Date: Wed, 23 Jun 2021 09:27:28 +0200
Subject: [PATCH 299/502] usb: cdnsp: Fixed issue with ZLP

The condition "if (need_zero_pkt && zero_len_trb)" was always false
and it caused that TRB for ZLP was not prepared.

Fix causes that after preparing last TRB in TD, the driver prepares
additional TD with ZLP when a ZLP is required.

Cc: <stable@vger.kernel.org>
Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver")
Signed-off-by: Pawel Laszczak <pawell@cadence.com>
Link: https://lore.kernel.org/r/20210623072728.41275-1-pawell@gli-login.cadence.com
Signed-off-by: Peter Chen <peter.chen@kernel.org>
---
 drivers/usb/cdns3/cdnsp-ring.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c
index 68972746e363..1b1438457fb0 100644
--- a/drivers/usb/cdns3/cdnsp-ring.c
+++ b/drivers/usb/cdns3/cdnsp-ring.c
@@ -1932,15 +1932,13 @@ int cdnsp_queue_bulk_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq)
 		}
 
 		if (enqd_len + trb_buff_len >= full_len) {
-			if (need_zero_pkt && zero_len_trb) {
-				zero_len_trb = true;
-			} else {
-				field &= ~TRB_CHAIN;
-				field |= TRB_IOC;
-				more_trbs_coming = false;
-				need_zero_pkt = false;
-				preq->td.last_trb = ring->enqueue;
-			}
+			if (need_zero_pkt)
+				zero_len_trb = !zero_len_trb;
+
+			field &= ~TRB_CHAIN;
+			field |= TRB_IOC;
+			more_trbs_coming = false;
+			preq->td.last_trb = ring->enqueue;
 		}
 
 		/* Only set interrupt on short packet for OUT endpoints. */
@@ -1955,7 +1953,7 @@ int cdnsp_queue_bulk_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq)
 		length_field = TRB_LEN(trb_buff_len) | TRB_TD_SIZE(remainder) |
 			TRB_INTR_TARGET(0);
 
-		cdnsp_queue_trb(pdev, ring, more_trbs_coming | need_zero_pkt,
+		cdnsp_queue_trb(pdev, ring, more_trbs_coming | zero_len_trb,
 				lower_32_bits(send_addr),
 				upper_32_bits(send_addr),
 				length_field,

From 5df09c15bab98463203c83ecab88b9321466e626 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Tue, 22 Jun 2021 21:37:48 +0200
Subject: [PATCH 300/502] usb: cdnsp: Fix the IMAN_IE_SET and IMAN_IE_CLEAR
 macro

IMAN_IE is BIT(1), so these macro are respectively equivalent to BIT(1)
and 0, whatever the value of 'p'.

The purpose was to set and reset a single bit in 'p'.
Fix these macros to do that correctly.

Acked-by: Pawel Laszczak <pawell@cadence.com>
Fixes: e93e58d27402 ("usb: cdnsp: Device side header file for CDNSP driver")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/d12bfcc9cbffb89e27b120668821b3c4f09b6755.1624390584.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Peter Chen <peter.chen@kernel.org>
---
 drivers/usb/cdns3/cdnsp-gadget.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/cdns3/cdnsp-gadget.h b/drivers/usb/cdns3/cdnsp-gadget.h
index 783ca8ffde00..f740fa6089d8 100644
--- a/drivers/usb/cdns3/cdnsp-gadget.h
+++ b/drivers/usb/cdns3/cdnsp-gadget.h
@@ -383,8 +383,8 @@ struct cdnsp_intr_reg {
 #define IMAN_IE			BIT(1)
 #define IMAN_IP			BIT(0)
 /* bits 2:31 need to be preserved */
-#define IMAN_IE_SET(p)		(((p) & IMAN_IE) | 0x2)
-#define IMAN_IE_CLEAR(p)	(((p) & IMAN_IE) & ~(0x2))
+#define IMAN_IE_SET(p)		((p) | IMAN_IE)
+#define IMAN_IE_CLEAR(p)	((p) & ~IMAN_IE)
 
 /* IMOD - Interrupter Moderation Register - irq_control bitmasks. */
 /*

From 0d4867a185460397af56b9afe3e2243d3e610e37 Mon Sep 17 00:00:00 2001
From: Alexander Monakov <amonakov@ispras.ru>
Date: Wed, 21 Jul 2021 20:01:41 +0300
Subject: [PATCH 301/502] ALSA: hda/realtek: add mic quirk for Acer SF314-42

The Acer Swift SF314-42 laptop is using Realtek ALC255 codec. Add a
quirk so microphone in a headset connected via the right-hand side jack
is usable.

Signed-off-by: Alexander Monakov <amonakov@ispras.ru>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210721170141.24807-1-amonakov@ispras.ru
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 14e1ab7c7954..21c521596c9d 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -8278,6 +8278,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1025, 0x1308, "Acer Aspire Z24-890", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
 	SND_PCI_QUIRK(0x1025, 0x132a, "Acer TravelMate B114-21", ALC233_FIXUP_ACER_HEADSET_MIC),
 	SND_PCI_QUIRK(0x1025, 0x1330, "Acer TravelMate X514-51T", ALC255_FIXUP_ACER_HEADSET_MIC),
+	SND_PCI_QUIRK(0x1025, 0x142b, "Acer Swift SF314-42", ALC255_FIXUP_ACER_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1025, 0x1430, "Acer TravelMate B311R-31", ALC256_FIXUP_ACER_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1025, 0x1466, "Acer Aspire A515-56", ALC255_FIXUP_ACER_HEADPHONE_AND_MIC),
 	SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z),

From db8d3a21275c807a4047a21bde3b57d49ca55d82 Mon Sep 17 00:00:00 2001
From: Michael Zaidman <michael.zaidman@gmail.com>
Date: Thu, 29 Jul 2021 13:26:03 +0300
Subject: [PATCH 302/502] HID: ft260: fix device removal due to USB disconnect

This commit fixes a functional regression introduced by the commit 82f09a637dd3
("HID: ft260: improve error handling of ft260_hid_feature_report_get()")
when upon USB disconnect, the FTDI FT260 i2c device is still available within
the /dev folder.

In my company's product, where the host USB to FT260 USB connection is
hard-wired in the PCB, the issue is not reproducible. To reproduce it, I used
the VirtualBox Ubuntu 20.04 VM and the UMFT260EV1A development module for the
FTDI FT260 chip:

Plug the UMFT260EV1A module into a USB port and attach it to VM.

The VM shows 2 i2c devices under the /dev:
    michael@michael-VirtualBox:~$ ls /dev/i2c-*
    /dev/i2c-0  /dev/i2c-1

The i2c-0 is not related to the FTDI FT260:
    michael@michael-VirtualBox:~$ cat /sys/bus/i2c/devices/i2c-0/name
    SMBus PIIX4 adapter at 4100

The i2c-1 is created by hid-ft260.ko:
    michael@michael-VirtualBox:~$ cat /sys/bus/i2c/devices/i2c-1/name
    FT260 usb-i2c bridge on hidraw1

Now, detach the FTDI FT260 USB device from VM. We expect the /dev/i2c-1
to disappear, but it's still here:
    michael@michael-VirtualBox:~$ ls /dev/i2c-*
    /dev/i2c-0  /dev/i2c-1

And the kernel log shows:
    [  +0.001202] usb 2-2: USB disconnect, device number 3
    [  +0.000109] ft260 0003:0403:6030.0002: failed to retrieve system status
    [  +0.000316] ft260 0003:0403:6030.0003: failed to retrieve system status

It happens because the commit 82f09a637dd3 changed the ft260_get_system_config()
return logic. This caused the ft260_is_interface_enabled() to exit with error
upon the FT260 device USB disconnect, which in turn, aborted the ft260_remove()
before deleting the FT260 i2c device and cleaning its sysfs stuff.

This commit restores the FT260 USB removal functionality and improves the
ft260_is_interface_enabled() code to handle correctly all chip modes defined
by the device interface configuration pins DCNF0 and DCNF1.

Signed-off-by: Michael Zaidman <michael.zaidman@gmail.com>
Acked-by: Aaron Jones (FTDI-UK) <aaron.jones@ftdichip.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-ft260.c | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/drivers/hid/hid-ft260.c b/drivers/hid/hid-ft260.c
index 6f10df2042c4..4ef1c3b8094e 100644
--- a/drivers/hid/hid-ft260.c
+++ b/drivers/hid/hid-ft260.c
@@ -742,7 +742,7 @@ static int ft260_is_interface_enabled(struct hid_device *hdev)
 	int ret;
 
 	ret = ft260_get_system_config(hdev, &cfg);
-	if (ret)
+	if (ret < 0)
 		return ret;
 
 	ft260_dbg("interface:  0x%02x\n", interface);
@@ -754,23 +754,16 @@ static int ft260_is_interface_enabled(struct hid_device *hdev)
 	switch (cfg.chip_mode) {
 	case FT260_MODE_ALL:
 	case FT260_MODE_BOTH:
-		if (interface == 1) {
+		if (interface == 1)
 			hid_info(hdev, "uart interface is not supported\n");
-			return 0;
-		}
-		ret = 1;
+		else
+			ret = 1;
 		break;
 	case FT260_MODE_UART:
-		if (interface == 0) {
-			hid_info(hdev, "uart is unsupported on interface 0\n");
-			ret = 0;
-		}
+		hid_info(hdev, "uart interface is not supported\n");
 		break;
 	case FT260_MODE_I2C:
-		if (interface == 1) {
-			hid_info(hdev, "i2c is unsupported on interface 1\n");
-			ret = 0;
-		}
+		ret = 1;
 		break;
 	}
 	return ret;
@@ -1004,11 +997,9 @@ err_hid_stop:
 
 static void ft260_remove(struct hid_device *hdev)
 {
-	int ret;
 	struct ft260_device *dev = hid_get_drvdata(hdev);
 
-	ret = ft260_is_interface_enabled(hdev);
-	if (ret <= 0)
+	if (!dev)
 		return;
 
 	sysfs_remove_group(&hdev->dev.kobj, &ft260_attr_group);

From 7280305eb57dd32735f795ed4ee679bf9854f9d0 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Wed, 28 Jul 2021 18:00:24 +0200
Subject: [PATCH 303/502] btrfs: calculate number of eb pages properly in
 csum_tree_block
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Building with -Warray-bounds on systems with 64K pages there's a
warning:

  fs/btrfs/disk-io.c: In function ‘csum_tree_block’:
  fs/btrfs/disk-io.c:226:34: warning: array subscript 1 is above array bounds of ‘struct page *[1]’ [-Warray-bounds]
    226 |   kaddr = page_address(buf->pages[i]);
        |                        ~~~~~~~~~~^~~
  ./include/linux/mm.h:1630:48: note: in definition of macro ‘page_address’
   1630 | #define page_address(page) lowmem_page_address(page)
        |                                                ^~~~
  In file included from fs/btrfs/ctree.h:32,
                   from fs/btrfs/disk-io.c:23:
  fs/btrfs/extent_io.h:98:15: note: while referencing ‘pages’
     98 |  struct page *pages[1];
        |               ^~~~~

The compiler has no way to know that in that case the nodesize is exactly
PAGE_SIZE, so the resulting number of pages will be correct (1).

Let's use num_extent_pages that makes the case nodesize == PAGE_SIZE
explicitly 1.

Reported-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b117dd3b8172..a59ab7b9aea0 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -209,7 +209,7 @@ void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb,
 static void csum_tree_block(struct extent_buffer *buf, u8 *result)
 {
 	struct btrfs_fs_info *fs_info = buf->fs_info;
-	const int num_pages = fs_info->nodesize >> PAGE_SHIFT;
+	const int num_pages = num_extent_pages(buf);
 	const int first_page_part = min_t(u32, PAGE_SIZE, fs_info->nodesize);
 	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
 	char *kaddr;

From 3c18e9baee0ef97510dcda78c82285f52626764b Mon Sep 17 00:00:00 2001
From: Willy Tarreau <w@1wt.eu>
Date: Sat, 24 Jul 2021 17:27:39 +0200
Subject: [PATCH 304/502] USB: serial: ch341: fix character loss at high
 transfer rates

The chip supports high transfer rates, but with the small default buffers
(64 bytes read), some entire blocks are regularly lost. This typically
happens at 1.5 Mbps (which is the default speed on Rockchip devices) when
used as a console to access U-Boot where the output of the "help" command
misses many lines and where "printenv" mangles the environment.

The FTDI driver doesn't suffer at all from this. One difference is that
it uses 512 bytes rx buffers and 256 bytes tx buffers. Adopting these
values completely resolved the issue, even the output of "dmesg" is
reliable. I preferred to leave the Tx value unchanged as it is not
involved in this issue, while a change could increase the risk of
triggering the same issue with other devices having too small buffers.

I verified that it backports well (and works) at least to 5.4. It's of
low importance enough to be dropped where it doesn't trivially apply
anymore.

Cc: stable@vger.kernel.org
Signed-off-by: Willy Tarreau <w@1wt.eu>
Link: https://lore.kernel.org/r/20210724152739.18726-1-w@1wt.eu
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/ch341.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index 2db917eab799..8a521b5ea769 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -851,6 +851,7 @@ static struct usb_serial_driver ch341_device = {
 		.owner	= THIS_MODULE,
 		.name	= "ch341-uart",
 	},
+	.bulk_in_size      = 512,
 	.id_table          = id_table,
 	.num_ports         = 1,
 	.open              = ch341_open,

From 333cf507465fbebb3727f5b53e77538467df312a Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Thu, 29 Jul 2021 11:34:49 +0530
Subject: [PATCH 305/502] powerpc/pseries: Fix regression while building
 external modules

With commit c9f3401313a5 ("powerpc: Always enable queued spinlocks for
64s, disable for others") CONFIG_PPC_QUEUED_SPINLOCKS is always
enabled on ppc64le, external modules that use spinlock APIs are
failing.

  ERROR: modpost: GPL-incompatible module XXX.ko uses GPL-only symbol 'shared_processor'

Before the above commit, modules were able to build without any
issues. Also this problem is not seen on other architectures. This
problem can be workaround if CONFIG_UNINLINE_SPIN_UNLOCK is enabled in
the config. However CONFIG_UNINLINE_SPIN_UNLOCK is not enabled by
default and only enabled in certain conditions like
CONFIG_DEBUG_SPINLOCKS is set in the kernel config.

  #include <linux/module.h>
  spinlock_t spLock;

  static int __init spinlock_test_init(void)
  {
          spin_lock_init(&spLock);
          spin_lock(&spLock);
          spin_unlock(&spLock);
          return 0;
  }

  static void __exit spinlock_test_exit(void)
  {
  	printk("spinlock_test unloaded\n");
  }
  module_init(spinlock_test_init);
  module_exit(spinlock_test_exit);

  MODULE_DESCRIPTION ("spinlock_test");
  MODULE_LICENSE ("non-GPL");
  MODULE_AUTHOR ("Srikar Dronamraju");

Given that spin locks are one of the basic facilities for module code,
this effectively makes it impossible to build/load almost any non GPL
modules on ppc64le.

This was first reported at https://github.com/openzfs/zfs/issues/11172

Currently shared_processor is exported as GPL only symbol.
Fix this for parity with other architectures by exposing
shared_processor to non-GPL modules too.

Fixes: 14c73bd344da ("powerpc/vcpu: Assume dedicated processors as non-preempt")
Cc: stable@vger.kernel.org # v5.5+
Reported-by: marc.c.dionne@gmail.com
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210729060449.292780-1-srikar@linux.vnet.ibm.com
---
 arch/powerpc/platforms/pseries/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 631a0d57b6cd..6b0886668465 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -77,7 +77,7 @@
 #include "../../../../drivers/pci/pci.h"
 
 DEFINE_STATIC_KEY_FALSE(shared_processor);
-EXPORT_SYMBOL_GPL(shared_processor);
+EXPORT_SYMBOL(shared_processor);
 
 int CMO_PrPSP = -1;
 int CMO_SecPSP = -1;

From a88603f4b92ecef9e2359e40bcb99ad399d85dd7 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 29 Jul 2021 22:56:36 +1000
Subject: [PATCH 306/502] powerpc/vdso: Don't use r30 to avoid breaking Go lang

The Go runtime uses r30 for some special value called 'g'. It assumes
that value will remain unchanged even when calling VDSO functions.
Although r30 is non-volatile across function calls, the callee is free
to use it, as long as the callee saves the value and restores it before
returning.

It used to be true by accident that the VDSO didn't use r30, because the
VDSO was hand-written asm. When we switched to building the VDSO from C
the compiler started using r30, at least in some builds, leading to
crashes in Go. eg:

  ~/go/src$ ./all.bash
  Building Go cmd/dist using /usr/lib/go-1.16. (go1.16.2 linux/ppc64le)
  Building Go toolchain1 using /usr/lib/go-1.16.
  go build os/exec: /usr/lib/go-1.16/pkg/tool/linux_ppc64le/compile: signal: segmentation fault
  go build reflect: /usr/lib/go-1.16/pkg/tool/linux_ppc64le/compile: signal: segmentation fault
  go tool dist: FAILED: /usr/lib/go-1.16/bin/go install -gcflags=-l -tags=math_big_pure_go compiler_bootstrap bootstrap/cmd/...: exit status 1

There are patches in flight to fix Go[1], but until they are released
and widely deployed we can workaround it in the VDSO by avoiding use of
r30.

Note this only works with GCC, clang does not support -ffixed-rN.

1: https://go-review.googlesource.com/c/go/+/328110

Fixes: ab037dd87a2f ("powerpc/vdso: Switch VDSO to generic C implementation.")
Cc: stable@vger.kernel.org # v5.11+
Reported-by: Paul Menzel <pmenzel@molgen.mpg.de>
Tested-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210729131244.2595519-1-mpe@ellerman.id.au
---
 arch/powerpc/kernel/vdso64/Makefile | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
index 2813e3f98db6..3c5baaa6f1e7 100644
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -27,6 +27,13 @@ KASAN_SANITIZE := n
 
 ccflags-y := -shared -fno-common -fno-builtin -nostdlib \
 	-Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both
+
+# Go prior to 1.16.x assumes r30 is not clobbered by any VDSO code. That used to be true
+# by accident when the VDSO was hand-written asm code, but may not be now that the VDSO is
+# compiler generated. To avoid breaking Go tell GCC not to use r30. Impact on code
+# generation is minimal, it will just use r29 instead.
+ccflags-y += $(call cc-option, -ffixed-r30)
+
 asflags-y := -D__VDSO64__ -s
 
 targets += vdso64.lds

From 06e91df16f3e1ca1a1886968fb22d4258f3b6b6f Mon Sep 17 00:00:00 2001
From: Sherry Sun <sherry.sun@nxp.com>
Date: Thu, 29 Jul 2021 16:31:09 +0800
Subject: [PATCH 307/502] tty: serial: fsl_lpuart: fix the wrong return value
 in lpuart32_get_mctrl

Patch e60c2991f18b make the lpuart32_get_mctrl always return 0, actually
this will break the functions of device which use flow control such as
Bluetooth.
For lpuart32 plaform, the hardware can handle the CTS automatically.
So we should set TIOCM_CTS active. Also need to set CAR and DSR active.

Patch has been tested on lpuart32 platforms such as imx8qm and imx8ulp.

Fixes: e60c2991f18b ("serial: fsl_lpuart: remove RTSCTS handling from get_mctrl()")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Sherry Sun <sherry.sun@nxp.com>
Link: https://lore.kernel.org/r/20210729083109.31541-1-sherry.sun@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/fsl_lpuart.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
index 508128ddfa01..f0e5da77ed6d 100644
--- a/drivers/tty/serial/fsl_lpuart.c
+++ b/drivers/tty/serial/fsl_lpuart.c
@@ -1415,7 +1415,7 @@ static unsigned int lpuart_get_mctrl(struct uart_port *port)
 
 static unsigned int lpuart32_get_mctrl(struct uart_port *port)
 {
-	unsigned int mctrl = 0;
+	unsigned int mctrl = TIOCM_CAR | TIOCM_DSR | TIOCM_CTS;
 	u32 reg;
 
 	reg = lpuart32_read(port, UARTCTRL);

From 7c4a509d3815a260c423c0633bd73695250ac26d Mon Sep 17 00:00:00 2001
From: Zhiyong Tao <zhiyong.tao@mediatek.com>
Date: Thu, 29 Jul 2021 16:46:40 +0800
Subject: [PATCH 308/502] serial: 8250_mtk: fix uart corruption issue when rx
 power off

Fix uart corruption issue when rx power off.
Add spin lock in mtk8250_dma_rx_complete function in APDMA mode.

when uart is used as a communication port with external device(GPS).
when external device(GPS) power off, the power of rx pin is also from
1.8v to 0v. Even if there is not any data in rx. But uart rx pin can
capture the data "0".
If uart don't receive any data in specified cycle, uart will generates
BI(Break interrupt) interrupt.
If external device(GPS) power off, we found that BI interrupt appeared
continuously and very frequently.
When uart interrupt type is BI, uart IRQ handler(8250 framwork
API:serial8250_handle_irq) will push data to tty buffer.
mtk8250_dma_rx_complete is a task of mtk_uart_apdma_rx_handler.
mtk8250_dma_rx_complete priority is lower than uart irq
handler(serial8250_handle_irq).
if we are in process of mtk8250_dma_rx_complete, uart appear BI
interrupt:1)serial8250_handle_irq will priority execution.2)it may cause
write tty buffer conflict in mtk8250_dma_rx_complete.
So the spin lock protect the rx receive data process is not break.

Signed-off-by: Zhiyong Tao <zhiyong.tao@mediatek.com>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210729084640.17613-2-zhiyong.tao@mediatek.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_mtk.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c
index f7d3023f860f..fb65dc601b23 100644
--- a/drivers/tty/serial/8250/8250_mtk.c
+++ b/drivers/tty/serial/8250/8250_mtk.c
@@ -93,10 +93,13 @@ static void mtk8250_dma_rx_complete(void *param)
 	struct dma_tx_state state;
 	int copied, total, cnt;
 	unsigned char *ptr;
+	unsigned long flags;
 
 	if (data->rx_status == DMA_RX_SHUTDOWN)
 		return;
 
+	spin_lock_irqsave(&up->port.lock, flags);
+
 	dmaengine_tx_status(dma->rxchan, dma->rx_cookie, &state);
 	total = dma->rx_size - state.residue;
 	cnt = total;
@@ -120,6 +123,8 @@ static void mtk8250_dma_rx_complete(void *param)
 	tty_flip_buffer_push(tty_port);
 
 	mtk8250_rx_dma(up);
+
+	spin_unlock_irqrestore(&up->port.lock, flags);
 }
 
 static void mtk8250_rx_dma(struct uart_8250_port *up)

From 0d6434e10b5377a006f6dd995c8fc5e2d82acddc Mon Sep 17 00:00:00 2001
From: Anirudh Rayabharam <mail@anirudhrb.com>
Date: Wed, 28 Jul 2021 14:21:06 +0530
Subject: [PATCH 309/502] firmware_loader: use -ETIMEDOUT instead of -EAGAIN in
 fw_load_sysfs_fallback

The only motivation for using -EAGAIN in commit 0542ad88fbdd81bb
("firmware loader: Fix _request_firmware_load() return val for fw load
abort") was to distinguish the error from -ENOMEM, and so there is no
real reason in keeping it. -EAGAIN is typically used to tell the
userspace to try something again and in this case re-using the sysfs
loading interface cannot be retried when a timeout happens, so the
return value is also bogus.

-ETIMEDOUT is received when the wait times out and returning that
is much more telling of what the reason for the failure was. So, just
propagate that instead of returning -EAGAIN.

Suggested-by: Luis Chamberlain <mcgrof@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Acked-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Anirudh Rayabharam <mail@anirudhrb.com>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210728085107.4141-2-mail@anirudhrb.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/firmware_loader/fallback.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/base/firmware_loader/fallback.c b/drivers/base/firmware_loader/fallback.c
index 91899d185e31..1a48be0a030e 100644
--- a/drivers/base/firmware_loader/fallback.c
+++ b/drivers/base/firmware_loader/fallback.c
@@ -535,8 +535,6 @@ static int fw_load_sysfs_fallback(struct fw_sysfs *fw_sysfs, long timeout)
 	if (fw_state_is_aborted(fw_priv)) {
 		if (retval == -ERESTARTSYS)
 			retval = -EINTR;
-		else
-			retval = -EAGAIN;
 	} else if (fw_priv->is_paged_buf && !fw_priv->data)
 		retval = -ENOMEM;
 

From 75d95e2e39b27f733f21e6668af1c9893a97de5e Mon Sep 17 00:00:00 2001
From: Anirudh Rayabharam <mail@anirudhrb.com>
Date: Wed, 28 Jul 2021 14:21:07 +0530
Subject: [PATCH 310/502] firmware_loader: fix use-after-free in
 firmware_fallback_sysfs

This use-after-free happens when a fw_priv object has been freed but
hasn't been removed from the pending list (pending_fw_head). The next
time fw_load_sysfs_fallback tries to insert into the list, it ends up
accessing the pending_list member of the previously freed fw_priv.

The root cause here is that all code paths that abort the fw load
don't delete it from the pending list. For example:

        _request_firmware()
          -> fw_abort_batch_reqs()
              -> fw_state_aborted()

To fix this, delete the fw_priv from the list in __fw_set_state() if
the new state is DONE or ABORTED. This way, all aborts will remove
the fw_priv from the list. Accordingly, remove calls to list_del_init
that were being made before calling fw_state_(aborted|done).

Also, in fw_load_sysfs_fallback, don't add the fw_priv to the pending
list if it is already aborted. Instead, just jump out and return early.

Fixes: bcfbd3523f3c ("firmware: fix a double abort case with fw_load_sysfs_fallback")
Cc: stable <stable@vger.kernel.org>
Reported-by: syzbot+de271708674e2093097b@syzkaller.appspotmail.com
Tested-by: syzbot+de271708674e2093097b@syzkaller.appspotmail.com
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Acked-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Anirudh Rayabharam <mail@anirudhrb.com>
Link: https://lore.kernel.org/r/20210728085107.4141-3-mail@anirudhrb.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/firmware_loader/fallback.c | 12 ++++++++----
 drivers/base/firmware_loader/firmware.h | 10 +++++++++-
 drivers/base/firmware_loader/main.c     |  2 ++
 3 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/drivers/base/firmware_loader/fallback.c b/drivers/base/firmware_loader/fallback.c
index 1a48be0a030e..d7d63c1aa993 100644
--- a/drivers/base/firmware_loader/fallback.c
+++ b/drivers/base/firmware_loader/fallback.c
@@ -89,12 +89,11 @@ static void __fw_load_abort(struct fw_priv *fw_priv)
 {
 	/*
 	 * There is a small window in which user can write to 'loading'
-	 * between loading done and disappearance of 'loading'
+	 * between loading done/aborted and disappearance of 'loading'
 	 */
-	if (fw_sysfs_done(fw_priv))
+	if (fw_state_is_aborted(fw_priv) || fw_sysfs_done(fw_priv))
 		return;
 
-	list_del_init(&fw_priv->pending_list);
 	fw_state_aborted(fw_priv);
 }
 
@@ -280,7 +279,6 @@ static ssize_t firmware_loading_store(struct device *dev,
 			 * Same logic as fw_load_abort, only the DONE bit
 			 * is ignored and we set ABORT only on failure.
 			 */
-			list_del_init(&fw_priv->pending_list);
 			if (rc) {
 				fw_state_aborted(fw_priv);
 				written = rc;
@@ -513,6 +511,11 @@ static int fw_load_sysfs_fallback(struct fw_sysfs *fw_sysfs, long timeout)
 	}
 
 	mutex_lock(&fw_lock);
+	if (fw_state_is_aborted(fw_priv)) {
+		mutex_unlock(&fw_lock);
+		retval = -EINTR;
+		goto out;
+	}
 	list_add(&fw_priv->pending_list, &pending_fw_head);
 	mutex_unlock(&fw_lock);
 
@@ -538,6 +541,7 @@ static int fw_load_sysfs_fallback(struct fw_sysfs *fw_sysfs, long timeout)
 	} else if (fw_priv->is_paged_buf && !fw_priv->data)
 		retval = -ENOMEM;
 
+out:
 	device_del(f_dev);
 err_put_dev:
 	put_device(f_dev);
diff --git a/drivers/base/firmware_loader/firmware.h b/drivers/base/firmware_loader/firmware.h
index 63bd29fdcb9c..a3014e9e2c85 100644
--- a/drivers/base/firmware_loader/firmware.h
+++ b/drivers/base/firmware_loader/firmware.h
@@ -117,8 +117,16 @@ static inline void __fw_state_set(struct fw_priv *fw_priv,
 
 	WRITE_ONCE(fw_st->status, status);
 
-	if (status == FW_STATUS_DONE || status == FW_STATUS_ABORTED)
+	if (status == FW_STATUS_DONE || status == FW_STATUS_ABORTED) {
+#ifdef CONFIG_FW_LOADER_USER_HELPER
+		/*
+		 * Doing this here ensures that the fw_priv is deleted from
+		 * the pending list in all abort/done paths.
+		 */
+		list_del_init(&fw_priv->pending_list);
+#endif
 		complete_all(&fw_st->completion);
+	}
 }
 
 static inline void fw_state_aborted(struct fw_priv *fw_priv)
diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c
index 4fdb8219cd08..68c549d71230 100644
--- a/drivers/base/firmware_loader/main.c
+++ b/drivers/base/firmware_loader/main.c
@@ -783,8 +783,10 @@ static void fw_abort_batch_reqs(struct firmware *fw)
 		return;
 
 	fw_priv = fw->priv;
+	mutex_lock(&fw_lock);
 	if (!fw_state_is_aborted(fw_priv))
 		fw_state_aborted(fw_priv);
+	mutex_unlock(&fw_lock);
 }
 
 /* called from request_firmware() and request_firmware_work_func() */

From b1e27239b9169f07edba0ca0e52805645a1768ba Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 27 Jul 2021 16:23:46 -0700
Subject: [PATCH 311/502] xfs: flush data dev on external log write

We incorrectly flush the log device instead of the data device when
trying to ensure metadata is correctly on disk before writing the
unmount record.

Fixes: eef983ffeae7 ("xfs: journal IO cache flush reductions")
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/xfs_log.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 36fa2650b081..96434cc4df6e 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -833,7 +833,7 @@ xlog_write_unmount_record(
 	 * stamp the tail LSN into the unmount record.
 	 */
 	if (log->l_targ != log->l_mp->m_ddev_targp)
-		blkdev_issue_flush(log->l_targ->bt_bdev);
+		blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev);
 	return xlog_write(log, &vec, ticket, NULL, NULL, XLOG_UNMOUNT_TRANS);
 }
 

From b5d721eaae47eaa4b4c2754699dadacc4cbca2e0 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 27 Jul 2021 16:23:47 -0700
Subject: [PATCH 312/502] xfs: external logs need to flush data device

The recent journal flush/FUA changes replaced the flushing of the
data device on every iclog write with an up-front async data device
cache flush. Unfortunately, the assumption of which this was based
on has been proven incorrect by the flush vs log tail update
ordering issue. As the fix for that issue uses the
XLOG_ICL_NEED_FLUSH flag to indicate that data device needs a cache
flush, we now need to (once again) ensure that an iclog write to
external logs that need a cache flush to be issued actually issue a
cache flush to the data device as well as the log device.

Fixes: eef983ffeae7 ("xfs: journal IO cache flush reductions")
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/xfs_log.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 96434cc4df6e..a3c4d48195d9 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -827,13 +827,6 @@ xlog_write_unmount_record(
 	/* account for space used by record data */
 	ticket->t_curr_res -= sizeof(ulf);
 
-	/*
-	 * For external log devices, we need to flush the data device cache
-	 * first to ensure all metadata writeback is on stable storage before we
-	 * stamp the tail LSN into the unmount record.
-	 */
-	if (log->l_targ != log->l_mp->m_ddev_targp)
-		blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev);
 	return xlog_write(log, &vec, ticket, NULL, NULL, XLOG_UNMOUNT_TRANS);
 }
 
@@ -1796,10 +1789,20 @@ xlog_write_iclog(
 	 * metadata writeback and causing priority inversions.
 	 */
 	iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_IDLE;
-	if (iclog->ic_flags & XLOG_ICL_NEED_FLUSH)
+	if (iclog->ic_flags & XLOG_ICL_NEED_FLUSH) {
 		iclog->ic_bio.bi_opf |= REQ_PREFLUSH;
+		/*
+		 * For external log devices, we also need to flush the data
+		 * device cache first to ensure all metadata writeback covered
+		 * by the LSN in this iclog is on stable storage. This is slow,
+		 * but it *must* complete before we issue the external log IO.
+		 */
+		if (log->l_targ != log->l_mp->m_ddev_targp)
+			blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev);
+	}
 	if (iclog->ic_flags & XLOG_ICL_NEED_FUA)
 		iclog->ic_bio.bi_opf |= REQ_FUA;
+
 	iclog->ic_flags &= ~(XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA);
 
 	if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) {

From 9d3920644081edf311878b56e0c1e1477991a195 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 27 Jul 2021 16:23:47 -0700
Subject: [PATCH 313/502] xfs: fold __xlog_state_release_iclog into
 xlog_state_release_iclog

Fold __xlog_state_release_iclog into its only caller to prepare
make an upcoming fix easier.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
[hch: split from a larger patch]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/xfs_log.c | 45 +++++++++++++++++----------------------------
 1 file changed, 17 insertions(+), 28 deletions(-)

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index a3c4d48195d9..82f5996d3889 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -487,29 +487,6 @@ out_error:
 	return error;
 }
 
-static bool
-__xlog_state_release_iclog(
-	struct xlog		*log,
-	struct xlog_in_core	*iclog)
-{
-	lockdep_assert_held(&log->l_icloglock);
-
-	if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
-		/* update tail before writing to iclog */
-		xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp);
-
-		iclog->ic_state = XLOG_STATE_SYNCING;
-		iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
-		xlog_verify_tail_lsn(log, iclog, tail_lsn);
-		/* cycle incremented when incrementing curr_block */
-		trace_xlog_iclog_syncing(iclog, _RET_IP_);
-		return true;
-	}
-
-	ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
-	return false;
-}
-
 /*
  * Flush iclog to disk if this is the last reference to the given iclog and the
  * it is in the WANT_SYNC state.
@@ -519,19 +496,31 @@ xlog_state_release_iclog(
 	struct xlog		*log,
 	struct xlog_in_core	*iclog)
 {
+	xfs_lsn_t		tail_lsn;
 	lockdep_assert_held(&log->l_icloglock);
 
 	trace_xlog_iclog_release(iclog, _RET_IP_);
 	if (iclog->ic_state == XLOG_STATE_IOERROR)
 		return -EIO;
 
-	if (atomic_dec_and_test(&iclog->ic_refcnt) &&
-	    __xlog_state_release_iclog(log, iclog)) {
-		spin_unlock(&log->l_icloglock);
-		xlog_sync(log, iclog);
-		spin_lock(&log->l_icloglock);
+	if (!atomic_dec_and_test(&iclog->ic_refcnt))
+		return 0;
+
+	if (iclog->ic_state != XLOG_STATE_WANT_SYNC) {
+		ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
+		return 0;
 	}
 
+	/* update tail before writing to iclog */
+	tail_lsn = xlog_assign_tail_lsn(log->l_mp);
+	iclog->ic_state = XLOG_STATE_SYNCING;
+	iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
+	xlog_verify_tail_lsn(log, iclog, tail_lsn);
+	trace_xlog_iclog_syncing(iclog, _RET_IP_);
+
+	spin_unlock(&log->l_icloglock);
+	xlog_sync(log, iclog);
+	spin_lock(&log->l_icloglock);
 	return 0;
 }
 

From 0dc8f7f139f07aaca1afcec0ade5718c4ebba91e Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 27 Jul 2021 16:23:48 -0700
Subject: [PATCH 314/502] xfs: fix ordering violation between cache flushes and
 tail updates

There is a race between the new CIL async data device metadata IO
completion cache flush and the log tail in the iclog the flush
covers being updated. This can be seen by repeating generic/482 in a
loop and eventually log recovery fails with a failures such as this:

XFS (dm-3): Starting recovery (logdev: internal)
XFS (dm-3): bad inode magic/vsn daddr 228352 #0 (magic=0)
XFS (dm-3): Metadata corruption detected at xfs_inode_buf_verify+0x180/0x190, xfs_inode block 0x37c00 xfs_inode_buf_verify
XFS (dm-3): Unmount and run xfs_repair
XFS (dm-3): First 128 bytes of corrupted metadata buffer:
00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
00000030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
00000040: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
00000050: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
00000060: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
00000070: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
XFS (dm-3): metadata I/O error in "xlog_recover_items_pass2+0x55/0xc0" at daddr 0x37c00 len 32 error 117

Analysis of the logwrite replay shows that there were no writes to
the data device between the FUA @ write 124 and the FUA at write @
125, but log recovery @ 125 failed. The difference was the one log
write @ 125 moved the tail of the log forwards from (1,8) to (1,32)
and so the inode create intent in (1,8) was not replayed and so the
inode cluster was zero on disk when replay of the first inode item
in (1,32) was attempted.

What this meant was that the journal write that occurred at @ 125
did not ensure that metadata completed before the iclog was written
was correctly on stable storage. The tail of the log moved forward,
so IO must have been completed between the two iclog writes. This
means that there is a race condition between the unconditional async
cache flush in the CIL push work and the tail LSN that is written to
the iclog. This happens like so:

CIL push work				AIL push work
-------------				-------------
Add to committing list
start async data dev cache flush
.....
<flush completes>
<all writes to old tail lsn are stable>
xlog_write
  ....					push inode create buffer
					<start IO>
					.....
xlog_write(commit record)
  ....					<IO completes>
  					log tail moves
  					  xlog_assign_tail_lsn()
start_lsn == commit_lsn
  <no iclog preflush!>
xlog_state_release_iclog
  __xlog_state_release_iclog()
    <writes *new* tail_lsn into iclog>
  xlog_sync()
    ....
    submit_bio()
<tail in log moves forward without flushing written metadata>

Essentially, this can only occur if the commit iclog is issued
without a cache flush. If the iclog bio is submitted with
REQ_PREFLUSH, then it will guarantee that all the completed IO is
one stable storage before the iclog bio with the new tail LSN in it
is written to the log.

IOWs, the tail lsn that is written to the iclog needs to be sampled
*before* we issue the cache flush that guarantees all IO up to that
LSN has been completed.

To fix this without giving up the performance advantage of the
flush/FUA optimisations (e.g. g/482 runtime halves with 5.14-rc1
compared to 5.13), we need to ensure that we always issue a cache
flush if the tail LSN changes between the initial async flush and
the commit record being written. THis requires sampling the tail_lsn
before we start the flush, and then passing the sampled tail LSN to
xlog_state_release_iclog() so it can determine if the the tail LSN
has changed while writing the checkpoint. If the tail LSN has
changed, then it needs to set the NEED_FLUSH flag on the iclog and
we'll issue another cache flush before writing the iclog.

Fixes: eef983ffeae7 ("xfs: journal IO cache flush reductions")
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/xfs_log.c      | 36 ++++++++++++++++++++++++++----------
 fs/xfs/xfs_log_cil.c  | 13 +++++++++++--
 fs/xfs/xfs_log_priv.h |  3 ++-
 3 files changed, 39 insertions(+), 13 deletions(-)

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 82f5996d3889..e8c6c96d4f7c 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -489,12 +489,17 @@ out_error:
 
 /*
  * Flush iclog to disk if this is the last reference to the given iclog and the
- * it is in the WANT_SYNC state.
+ * it is in the WANT_SYNC state.  If the caller passes in a non-zero
+ * @old_tail_lsn and the current log tail does not match, there may be metadata
+ * on disk that must be persisted before this iclog is written.  To satisfy that
+ * requirement, set the XLOG_ICL_NEED_FLUSH flag as a condition for writing this
+ * iclog with the new log tail value.
  */
 int
 xlog_state_release_iclog(
 	struct xlog		*log,
-	struct xlog_in_core	*iclog)
+	struct xlog_in_core	*iclog,
+	xfs_lsn_t		old_tail_lsn)
 {
 	xfs_lsn_t		tail_lsn;
 	lockdep_assert_held(&log->l_icloglock);
@@ -503,6 +508,19 @@ xlog_state_release_iclog(
 	if (iclog->ic_state == XLOG_STATE_IOERROR)
 		return -EIO;
 
+	/*
+	 * Grabbing the current log tail needs to be atomic w.r.t. the writing
+	 * of the tail LSN into the iclog so we guarantee that the log tail does
+	 * not move between deciding if a cache flush is required and writing
+	 * the LSN into the iclog below.
+	 */
+	if (old_tail_lsn || iclog->ic_state == XLOG_STATE_WANT_SYNC) {
+		tail_lsn = xlog_assign_tail_lsn(log->l_mp);
+
+		if (old_tail_lsn && tail_lsn != old_tail_lsn)
+			iclog->ic_flags |= XLOG_ICL_NEED_FLUSH;
+	}
+
 	if (!atomic_dec_and_test(&iclog->ic_refcnt))
 		return 0;
 
@@ -511,8 +529,6 @@ xlog_state_release_iclog(
 		return 0;
 	}
 
-	/* update tail before writing to iclog */
-	tail_lsn = xlog_assign_tail_lsn(log->l_mp);
 	iclog->ic_state = XLOG_STATE_SYNCING;
 	iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
 	xlog_verify_tail_lsn(log, iclog, tail_lsn);
@@ -858,7 +874,7 @@ out_err:
 	 * iclog containing the unmount record is written.
 	 */
 	iclog->ic_flags |= (XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA);
-	error = xlog_state_release_iclog(log, iclog);
+	error = xlog_state_release_iclog(log, iclog, 0);
 	xlog_wait_on_iclog(iclog);
 
 	if (tic) {
@@ -2302,7 +2318,7 @@ xlog_write_copy_finish(
 	return 0;
 
 release_iclog:
-	error = xlog_state_release_iclog(log, iclog);
+	error = xlog_state_release_iclog(log, iclog, 0);
 	spin_unlock(&log->l_icloglock);
 	return error;
 }
@@ -2521,7 +2537,7 @@ next_lv:
 		ASSERT(optype & XLOG_COMMIT_TRANS);
 		*commit_iclog = iclog;
 	} else {
-		error = xlog_state_release_iclog(log, iclog);
+		error = xlog_state_release_iclog(log, iclog, 0);
 	}
 	spin_unlock(&log->l_icloglock);
 
@@ -2959,7 +2975,7 @@ restart:
 		 * reference to the iclog.
 		 */
 		if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1))
-			error = xlog_state_release_iclog(log, iclog);
+			error = xlog_state_release_iclog(log, iclog, 0);
 		spin_unlock(&log->l_icloglock);
 		if (error)
 			return error;
@@ -3195,7 +3211,7 @@ xfs_log_force(
 			atomic_inc(&iclog->ic_refcnt);
 			lsn = be64_to_cpu(iclog->ic_header.h_lsn);
 			xlog_state_switch_iclogs(log, iclog, 0);
-			if (xlog_state_release_iclog(log, iclog))
+			if (xlog_state_release_iclog(log, iclog, 0))
 				goto out_error;
 
 			if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn)
@@ -3275,7 +3291,7 @@ xlog_force_lsn(
 		}
 		atomic_inc(&iclog->ic_refcnt);
 		xlog_state_switch_iclogs(log, iclog, 0);
-		if (xlog_state_release_iclog(log, iclog))
+		if (xlog_state_release_iclog(log, iclog, 0))
 			goto out_error;
 		if (log_flushed)
 			*log_flushed = 1;
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index b128aaa9b870..4c44bc3786c0 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -654,8 +654,9 @@ xlog_cil_push_work(
 	struct xfs_trans_header thdr;
 	struct xfs_log_iovec	lhdr;
 	struct xfs_log_vec	lvhdr = { NULL };
+	xfs_lsn_t		preflush_tail_lsn;
 	xfs_lsn_t		commit_lsn;
-	xfs_lsn_t		push_seq;
+	xfs_csn_t		push_seq;
 	struct bio		bio;
 	DECLARE_COMPLETION_ONSTACK(bdev_flush);
 
@@ -730,7 +731,15 @@ xlog_cil_push_work(
 	 * because we hold the flush lock exclusively. Hence we can now issue
 	 * a cache flush to ensure all the completed metadata in the journal we
 	 * are about to overwrite is on stable storage.
+	 *
+	 * Because we are issuing this cache flush before we've written the
+	 * tail lsn to the iclog, we can have metadata IO completions move the
+	 * tail forwards between the completion of this flush and the iclog
+	 * being written. In this case, we need to re-issue the cache flush
+	 * before the iclog write. To detect whether the log tail moves, sample
+	 * the tail LSN *before* we issue the flush.
 	 */
+	preflush_tail_lsn = atomic64_read(&log->l_tail_lsn);
 	xfs_flush_bdev_async(&bio, log->l_mp->m_ddev_targp->bt_bdev,
 				&bdev_flush);
 
@@ -941,7 +950,7 @@ restart:
 	 * storage.
 	 */
 	commit_iclog->ic_flags |= XLOG_ICL_NEED_FUA;
-	xlog_state_release_iclog(log, commit_iclog);
+	xlog_state_release_iclog(log, commit_iclog, preflush_tail_lsn);
 	spin_unlock(&log->l_icloglock);
 	return;
 
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 4c41bbfa33b0..7cbde0b4f990 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -497,7 +497,8 @@ int	xlog_commit_record(struct xlog *log, struct xlog_ticket *ticket,
 void	xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket);
 void	xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket);
 
-int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog);
+int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog,
+		xfs_lsn_t log_tail_lsn);
 
 /*
  * When we crack an atomic LSN, we sample it first so that the value will not

From 45eddb414047c366744cc60dd6cef7c7e58c6ab9 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 27 Jul 2021 16:23:48 -0700
Subject: [PATCH 315/502] xfs: factor out forced iclog flushes

We force iclogs in several places - we need them all to have the
same cache flush semantics, so start by factoring out the iclog
force into a common helper.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/xfs_log.c | 42 ++++++++++++++++++------------------------
 1 file changed, 18 insertions(+), 24 deletions(-)

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index e8c6c96d4f7c..184c68ea62bb 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -778,6 +778,20 @@ xfs_log_mount_cancel(
 	xfs_log_unmount(mp);
 }
 
+/*
+ * Flush out the iclog to disk ensuring that device caches are flushed and
+ * the iclog hits stable storage before any completion waiters are woken.
+ */
+static inline int
+xlog_force_iclog(
+	struct xlog_in_core	*iclog)
+{
+	atomic_inc(&iclog->ic_refcnt);
+	if (iclog->ic_state == XLOG_STATE_ACTIVE)
+		xlog_state_switch_iclogs(iclog->ic_log, iclog, 0);
+	return xlog_state_release_iclog(iclog->ic_log, iclog, 0);
+}
+
 /*
  * Wait for the iclog and all prior iclogs to be written disk as required by the
  * log force state machine. Waiting on ic_force_wait ensures iclog completions
@@ -863,18 +877,8 @@ out_err:
 
 	spin_lock(&log->l_icloglock);
 	iclog = log->l_iclog;
-	atomic_inc(&iclog->ic_refcnt);
-	if (iclog->ic_state == XLOG_STATE_ACTIVE)
-		xlog_state_switch_iclogs(log, iclog, 0);
-	else
-		ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC ||
-		       iclog->ic_state == XLOG_STATE_IOERROR);
-	/*
-	 * Ensure the journal is fully flushed and on stable storage once the
-	 * iclog containing the unmount record is written.
-	 */
 	iclog->ic_flags |= (XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA);
-	error = xlog_state_release_iclog(log, iclog, 0);
+	error = xlog_force_iclog(iclog);
 	xlog_wait_on_iclog(iclog);
 
 	if (tic) {
@@ -3201,17 +3205,9 @@ xfs_log_force(
 		iclog = iclog->ic_prev;
 	} else if (iclog->ic_state == XLOG_STATE_ACTIVE) {
 		if (atomic_read(&iclog->ic_refcnt) == 0) {
-			/*
-			 * We are the only one with access to this iclog.
-			 *
-			 * Flush it out now.  There should be a roundoff of zero
-			 * to show that someone has already taken care of the
-			 * roundoff from the previous sync.
-			 */
-			atomic_inc(&iclog->ic_refcnt);
+			/* We have exclusive access to this iclog. */
 			lsn = be64_to_cpu(iclog->ic_header.h_lsn);
-			xlog_state_switch_iclogs(log, iclog, 0);
-			if (xlog_state_release_iclog(log, iclog, 0))
+			if (xlog_force_iclog(iclog))
 				goto out_error;
 
 			if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn)
@@ -3289,9 +3285,7 @@ xlog_force_lsn(
 					&log->l_icloglock);
 			return -EAGAIN;
 		}
-		atomic_inc(&iclog->ic_refcnt);
-		xlog_state_switch_iclogs(log, iclog, 0);
-		if (xlog_state_release_iclog(log, iclog, 0))
+		if (xlog_force_iclog(iclog))
 			goto out_error;
 		if (log_flushed)
 			*log_flushed = 1;

From 2bf1ec0ff067ff8f692d261b29c713f3583f7e2a Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 27 Jul 2021 16:23:49 -0700
Subject: [PATCH 316/502] xfs: log forces imply data device cache flushes

After fixing the tail_lsn vs cache flush race, generic/482 continued
to fail in a similar way where cache flushes were missing before
iclog FUA writes. Tracing of iclog state changes during the fsstress
workload portion of the test (via xlog_iclog* events) indicated that
iclog writes were coming from two sources - CIL pushes and log
forces (due to fsync/O_SYNC operations). All of the cases where a
recovery problem was triggered indicated that the log force was the
source of the iclog write that was not preceeded by a cache flush.

This was an oversight in the modifications made in commit
eef983ffeae7 ("xfs: journal IO cache flush reductions"). Log forces
for fsync imply a data device cache flush has been issued if an
iclog was flushed to disk and is indicated to the caller via the
log_flushed parameter so they can elide the device cache flush if
the journal issued one.

The change in eef983ffeae7 results in iclogs only issuing a cache
flush if XLOG_ICL_NEED_FLUSH is set on the iclog, but this was not
added to the iclogs that the log force code flushes to disk. Hence
log forces are no longer guaranteeing that a cache flush is issued,
hence opening up a potential on-disk ordering failure.

Log forces should also set XLOG_ICL_NEED_FUA as well to ensure that
the actual iclogs it forces to the journal are also on stable
storage before it returns to the caller.

This patch introduces the xlog_force_iclog() helper function to
encapsulate the process of taking a reference to an iclog, switching
its state if WANT_SYNC and flushing it to stable storage correctly.

Both xfs_log_force() and xfs_log_force_lsn() are converted to use
it, as is xlog_unmount_write() which has an elaborate method of
doing exactly the same "write this iclog to stable storage"
operation.

Further, if the log force code needs to wait on a iclog in the
WANT_SYNC state, it needs to ensure that iclog also results in a
cache flush being issued. This covers the case where the iclog
contains the commit record of the CIL flush that the log force
triggered, but it hasn't been written yet because there is still an
active reference to the iclog.

Note: this whole cache flush whack-a-mole patch is a result of log
forces still being iclog state centric rather than being CIL
sequence centric. Most of this nasty code will go away in future
when log forces are converted to wait on CIL sequence push
completion rather than iclog completion. With the CIL push algorithm
guaranteeing that the CIL checkpoint is fully on stable storage when
it completes, we no longer need to iterate iclogs and push them to
ensure a CIL sequence push has completed and so all this nasty iclog
iteration and flushing code will go away.

Fixes: eef983ffeae7 ("xfs: journal IO cache flush reductions")
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/xfs_log.c | 47 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 34 insertions(+), 13 deletions(-)

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 184c68ea62bb..160b8bb7ee60 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -787,6 +787,7 @@ xlog_force_iclog(
 	struct xlog_in_core	*iclog)
 {
 	atomic_inc(&iclog->ic_refcnt);
+	iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA;
 	if (iclog->ic_state == XLOG_STATE_ACTIVE)
 		xlog_state_switch_iclogs(iclog->ic_log, iclog, 0);
 	return xlog_state_release_iclog(iclog->ic_log, iclog, 0);
@@ -877,7 +878,6 @@ out_err:
 
 	spin_lock(&log->l_icloglock);
 	iclog = log->l_iclog;
-	iclog->ic_flags |= (XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA);
 	error = xlog_force_iclog(iclog);
 	xlog_wait_on_iclog(iclog);
 
@@ -3214,22 +3214,23 @@ xfs_log_force(
 				goto out_unlock;
 		} else {
 			/*
-			 * Someone else is writing to this iclog.
-			 *
-			 * Use its call to flush out the data.  However, the
-			 * other thread may not force out this LR, so we mark
-			 * it WANT_SYNC.
+			 * Someone else is still writing to this iclog, so we
+			 * need to ensure that when they release the iclog it
+			 * gets synced immediately as we may be waiting on it.
 			 */
 			xlog_state_switch_iclogs(log, iclog, 0);
 		}
-	} else {
-		/*
-		 * If the head iclog is not active nor dirty, we just attach
-		 * ourselves to the head and go to sleep if necessary.
-		 */
-		;
 	}
 
+	/*
+	 * The iclog we are about to wait on may contain the checkpoint pushed
+	 * by the above xlog_cil_force() call, but it may not have been pushed
+	 * to disk yet. Like the ACTIVE case above, we need to make sure caches
+	 * are flushed when this iclog is written.
+	 */
+	if (iclog->ic_state == XLOG_STATE_WANT_SYNC)
+		iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA;
+
 	if (flags & XFS_LOG_SYNC)
 		return xlog_wait_on_iclog(iclog);
 out_unlock:
@@ -3262,7 +3263,8 @@ xlog_force_lsn(
 			goto out_unlock;
 	}
 
-	if (iclog->ic_state == XLOG_STATE_ACTIVE) {
+	switch (iclog->ic_state) {
+	case XLOG_STATE_ACTIVE:
 		/*
 		 * We sleep here if we haven't already slept (e.g. this is the
 		 * first time we've looked at the correct iclog buf) and the
@@ -3289,6 +3291,25 @@ xlog_force_lsn(
 			goto out_error;
 		if (log_flushed)
 			*log_flushed = 1;
+		break;
+	case XLOG_STATE_WANT_SYNC:
+		/*
+		 * This iclog may contain the checkpoint pushed by the
+		 * xlog_cil_force_seq() call, but there are other writers still
+		 * accessing it so it hasn't been pushed to disk yet. Like the
+		 * ACTIVE case above, we need to make sure caches are flushed
+		 * when this iclog is written.
+		 */
+		iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA;
+		break;
+	default:
+		/*
+		 * The entire checkpoint was written by the CIL force and is on
+		 * its way to disk already. It will be stable when it
+		 * completes, so we don't need to manipulate caches here at all.
+		 * We just need to wait for completion if necessary.
+		 */
+		break;
 	}
 
 	if (flags & XFS_LOG_SYNC)

From 8191d8222c514c69a8e1ac46bd9812b9e0aab7d0 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 27 Jul 2021 16:23:49 -0700
Subject: [PATCH 317/502] xfs: avoid unnecessary waits in xfs_log_force_lsn()

Before waiting on a iclog in xfs_log_force_lsn(), we don't check to
see if the iclog has already been completed and the contents on
stable storage. We check for completed iclogs in xfs_log_force(), so
we should do the same thing for xfs_log_force_lsn().

This fixed some random up-to-30s pauses seen in unmounting
filesystems in some tests. A log force ends up waiting on completed
iclog, and that doesn't then get flushed (and hence the log force
get completed) until the background log worker issues a log force
that flushes the iclog in question. Then the unmount unblocks and
continues.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/xfs_log.c | 42 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 37 insertions(+), 5 deletions(-)

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 160b8bb7ee60..1c328efdca66 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -3143,6 +3143,35 @@ xlog_state_switch_iclogs(
 	log->l_iclog = iclog->ic_next;
 }
 
+/*
+ * Force the iclog to disk and check if the iclog has been completed before
+ * xlog_force_iclog() returns. This can happen on synchronous (e.g.
+ * pmem) or fast async storage because we drop the icloglock to issue the IO.
+ * If completion has already occurred, tell the caller so that it can avoid an
+ * unnecessary wait on the iclog.
+ */
+static int
+xlog_force_and_check_iclog(
+	struct xlog_in_core	*iclog,
+	bool			*completed)
+{
+	xfs_lsn_t		lsn = be64_to_cpu(iclog->ic_header.h_lsn);
+	int			error;
+
+	*completed = false;
+	error = xlog_force_iclog(iclog);
+	if (error)
+		return error;
+
+	/*
+	 * If the iclog has already been completed and reused the header LSN
+	 * will have been rewritten by completion
+	 */
+	if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn)
+		*completed = true;
+	return 0;
+}
+
 /*
  * Write out all data in the in-core log as of this exact moment in time.
  *
@@ -3177,7 +3206,6 @@ xfs_log_force(
 {
 	struct xlog		*log = mp->m_log;
 	struct xlog_in_core	*iclog;
-	xfs_lsn_t		lsn;
 
 	XFS_STATS_INC(mp, xs_log_force);
 	trace_xfs_log_force(mp, 0, _RET_IP_);
@@ -3206,11 +3234,12 @@ xfs_log_force(
 	} else if (iclog->ic_state == XLOG_STATE_ACTIVE) {
 		if (atomic_read(&iclog->ic_refcnt) == 0) {
 			/* We have exclusive access to this iclog. */
-			lsn = be64_to_cpu(iclog->ic_header.h_lsn);
-			if (xlog_force_iclog(iclog))
+			bool	completed;
+
+			if (xlog_force_and_check_iclog(iclog, &completed))
 				goto out_error;
 
-			if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn)
+			if (completed)
 				goto out_unlock;
 		} else {
 			/*
@@ -3250,6 +3279,7 @@ xlog_force_lsn(
 	bool			already_slept)
 {
 	struct xlog_in_core	*iclog;
+	bool			completed;
 
 	spin_lock(&log->l_icloglock);
 	iclog = log->l_iclog;
@@ -3287,10 +3317,12 @@ xlog_force_lsn(
 					&log->l_icloglock);
 			return -EAGAIN;
 		}
-		if (xlog_force_iclog(iclog))
+		if (xlog_force_and_check_iclog(iclog, &completed))
 			goto out_error;
 		if (log_flushed)
 			*log_flushed = 1;
+		if (completed)
+			goto out_unlock;
 		break;
 	case XLOG_STATE_WANT_SYNC:
 		/*

From 32baa63d82ee3f5ab3bd51bae6bf7d1c15aed8c7 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 27 Jul 2021 16:23:49 -0700
Subject: [PATCH 318/502] xfs: logging the on disk inode LSN can make it go
 backwards

When we log an inode, we format the "log inode" core and set an LSN
in that inode core. We do that via xfs_inode_item_format_core(),
which calls:

	xfs_inode_to_log_dinode(ip, dic, ip->i_itemp->ili_item.li_lsn);

to format the log inode. It writes the LSN from the inode item into
the log inode, and if recovery decides the inode item needs to be
replayed, it recovers the log inode LSN field and writes it into the
on disk inode LSN field.

Now this might seem like a reasonable thing to do, but it is wrong
on multiple levels. Firstly, if the item is not yet in the AIL,
item->li_lsn is zero. i.e. the first time the inode it is logged and
formatted, the LSN we write into the log inode will be zero. If we
only log it once, recovery will run and can write this zero LSN into
the inode.

This means that the next time the inode is logged and log recovery
runs, it will *always* replay changes to the inode regardless of
whether the inode is newer on disk than the version in the log and
that violates the entire purpose of recording the LSN in the inode
at writeback time (i.e. to stop it going backwards in time on disk
during recovery).

Secondly, if we commit the CIL to the journal so the inode item
moves to the AIL, and then relog the inode, the LSN that gets
stamped into the log inode will be the LSN of the inode's current
location in the AIL, not it's age on disk. And it's not the LSN that
will be associated with the current change. That means when log
recovery replays this inode item, the LSN that ends up on disk is
the LSN for the previous changes in the log, not the current
changes being replayed. IOWs, after recovery the LSN on disk is not
in sync with the LSN of the modifications that were replayed into
the inode. This, again, violates the recovery ordering semantics
that on-disk writeback LSNs provide.

Hence the inode LSN in the log dinode is -always- invalid.

Thirdly, recovery actually has the LSN of the log transaction it is
replaying right at hand - it uses it to determine if it should
replay the inode by comparing it to the on-disk inode's LSN. But it
doesn't use that LSN to stamp the LSN into the inode which will be
written back when the transaction is fully replayed. It uses the one
in the log dinode, which we know is always going to be incorrect.

Looking back at the change history, the inode logging was broken by
commit 93f958f9c41f ("xfs: cull unnecessary icdinode fields") way
back in 2016 by a stupid idiot who thought he knew how this code
worked. i.e. me. That commit replaced an in memory di_lsn field that
was updated only at inode writeback time from the inode item.li_lsn
value - and hence always contained the same LSN that appeared in the
on-disk inode - with a read of the inode item LSN at inode format
time. CLearly these are not the same thing.

Before 93f958f9c41f, the log recovery behaviour was irrelevant,
because the LSN in the log inode always matched the on-disk LSN at
the time the inode was logged, hence recovery of the transaction
would never make the on-disk LSN in the inode go backwards or get
out of sync.

A symptom of the problem is this, caught from a failure of
generic/482. Before log recovery, the inode has been allocated but
never used:

xfs_db> inode 393388
xfs_db> p
core.magic = 0x494e
core.mode = 0
....
v3.crc = 0x99126961 (correct)
v3.change_count = 0
v3.lsn = 0
v3.flags2 = 0
v3.cowextsize = 0
v3.crtime.sec = Thu Jan  1 10:00:00 1970
v3.crtime.nsec = 0

After log recovery:

xfs_db> p
core.magic = 0x494e
core.mode = 020444
....
v3.crc = 0x23e68f23 (correct)
v3.change_count = 2
v3.lsn = 0
v3.flags2 = 0
v3.cowextsize = 0
v3.crtime.sec = Thu Jul 22 17:03:03 2021
v3.crtime.nsec = 751000000
...

You can see that the LSN of the on-disk inode is 0, even though it
clearly has been written to disk. I point out this inode, because
the generic/482 failure occurred because several adjacent inodes in
this specific inode cluster were not replayed correctly and still
appeared to be zero on disk when all the other metadata (inobt,
finobt, directories, etc) indicated they should be allocated and
written back.

The fix for this is two-fold. The first is that we need to either
revert the LSN changes in 93f958f9c41f or stop logging the inode LSN
altogether. If we do the former, log recovery does not need to
change but we add 8 bytes of memory per inode to store what is
largely a write-only inode field. If we do the latter, log recovery
needs to stamp the on-disk inode in the same manner that inode
writeback does.

I prefer the latter, because we shouldn't really be trying to log
and replay changes to the on disk LSN as the on-disk value is the
canonical source of the on-disk version of the inode. It also
matches the way we recover buffer items - we create a buf_log_item
that carries the current recovery transaction LSN that gets stamped
into the buffer by the write verifier when it gets written back
when the transaction is fully recovered.

However, this might break log recovery on older kernels even more,
so I'm going to simply ignore the logged value in recovery and stamp
the on-disk inode with the LSN of the transaction being recovered
that will trigger writeback on transaction recovery completion. This
will ensure that the on-disk inode LSN always reflects the LSN of
the last change that was written to disk, regardless of whether it
comes from log recovery or runtime writeback.

Fixes: 93f958f9c41f ("xfs: cull unnecessary icdinode fields")
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/libxfs/xfs_log_format.h  | 11 +++++++++-
 fs/xfs/xfs_inode_item_recover.c | 39 ++++++++++++++++++++++++---------
 2 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index d548ea4b6aab..2c5bcbc19264 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -411,7 +411,16 @@ struct xfs_log_dinode {
 	/* start of the extended dinode, writable fields */
 	uint32_t	di_crc;		/* CRC of the inode */
 	uint64_t	di_changecount;	/* number of attribute changes */
-	xfs_lsn_t	di_lsn;		/* flush sequence */
+
+	/*
+	 * The LSN we write to this field during formatting is not a reflection
+	 * of the current on-disk LSN. It should never be used for recovery
+	 * sequencing, nor should it be recovered into the on-disk inode at all.
+	 * See xlog_recover_inode_commit_pass2() and xfs_log_dinode_to_disk()
+	 * for details.
+	 */
+	xfs_lsn_t	di_lsn;
+
 	uint64_t	di_flags2;	/* more random flags */
 	uint32_t	di_cowextsize;	/* basic cow extent size for file */
 	uint8_t		di_pad2[12];	/* more padding for future expansion */
diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
index 7b79518b6c20..e0072a6cd2d3 100644
--- a/fs/xfs/xfs_inode_item_recover.c
+++ b/fs/xfs/xfs_inode_item_recover.c
@@ -145,7 +145,8 @@ xfs_log_dinode_to_disk_ts(
 STATIC void
 xfs_log_dinode_to_disk(
 	struct xfs_log_dinode	*from,
-	struct xfs_dinode	*to)
+	struct xfs_dinode	*to,
+	xfs_lsn_t		lsn)
 {
 	to->di_magic = cpu_to_be16(from->di_magic);
 	to->di_mode = cpu_to_be16(from->di_mode);
@@ -182,7 +183,7 @@ xfs_log_dinode_to_disk(
 		to->di_flags2 = cpu_to_be64(from->di_flags2);
 		to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
 		to->di_ino = cpu_to_be64(from->di_ino);
-		to->di_lsn = cpu_to_be64(from->di_lsn);
+		to->di_lsn = cpu_to_be64(lsn);
 		memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
 		uuid_copy(&to->di_uuid, &from->di_uuid);
 		to->di_flushiter = 0;
@@ -261,16 +262,25 @@ xlog_recover_inode_commit_pass2(
 	}
 
 	/*
-	 * If the inode has an LSN in it, recover the inode only if it's less
-	 * than the lsn of the transaction we are replaying. Note: we still
-	 * need to replay an owner change even though the inode is more recent
-	 * than the transaction as there is no guarantee that all the btree
-	 * blocks are more recent than this transaction, too.
+	 * If the inode has an LSN in it, recover the inode only if the on-disk
+	 * inode's LSN is older than the lsn of the transaction we are
+	 * replaying. We can have multiple checkpoints with the same start LSN,
+	 * so the current LSN being equal to the on-disk LSN doesn't necessarily
+	 * mean that the on-disk inode is more recent than the change being
+	 * replayed.
+	 *
+	 * We must check the current_lsn against the on-disk inode
+	 * here because the we can't trust the log dinode to contain a valid LSN
+	 * (see comment below before replaying the log dinode for details).
+	 *
+	 * Note: we still need to replay an owner change even though the inode
+	 * is more recent than the transaction as there is no guarantee that all
+	 * the btree blocks are more recent than this transaction, too.
 	 */
 	if (dip->di_version >= 3) {
 		xfs_lsn_t	lsn = be64_to_cpu(dip->di_lsn);
 
-		if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
+		if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) > 0) {
 			trace_xfs_log_recover_inode_skip(log, in_f);
 			error = 0;
 			goto out_owner_change;
@@ -368,8 +378,17 @@ xlog_recover_inode_commit_pass2(
 		goto out_release;
 	}
 
-	/* recover the log dinode inode into the on disk inode */
-	xfs_log_dinode_to_disk(ldip, dip);
+	/*
+	 * Recover the log dinode inode into the on disk inode.
+	 *
+	 * The LSN in the log dinode is garbage - it can be zero or reflect
+	 * stale in-memory runtime state that isn't coherent with the changes
+	 * logged in this transaction or the changes written to the on-disk
+	 * inode.  Hence we write the current lSN into the inode because that
+	 * matches what xfs_iflush() would write inode the inode when flushing
+	 * the changes in this transaction.
+	 */
+	xfs_log_dinode_to_disk(ldip, dip, current_lsn);
 
 	fields = in_f->ilf_fields;
 	if (fields & XFS_ILOG_DEV)

From d8f4c2d0398fa1d92cacf854daf80d21a46bfefc Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 27 Jul 2021 16:23:50 -0700
Subject: [PATCH 319/502] xfs: Enforce attr3 buffer recovery order

From the department of "WTAF? How did we miss that!?"...

When we are recovering a buffer, the first thing we do is check the
buffer magic number and extract the LSN from the buffer. If the LSN
is older than the current LSN, we replay the modification to it. If
the metadata on disk is newer than the transaction in the log, we
skip it. This is a fundamental v5 filesystem metadata recovery
behaviour.

generic/482 failed with an attribute writeback failure during log
recovery. The write verifier caught the corruption before it got
written to disk, and the attr buffer dump looked like:

XFS (dm-3): Metadata corruption detected at xfs_attr3_leaf_verify+0x275/0x2e0, xfs_attr3_leaf block 0x19be8
XFS (dm-3): Unmount and run xfs_repair
XFS (dm-3): First 128 bytes of corrupted metadata buffer:
00000000: 00 00 00 00 00 00 00 00 3b ee 00 00 4d 2a 01 e1  ........;...M*..
00000010: 00 00 00 00 00 01 9b e8 00 00 00 01 00 00 05 38  ...............8
                                  ^^^^^^^^^^^^^^^^^^^^^^^
00000020: df 39 5e 51 58 ac 44 b6 8d c5 e7 10 44 09 bc 17  .9^QX.D.....D...
00000030: 00 00 00 00 00 02 00 83 00 03 00 cc 0f 24 01 00  .............$..
00000040: 00 68 0e bc 0f c8 00 10 00 00 00 00 00 00 00 00  .h..............
00000050: 00 00 3c 31 0f 24 01 00 00 00 3c 32 0f 88 01 00  ..<1.$....<2....
00000060: 00 00 3c 33 0f d8 01 00 00 00 00 00 00 00 00 00  ..<3............
00000070: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
.....

The highlighted bytes are the LSN that was replayed into the
buffer: 0x100000538. This is cycle 1, block 0x538. Prior to replay,
that block on disk looks like this:

$ sudo xfs_db -c "fsb 0x417d" -c "type attr3" -c p /dev/mapper/thin-vol
hdr.info.hdr.forw = 0
hdr.info.hdr.back = 0
hdr.info.hdr.magic = 0x3bee
hdr.info.crc = 0xb5af0bc6 (correct)
hdr.info.bno = 105448
hdr.info.lsn = 0x100000900
               ^^^^^^^^^^^
hdr.info.uuid = df395e51-58ac-44b6-8dc5-e7104409bc17
hdr.info.owner = 131203
hdr.count = 2
hdr.usedbytes = 120
hdr.firstused = 3796
hdr.holes = 1
hdr.freemap[0-2] = [base,size]

Note the LSN stamped into the buffer on disk: 1/0x900. The version
on disk is much newer than the log transaction that was being
replayed. That's a bug, and should -never- happen.

So I immediately went to look at xlog_recover_get_buf_lsn() to check
that we handled the LSN correctly. I was wondering if there was a
similar "two commits with the same start LSN skips the second
replay" problem with buffers. I didn't get that far, because I found
a much more basic, rudimentary bug: xlog_recover_get_buf_lsn()
doesn't recognise buffers with XFS_ATTR3_LEAF_MAGIC set in them!!!

IOWs, attr3 leaf buffers fall through the magic number checks
unrecognised, so trigger the "recover immediately" behaviour instead
of undergoing an LSN check. IOWs, we incorrectly replay ATTR3 leaf
buffers and that causes silent on disk corruption of inode attribute
forks and potentially other things....

Git history shows this is *another* zero day bug, this time
introduced in commit 50d5c8d8e938 ("xfs: check LSN ordering for v5
superblocks during recovery") which failed to handle the attr3 leaf
buffers in recovery. And we've failed to handle them ever since...

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/xfs_buf_item_recover.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
index d44e8b4a3391..05fd816edf59 100644
--- a/fs/xfs/xfs_buf_item_recover.c
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -796,6 +796,7 @@ xlog_recover_get_buf_lsn(
 	switch (magicda) {
 	case XFS_DIR3_LEAF1_MAGIC:
 	case XFS_DIR3_LEAFN_MAGIC:
+	case XFS_ATTR3_LEAF_MAGIC:
 	case XFS_DA3_NODE_MAGIC:
 		lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn);
 		uuid = &((struct xfs_da3_blkinfo *)blk)->uuid;

From b2ae3a9ef91152931b99620c431cf3805daa1429 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 27 Jul 2021 16:23:50 -0700
Subject: [PATCH 320/502] xfs: need to see iclog flags in tracing

Because I cannot tell if the NEED_FLUSH flag is being set correctly
by the log force and CIL push machinery without it.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/xfs_log_priv.h | 13 ++++++++++---
 fs/xfs/xfs_trace.h    |  5 ++++-
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 7cbde0b4f990..f3e79a45d60a 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -59,6 +59,16 @@ enum xlog_iclog_state {
 	{ XLOG_STATE_DIRTY,	"XLOG_STATE_DIRTY" }, \
 	{ XLOG_STATE_IOERROR,	"XLOG_STATE_IOERROR" }
 
+/*
+ * In core log flags
+ */
+#define XLOG_ICL_NEED_FLUSH	(1 << 0)	/* iclog needs REQ_PREFLUSH */
+#define XLOG_ICL_NEED_FUA	(1 << 1)	/* iclog needs REQ_FUA */
+
+#define XLOG_ICL_STRINGS \
+	{ XLOG_ICL_NEED_FLUSH,	"XLOG_ICL_NEED_FLUSH" }, \
+	{ XLOG_ICL_NEED_FUA,	"XLOG_ICL_NEED_FUA" }
+
 
 /*
  * Log ticket flags
@@ -143,9 +153,6 @@ enum xlog_iclog_state {
 
 #define XLOG_COVER_OPS		5
 
-#define XLOG_ICL_NEED_FLUSH	(1 << 0)	/* iclog needs REQ_PREFLUSH */
-#define XLOG_ICL_NEED_FUA	(1 << 1)	/* iclog needs REQ_FUA */
-
 /* Ticket reservation region accounting */ 
 #define XLOG_TIC_LEN_MAX	15
 
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index f9d8d605f9b1..19260291ff8b 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3944,6 +3944,7 @@ DECLARE_EVENT_CLASS(xlog_iclog_class,
 		__field(uint32_t, state)
 		__field(int32_t, refcount)
 		__field(uint32_t, offset)
+		__field(uint32_t, flags)
 		__field(unsigned long long, lsn)
 		__field(unsigned long, caller_ip)
 	),
@@ -3952,15 +3953,17 @@ DECLARE_EVENT_CLASS(xlog_iclog_class,
 		__entry->state = iclog->ic_state;
 		__entry->refcount = atomic_read(&iclog->ic_refcnt);
 		__entry->offset = iclog->ic_offset;
+		__entry->flags = iclog->ic_flags;
 		__entry->lsn = be64_to_cpu(iclog->ic_header.h_lsn);
 		__entry->caller_ip = caller_ip;
 	),
-	TP_printk("dev %d:%d state %s refcnt %d offset %u lsn 0x%llx caller %pS",
+	TP_printk("dev %d:%d state %s refcnt %d offset %u lsn 0x%llx flags %s caller %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __print_symbolic(__entry->state, XLOG_STATE_STRINGS),
 		  __entry->refcount,
 		  __entry->offset,
 		  __entry->lsn,
+		  __print_flags(__entry->flags, "|", XLOG_ICL_STRINGS),
 		  (char *)__entry->caller_ip)
 
 );

From 9d110014205cb1129fa570d8de83d486fa199354 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Wed, 28 Jul 2021 17:14:11 -0700
Subject: [PATCH 321/502] xfs: limit iclog tail updates

From the department of "generic/482 keeps on giving", we bring you
another tail update race condition:

iclog:
	S1			C1
	+-----------------------+-----------------------+
				 S2			EOIC

Two checkpoints in a single iclog. One is complete, the other just
contains the start record and overruns into a new iclog.

Timeline:

Before S1:	Cache flush, log tail = X
At S1:		Metadata stable, write start record and checkpoint
At C1:		Write commit record, set NEED_FUA
		Single iclog checkpoint, so no need for NEED_FLUSH
		Log tail still = X, so no need for NEED_FLUSH

After C1,
Before S2:	Cache flush, log tail = X
At S2:		Metadata stable, write start record and checkpoint
After S2:	Log tail moves to X+1
At EOIC:	End of iclog, more journal data to write
		Releases iclog
		Not a commit iclog, so no need for NEED_FLUSH
		Writes log tail X+1 into iclog.

At this point, the iclog has tail X+1 and NEED_FUA set. There has
been no cache flush for the metadata between X and X+1, and the
iclog writes the new tail permanently to the log. THis is sufficient
to violate on disk metadata/journal ordering.

We have two options here. The first is to detect this case in some
manner and ensure that the partial checkpoint write sets NEED_FLUSH
when the iclog is already marked NEED_FUA and the log tail changes.
This seems somewhat fragile and quite complex to get right, and it
doesn't actually make it obvious what underlying problem it is
actually addressing from reading the code.

The second option seems much cleaner to me, because it is derived
directly from the requirements of the C1 commit record in the iclog.
That is, when we write this commit record to the iclog, we've
guaranteed that the metadata/data ordering is correct for tail
update purposes. Hence if we only write the log tail into the iclog
for the *first* commit record rather than the log tail at the last
release, we guarantee that the log tail does not move past where the
the first commit record in the log expects it to be.

IOWs, taking the first option means that replay of C1 becomes
dependent on future operations doing the right thing, not just the
C1 checkpoint itself doing the right thing. This makes log recovery
almost impossible to reason about because now we have to take into
account what might or might not have happened in the future when
looking at checkpoints in the log rather than just having to
reconstruct the past...

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/xfs_log.c | 50 +++++++++++++++++++++++++++++++++++-------------
 1 file changed, 37 insertions(+), 13 deletions(-)

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 1c328efdca66..60ac5fd63f1e 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -78,13 +78,12 @@ xlog_verify_iclog(
 STATIC void
 xlog_verify_tail_lsn(
 	struct xlog		*log,
-	struct xlog_in_core	*iclog,
-	xfs_lsn_t		tail_lsn);
+	struct xlog_in_core	*iclog);
 #else
 #define xlog_verify_dest_ptr(a,b)
 #define xlog_verify_grant_tail(a)
 #define xlog_verify_iclog(a,b,c)
-#define xlog_verify_tail_lsn(a,b,c)
+#define xlog_verify_tail_lsn(a,b)
 #endif
 
 STATIC int
@@ -489,12 +488,31 @@ out_error:
 
 /*
  * Flush iclog to disk if this is the last reference to the given iclog and the
- * it is in the WANT_SYNC state.  If the caller passes in a non-zero
- * @old_tail_lsn and the current log tail does not match, there may be metadata
- * on disk that must be persisted before this iclog is written.  To satisfy that
- * requirement, set the XLOG_ICL_NEED_FLUSH flag as a condition for writing this
- * iclog with the new log tail value.
+ * it is in the WANT_SYNC state.
+ *
+ * If the caller passes in a non-zero @old_tail_lsn and the current log tail
+ * does not match, there may be metadata on disk that must be persisted before
+ * this iclog is written.  To satisfy that requirement, set the
+ * XLOG_ICL_NEED_FLUSH flag as a condition for writing this iclog with the new
+ * log tail value.
+ *
+ * If XLOG_ICL_NEED_FUA is already set on the iclog, we need to ensure that the
+ * log tail is updated correctly. NEED_FUA indicates that the iclog will be
+ * written to stable storage, and implies that a commit record is contained
+ * within the iclog. We need to ensure that the log tail does not move beyond
+ * the tail that the first commit record in the iclog ordered against, otherwise
+ * correct recovery of that checkpoint becomes dependent on future operations
+ * performed on this iclog.
+ *
+ * Hence if NEED_FUA is set and the current iclog tail lsn is empty, write the
+ * current tail into iclog. Once the iclog tail is set, future operations must
+ * not modify it, otherwise they potentially violate ordering constraints for
+ * the checkpoint commit that wrote the initial tail lsn value. The tail lsn in
+ * the iclog will get zeroed on activation of the iclog after sync, so we
+ * always capture the tail lsn on the iclog on the first NEED_FUA release
+ * regardless of the number of active reference counts on this iclog.
  */
+
 int
 xlog_state_release_iclog(
 	struct xlog		*log,
@@ -519,6 +537,10 @@ xlog_state_release_iclog(
 
 		if (old_tail_lsn && tail_lsn != old_tail_lsn)
 			iclog->ic_flags |= XLOG_ICL_NEED_FLUSH;
+
+		if ((iclog->ic_flags & XLOG_ICL_NEED_FUA) &&
+		    !iclog->ic_header.h_tail_lsn)
+			iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
 	}
 
 	if (!atomic_dec_and_test(&iclog->ic_refcnt))
@@ -530,8 +552,9 @@ xlog_state_release_iclog(
 	}
 
 	iclog->ic_state = XLOG_STATE_SYNCING;
-	iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
-	xlog_verify_tail_lsn(log, iclog, tail_lsn);
+	if (!iclog->ic_header.h_tail_lsn)
+		iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
+	xlog_verify_tail_lsn(log, iclog);
 	trace_xlog_iclog_syncing(iclog, _RET_IP_);
 
 	spin_unlock(&log->l_icloglock);
@@ -2579,6 +2602,7 @@ xlog_state_activate_iclog(
 	memset(iclog->ic_header.h_cycle_data, 0,
 		sizeof(iclog->ic_header.h_cycle_data));
 	iclog->ic_header.h_lsn = 0;
+	iclog->ic_header.h_tail_lsn = 0;
 }
 
 /*
@@ -3614,10 +3638,10 @@ xlog_verify_grant_tail(
 STATIC void
 xlog_verify_tail_lsn(
 	struct xlog		*log,
-	struct xlog_in_core	*iclog,
-	xfs_lsn_t		tail_lsn)
+	struct xlog_in_core	*iclog)
 {
-    int blocks;
+	xfs_lsn_t	tail_lsn = be64_to_cpu(iclog->ic_header.h_tail_lsn);
+	int		blocks;
 
     if (CYCLE_LSN(tail_lsn) == log->l_prev_cycle) {
 	blocks =

From 81a448d7b0668ae39c08e6f34a54cc7eafb844f1 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Mon, 26 Jul 2021 16:43:17 -0700
Subject: [PATCH 322/502] xfs: prevent spoofing of rtbitmap blocks when
 recovering buffers

While reviewing the buffer item recovery code, the thought occurred to
me: in V5 filesystems we use log sequence number (LSN) tracking to avoid
replaying older metadata updates against newer log items.  However, we
use the magic number of the ondisk buffer to find the LSN of the ondisk
metadata, which means that if an attacker can control the layout of the
realtime device precisely enough that the start of an rt bitmap block
matches the magic and UUID of some other kind of block, they can control
the purported LSN of that spoofed block and thereby break log replay.

Since realtime bitmap and summary blocks don't have headers at all, we
have no way to tell if a block really should be replayed.  The best we
can do is replay unconditionally and hope for the best.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
---
 fs/xfs/xfs_buf_item_recover.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
index 05fd816edf59..4775485b4062 100644
--- a/fs/xfs/xfs_buf_item_recover.c
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -698,7 +698,8 @@ xlog_recover_do_inode_buffer(
 static xfs_lsn_t
 xlog_recover_get_buf_lsn(
 	struct xfs_mount	*mp,
-	struct xfs_buf		*bp)
+	struct xfs_buf		*bp,
+	struct xfs_buf_log_format *buf_f)
 {
 	uint32_t		magic32;
 	uint16_t		magic16;
@@ -706,11 +707,20 @@ xlog_recover_get_buf_lsn(
 	void			*blk = bp->b_addr;
 	uuid_t			*uuid;
 	xfs_lsn_t		lsn = -1;
+	uint16_t		blft;
 
 	/* v4 filesystems always recover immediately */
 	if (!xfs_sb_version_hascrc(&mp->m_sb))
 		goto recover_immediately;
 
+	/*
+	 * realtime bitmap and summary file blocks do not have magic numbers or
+	 * UUIDs, so we must recover them immediately.
+	 */
+	blft = xfs_blft_from_flags(buf_f);
+	if (blft == XFS_BLFT_RTBITMAP_BUF || blft == XFS_BLFT_RTSUMMARY_BUF)
+		goto recover_immediately;
+
 	magic32 = be32_to_cpu(*(__be32 *)blk);
 	switch (magic32) {
 	case XFS_ABTB_CRC_MAGIC:
@@ -920,7 +930,7 @@ xlog_recover_buf_commit_pass2(
 	 * the verifier will be reset to match whatever recover turns that
 	 * buffer into.
 	 */
-	lsn = xlog_recover_get_buf_lsn(mp, bp);
+	lsn = xlog_recover_get_buf_lsn(mp, bp, buf_f);
 	if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
 		trace_xfs_log_recover_buf_skip(log, buf_f);
 		xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN);

From 696e572dc85c674b31f4f13f59d8e217ee1b057f Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Mon, 26 Jul 2021 15:06:22 -0500
Subject: [PATCH 323/502] ARM: riscpc: Fix fall-through warning for Clang

Fix the following fallthrough warning:

arch/arm/mach-rpc/riscpc.c:52:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]
           default:
           ^
arch/arm/mach-rpc/riscpc.c:52:2: note: insert 'break;' to avoid fall-through
           default:
           ^
           break;

Reported-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/lkml/202107260355.bF00i5bi-lkp@intel.com/
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 arch/arm/mach-rpc/riscpc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/mach-rpc/riscpc.c b/arch/arm/mach-rpc/riscpc.c
index d23970bd638d..f70fb9c4b0cb 100644
--- a/arch/arm/mach-rpc/riscpc.c
+++ b/arch/arm/mach-rpc/riscpc.c
@@ -49,6 +49,7 @@ static int __init parse_tag_acorn(const struct tag *tag)
 		fallthrough;	/* ??? */
 	case 256:
 		vram_size += PAGE_SIZE * 256;
+		break;
 	default:
 		break;
 	}

From eb4f520ca691f109f5fb1d16fc9cc26447a941e1 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Mon, 26 Jul 2021 15:33:53 -0500
Subject: [PATCH 324/502] scsi: acornscsi: Fix fall-through warning for clang

Fix the following fallthrough warning (on ARM):

drivers/scsi/arm/acornscsi.c:2651:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]
           case res_success:
           ^
   drivers/scsi/arm/acornscsi.c:2651:2: note: insert '__attribute__((fallthrough));' to silence this warning
           case res_success:
           ^
           __attribute__((fallthrough));
   drivers/scsi/arm/acornscsi.c:2651:2: note: insert 'break;' to avoid fall-through
           case res_success:
           ^
           break;
Reported-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/lkml/202107260355.bF00i5bi-lkp@intel.com/
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 drivers/scsi/arm/acornscsi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/arm/acornscsi.c b/drivers/scsi/arm/acornscsi.c
index 84fc7a0c6ff4..4a84599ff491 100644
--- a/drivers/scsi/arm/acornscsi.c
+++ b/drivers/scsi/arm/acornscsi.c
@@ -2642,6 +2642,7 @@ int acornscsi_abort(struct scsi_cmnd *SCpnt)
 //#endif
 		clear_bit(SCpnt->device->id * 8 +
 			  (u8)(SCpnt->device->lun & 0x7), host->busyluns);
+		fallthrough;
 
 	/*
 	 * We found the command, and cleared it out.  Either

From cb163627e6d32dbaca4d89b2292788cee895b06d Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Mon, 26 Jul 2021 15:46:47 -0500
Subject: [PATCH 325/502] scsi: fas216: Fix fall-through warning for Clang

Fix the following fallthrough warning (on ARM):

drivers/scsi/arm/fas216.c:1379:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]
           default:
           ^
   drivers/scsi/arm/fas216.c:1379:2: note: insert 'break;' to avoid fall-through
           default:
           ^
           break;

Reported-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/lkml/202107260355.bF00i5bi-lkp@intel.com/
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 drivers/scsi/arm/fas216.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/arm/fas216.c b/drivers/scsi/arm/fas216.c
index 6baa9b36367d..9c4458a99025 100644
--- a/drivers/scsi/arm/fas216.c
+++ b/drivers/scsi/arm/fas216.c
@@ -1375,6 +1375,7 @@ static void fas216_busservice_intr(FAS216_Info *info, unsigned int stat, unsigne
 		case IS_COMPLETE:
 			break;
 		}
+		break;
 
 	default:
 		break;

From 0aab5dce395636eddf4e5f33eba88390328a95b4 Mon Sep 17 00:00:00 2001
From: Edmund Dea <edmund.j.dea@intel.com>
Date: Tue, 25 Aug 2020 14:51:17 -0700
Subject: [PATCH 326/502] drm/kmb: Enable LCD DMA for low TVDDCV

There's an undocumented dependency between LCD layer enable bits [2-5]
and the AXI pipelined read enable bit [28] in the LCD_CONTROL register.
The proper order of operation is:

1) Clear AXI pipelined read enable bit
2) Set LCD layers
3) Set AXI pipelined read enable bit

With this update, LCD can start DMA when TVDDCV is reduced down to 700mV.

Fixes: 7f7b96a8a0a1 ("drm/kmb: Add support for KeemBay Display")
Signed-off-by: Edmund Dea <edmund.j.dea@intel.com>
Signed-off-by: Anitha Chrisanthus <anitha.chrisanthus@intel.com>
Acked-by: Sam Ravnborg <sam@ravnborg.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20210728003126.1425028-1-anitha.chrisanthus@intel.com
---
 drivers/gpu/drm/kmb/kmb_drv.c   | 14 ++++++++++++++
 drivers/gpu/drm/kmb/kmb_plane.c | 15 +++++++++++++--
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/kmb/kmb_drv.c b/drivers/gpu/drm/kmb/kmb_drv.c
index 96ea1a2c11dd..c0b1c6f99249 100644
--- a/drivers/gpu/drm/kmb/kmb_drv.c
+++ b/drivers/gpu/drm/kmb/kmb_drv.c
@@ -203,6 +203,7 @@ static irqreturn_t handle_lcd_irq(struct drm_device *dev)
 	unsigned long status, val, val1;
 	int plane_id, dma0_state, dma1_state;
 	struct kmb_drm_private *kmb = to_kmb(dev);
+	u32 ctrl = 0;
 
 	status = kmb_read_lcd(kmb, LCD_INT_STATUS);
 
@@ -227,6 +228,19 @@ static irqreturn_t handle_lcd_irq(struct drm_device *dev)
 				kmb_clr_bitmask_lcd(kmb, LCD_CONTROL,
 						    kmb->plane_status[plane_id].ctrl);
 
+				ctrl = kmb_read_lcd(kmb, LCD_CONTROL);
+				if (!(ctrl & (LCD_CTRL_VL1_ENABLE |
+				    LCD_CTRL_VL2_ENABLE |
+				    LCD_CTRL_GL1_ENABLE |
+				    LCD_CTRL_GL2_ENABLE))) {
+					/* If no LCD layers are using DMA,
+					 * then disable DMA pipelined AXI read
+					 * transactions.
+					 */
+					kmb_clr_bitmask_lcd(kmb, LCD_CONTROL,
+							    LCD_CTRL_PIPELINE_DMA);
+				}
+
 				kmb->plane_status[plane_id].disable = false;
 			}
 		}
diff --git a/drivers/gpu/drm/kmb/kmb_plane.c b/drivers/gpu/drm/kmb/kmb_plane.c
index d5b6195856d1..ecee6782612d 100644
--- a/drivers/gpu/drm/kmb/kmb_plane.c
+++ b/drivers/gpu/drm/kmb/kmb_plane.c
@@ -427,8 +427,14 @@ static void kmb_plane_atomic_update(struct drm_plane *plane,
 
 	kmb_set_bitmask_lcd(kmb, LCD_CONTROL, ctrl);
 
-	/* FIXME no doc on how to set output format,these values are
-	 * taken from the Myriadx tests
+	/* Enable pipeline AXI read transactions for the DMA
+	 * after setting graphics layers. This must be done
+	 * in a separate write cycle.
+	 */
+	kmb_set_bitmask_lcd(kmb, LCD_CONTROL, LCD_CTRL_PIPELINE_DMA);
+
+	/* FIXME no doc on how to set output format, these values are taken
+	 * from the Myriadx tests
 	 */
 	out_format |= LCD_OUTF_FORMAT_RGB888;
 
@@ -526,6 +532,11 @@ struct kmb_plane *kmb_plane_init(struct drm_device *drm)
 		plane->id = i;
 	}
 
+	/* Disable pipeline AXI read transactions for the DMA
+	 * prior to setting graphics layers
+	 */
+	kmb_clr_bitmask_lcd(kmb, LCD_CONTROL, LCD_CTRL_PIPELINE_DMA);
+
 	return primary;
 cleanup:
 	drmm_kfree(drm, plane);

From eb92830cdbc232a0e8166c48061ca276132646a7 Mon Sep 17 00:00:00 2001
From: Edmund Dea <edmund.j.dea@intel.com>
Date: Wed, 26 Aug 2020 13:17:29 -0700
Subject: [PATCH 327/502] drm/kmb: Define driver date and major/minor version

Added macros for date and version

Fixes: 7f7b96a8a0a1 ("drm/kmb: Add support for KeemBay Display")
Signed-off-by: Edmund Dea <edmund.j.dea@intel.com>
Signed-off-by: Anitha Chrisanthus <anitha.chrisanthus@intel.com>
Acked-by: Sam Ravnborg <sam@ravnborg.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20210728003126.1425028-2-anitha.chrisanthus@intel.com
---
 drivers/gpu/drm/kmb/kmb_drv.c | 8 ++++----
 drivers/gpu/drm/kmb/kmb_drv.h | 5 +++++
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/kmb/kmb_drv.c b/drivers/gpu/drm/kmb/kmb_drv.c
index c0b1c6f99249..f54392ec4fab 100644
--- a/drivers/gpu/drm/kmb/kmb_drv.c
+++ b/drivers/gpu/drm/kmb/kmb_drv.c
@@ -425,10 +425,10 @@ static const struct drm_driver kmb_driver = {
 	.fops = &fops,
 	DRM_GEM_CMA_DRIVER_OPS_VMAP,
 	.name = "kmb-drm",
-	.desc = "KEEMBAY DISPLAY DRIVER ",
-	.date = "20201008",
-	.major = 1,
-	.minor = 0,
+	.desc = "KEEMBAY DISPLAY DRIVER",
+	.date = DRIVER_DATE,
+	.major = DRIVER_MAJOR,
+	.minor = DRIVER_MINOR,
 };
 
 static int kmb_remove(struct platform_device *pdev)
diff --git a/drivers/gpu/drm/kmb/kmb_drv.h b/drivers/gpu/drm/kmb/kmb_drv.h
index 02e806712a64..ebbaa5f422d5 100644
--- a/drivers/gpu/drm/kmb/kmb_drv.h
+++ b/drivers/gpu/drm/kmb/kmb_drv.h
@@ -15,6 +15,11 @@
 #define KMB_MAX_HEIGHT			1080 /*Max height in pixels */
 #define KMB_MIN_WIDTH                   1920 /*Max width in pixels */
 #define KMB_MIN_HEIGHT                  1080 /*Max height in pixels */
+
+#define DRIVER_DATE			"20210223"
+#define DRIVER_MAJOR			1
+#define DRIVER_MINOR			1
+
 #define KMB_LCD_DEFAULT_CLK		200000000
 #define KMB_SYS_CLK_MHZ			500
 

From bc546c0c9abb3bb2fb46866b3d1e6ade9695a5f6 Mon Sep 17 00:00:00 2001
From: Ye Bin <yebin10@huawei.com>
Date: Wed, 13 Jan 2021 14:31:03 +0800
Subject: [PATCH 328/502] scsi: scsi_dh_rdac: Avoid crash during
 rdac_bus_attach()

The following BUG_ON() was observed during RDAC scan:

[595952.944297] kernel BUG at drivers/scsi/device_handler/scsi_dh_rdac.c:427!
[595952.951143] Internal error: Oops - BUG: 0 [#1] SMP
......
[595953.251065] Call trace:
[595953.259054]  check_ownership+0xb0/0x118
[595953.269794]  rdac_bus_attach+0x1f0/0x4b0
[595953.273787]  scsi_dh_handler_attach+0x3c/0xe8
[595953.278211]  scsi_dh_add_device+0xc4/0xe8
[595953.282291]  scsi_sysfs_add_sdev+0x8c/0x2a8
[595953.286544]  scsi_probe_and_add_lun+0x9fc/0xd00
[595953.291142]  __scsi_scan_target+0x598/0x630
[595953.295395]  scsi_scan_target+0x120/0x130
[595953.299481]  fc_user_scan+0x1a0/0x1c0 [scsi_transport_fc]
[595953.304944]  store_scan+0xb0/0x108
[595953.308420]  dev_attr_store+0x44/0x60
[595953.312160]  sysfs_kf_write+0x58/0x80
[595953.315893]  kernfs_fop_write+0xe8/0x1f0
[595953.319888]  __vfs_write+0x60/0x190
[595953.323448]  vfs_write+0xac/0x1c0
[595953.326836]  ksys_write+0x74/0xf0
[595953.330221]  __arm64_sys_write+0x24/0x30

Code is in check_ownership:

	list_for_each_entry_rcu(tmp, &h->ctlr->dh_list, node) {
		/* h->sdev should always be valid */
		BUG_ON(!tmp->sdev);
		tmp->sdev->access_state = access_state;
	}

	rdac_bus_attach
		initialize_controller
			list_add_rcu(&h->node, &h->ctlr->dh_list);
			h->sdev = sdev;

	rdac_bus_detach
		list_del_rcu(&h->node);
		h->sdev = NULL;

Fix the race between rdac_bus_attach() and rdac_bus_detach() where h->sdev
is NULL when processing the RDAC attach.

Link: https://lore.kernel.org/r/20210113063103.2698953-1-yebin10@huawei.com
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Ye Bin <yebin10@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/device_handler/scsi_dh_rdac.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c
index 25f6e1ac9e7b..66652ab409cc 100644
--- a/drivers/scsi/device_handler/scsi_dh_rdac.c
+++ b/drivers/scsi/device_handler/scsi_dh_rdac.c
@@ -453,8 +453,8 @@ static int initialize_controller(struct scsi_device *sdev,
 		if (!h->ctlr)
 			err = SCSI_DH_RES_TEMP_UNAVAIL;
 		else {
-			list_add_rcu(&h->node, &h->ctlr->dh_list);
 			h->sdev = sdev;
+			list_add_rcu(&h->node, &h->ctlr->dh_list);
 		}
 		spin_unlock(&list_lock);
 		err = SCSI_DH_OK;
@@ -778,11 +778,11 @@ static void rdac_bus_detach( struct scsi_device *sdev )
 	spin_lock(&list_lock);
 	if (h->ctlr) {
 		list_del_rcu(&h->node);
-		h->sdev = NULL;
 		kref_put(&h->ctlr->kref, release_controller);
 	}
 	spin_unlock(&list_lock);
 	sdev->handler_data = NULL;
+	synchronize_rcu();
 	kfree(h);
 }
 

From 70edd2e6f652f67d854981fd67f9ad0f1deaea92 Mon Sep 17 00:00:00 2001
From: Sreekanth Reddy <sreekanth.reddy@broadcom.com>
Date: Mon, 26 Jul 2021 17:24:02 +0530
Subject: [PATCH 329/502] scsi: core: Avoid printing an error if target_alloc()
 returns -ENXIO

Avoid printing a 'target allocation failed' error if the driver
target_alloc() callback function returns -ENXIO. This return value
indicates that the corresponding H:C:T:L entry is empty.

Removing this error reduces the scan time if the user issues SCAN_WILD_CARD
scan operation through sysfs parameter on a host with a lot of empty
H:C:T:L entries.

Avoiding the printk on -ENXIO matches the behavior of the other callback
functions during scanning.

Link: https://lore.kernel.org/r/20210726115402.1936-1-sreekanth.reddy@broadcom.com
Signed-off-by: Sreekanth Reddy <sreekanth.reddy@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_scan.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index b059bf2b61d4..5b6996a2401b 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -475,7 +475,8 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
 		error = shost->hostt->target_alloc(starget);
 
 		if(error) {
-			dev_printk(KERN_ERR, dev, "target allocation failed, error %d\n", error);
+			if (error != -ENXIO)
+				dev_err(dev, "target allocation failed, error %d\n", error);
 			/* don't want scsi_target_reap to do the final
 			 * put because it will be under the host lock */
 			scsi_target_destroy(starget);

From a264cf5e81c78e2b9918b8b9ef2ace9dde1850df Mon Sep 17 00:00:00 2001
From: Tyrel Datwyler <tyreld@linux.ibm.com>
Date: Fri, 16 Jul 2021 14:52:20 -0600
Subject: [PATCH 330/502] scsi: ibmvfc: Fix command state accounting and stale
 response detection

Prior to commit 1f4a4a19508d ("scsi: ibmvfc: Complete commands outside the
host/queue lock") responses to commands were completed sequentially with
the host lock held such that a command had a basic binary state of active
or free. It was therefore a simple affair of ensuring the assocaiated
ibmvfc_event to a VIOS response was valid by testing that it was not
already free. The lock relexation work to complete commands outside the
lock inadverdently made it a trinary command state such that a command is
either in flight, received and being completed, or completed and now
free. This breaks the stale command detection logic as a command may be
still marked active and been placed on the delayed completion list when a
second stale response for the same command arrives. This can lead to double
completions and list corruption. This issue was exposed by a recent VIOS
regression were a missing memory barrier could occasionally result in the
ibmvfc client receiving a duplicate response for the same command.

Fix the issue by introducing the atomic ibmvfc_event.active to track the
trinary state of a command. The state is explicitly set to 1 when a command
is successfully sent. The CRQ response handlers use
atomic_dec_if_positive() to test for stale responses and correctly
transition to the completion state when a active command is received.
Finally, atomic_dec_and_test() is used to sanity check transistions when
commands are freed as a result of a completion, or moved to the purge list
as a result of error handling or adapter reset.

Link: https://lore.kernel.org/r/20210716205220.1101150-1-tyreld@linux.ibm.com
Fixes: 1f4a4a19508d ("scsi: ibmvfc: Complete commands outside the host/queue lock")
Cc: stable@vger.kernel.org
Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ibmvscsi/ibmvfc.c | 19 +++++++++++++++++--
 drivers/scsi/ibmvscsi/ibmvfc.h |  1 +
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index bee1bec49c09..935b01ee44b7 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -807,6 +807,13 @@ static int ibmvfc_init_event_pool(struct ibmvfc_host *vhost,
 	for (i = 0; i < size; ++i) {
 		struct ibmvfc_event *evt = &pool->events[i];
 
+		/*
+		 * evt->active states
+		 *  1 = in flight
+		 *  0 = being completed
+		 * -1 = free/freed
+		 */
+		atomic_set(&evt->active, -1);
 		atomic_set(&evt->free, 1);
 		evt->crq.valid = 0x80;
 		evt->crq.ioba = cpu_to_be64(pool->iu_token + (sizeof(*evt->xfer_iu) * i));
@@ -1017,6 +1024,7 @@ static void ibmvfc_free_event(struct ibmvfc_event *evt)
 
 	BUG_ON(!ibmvfc_valid_event(pool, evt));
 	BUG_ON(atomic_inc_return(&evt->free) != 1);
+	BUG_ON(atomic_dec_and_test(&evt->active));
 
 	spin_lock_irqsave(&evt->queue->l_lock, flags);
 	list_add_tail(&evt->queue_list, &evt->queue->free);
@@ -1072,6 +1080,12 @@ static void ibmvfc_complete_purge(struct list_head *purge_list)
  **/
 static void ibmvfc_fail_request(struct ibmvfc_event *evt, int error_code)
 {
+	/*
+	 * Anything we are failing should still be active. Otherwise, it
+	 * implies we already got a response for the command and are doing
+	 * something bad like double completing it.
+	 */
+	BUG_ON(!atomic_dec_and_test(&evt->active));
 	if (evt->cmnd) {
 		evt->cmnd->result = (error_code << 16);
 		evt->done = ibmvfc_scsi_eh_done;
@@ -1723,6 +1737,7 @@ static int ibmvfc_send_event(struct ibmvfc_event *evt,
 
 		evt->done(evt);
 	} else {
+		atomic_set(&evt->active, 1);
 		spin_unlock_irqrestore(&evt->queue->l_lock, flags);
 		ibmvfc_trc_start(evt);
 	}
@@ -3251,7 +3266,7 @@ static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost,
 		return;
 	}
 
-	if (unlikely(atomic_read(&evt->free))) {
+	if (unlikely(atomic_dec_if_positive(&evt->active))) {
 		dev_err(vhost->dev, "Received duplicate correlation_token 0x%08llx!\n",
 			crq->ioba);
 		return;
@@ -3778,7 +3793,7 @@ static void ibmvfc_handle_scrq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost
 		return;
 	}
 
-	if (unlikely(atomic_read(&evt->free))) {
+	if (unlikely(atomic_dec_if_positive(&evt->active))) {
 		dev_err(vhost->dev, "Received duplicate correlation_token 0x%08llx!\n",
 			crq->ioba);
 		return;
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h
index 4f0f3baefae4..92fb889d7eb0 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.h
+++ b/drivers/scsi/ibmvscsi/ibmvfc.h
@@ -745,6 +745,7 @@ struct ibmvfc_event {
 	struct ibmvfc_target *tgt;
 	struct scsi_cmnd *cmnd;
 	atomic_t free;
+	atomic_t active;
 	union ibmvfc_iu *xfer_iu;
 	void (*done)(struct ibmvfc_event *evt);
 	void (*_done)(struct ibmvfc_event *evt);

From 5c04243a56a7977185b00400e59ca7e108004faf Mon Sep 17 00:00:00 2001
From: Li Manyi <limanyi@uniontech.com>
Date: Mon, 26 Jul 2021 19:49:13 +0800
Subject: [PATCH 331/502] scsi: sr: Return correct event when media event code
 is 3

Media event code 3 is defined in the MMC-6 spec as follows:

  "MediaRemoval: The media has been removed from the specified slot, and
   the Drive is unable to access the media without user intervention. This
   applies to media changers only."

This indicated that treating the condition as an EJECT_REQUEST was
appropriate. However, doing so had the unfortunate side-effect of causing
the drive tray to be physically ejected on resume. Instead treat the event
as a MEDIA_CHANGE request.

Fixes: 7dd753ca59d6 ("scsi: sr: Return appropriate error code when disk is ejected")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=213759
Link: https://lore.kernel.org/r/20210726114913.6760-1-limanyi@uniontech.com
Signed-off-by: Li Manyi <limanyi@uniontech.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/sr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 94c254e9012e..a6d3ac0a6cbc 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -221,7 +221,7 @@ static unsigned int sr_get_events(struct scsi_device *sdev)
 	else if (med->media_event_code == 2)
 		return DISK_EVENT_MEDIA_CHANGE;
 	else if (med->media_event_code == 3)
-		return DISK_EVENT_EJECT_REQUEST;
+		return DISK_EVENT_MEDIA_CHANGE;
 	return 0;
 }
 

From f0f82e2476f6adb9c7a0135cfab8091456990c99 Mon Sep 17 00:00:00 2001
From: lijinlin <lijinlin3@huawei.com>
Date: Tue, 27 Jul 2021 11:44:55 +0800
Subject: [PATCH 332/502] scsi: core: Fix capacity set to zero after
 offlinining device

After adding physical volumes to a volume group through vgextend, the
kernel will rescan the partitions. This in turn will cause the device
capacity to be queried.

If the device status is set to offline through sysfs at this time, READ
CAPACITY command will return a result which the host byte is
DID_NO_CONNECT, and the capacity of the device will be set to zero in
read_capacity_error(). After setting device status back to running, the
capacity of the device will remain stuck at zero.

Fix this issue by rescanning device when the device state changes to
SDEV_RUNNING.

Link: https://lore.kernel.org/r/20210727034455.1494960-1-lijinlin3@huawei.com
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: lijinlin <lijinlin3@huawei.com>
Signed-off-by: Wu Bo <wubo40@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_sysfs.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 32489d25158f..ae9bfc658203 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -807,11 +807,14 @@ store_state_field(struct device *dev, struct device_attribute *attr,
 	mutex_lock(&sdev->state_mutex);
 	ret = scsi_device_set_state(sdev, state);
 	/*
-	 * If the device state changes to SDEV_RUNNING, we need to run
-	 * the queue to avoid I/O hang.
+	 * If the device state changes to SDEV_RUNNING, we need to
+	 * rescan the device to revalidate it, and run the queue to
+	 * avoid I/O hang.
 	 */
-	if (ret == 0 && state == SDEV_RUNNING)
+	if (ret == 0 && state == SDEV_RUNNING) {
+		scsi_rescan_device(dev);
 		blk_mq_run_hw_queues(sdev->request_queue, true);
+	}
 	mutex_unlock(&sdev->state_mutex);
 
 	return ret == 0 ? count : -EINVAL;

From 8a7b46fa7902a3d36ce44a64f4d66586d66206ea Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Mon, 26 Jul 2021 11:26:44 +0200
Subject: [PATCH 333/502] MAINTAINERS: add Yasushi SHOJI as reviewer for the
 Microchip CAN BUS Analyzer Tool driver

This patch adds Yasushi SHOJI as a reviewer for the Microchip CAN BUS
Analyzer Tool driver.

Link: https://lore.kernel.org/r/20210726111619.1023991-1-mkl@pengutronix.de
Acked-by: Yasushi SHOJI <yashi@spacecubics.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 MAINTAINERS | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 58afeb12d3b3..42ea3183e87c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11327,6 +11327,12 @@ W:	https://linuxtv.org
 T:	git git://linuxtv.org/media_tree.git
 F:	drivers/media/radio/radio-maxiradio*
 
+MCAB MICROCHIP CAN BUS ANALYZER TOOL DRIVER
+R:	Yasushi SHOJI <yashi@spacecubics.com>
+L:	linux-can@vger.kernel.org
+S:	Maintained
+F:	drivers/net/can/usb/mcba_usb.c
+
 MCAN MMIO DEVICE DRIVER
 M:	Chandrasekar Ramakrishnan <rcsekar@samsung.com>
 L:	linux-can@vger.kernel.org

From f6b3c7848e66e9046c8a79a5b88fd03461cc252b Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 29 Jul 2021 17:12:46 +0300
Subject: [PATCH 334/502] can: hi311x: fix a signedness bug in hi3110_cmd()

The hi3110_cmd() is supposed to return zero on success and negative
error codes on failure, but it was accidentally declared as a u8 when
it needs to be an int type.

Fixes: 57e83fb9b746 ("can: hi311x: Add Holt HI-311x CAN driver")
Link: https://lore.kernel.org/r/20210729141246.GA1267@kili
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/spi/hi311x.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c
index dd17b8c53e1c..89d9c986a229 100644
--- a/drivers/net/can/spi/hi311x.c
+++ b/drivers/net/can/spi/hi311x.c
@@ -218,7 +218,7 @@ static int hi3110_spi_trans(struct spi_device *spi, int len)
 	return ret;
 }
 
-static u8 hi3110_cmd(struct spi_device *spi, u8 command)
+static int hi3110_cmd(struct spi_device *spi, u8 command)
 {
 	struct hi3110_priv *priv = spi_get_drvdata(spi);
 

From fc43fb69a7af92839551f99c1a96a37b77b3ae7a Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Sun, 25 Jul 2021 13:36:30 +0300
Subject: [PATCH 335/502] can: mcba_usb_start(): add missing urb->transfer_dma
 initialization

Yasushi reported, that his Microchip CAN Analyzer stopped working
since commit 91c02557174b ("can: mcba_usb: fix memory leak in
mcba_usb"). The problem was in missing urb->transfer_dma
initialization.

In my previous patch to this driver I refactored mcba_usb_start() code
to avoid leaking usb coherent buffers. To archive it, I passed local
stack variable to usb_alloc_coherent() and then saved it to private
array to correctly free all coherent buffers on ->close() call. But I
forgot to initialize urb->transfer_dma with variable passed to
usb_alloc_coherent().

All of this was causing device to not work, since dma addr 0 is not
valid and following log can be found on bug report page, which points
exactly to problem described above.

| DMAR: [DMA Write] Request device [00:14.0] PASID ffffffff fault addr 0 [fault reason 05] PTE Write access is not set

Fixes: 91c02557174b ("can: mcba_usb: fix memory leak in mcba_usb")
Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=990850
Link: https://lore.kernel.org/r/20210725103630.23864-1-paskripkin@gmail.com
Cc: linux-stable <stable@vger.kernel.org>
Reported-by: Yasushi SHOJI <yasushi.shoji@gmail.com>
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Tested-by: Yasushi SHOJI <yashi@spacecubics.com>
[mkl: fixed typos in commit message - thanks Yasushi SHOJI]
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/mcba_usb.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c
index a45865bd7254..a1a154c08b7f 100644
--- a/drivers/net/can/usb/mcba_usb.c
+++ b/drivers/net/can/usb/mcba_usb.c
@@ -653,6 +653,8 @@ static int mcba_usb_start(struct mcba_priv *priv)
 			break;
 		}
 
+		urb->transfer_dma = buf_dma;
+
 		usb_fill_bulk_urb(urb, priv->udev,
 				  usb_rcvbulkpipe(priv->udev, MCBA_USB_EP_IN),
 				  buf, MCBA_USB_RX_BUFF_SIZE,

From 8dde723fcde4479f256441da03793e37181d9f21 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 29 Jul 2021 20:51:26 +0200
Subject: [PATCH 336/502] ALSA: usb-audio: Avoid unnecessary or invalid
 connector selection at resume

The recent fix for the resume on Lenovo machines seems causing a
regression on others.  It's because the change always triggers the
connector selection no matter which widget node type is.

This patch addresses the regression by setting the resume callback
selectively only for the connector widget.

Fixes: 44609fc01f28 ("ALSA: usb-audio: Check connector value on resume")
Cc: <stable@vger.kernel.org>
BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=213897
Link: https://lore.kernel.org/r/20210729185126.24432-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/mixer.c | 35 ++++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index f4cdaf1ba44a..9b713b4a5ec4 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -1816,6 +1816,15 @@ static void get_connector_control_name(struct usb_mixer_interface *mixer,
 		strlcat(name, " - Output Jack", name_size);
 }
 
+/* get connector value to "wake up" the USB audio */
+static int connector_mixer_resume(struct usb_mixer_elem_list *list)
+{
+	struct usb_mixer_elem_info *cval = mixer_elem_list_to_info(list);
+
+	get_connector_value(cval, NULL, NULL);
+	return 0;
+}
+
 /* Build a mixer control for a UAC connector control (jack-detect) */
 static void build_connector_control(struct usb_mixer_interface *mixer,
 				    const struct usbmix_name_map *imap,
@@ -1833,6 +1842,10 @@ static void build_connector_control(struct usb_mixer_interface *mixer,
 	if (!cval)
 		return;
 	snd_usb_mixer_elem_init_std(&cval->head, mixer, term->id);
+
+	/* set up a specific resume callback */
+	cval->head.resume = connector_mixer_resume;
+
 	/*
 	 * UAC2: The first byte from reading the UAC2_TE_CONNECTOR control returns the
 	 * number of channels connected.
@@ -3642,23 +3655,15 @@ static int restore_mixer_value(struct usb_mixer_elem_list *list)
 	return 0;
 }
 
-static int default_mixer_resume(struct usb_mixer_elem_list *list)
-{
-	struct usb_mixer_elem_info *cval = mixer_elem_list_to_info(list);
-
-	/* get connector value to "wake up" the USB audio */
-	if (cval->val_type == USB_MIXER_BOOLEAN && cval->channels == 1)
-		get_connector_value(cval, NULL, NULL);
-
-	return 0;
-}
-
 static int default_mixer_reset_resume(struct usb_mixer_elem_list *list)
 {
-	int err = default_mixer_resume(list);
+	int err;
 
-	if (err < 0)
-		return err;
+	if (list->resume) {
+		err = list->resume(list);
+		if (err < 0)
+			return err;
+	}
 	return restore_mixer_value(list);
 }
 
@@ -3697,7 +3702,7 @@ void snd_usb_mixer_elem_init_std(struct usb_mixer_elem_list *list,
 	list->id = unitid;
 	list->dump = snd_usb_mixer_dump_cval;
 #ifdef CONFIG_PM
-	list->resume = default_mixer_resume;
+	list->resume = NULL;
 	list->reset_resume = default_mixer_reset_resume;
 #endif
 }

From 0e865f0c31928d6a313269ef624907eec55287c4 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Tue, 27 Jul 2021 19:59:57 +0300
Subject: [PATCH 337/502] can: usb_8dev: fix memory leak

In usb_8dev_start() MAX_RX_URBS coherent buffers are allocated and
there is nothing, that frees them:

1) In callback function the urb is resubmitted and that's all
2) In disconnect function urbs are simply killed, but URB_FREE_BUFFER
   is not set (see usb_8dev_start) and this flag cannot be used with
   coherent buffers.

So, all allocated buffers should be freed with usb_free_coherent()
explicitly.

Side note: This code looks like a copy-paste of other can drivers. The
same patch was applied to mcba_usb driver and it works nice with real
hardware. There is no change in functionality, only clean-up code for
coherent buffers.

Fixes: 0024d8ad1639 ("can: usb_8dev: Add support for USB2CAN interface from 8 devices")
Link: https://lore.kernel.org/r/d39b458cd425a1cf7f512f340224e6e9563b07bd.1627404470.git.paskripkin@gmail.com
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/usb_8dev.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c
index b6e7ef0d5bc6..d1b83bd1b3cb 100644
--- a/drivers/net/can/usb/usb_8dev.c
+++ b/drivers/net/can/usb/usb_8dev.c
@@ -137,7 +137,8 @@ struct usb_8dev_priv {
 	u8 *cmd_msg_buffer;
 
 	struct mutex usb_8dev_cmd_lock;
-
+	void *rxbuf[MAX_RX_URBS];
+	dma_addr_t rxbuf_dma[MAX_RX_URBS];
 };
 
 /* tx frame */
@@ -733,6 +734,7 @@ static int usb_8dev_start(struct usb_8dev_priv *priv)
 	for (i = 0; i < MAX_RX_URBS; i++) {
 		struct urb *urb = NULL;
 		u8 *buf;
+		dma_addr_t buf_dma;
 
 		/* create a URB, and a buffer for it */
 		urb = usb_alloc_urb(0, GFP_KERNEL);
@@ -742,7 +744,7 @@ static int usb_8dev_start(struct usb_8dev_priv *priv)
 		}
 
 		buf = usb_alloc_coherent(priv->udev, RX_BUFFER_SIZE, GFP_KERNEL,
-					 &urb->transfer_dma);
+					 &buf_dma);
 		if (!buf) {
 			netdev_err(netdev, "No memory left for USB buffer\n");
 			usb_free_urb(urb);
@@ -750,6 +752,8 @@ static int usb_8dev_start(struct usb_8dev_priv *priv)
 			break;
 		}
 
+		urb->transfer_dma = buf_dma;
+
 		usb_fill_bulk_urb(urb, priv->udev,
 				  usb_rcvbulkpipe(priv->udev,
 						  USB_8DEV_ENDP_DATA_RX),
@@ -767,6 +771,9 @@ static int usb_8dev_start(struct usb_8dev_priv *priv)
 			break;
 		}
 
+		priv->rxbuf[i] = buf;
+		priv->rxbuf_dma[i] = buf_dma;
+
 		/* Drop reference, USB core will take care of freeing it */
 		usb_free_urb(urb);
 	}
@@ -836,6 +843,10 @@ static void unlink_all_urbs(struct usb_8dev_priv *priv)
 
 	usb_kill_anchored_urbs(&priv->rx_submitted);
 
+	for (i = 0; i < MAX_RX_URBS; ++i)
+		usb_free_coherent(priv->udev, RX_BUFFER_SIZE,
+				  priv->rxbuf[i], priv->rxbuf_dma[i]);
+
 	usb_kill_anchored_urbs(&priv->tx_submitted);
 	atomic_set(&priv->active_tx_urbs, 0);
 

From 9969e3c5f40c166e3396acc36c34f9de502929f6 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Tue, 27 Jul 2021 20:00:33 +0300
Subject: [PATCH 338/502] can: ems_usb: fix memory leak

In ems_usb_start() MAX_RX_URBS coherent buffers are allocated and
there is nothing, that frees them:

1) In callback function the urb is resubmitted and that's all
2) In disconnect function urbs are simply killed, but URB_FREE_BUFFER
   is not set (see ems_usb_start) and this flag cannot be used with
   coherent buffers.

So, all allocated buffers should be freed with usb_free_coherent()
explicitly.

Side note: This code looks like a copy-paste of other can drivers. The
same patch was applied to mcba_usb driver and it works nice with real
hardware. There is no change in functionality, only clean-up code for
coherent buffers.

Fixes: 702171adeed3 ("ems_usb: Added support for EMS CPC-USB/ARM7 CAN/USB interface")
Link: https://lore.kernel.org/r/59aa9fbc9a8cbf9af2bbd2f61a659c480b415800.1627404470.git.paskripkin@gmail.com
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/ems_usb.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
index 0a37af4a3fa4..2b5302e72435 100644
--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c
@@ -255,6 +255,8 @@ struct ems_usb {
 	unsigned int free_slots; /* remember number of available slots */
 
 	struct ems_cpc_msg active_params; /* active controller parameters */
+	void *rxbuf[MAX_RX_URBS];
+	dma_addr_t rxbuf_dma[MAX_RX_URBS];
 };
 
 static void ems_usb_read_interrupt_callback(struct urb *urb)
@@ -587,6 +589,7 @@ static int ems_usb_start(struct ems_usb *dev)
 	for (i = 0; i < MAX_RX_URBS; i++) {
 		struct urb *urb = NULL;
 		u8 *buf = NULL;
+		dma_addr_t buf_dma;
 
 		/* create a URB, and a buffer for it */
 		urb = usb_alloc_urb(0, GFP_KERNEL);
@@ -596,7 +599,7 @@ static int ems_usb_start(struct ems_usb *dev)
 		}
 
 		buf = usb_alloc_coherent(dev->udev, RX_BUFFER_SIZE, GFP_KERNEL,
-					 &urb->transfer_dma);
+					 &buf_dma);
 		if (!buf) {
 			netdev_err(netdev, "No memory left for USB buffer\n");
 			usb_free_urb(urb);
@@ -604,6 +607,8 @@ static int ems_usb_start(struct ems_usb *dev)
 			break;
 		}
 
+		urb->transfer_dma = buf_dma;
+
 		usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, 2),
 				  buf, RX_BUFFER_SIZE,
 				  ems_usb_read_bulk_callback, dev);
@@ -619,6 +624,9 @@ static int ems_usb_start(struct ems_usb *dev)
 			break;
 		}
 
+		dev->rxbuf[i] = buf;
+		dev->rxbuf_dma[i] = buf_dma;
+
 		/* Drop reference, USB core will take care of freeing it */
 		usb_free_urb(urb);
 	}
@@ -684,6 +692,10 @@ static void unlink_all_urbs(struct ems_usb *dev)
 
 	usb_kill_anchored_urbs(&dev->rx_submitted);
 
+	for (i = 0; i < MAX_RX_URBS; ++i)
+		usb_free_coherent(dev->udev, RX_BUFFER_SIZE,
+				  dev->rxbuf[i], dev->rxbuf_dma[i]);
+
 	usb_kill_anchored_urbs(&dev->tx_submitted);
 	atomic_set(&dev->active_tx_urbs, 0);
 

From 928150fad41ba16df7fcc9f7f945747d0f56cbb6 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Tue, 27 Jul 2021 20:00:46 +0300
Subject: [PATCH 339/502] can: esd_usb2: fix memory leak

In esd_usb2_setup_rx_urbs() MAX_RX_URBS coherent buffers are allocated
and there is nothing, that frees them:

1) In callback function the urb is resubmitted and that's all
2) In disconnect function urbs are simply killed, but URB_FREE_BUFFER
   is not set (see esd_usb2_setup_rx_urbs) and this flag cannot be used
   with coherent buffers.

So, all allocated buffers should be freed with usb_free_coherent()
explicitly.

Side note: This code looks like a copy-paste of other can drivers. The
same patch was applied to mcba_usb driver and it works nice with real
hardware. There is no change in functionality, only clean-up code for
coherent buffers.

Fixes: 96d8e90382dc ("can: Add driver for esd CAN-USB/2 device")
Link: https://lore.kernel.org/r/b31b096926dcb35998ad0271aac4b51770ca7cc8.1627404470.git.paskripkin@gmail.com
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/esd_usb2.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c
index 65b58f8fc328..66fa8b07c2e6 100644
--- a/drivers/net/can/usb/esd_usb2.c
+++ b/drivers/net/can/usb/esd_usb2.c
@@ -195,6 +195,8 @@ struct esd_usb2 {
 	int net_count;
 	u32 version;
 	int rxinitdone;
+	void *rxbuf[MAX_RX_URBS];
+	dma_addr_t rxbuf_dma[MAX_RX_URBS];
 };
 
 struct esd_usb2_net_priv {
@@ -545,6 +547,7 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev)
 	for (i = 0; i < MAX_RX_URBS; i++) {
 		struct urb *urb = NULL;
 		u8 *buf = NULL;
+		dma_addr_t buf_dma;
 
 		/* create a URB, and a buffer for it */
 		urb = usb_alloc_urb(0, GFP_KERNEL);
@@ -554,7 +557,7 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev)
 		}
 
 		buf = usb_alloc_coherent(dev->udev, RX_BUFFER_SIZE, GFP_KERNEL,
-					 &urb->transfer_dma);
+					 &buf_dma);
 		if (!buf) {
 			dev_warn(dev->udev->dev.parent,
 				 "No memory left for USB buffer\n");
@@ -562,6 +565,8 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev)
 			goto freeurb;
 		}
 
+		urb->transfer_dma = buf_dma;
+
 		usb_fill_bulk_urb(urb, dev->udev,
 				  usb_rcvbulkpipe(dev->udev, 1),
 				  buf, RX_BUFFER_SIZE,
@@ -574,8 +579,12 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev)
 			usb_unanchor_urb(urb);
 			usb_free_coherent(dev->udev, RX_BUFFER_SIZE, buf,
 					  urb->transfer_dma);
+			goto freeurb;
 		}
 
+		dev->rxbuf[i] = buf;
+		dev->rxbuf_dma[i] = buf_dma;
+
 freeurb:
 		/* Drop reference, USB core will take care of freeing it */
 		usb_free_urb(urb);
@@ -663,6 +672,11 @@ static void unlink_all_urbs(struct esd_usb2 *dev)
 	int i, j;
 
 	usb_kill_anchored_urbs(&dev->rx_submitted);
+
+	for (i = 0; i < MAX_RX_URBS; ++i)
+		usb_free_coherent(dev->udev, RX_BUFFER_SIZE,
+				  dev->rxbuf[i], dev->rxbuf_dma[i]);
+
 	for (i = 0; i < dev->net_count; i++) {
 		priv = dev->nets[i];
 		if (priv) {

From fe911792eae32f03d27d8f3de2f0271862d435ac Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 23 Jun 2021 10:45:20 +0200
Subject: [PATCH 340/502] media: Revert "media: rtl28xxu: fix zero-length
 control request"

This reverts commit 25d5ce3a606a1eb23a9265d615a92a876ff9cb5f.

The patch in question causes a regression and was superseded by a second
version. Unfortunately, the first revision ended up being applied
instead of the correct one.

Link: https://lore.kernel.org/r/YL3MCGY5wTsW2kEF@hovoldconsulting.com

Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/usb/dvb-usb-v2/rtl28xxu.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c
index 83705730e37e..0cbdb95f8d35 100644
--- a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c
+++ b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c
@@ -612,9 +612,8 @@ static int rtl28xxu_read_config(struct dvb_usb_device *d)
 static int rtl28xxu_identify_state(struct dvb_usb_device *d, const char **name)
 {
 	struct rtl28xxu_dev *dev = d_to_priv(d);
-	u8 buf[1];
 	int ret;
-	struct rtl28xxu_req req_demod_i2c = {0x0020, CMD_I2C_DA_RD, 1, buf};
+	struct rtl28xxu_req req_demod_i2c = {0x0020, CMD_I2C_DA_RD, 0, NULL};
 
 	dev_dbg(&d->intf->dev, "\n");
 

From 76f22c93b209c811bd489950f17f8839adb31901 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 23 Jun 2021 10:45:21 +0200
Subject: [PATCH 341/502] media: rtl28xxu: fix zero-length control request

The direction of the pipe argument must match the request-type direction
bit or control requests may fail depending on the host-controller-driver
implementation.

Control transfers without a data stage are treated as OUT requests by
the USB stack and should be using usb_sndctrlpipe(). Failing to do so
will now trigger a warning.

The driver uses a zero-length i2c-read request for type detection so
update the control-request code to use usb_sndctrlpipe() in this case.

Note that actually trying to read the i2c register in question does not
work as the register might not exist (e.g. depending on the demodulator)
as reported by Eero Lehtinen <debiangamer2@gmail.com>.

Reported-by: syzbot+faf11bbadc5a372564da@syzkaller.appspotmail.com
Reported-by: Eero Lehtinen <debiangamer2@gmail.com>
Tested-by: Eero Lehtinen <debiangamer2@gmail.com>
Fixes: d0f232e823af ("[media] rtl28xxu: add heuristic to detect chip type")
Cc: stable@vger.kernel.org      # 4.0
Cc: Antti Palosaari <crope@iki.fi>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/usb/dvb-usb-v2/rtl28xxu.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c
index 0cbdb95f8d35..795a012d4020 100644
--- a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c
+++ b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c
@@ -37,7 +37,16 @@ static int rtl28xxu_ctrl_msg(struct dvb_usb_device *d, struct rtl28xxu_req *req)
 	} else {
 		/* read */
 		requesttype = (USB_TYPE_VENDOR | USB_DIR_IN);
-		pipe = usb_rcvctrlpipe(d->udev, 0);
+
+		/*
+		 * Zero-length transfers must use usb_sndctrlpipe() and
+		 * rtl28xxu_identify_state() uses a zero-length i2c read
+		 * command to determine the chip type.
+		 */
+		if (req->size)
+			pipe = usb_rcvctrlpipe(d->udev, 0);
+		else
+			pipe = usb_sndctrlpipe(d->udev, 0);
 	}
 
 	ret = usb_control_msg(d->udev, pipe, 0, requesttype, req->value,

From c592b46907adbeb81243f7eb7a468c36692658b8 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Wed, 30 Jun 2021 09:58:23 +0200
Subject: [PATCH 342/502] media: videobuf2-core: dequeue if start_streaming
 fails

If a vb2_queue sets q->min_buffers_needed then when the number of
queued buffers reaches q->min_buffers_needed, vb2_core_qbuf() will call
the start_streaming() callback. If start_streaming() returns an error,
then that error was just returned by vb2_core_qbuf(), but the buffer
was still queued. However, userspace expects that if VIDIOC_QBUF fails,
the buffer is returned dequeued.

So if start_streaming() fails, then remove the buffer from the queue,
thus avoiding this unwanted side-effect.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Tested-by: Kieran Bingham <kieran.bingham@ideasonboard.com>
Fixes: b3379c6201bb ("[media] vb2: only call start_streaming if sufficient buffers are queued")
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/common/videobuf2/videobuf2-core.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c
index 02281d13505f..508ac295eb06 100644
--- a/drivers/media/common/videobuf2/videobuf2-core.c
+++ b/drivers/media/common/videobuf2/videobuf2-core.c
@@ -1573,6 +1573,7 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb,
 		  struct media_request *req)
 {
 	struct vb2_buffer *vb;
+	enum vb2_buffer_state orig_state;
 	int ret;
 
 	if (q->error) {
@@ -1673,6 +1674,7 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb,
 	 * Add to the queued buffers list, a buffer will stay on it until
 	 * dequeued in dqbuf.
 	 */
+	orig_state = vb->state;
 	list_add_tail(&vb->queued_entry, &q->queued_list);
 	q->queued_count++;
 	q->waiting_for_buffers = false;
@@ -1703,8 +1705,17 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb,
 	if (q->streaming && !q->start_streaming_called &&
 	    q->queued_count >= q->min_buffers_needed) {
 		ret = vb2_start_streaming(q);
-		if (ret)
+		if (ret) {
+			/*
+			 * Since vb2_core_qbuf will return with an error,
+			 * we should return it to state DEQUEUED since
+			 * the error indicates that the buffer wasn't queued.
+			 */
+			list_del(&vb->queued_entry);
+			q->queued_count--;
+			vb->state = orig_state;
 			return ret;
+		}
 	}
 
 	dprintk(q, 2, "qbuf of buffer %d succeeded\n", vb->index);

From f1de1c7803595e937ce9b922807f499851225021 Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Mon, 5 Jul 2021 14:57:08 +0200
Subject: [PATCH 343/502] media: atmel: fix build when ISC=m and XISC=y

Building VIDEO_ATMEL_ISC as module and VIDEO_ATMEL_XISC as built-in
(or viceversa) causes build errors:

 or1k-linux-ld: drivers/media/platform/atmel/atmel-isc-base.o: in function `isc_async_complete':
 atmel-isc-base.c:(.text+0x40d0): undefined reference to `__this_module'
 or1k-linux-ld: atmel-isc-base.c:(.text+0x40f0): undefined reference to `__this_module'
 or1k-linux-ld: drivers/media/platform/atmel/atmel-isc-base.o:(.rodata+0x390): undefined reference to `__this_module'
 or1k-linux-ld: drivers/media/platform/atmel/atmel-isc-base.o:(__param+0x4): undefined reference to `__this_module'
 or1k-linux-ld: drivers/media/platform/atmel/atmel-isc-base.o:(__param+0x18): undefined reference to `__this_module'

This is caused by the file atmel-isc-base.c which is common code between
the two drivers.

The solution is to create another Kconfig symbol that is automatically
selected and generates the module atmel-isc-base.ko. This module can be
loaded when both drivers are modules, or built-in when at least one of them
is built-in.

Reported-by: kernel test robot <lkp@intel.com>
Fixes: c9aa973884a1 ("media: atmel: atmel-isc: add microchip-xisc driver")
Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/atmel/Kconfig          |  8 ++++++++
 drivers/media/platform/atmel/Makefile         |  5 +++--
 drivers/media/platform/atmel/atmel-isc-base.c | 11 +++++++++++
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/drivers/media/platform/atmel/Kconfig b/drivers/media/platform/atmel/Kconfig
index 99b51213f871..dda2f27da317 100644
--- a/drivers/media/platform/atmel/Kconfig
+++ b/drivers/media/platform/atmel/Kconfig
@@ -8,6 +8,7 @@ config VIDEO_ATMEL_ISC
 	select VIDEOBUF2_DMA_CONTIG
 	select REGMAP_MMIO
 	select V4L2_FWNODE
+	select VIDEO_ATMEL_ISC_BASE
 	help
 	   This module makes the ATMEL Image Sensor Controller available
 	   as a v4l2 device.
@@ -19,10 +20,17 @@ config VIDEO_ATMEL_XISC
 	select VIDEOBUF2_DMA_CONTIG
 	select REGMAP_MMIO
 	select V4L2_FWNODE
+	select VIDEO_ATMEL_ISC_BASE
 	help
 	   This module makes the ATMEL eXtended Image Sensor Controller
 	   available as a v4l2 device.
 
+config VIDEO_ATMEL_ISC_BASE
+	tristate
+	default n
+	help
+	  ATMEL ISC and XISC common code base.
+
 config VIDEO_ATMEL_ISI
 	tristate "ATMEL Image Sensor Interface (ISI) support"
 	depends on VIDEO_V4L2 && OF
diff --git a/drivers/media/platform/atmel/Makefile b/drivers/media/platform/atmel/Makefile
index c5c01556c653..46d264ab7948 100644
--- a/drivers/media/platform/atmel/Makefile
+++ b/drivers/media/platform/atmel/Makefile
@@ -1,7 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0-only
-atmel-isc-objs = atmel-sama5d2-isc.o atmel-isc-base.o
-atmel-xisc-objs = atmel-sama7g5-isc.o atmel-isc-base.o
+atmel-isc-objs = atmel-sama5d2-isc.o
+atmel-xisc-objs = atmel-sama7g5-isc.o
 
 obj-$(CONFIG_VIDEO_ATMEL_ISI) += atmel-isi.o
+obj-$(CONFIG_VIDEO_ATMEL_ISC_BASE) += atmel-isc-base.o
 obj-$(CONFIG_VIDEO_ATMEL_ISC) += atmel-isc.o
 obj-$(CONFIG_VIDEO_ATMEL_XISC) += atmel-xisc.o
diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c
index 19daa49bf604..136ab7cf36ed 100644
--- a/drivers/media/platform/atmel/atmel-isc-base.c
+++ b/drivers/media/platform/atmel/atmel-isc-base.c
@@ -378,6 +378,7 @@ int isc_clk_init(struct isc_device *isc)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(isc_clk_init);
 
 void isc_clk_cleanup(struct isc_device *isc)
 {
@@ -392,6 +393,7 @@ void isc_clk_cleanup(struct isc_device *isc)
 			clk_unregister(isc_clk->clk);
 	}
 }
+EXPORT_SYMBOL_GPL(isc_clk_cleanup);
 
 static int isc_queue_setup(struct vb2_queue *vq,
 			    unsigned int *nbuffers, unsigned int *nplanes,
@@ -1578,6 +1580,7 @@ irqreturn_t isc_interrupt(int irq, void *dev_id)
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(isc_interrupt);
 
 static void isc_hist_count(struct isc_device *isc, u32 *min, u32 *max)
 {
@@ -2212,6 +2215,7 @@ const struct v4l2_async_notifier_operations isc_async_ops = {
 	.unbind = isc_async_unbind,
 	.complete = isc_async_complete,
 };
+EXPORT_SYMBOL_GPL(isc_async_ops);
 
 void isc_subdev_cleanup(struct isc_device *isc)
 {
@@ -2224,6 +2228,7 @@ void isc_subdev_cleanup(struct isc_device *isc)
 
 	INIT_LIST_HEAD(&isc->subdev_entities);
 }
+EXPORT_SYMBOL_GPL(isc_subdev_cleanup);
 
 int isc_pipeline_init(struct isc_device *isc)
 {
@@ -2264,6 +2269,7 @@ int isc_pipeline_init(struct isc_device *isc)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(isc_pipeline_init);
 
 /* regmap configuration */
 #define ATMEL_ISC_REG_MAX    0xd5c
@@ -2273,4 +2279,9 @@ const struct regmap_config isc_regmap_config = {
 	.val_bits       = 32,
 	.max_register	= ATMEL_ISC_REG_MAX,
 };
+EXPORT_SYMBOL_GPL(isc_regmap_config);
 
+MODULE_AUTHOR("Songjun Wu");
+MODULE_AUTHOR("Eugen Hristev");
+MODULE_DESCRIPTION("Atmel ISC common code base");
+MODULE_LICENSE("GPL v2");

From 341abd693d10e5f337a51f140ae3e7a1ae0febf6 Mon Sep 17 00:00:00 2001
From: Mario Kleiner <mario.kleiner.de@gmail.com>
Date: Thu, 29 Jul 2021 06:33:06 +0200
Subject: [PATCH 344/502] serial: 8250_pci: Avoid irq sharing for MSI(-X)
 interrupts.

This attempts to fix a bug found with a serial port card which uses
an MCS9922 chip, one of the 4 models for which MSI-X interrupts are
currently supported. I don't possess such a card, and i'm not
experienced with the serial subsystem, so this patch is based on what
i think i found as a likely reason for failure, based on walking the
user who actually owns the card through some diagnostic.

The user who reported the problem finds the following in his dmesg
output for the relevant ttyS4 and ttyS5:

[    0.580425] serial 0000:02:00.0: enabling device (0000 -> 0003)
[    0.601448] 0000:02:00.0: ttyS4 at I/O 0x3010 (irq = 125, base_baud = 115200) is a ST16650V2
[    0.603089] serial 0000:02:00.1: enabling device (0000 -> 0003)
[    0.624119] 0000:02:00.1: ttyS5 at I/O 0x3000 (irq = 126, base_baud = 115200) is a ST16650V2
...
[    6.323784] genirq: Flags mismatch irq 128. 00000080 (ttyS5) vs. 00000000 (xhci_hcd)
[    6.324128] genirq: Flags mismatch irq 128. 00000080 (ttyS5) vs. 00000000 (xhci_hcd)
...

Output of setserial -a:

/dev/ttyS4, Line 4, UART: 16650V2, Port: 0x3010, IRQ: 127
	Baud_base: 115200, close_delay: 50, divisor: 0
	closing_wait: 3000
	Flags: spd_normal skip_test

This suggests to me that the serial driver wants to register and share a
MSI/MSI-X irq 128 with the xhci_hcd driver, whereas the xhci driver does
not want to share the irq, as flags 0x00000080 (== IRQF_SHARED) from the
serial port driver means to share the irq, and this mismatch ends in some
failed irq init?

With this setup, data reception works very unreliable, with dropped data,
already at a transmission rate of only a 16 Bytes chunk every 1/120th of
a second, ie. 1920 Bytes/sec, presumably due to rx fifo overflow due to
mishandled or not used at all rx irq's?

See full discussion thread with attempted diagnosis at:

https://psychtoolbox.discourse.group/t/issues-with-iscan-serial-port-recording/3886

Disabling the use of MSI interrupts for the serial port pci card did
fix the reliability problems. The user executed the following sequence
of commands to achieve this:

echo 0000:02:00.0 | sudo tee /sys/bus/pci/drivers/serial/unbind
echo 0000:02:00.1 | sudo tee /sys/bus/pci/drivers/serial/unbind

echo 0 | sudo tee /sys/bus/pci/devices/0000:02:00.0/msi_bus
echo 0 | sudo tee /sys/bus/pci/devices/0000:02:00.1/msi_bus

echo 0000:02:00.0 | sudo tee /sys/bus/pci/drivers/serial/bind
echo 0000:02:00.1 | sudo tee /sys/bus/pci/drivers/serial/bind

This resulted in the following log output:

[   82.179021] pci 0000:02:00.0: MSI/MSI-X disallowed for future drivers
[   87.003031] pci 0000:02:00.1: MSI/MSI-X disallowed for future drivers
[   98.537010] 0000:02:00.0: ttyS4 at I/O 0x3010 (irq = 17, base_baud = 115200) is a ST16650V2
[  103.648124] 0000:02:00.1: ttyS5 at I/O 0x3000 (irq = 18, base_baud = 115200) is a ST16650V2

This patch attempts to fix the problem by disabling irq sharing when
using MSI irq's. Note that all i know for sure is that disabling MSI
irq's fixed the problem for the user, so this patch could be wrong and
is untested. Please review with caution, keeping this in mind.

Fixes: 8428413b1d14 ("serial: 8250_pci: Implement MSI(-X) support")
Cc: Ralf Ramsauer <ralf.ramsauer@oth-regensburg.de>
Cc: stable <stable@vger.kernel.org>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com>
Link: https://lore.kernel.org/r/20210729043306.18528-1-mario.kleiner.de@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_pci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index 02985cf90ef2..a808c283883e 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -4002,6 +4002,7 @@ pciserial_init_ports(struct pci_dev *dev, const struct pciserial_board *board)
 		if (pci_match_id(pci_use_msi, dev)) {
 			dev_dbg(&dev->dev, "Using MSI(-X) interrupts\n");
 			pci_set_master(dev);
+			uart.port.flags &= ~UPF_SHARE_IRQ;
 			rc = pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_ALL_TYPES);
 		} else {
 			dev_dbg(&dev->dev, "Using legacy interrupts\n");

From fa7a549d321a4189677b0cea86e58d9db7977f7b Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 14 Jul 2021 17:37:49 -0400
Subject: [PATCH 345/502] KVM: x86: accept userspace interrupt only if no event
 is injected

Once an exception has been injected, any side effects related to
the exception (such as setting CR2 or DR6) have been taked place.
Therefore, once KVM sets the VM-entry interruption information
field or the AMD EVENTINJ field, the next VM-entry must deliver that
exception.

Pending interrupts are processed after injected exceptions, so
in theory it would not be a problem to use KVM_INTERRUPT when
an injected exception is present.  However, DOSEMU is using
run->ready_for_interrupt_injection to detect interrupt windows
and then using KVM_SET_SREGS/KVM_SET_REGS to inject the
interrupt manually.  For this to work, the interrupt window
must be delayed after the completion of the previous event
injection.

Cc: stable@vger.kernel.org
Reported-by: Stas Sergeev <stsp2@yandex.ru>
Tested-by: Stas Sergeev <stsp2@yandex.ru>
Fixes: 71cc849b7093 ("KVM: x86: Fix split-irqchip vs interrupt injection window request")
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4116567f3d44..e5d5c5ed7dd4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4358,8 +4358,17 @@ static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu)
 
 static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
 {
-	return kvm_arch_interrupt_allowed(vcpu) &&
-		kvm_cpu_accept_dm_intr(vcpu);
+	/*
+	 * Do not cause an interrupt window exit if an exception
+	 * is pending or an event needs reinjection; userspace
+	 * might want to inject the interrupt manually using KVM_SET_REGS
+	 * or KVM_SET_SREGS.  For that to work, we must be at an
+	 * instruction boundary and with no events half-injected.
+	 */
+	return (kvm_arch_interrupt_allowed(vcpu) &&
+		kvm_cpu_accept_dm_intr(vcpu) &&
+		!kvm_event_needs_reinjection(vcpu) &&
+		!vcpu->arch.exception.pending);
 }
 
 static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,

From 3a0670824979a986a2314c921aa092e60730eeae Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Sun, 20 Jun 2021 20:54:21 +0200
Subject: [PATCH 346/502] ARM: dts: stm32: Prefer HW RTC on DHCOM SoM

The DHCOM SoM has two RTC, one is the STM32 RTC built into the SoC
and another is Microcrystal RV RTC. By default, only the later has
battery backup, the former does not. The order in which the RTCs
are probed on boot is random, which means the kernel might pick up
system time from the STM32 RTC which has no battery backup. This
then leads to incorrect initial system time setup, even though the
HW RTC has correct time configured in it.

Add DT alias entries, so that the RTCs get assigned fixed IDs and
the HW RTC is always picked by the kernel as the default RTC, thus
resulting in correct system time in early userspace.

Fixes: 34e0c7847dcf ("ARM: dts: stm32: Add DH Electronics DHCOM STM32MP1 SoM and PDK2 board")
Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Alexandre Torgue <alexandre.torgue@foss.st.com>
Cc: Patrice Chotard <patrice.chotard@foss.st.com>
Cc: Patrick Delaunay <patrick.delaunay@foss.st.com>
Cc: linux-stm32@st-md-mailman.stormreply.com
To: linux-arm-kernel@lists.infradead.org
Signed-off-by: Alexandre Torgue <alexandre.torgue@foss.st.com>
---
 arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi
index 2af0a6752674..8349c9099e30 100644
--- a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi
+++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi
@@ -12,6 +12,8 @@
 	aliases {
 		ethernet0 = &ethernet0;
 		ethernet1 = &ksz8851;
+		rtc0 = &hwrtc;
+		rtc1 = &rtc;
 	};
 
 	memory@c0000000 {
@@ -248,7 +250,7 @@
 	/delete-property/dmas;
 	/delete-property/dma-names;
 
-	rtc@32 {
+	hwrtc: rtc@32 {
 		compatible = "microcrystal,rv8803";
 		reg = <0x32>;
 	};

From 36862c1ebc92a7e6fcc55002965c44b8ad17d4ca Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Wed, 21 Jul 2021 20:12:53 +0200
Subject: [PATCH 347/502] ARM: dts: stm32: Disable LAN8710 EDPD on DHCOM

The LAN8710 Energy Detect Power Down (EDPD) functionality might cause
unreliable cable detection. There are multiple accounts of this in the
SMSC PHY driver patches which attempted to make EDPD reliable, however
it seems there is always some sort of corner case left. Unfortunatelly,
there is no errata documented which would confirm this to be a silicon
bug on the LAN87xx series of PHYs (LAN8700, LAN8710, LAN8720 at least).

Disable EDPD on the DHCOM SoM, just like multiple other boards already
do as well, to make the cable detection reliable.

Fixes: 34e0c7847dcf ("ARM: dts: stm32: Add DH Electronics DHCOM STM32MP1 SoM and PDK2 board")
Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Alexandre Torgue <alexandre.torgue@foss.st.com>
Cc: Patrice Chotard <patrice.chotard@foss.st.com>
Cc: Patrick Delaunay <patrick.delaunay@foss.st.com>
Cc: linux-stm32@st-md-mailman.stormreply.com
To: linux-arm-kernel@lists.infradead.org
Signed-off-by: Alexandre Torgue <alexandre.torgue@foss.st.com>
---
 arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi
index 8349c9099e30..8c41f819f776 100644
--- a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi
+++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi
@@ -140,6 +140,7 @@
 			reset-gpios = <&gpioh 3 GPIO_ACTIVE_LOW>;
 			reset-assert-us = <500>;
 			reset-deassert-us = <500>;
+			smsc,disable-energy-detect;
 			interrupt-parent = <&gpioi>;
 			interrupts = <11 IRQ_TYPE_LEVEL_LOW>;
 		};

From 15f68f027ebd961b99a1c420f96ff3838c5e4450 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Wed, 21 Jul 2021 20:10:40 +0200
Subject: [PATCH 348/502] ARM: dts: stm32: Fix touchscreen IRQ line assignment
 on DHCOM

While 7e5f3155dcbb4 ("ARM: dts: stm32: Fix LED5 on STM32MP1 DHCOM PDK2")
fixed the LED0 assignment on the PDK2 board, the same commit did not
update the touchscreen IRQ line assignment, which is the same GPIO line,
shared between the LED0 output and touchscreen IRQ input. To make this
more convoluted, the same EXTI input (not the same GPIO line) is shared
between Button B which is Active-Low IRQ, and touchscreen IRQ which is
Edge-Falling IRQ, which cannot be used at the same time. In case the LCD
board with touchscreen is in use, which is the case here, LED0 must be
disabled, Button B must be polled, so the touchscreen interrupt works as
it should.

Update the touchscreen IRQ line assignment, disable LED0 and use polled
GPIO button driver for Button B, since the DT here describes baseboard
with LCD board.

Fixes: 7e5f3155dcbb4 ("ARM: dts: stm32: Fix LED5 on STM32MP1 DHCOM PDK2")
Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Alexandre Torgue <alexandre.torgue@foss.st.com>
Cc: Patrice Chotard <patrice.chotard@foss.st.com>
Cc: Patrick Delaunay <patrick.delaunay@foss.st.com>
Cc: linux-stm32@st-md-mailman.stormreply.com
To: linux-arm-kernel@lists.infradead.org
Signed-off-by: Alexandre Torgue <alexandre.torgue@foss.st.com>
---
 arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi | 24 +++++++++++--------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi
index c5ea08fec535..6cf1c8b4c6e2 100644
--- a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi
+++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi
@@ -37,7 +37,7 @@
 		poll-interval = <20>;
 
 		/*
-		 * The EXTi IRQ line 3 is shared with touchscreen and ethernet,
+		 * The EXTi IRQ line 3 is shared with ethernet,
 		 * so mark this as polled GPIO key.
 		 */
 		button-0 {
@@ -46,6 +46,16 @@
 			gpios = <&gpiof 3 GPIO_ACTIVE_LOW>;
 		};
 
+		/*
+		 * The EXTi IRQ line 6 is shared with touchscreen,
+		 * so mark this as polled GPIO key.
+		 */
+		button-1 {
+			label = "TA2-GPIO-B";
+			linux,code = <KEY_B>;
+			gpios = <&gpiod 6 GPIO_ACTIVE_LOW>;
+		};
+
 		/*
 		 * The EXTi IRQ line 0 is shared with PMIC,
 		 * so mark this as polled GPIO key.
@@ -60,13 +70,6 @@
 	gpio-keys {
 		compatible = "gpio-keys";
 
-		button-1 {
-			label = "TA2-GPIO-B";
-			linux,code = <KEY_B>;
-			gpios = <&gpiod 6 GPIO_ACTIVE_LOW>;
-			wakeup-source;
-		};
-
 		button-3 {
 			label = "TA4-GPIO-D";
 			linux,code = <KEY_D>;
@@ -82,6 +85,7 @@
 			label = "green:led5";
 			gpios = <&gpioc 6 GPIO_ACTIVE_HIGH>;
 			default-state = "off";
+			status = "disabled";
 		};
 
 		led-1 {
@@ -185,8 +189,8 @@
 	touchscreen@38 {
 		compatible = "edt,edt-ft5406";
 		reg = <0x38>;
-		interrupt-parent = <&gpiog>;
-		interrupts = <2 IRQ_TYPE_EDGE_FALLING>; /* GPIO E */
+		interrupt-parent = <&gpioc>;
+		interrupts = <6 IRQ_TYPE_EDGE_FALLING>; /* GPIO E */
 	};
 };
 

From ce5a595744126be4f1327e29e3c5ae9aac6b38d5 Mon Sep 17 00:00:00 2001
From: Mike Tipton <mdtipton@codeaurora.org>
Date: Wed, 21 Jul 2021 10:54:31 -0700
Subject: [PATCH 349/502] interconnect: qcom: icc-rpmh: Ensure floor BW is
 enforced for all nodes

We currently only enforce BW floors for a subset of nodes in a path.
All BCMs that need updating are queued in the pre_aggregate/aggregate
phase. The first set() commits all queued BCMs and subsequent set()
calls short-circuit without committing anything. Since the floor BW
isn't set in sum_avg/max_peak until set(), then some BCMs are committed
before their associated nodes reflect the floor.

Set the floor as each node is being aggregated. This ensures that all
all relevant floors are set before the BCMs are committed.

Fixes: 266cd33b5913 ("interconnect: qcom: Ensure that the floor bandwidth value is enforced")
Signed-off-by: Mike Tipton <mdtipton@codeaurora.org>
Link: https://lore.kernel.org/r/20210721175432.2119-4-mdtipton@codeaurora.org
[georgi: Removed unused variable]
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/qcom/icc-rpmh.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/interconnect/qcom/icc-rpmh.c b/drivers/interconnect/qcom/icc-rpmh.c
index bf01d09dba6c..f6fae64861ce 100644
--- a/drivers/interconnect/qcom/icc-rpmh.c
+++ b/drivers/interconnect/qcom/icc-rpmh.c
@@ -57,6 +57,11 @@ int qcom_icc_aggregate(struct icc_node *node, u32 tag, u32 avg_bw,
 			qn->sum_avg[i] += avg_bw;
 			qn->max_peak[i] = max_t(u32, qn->max_peak[i], peak_bw);
 		}
+
+		if (node->init_avg || node->init_peak) {
+			qn->sum_avg[i] = max_t(u64, qn->sum_avg[i], node->init_avg);
+			qn->max_peak[i] = max_t(u64, qn->max_peak[i], node->init_peak);
+		}
 	}
 
 	*agg_avg += avg_bw;
@@ -79,7 +84,6 @@ EXPORT_SYMBOL_GPL(qcom_icc_aggregate);
 int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
 {
 	struct qcom_icc_provider *qp;
-	struct qcom_icc_node *qn;
 	struct icc_node *node;
 
 	if (!src)
@@ -88,12 +92,6 @@ int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
 		node = src;
 
 	qp = to_qcom_provider(node->provider);
-	qn = node->data;
-
-	qn->sum_avg[QCOM_ICC_BUCKET_AMC] = max_t(u64, qn->sum_avg[QCOM_ICC_BUCKET_AMC],
-						 node->avg_bw);
-	qn->max_peak[QCOM_ICC_BUCKET_AMC] = max_t(u64, qn->max_peak[QCOM_ICC_BUCKET_AMC],
-						  node->peak_bw);
 
 	qcom_icc_bcm_voter_commit(qp->voter);
 

From f84f5b6f72e68bbaeb850b58ac167e4a3a47532a Mon Sep 17 00:00:00 2001
From: Mike Tipton <mdtipton@codeaurora.org>
Date: Wed, 21 Jul 2021 10:54:32 -0700
Subject: [PATCH 350/502] interconnect: qcom: icc-rpmh: Add BCMs to commit list
 in pre_aggregate

We're only adding BCMs to the commit list in aggregate(), but there are
cases where pre_aggregate() is called without subsequently calling
aggregate(). In particular, in icc_sync_state() when a node with initial
BW has zero requests. Since BCMs aren't added to the commit list in
these cases, we don't actually send the zero BW request to HW. So the
resources remain on unnecessarily.

Add BCMs to the commit list in pre_aggregate() instead, which is always
called even when there are no requests.

Fixes: 976daac4a1c5 ("interconnect: qcom: Consolidate interconnect RPMh support")
Signed-off-by: Mike Tipton <mdtipton@codeaurora.org>
Link: https://lore.kernel.org/r/20210721175432.2119-5-mdtipton@codeaurora.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/qcom/icc-rpmh.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/interconnect/qcom/icc-rpmh.c b/drivers/interconnect/qcom/icc-rpmh.c
index f6fae64861ce..27cc5f03611c 100644
--- a/drivers/interconnect/qcom/icc-rpmh.c
+++ b/drivers/interconnect/qcom/icc-rpmh.c
@@ -20,13 +20,18 @@ void qcom_icc_pre_aggregate(struct icc_node *node)
 {
 	size_t i;
 	struct qcom_icc_node *qn;
+	struct qcom_icc_provider *qp;
 
 	qn = node->data;
+	qp = to_qcom_provider(node->provider);
 
 	for (i = 0; i < QCOM_ICC_NUM_BUCKETS; i++) {
 		qn->sum_avg[i] = 0;
 		qn->max_peak[i] = 0;
 	}
+
+	for (i = 0; i < qn->num_bcms; i++)
+		qcom_icc_bcm_voter_add(qp->voter, qn->bcms[i]);
 }
 EXPORT_SYMBOL_GPL(qcom_icc_pre_aggregate);
 
@@ -44,10 +49,8 @@ int qcom_icc_aggregate(struct icc_node *node, u32 tag, u32 avg_bw,
 {
 	size_t i;
 	struct qcom_icc_node *qn;
-	struct qcom_icc_provider *qp;
 
 	qn = node->data;
-	qp = to_qcom_provider(node->provider);
 
 	if (!tag)
 		tag = QCOM_ICC_TAG_ALWAYS;
@@ -67,9 +70,6 @@ int qcom_icc_aggregate(struct icc_node *node, u32 tag, u32 avg_bw,
 	*agg_avg += avg_bw;
 	*agg_peak = max_t(u32, *agg_peak, peak_bw);
 
-	for (i = 0; i < qn->num_bcms; i++)
-		qcom_icc_bcm_voter_add(qp->voter, qn->bcms[i]);
-
 	return 0;
 }
 EXPORT_SYMBOL_GPL(qcom_icc_aggregate);

From 094121ef815f29d9e6a01fafca365831454ce293 Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Wed, 28 Jul 2021 20:21:15 +0200
Subject: [PATCH 351/502] arch: Kconfig: clean up obsolete use of HAVE_IDE

The arch-specific Kconfig files use HAVE_IDE to indicate if IDE is
supported.

As IDE support and the HAVE_IDE config vanishes with commit b7fb14d3ac63
("ide: remove the legacy ide driver"), there is no need to mention
HAVE_IDE in all those arch-specific Kconfig files.

The issue was identified with ./scripts/checkkconfigsymbols.py.

Fixes: b7fb14d3ac63 ("ide: remove the legacy ide driver")
Suggested-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://lore.kernel.org/r/20210728182115.4401-1-lukas.bulwahn@gmail.com
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 arch/alpha/Kconfig            | 1 -
 arch/arm/Kconfig              | 6 ------
 arch/arm/mach-davinci/Kconfig | 1 -
 arch/h8300/Kconfig.cpu        | 1 -
 arch/ia64/Kconfig             | 1 -
 arch/m68k/Kconfig             | 1 -
 arch/mips/Kconfig             | 1 -
 arch/parisc/Kconfig           | 1 -
 arch/powerpc/Kconfig          | 1 -
 arch/sh/Kconfig               | 1 -
 arch/sparc/Kconfig            | 1 -
 arch/x86/Kconfig              | 1 -
 arch/xtensa/Kconfig           | 1 -
 13 files changed, 18 deletions(-)

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 77d3280dc678..a6d4c2f744e3 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -14,7 +14,6 @@ config ALPHA
 	select PCI_SYSCALL if PCI
 	select HAVE_AOUT
 	select HAVE_ASM_MODVERSIONS
-	select HAVE_IDE
 	select HAVE_PCSPKR_PLATFORM
 	select HAVE_PERF_EVENTS
 	select NEED_DMA_MAP_STATE
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 06b6187b67af..f2ce83e643e7 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -95,7 +95,6 @@ config ARM
 	select HAVE_FUNCTION_TRACER if !XIP_KERNEL
 	select HAVE_GCC_PLUGINS
 	select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7)
-	select HAVE_IDE if PCI || ISA || PCMCIA
 	select HAVE_IRQ_TIME_ACCOUNTING
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_LZ4
@@ -361,7 +360,6 @@ config ARCH_FOOTBRIDGE
 	bool "FootBridge"
 	select CPU_SA110
 	select FOOTBRIDGE
-	select HAVE_IDE
 	select NEED_MACH_IO_H if !MMU
 	select NEED_MACH_MEMORY_H
 	help
@@ -429,7 +427,6 @@ config ARCH_PXA
 	select GENERIC_IRQ_MULTI_HANDLER
 	select GPIO_PXA
 	select GPIOLIB
-	select HAVE_IDE
 	select IRQ_DOMAIN
 	select PLAT_PXA
 	select SPARSE_IRQ
@@ -445,7 +442,6 @@ config ARCH_RPC
 	select ARM_HAS_SG_CHAIN
 	select CPU_SA110
 	select FIQ
-	select HAVE_IDE
 	select HAVE_PATA_PLATFORM
 	select ISA_DMA_API
 	select LEGACY_TIMER_TICK
@@ -468,7 +464,6 @@ config ARCH_SA1100
 	select CPU_SA1100
 	select GENERIC_IRQ_MULTI_HANDLER
 	select GPIOLIB
-	select HAVE_IDE
 	select IRQ_DOMAIN
 	select ISA
 	select NEED_MACH_MEMORY_H
@@ -504,7 +499,6 @@ config ARCH_OMAP1
 	select GENERIC_IRQ_CHIP
 	select GENERIC_IRQ_MULTI_HANDLER
 	select GPIOLIB
-	select HAVE_IDE
 	select HAVE_LEGACY_CLK
 	select IRQ_DOMAIN
 	select NEED_MACH_IO_H if PCCARD
diff --git a/arch/arm/mach-davinci/Kconfig b/arch/arm/mach-davinci/Kconfig
index de11030748d0..1d3aef84287d 100644
--- a/arch/arm/mach-davinci/Kconfig
+++ b/arch/arm/mach-davinci/Kconfig
@@ -9,7 +9,6 @@ menuconfig ARCH_DAVINCI
 	select PM_GENERIC_DOMAINS_OF if PM && OF
 	select REGMAP_MMIO
 	select RESET_CONTROLLER
-	select HAVE_IDE
 	select PINCTRL_SINGLE
 
 if ARCH_DAVINCI
diff --git a/arch/h8300/Kconfig.cpu b/arch/h8300/Kconfig.cpu
index b5e14d513e62..c30baa0499fc 100644
--- a/arch/h8300/Kconfig.cpu
+++ b/arch/h8300/Kconfig.cpu
@@ -44,7 +44,6 @@ config H8300_H8MAX
 	bool "H8MAX"
 	select H83069
 	select RAMKERNEL
-	select HAVE_IDE
 	help
 	  H8MAX Evaluation Board Support
 	  More Information. (Japanese Only)
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index cf425c2c63af..4993c7ac7ff6 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -25,7 +25,6 @@ config IA64
 	select HAVE_ASM_MODVERSIONS
 	select HAVE_UNSTABLE_SCHED_CLOCK
 	select HAVE_EXIT_THREAD
-	select HAVE_IDE
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
 	select HAVE_FTRACE_MCOUNT_RECORD
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 96989ad46f66..d632a1d576f9 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -23,7 +23,6 @@ config M68K
 	select HAVE_DEBUG_BUGVERBOSE
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_HAS_NO_UNALIGNED
 	select HAVE_FUTEX_CMPXCHG if MMU && FUTEX
-	select HAVE_IDE
 	select HAVE_MOD_ARCH_SPECIFIC
 	select HAVE_UID16
 	select MMU_GATHER_NO_RANGE if MMU
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index cee6087cd686..6dfb27d531dd 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -71,7 +71,6 @@ config MIPS
 	select HAVE_FUNCTION_TRACER
 	select HAVE_GCC_PLUGINS
 	select HAVE_GENERIC_VDSO
-	select HAVE_IDE
 	select HAVE_IOREMAP_PROT
 	select HAVE_IRQ_EXIT_ON_IRQ_STACK
 	select HAVE_IRQ_TIME_ACCOUNTING
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index bde9907bc5b2..4f8c1fbf8f2f 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -3,7 +3,6 @@ config PARISC
 	def_bool y
 	select ARCH_32BIT_OFF_T if !64BIT
 	select ARCH_MIGHT_HAVE_PC_PARPORT
-	select HAVE_IDE
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_SYSCALL_TRACEPOINTS
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index d01e3401581d..663766fbf505 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -220,7 +220,6 @@ config PPC
 	select HAVE_HARDLOCKUP_DETECTOR_ARCH	if PPC_BOOK3S_64 && SMP
 	select HAVE_HARDLOCKUP_DETECTOR_PERF	if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
 	select HAVE_HW_BREAKPOINT		if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
-	select HAVE_IDE
 	select HAVE_IOREMAP_PROT
 	select HAVE_IRQ_EXIT_ON_IRQ_STACK
 	select HAVE_IRQ_TIME_ACCOUNTING
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 45a0549421cd..b683b69a4556 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -39,7 +39,6 @@ config SUPERH
 	select HAVE_FUTEX_CMPXCHG if FUTEX
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_HW_BREAKPOINT
-	select HAVE_IDE if HAS_IOPORT_MAP
 	select HAVE_IOREMAP_PROT if MMU && !X2TLB
 	select HAVE_KERNEL_BZIP2
 	select HAVE_KERNEL_GZIP
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index c5fa7932b550..f0c0f955e169 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -19,7 +19,6 @@ config SPARC
 	select OF
 	select OF_PROMTREE
 	select HAVE_ASM_MODVERSIONS
-	select HAVE_IDE
 	select HAVE_ARCH_KGDB if !SMP || SPARC64
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_SECCOMP if SPARC64
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 49270655e827..88fb922c23a0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -202,7 +202,6 @@ config X86
 	select HAVE_FUNCTION_TRACER
 	select HAVE_GCC_PLUGINS
 	select HAVE_HW_BREAKPOINT
-	select HAVE_IDE
 	select HAVE_IOREMAP_PROT
 	select HAVE_IRQ_EXIT_ON_IRQ_STACK	if X86_64
 	select HAVE_IRQ_TIME_ACCOUNTING
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 2332b2156993..3878880469d1 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -327,7 +327,6 @@ config XTENSA_PLATFORM_ISS
 
 config XTENSA_PLATFORM_XT2000
 	bool "XT2000"
-	select HAVE_IDE
 	help
 	  XT2000 is the name of Tensilica's feature-rich emulation platform.
 	  This hardware is capable of running a full Linux distribution.

From 7561c14d8a4d1a24a40b1839d927d488e2d6345a Mon Sep 17 00:00:00 2001
From: Sumanth Korikkar <sumanthk@linux.ibm.com>
Date: Wed, 28 Jul 2021 13:24:53 +0200
Subject: [PATCH 352/502] s390/vdso: add .got.plt in vdso linker script

KCFLAGS="-mno-pic-data-is-text-relative" make leads to bfd assertion
error in s390_got_pointer():

LD      arch/s390/kernel/vdso64/vdso64.so.dbg
ld: BFD version 2.35-18.fc33 assertion fail elf-s390-common.c:74

readelf -Wr vdso64_generic.o | grep GOT
0000000000000032  000000110000001a R_390_GOTENT 0000000000000000 _vdso_data + 2
(...)

Add .got.plt in linker script to avoid this.

Suggested-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/kernel/vdso32/vdso32.lds.S | 1 +
 arch/s390/kernel/vdso64/vdso64.lds.S | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S
index bff50b6acd6d..edf5ff1debe1 100644
--- a/arch/s390/kernel/vdso32/vdso32.lds.S
+++ b/arch/s390/kernel/vdso32/vdso32.lds.S
@@ -51,6 +51,7 @@ SECTIONS
 
 	.rela.dyn ALIGN(8) : { *(.rela.dyn) }
 	.got ALIGN(8)	: { *(.got .toc) }
+	.got.plt ALIGN(8) : { *(.got.plt) }
 
 	_end = .;
 	PROVIDE(end = .);
diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S
index d4fb336d747b..4461ea151e49 100644
--- a/arch/s390/kernel/vdso64/vdso64.lds.S
+++ b/arch/s390/kernel/vdso64/vdso64.lds.S
@@ -51,6 +51,7 @@ SECTIONS
 
 	.rela.dyn ALIGN(8) : { *(.rela.dyn) }
 	.got ALIGN(8)	: { *(.got .toc) }
+	.got.plt ALIGN(8) : { *(.got.plt) }
 
 	_end = .;
 	PROVIDE(end = .);

From 88731c8f3636b133e27df88febcd7cd2fdece0a7 Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Thu, 29 Jul 2021 15:29:19 +0200
Subject: [PATCH 353/502] s390/boot: fix zstd build for -march=z900

zstd decompression uses __builtin_clz() which fails back to __clzdi2()
when the kernel is built for older hardware like z900. This leads to
build failures like the following:
s390x-11.1.0-ld: /devel/src/kernel/arch/s390/boot/compressed/../../../../lib/zstd/bitstream.h:148: undefined reference to `__clzdi2'

Fix that by optionally including lib/clz_ctz.c into the decompressor.

Reported-by: kernel test robot <lkp@intel.com>
Fixes: 7b034d9c1b08 ("s390/boot: add zstd support")
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
Link: https://lore.kernel.org/r/patch-1.thread-f0f589.git-f0f58936888f.your-ad-here.call-01627564869-ext-2765@work.hours
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/boot/compressed/Makefile  | 1 +
 arch/s390/boot/compressed/clz_ctz.c | 2 ++
 2 files changed, 3 insertions(+)
 create mode 100644 arch/s390/boot/compressed/clz_ctz.c

diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
index 660c799d875d..e30d3fdbbc78 100644
--- a/arch/s390/boot/compressed/Makefile
+++ b/arch/s390/boot/compressed/Makefile
@@ -11,6 +11,7 @@ UBSAN_SANITIZE := n
 KASAN_SANITIZE := n
 
 obj-y	:= $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o
+obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o
 obj-all := $(obj-y) piggy.o syms.o
 targets	:= vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
diff --git a/arch/s390/boot/compressed/clz_ctz.c b/arch/s390/boot/compressed/clz_ctz.c
new file mode 100644
index 000000000000..c3ebf248596b
--- /dev/null
+++ b/arch/s390/boot/compressed/clz_ctz.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../../../../lib/clz_ctz.c"

From 1e9faef4d26de33bd6b5018695996e7394119e5b Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 30 Jul 2021 14:21:56 +0200
Subject: [PATCH 354/502] USB: serial: pl2303: fix HX type detection

The device release number for HX-type devices is configurable in
EEPROM/OTPROM and cannot be used reliably for type detection.

Assume all (non-H) devices with bcdUSB 1.1 and unknown bcdDevice to be
of HX type while adding a bcdDevice check for HXD and TB (1.1 and 2.0,
respectively).

Reported-by: Chris <chris@cyber-anlage.de>
Fixes: 8a7bf7510d1f ("USB: serial: pl2303: amend and tighten type detection")
Cc: stable@vger.kernel.org	# 5.13
Link: https://lore.kernel.org/r/20210730122156.718-1-johan@kernel.org
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/pl2303.c | 41 ++++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index 2f2f5047452b..17601e32083e 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -418,24 +418,33 @@ static int pl2303_detect_type(struct usb_serial *serial)
 	bcdDevice = le16_to_cpu(desc->bcdDevice);
 	bcdUSB = le16_to_cpu(desc->bcdUSB);
 
-	switch (bcdDevice) {
-	case 0x100:
-		/*
-		 * Assume it's an HXN-type if the device doesn't support the old read
-		 * request value.
-		 */
-		if (bcdUSB == 0x200 && !pl2303_supports_hx_status(serial))
-			return TYPE_HXN;
+	switch (bcdUSB) {
+	case 0x110:
+		switch (bcdDevice) {
+		case 0x300:
+			return TYPE_HX;
+		case 0x400:
+			return TYPE_HXD;
+		default:
+			return TYPE_HX;
+		}
 		break;
-	case 0x300:
-		if (bcdUSB == 0x200)
+	case 0x200:
+		switch (bcdDevice) {
+		case 0x100:
+			/*
+			 * Assume it's an HXN-type if the device doesn't
+			 * support the old read request value.
+			 */
+			if (!pl2303_supports_hx_status(serial))
+				return TYPE_HXN;
+			break;
+		case 0x300:
 			return TYPE_TA;
-
-		return TYPE_HX;
-	case 0x400:
-		return TYPE_HXD;
-	case 0x500:
-		return TYPE_TB;
+		case 0x500:
+			return TYPE_TB;
+		}
+		break;
 	}
 
 	dev_err(&serial->interface->dev,

From 4d77f36f2c8c62b230f4a5eb264c169fa04c4a5a Mon Sep 17 00:00:00 2001
From: xinhui pan <xinhui.pan@amd.com>
Date: Tue, 27 Jul 2021 17:43:37 +0800
Subject: [PATCH 355/502] drm/amdgpu: Fix out-of-bounds read when update
 mapping
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If one GTT BO has been evicted/swapped out, it should sit in CPU domain.
TTM only alloc struct ttm_resource instead of struct ttm_range_mgr_node
for sysMem.

Now when we update mapping for such invalidated BOs, we might walk out
of bounds of struct ttm_resource.

Three possible fix:
1) Let sysMem manager alloc struct ttm_range_mgr_node, like
ttm_range_manager does.
2) Pass pages_addr to update_mapping function too, but need memset
pages_addr[] to zero when unpopulate.
3) Init amdgpu_res_cursor directly.

bug is detected by kfence.
==================================================================
BUG: KFENCE: out-of-bounds read in amdgpu_vm_bo_update_mapping+0x564/0x6e0

Out-of-bounds read at 0x000000008ea93fe9 (64B right of kfence-#167):
 amdgpu_vm_bo_update_mapping+0x564/0x6e0 [amdgpu]
 amdgpu_vm_bo_update+0x282/0xa40 [amdgpu]
 amdgpu_vm_handle_moved+0x19e/0x1f0 [amdgpu]
 amdgpu_cs_vm_handling+0x4e4/0x640 [amdgpu]
 amdgpu_cs_ioctl+0x19e7/0x23c0 [amdgpu]
 drm_ioctl_kernel+0xf3/0x180 [drm]
 drm_ioctl+0x2cb/0x550 [drm]
 amdgpu_drm_ioctl+0x5e/0xb0 [amdgpu]

kfence-#167 [0x000000008e11c055-0x000000001f676b3e
 ttm_sys_man_alloc+0x35/0x80 [ttm]
 ttm_resource_alloc+0x39/0x50 [ttm]
 ttm_bo_swapout+0x252/0x5a0 [ttm]
 ttm_device_swapout+0x107/0x180 [ttm]
 ttm_global_swapout+0x6f/0x130 [ttm]
 ttm_tt_populate+0xb1/0x2a0 [ttm]
 ttm_bo_handle_move_mem+0x17e/0x1d0 [ttm]
 ttm_mem_evict_first+0x59d/0x9c0 [ttm]
 ttm_bo_mem_space+0x39f/0x400 [ttm]
 ttm_bo_validate+0x13c/0x340 [ttm]
 ttm_bo_init_reserved+0x269/0x540 [ttm]
 amdgpu_bo_create+0x1d1/0xa30 [amdgpu]
 amdgpu_bo_create_user+0x40/0x80 [amdgpu]
 amdgpu_gem_object_create+0x71/0xc0 [amdgpu]
 amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu+0x2f2/0xcd0 [amdgpu]
 kfd_ioctl_alloc_memory_of_gpu+0xe2/0x330 [amdgpu]
 kfd_ioctl+0x461/0x690 [amdgpu]

Signed-off-by: xinhui pan <xinhui.pan@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index 59e0fefb15aa..acfa207cf970 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -54,11 +54,12 @@ static inline void amdgpu_res_first(struct ttm_resource *res,
 {
 	struct drm_mm_node *node;
 
-	if (!res) {
+	if (!res || res->mem_type == TTM_PL_SYSTEM) {
 		cur->start = start;
 		cur->size = size;
 		cur->remaining = size;
 		cur->node = NULL;
+		WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT);
 		return;
 	}
 

From 1c0539a6fc8a4a4b77278e35d763073890de96b9 Mon Sep 17 00:00:00 2001
From: Yifan Zhang <yifan1.zhang@amd.com>
Date: Thu, 10 Jun 2021 09:55:01 +0800
Subject: [PATCH 356/502] drm/amdgpu: fix the doorbell missing when in CGPG
 issue for renoir.

If GC has entered CGPG, ringing doorbell > first page doesn't wakeup GC.
Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround this issue.

Signed-off-by: Yifan Zhang <yifan1.zhang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 044076ec1d03..6a23c6826e12 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1295,6 +1295,16 @@ static bool is_raven_kicker(struct amdgpu_device *adev)
 		return false;
 }
 
+static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
+{
+	if ((adev->asic_type == CHIP_RENOIR) &&
+	    (adev->gfx.me_fw_version >= 0x000000a5) &&
+	    (adev->gfx.me_feature_version >= 52))
+		return true;
+	else
+		return false;
+}
+
 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
 {
 	if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
@@ -3675,7 +3685,16 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
 	if (ring->use_doorbell) {
 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
 					(adev->doorbell_index.kiq * 2) << 2);
-		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
+		/* If GC has entered CGPG, ringing doorbell > first page
+		 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
+		 * workaround this issue. And this change has to align with firmware
+		 * update.
+		 */
+		if (check_if_enlarge_doorbell_range(adev))
+			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
+					(adev->doorbell.size - 4));
+		else
+			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
 					(adev->doorbell_index.userqueue_end * 2) << 2);
 	}
 

From 028a71775f811e9d60664ba2c248ff95c6cf57cb Mon Sep 17 00:00:00 2001
From: Catherine Sullivan <csully@google.com>
Date: Thu, 29 Jul 2021 08:52:58 -0700
Subject: [PATCH 357/502] gve: Update MAINTAINERS list

The team maintaining the gve driver has undergone some changes,
this updates the MAINTAINERS file accordingly.

Signed-off-by: Catherine Sullivan <csully@google.com>
Signed-off-by: Jon Olson <jonolson@google.com>
Signed-off-by: David Awogbemila <awogbemila@google.com>
Signed-off-by: Jeroen de Borst <jeroendb@google.com>
Link: https://lore.kernel.org/r/20210729155258.442650-1-csully@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 MAINTAINERS | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 58afeb12d3b3..17a873153eba 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7858,9 +7858,9 @@ S:	Maintained
 F:	drivers/input/touchscreen/goodix.c
 
 GOOGLE ETHERNET DRIVERS
-M:	Catherine Sullivan <csully@google.com>
-R:	Sagi Shahar <sagis@google.com>
-R:	Jon Olson <jonolson@google.com>
+M:	Jeroen de Borst <jeroendb@google.com>
+R:	Catherine Sullivan <csully@google.com>
+R:	David Awogbemila <awogbemila@google.com>
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	Documentation/networking/device_drivers/ethernet/google/gve.rst

From b2ff70a01a7a8083e749e01e5d3ffda706fe3305 Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@microsoft.com>
Date: Thu, 29 Jul 2021 14:53:35 -0700
Subject: [PATCH 358/502] lib/test_string.c: move string selftest in the
 Runtime Testing menu

STRING_SELFTEST is presented in the "Library routines" menu.  Move it in
Kernel hacking > Kernel Testing and Coverage > Runtime Testing together
with other similar tests found in lib/

	--- Runtime Testing
	<*>   Test functions located in the hexdump module at runtime
	<*>   Test string functions (NEW)
	<*>   Test functions located in the string_helpers module at runtime
	<*>   Test strscpy*() family of functions at runtime
	<*>   Test kstrto*() family of functions at runtime
	<*>   Test printf() family of functions at runtime
	<*>   Test scanf() family of functions at runtime

Link: https://lkml.kernel.org/r/20210719185158.190371-1-mcroce@linux.microsoft.com
Signed-off-by: Matteo Croce <mcroce@microsoft.com>
Cc: Peter Rosin <peda@axentia.se>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig       | 3 ---
 lib/Kconfig.debug | 3 +++
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/Kconfig b/lib/Kconfig
index d241fe476fda..5c9c0687f76d 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -683,9 +683,6 @@ config PARMAN
 config OBJAGG
 	tristate "objagg" if COMPILE_TEST
 
-config STRING_SELFTEST
-	tristate "Test string functions"
-
 endmenu
 
 config GENERIC_IOREMAP
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 831212722924..5ddd575159fb 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2180,6 +2180,9 @@ config ASYNC_RAID6_TEST
 config TEST_HEXDUMP
 	tristate "Test functions located in the hexdump module at runtime"
 
+config STRING_SELFTEST
+	tristate "Test string functions at runtime"
+
 config TEST_STRING_HELPERS
 	tristate "Test functions located in the string_helpers module at runtime"
 

From f267aeb6dea5e468793e5b8eb6a9c72c0020d418 Mon Sep 17 00:00:00 2001
From: Junxiao Bi <junxiao.bi@oracle.com>
Date: Thu, 29 Jul 2021 14:53:38 -0700
Subject: [PATCH 359/502] ocfs2: fix zero out valid data

If append-dio feature is enabled, direct-io write and fallocate could
run in parallel to extend file size, fallocate used "orig_isize" to
record i_size before taking "ip_alloc_sem", when
ocfs2_zeroout_partial_cluster() zeroout EOF blocks, i_size maybe already
extended by ocfs2_dio_end_io_write(), that will cause valid data zeroed
out.

Link: https://lkml.kernel.org/r/20210722054923.24389-1-junxiao.bi@oracle.com
Fixes: 6bba4471f0cc ("ocfs2: fix data corruption by fallocate")
Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Gang He <ghe@suse.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Jun Piao <piaojun@huawei.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/file.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 775657943057..53bb46ce3cbb 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1935,7 +1935,6 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
 		goto out_inode_unlock;
 	}
 
-	orig_isize = i_size_read(inode);
 	switch (sr->l_whence) {
 	case 0: /*SEEK_SET*/
 		break;
@@ -1943,7 +1942,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
 		sr->l_start += f_pos;
 		break;
 	case 2: /*SEEK_END*/
-		sr->l_start += orig_isize;
+		sr->l_start += i_size_read(inode);
 		break;
 	default:
 		ret = -EINVAL;
@@ -1998,6 +1997,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
 		ret = -EINVAL;
 	}
 
+	orig_isize = i_size_read(inode);
 	/* zeroout eof blocks in the cluster. */
 	if (!ret && change_size && orig_isize < size) {
 		ret = ocfs2_zeroout_partial_cluster(inode, orig_isize,

From 9449ad33be8480f538b11a593e2dda2fb33ca06d Mon Sep 17 00:00:00 2001
From: Junxiao Bi <junxiao.bi@oracle.com>
Date: Thu, 29 Jul 2021 14:53:41 -0700
Subject: [PATCH 360/502] ocfs2: issue zeroout to EOF blocks

For punch holes in EOF blocks, fallocate used buffer write to zero the
EOF blocks in last cluster.  But since ->writepage will ignore EOF
pages, those zeros will not be flushed.

This "looks" ok as commit 6bba4471f0cc ("ocfs2: fix data corruption by
fallocate") will zero the EOF blocks when extend the file size, but it
isn't.  The problem happened on those EOF pages, before writeback, those
pages had DIRTY flag set and all buffer_head in them also had DIRTY flag
set, when writeback run by write_cache_pages(), DIRTY flag on the page
was cleared, but DIRTY flag on the buffer_head not.

When next write happened to those EOF pages, since buffer_head already
had DIRTY flag set, it would not mark page DIRTY again.  That made
writeback ignore them forever.  That will cause data corruption.  Even
directio write can't work because it will fail when trying to drop pages
caches before direct io, as it found the buffer_head for those pages
still had DIRTY flag set, then it will fall back to buffer io mode.

To make a summary of the issue, as writeback ingores EOF pages, once any
EOF page is generated, any write to it will only go to the page cache,
it will never be flushed to disk even file size extends and that page is
not EOF page any more.  The fix is to avoid zero EOF blocks with buffer
write.

The following code snippet from qemu-img could trigger the corruption.

  656   open("6b3711ae-3306-4bdd-823c-cf1c0060a095.conv.2", O_RDWR|O_DIRECT|O_CLOEXEC) = 11
  ...
  660   fallocate(11, FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE, 2275868672, 327680 <unfinished ...>
  660   fallocate(11, 0, 2275868672, 327680) = 0
  658   pwrite64(11, "

Link: https://lkml.kernel.org/r/20210722054923.24389-2-junxiao.bi@oracle.com
Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Gang He <ghe@suse.com>
Cc: Jun Piao <piaojun@huawei.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/file.c | 99 ++++++++++++++++++++++++++++++-------------------
 1 file changed, 60 insertions(+), 39 deletions(-)

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 53bb46ce3cbb..54d7843c0211 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1529,6 +1529,45 @@ static void ocfs2_truncate_cluster_pages(struct inode *inode, u64 byte_start,
 	}
 }
 
+/*
+ * zero out partial blocks of one cluster.
+ *
+ * start: file offset where zero starts, will be made upper block aligned.
+ * len: it will be trimmed to the end of current cluster if "start + len"
+ *      is bigger than it.
+ */
+static int ocfs2_zeroout_partial_cluster(struct inode *inode,
+					u64 start, u64 len)
+{
+	int ret;
+	u64 start_block, end_block, nr_blocks;
+	u64 p_block, offset;
+	u32 cluster, p_cluster, nr_clusters;
+	struct super_block *sb = inode->i_sb;
+	u64 end = ocfs2_align_bytes_to_clusters(sb, start);
+
+	if (start + len < end)
+		end = start + len;
+
+	start_block = ocfs2_blocks_for_bytes(sb, start);
+	end_block = ocfs2_blocks_for_bytes(sb, end);
+	nr_blocks = end_block - start_block;
+	if (!nr_blocks)
+		return 0;
+
+	cluster = ocfs2_bytes_to_clusters(sb, start);
+	ret = ocfs2_get_clusters(inode, cluster, &p_cluster,
+				&nr_clusters, NULL);
+	if (ret)
+		return ret;
+	if (!p_cluster)
+		return 0;
+
+	offset = start_block - ocfs2_clusters_to_blocks(sb, cluster);
+	p_block = ocfs2_clusters_to_blocks(sb, p_cluster) + offset;
+	return sb_issue_zeroout(sb, p_block, nr_blocks, GFP_NOFS);
+}
+
 static int ocfs2_zero_partial_clusters(struct inode *inode,
 				       u64 start, u64 len)
 {
@@ -1538,6 +1577,7 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	unsigned int csize = osb->s_clustersize;
 	handle_t *handle;
+	loff_t isize = i_size_read(inode);
 
 	/*
 	 * The "start" and "end" values are NOT necessarily part of
@@ -1558,6 +1598,26 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
 	if ((start & (csize - 1)) == 0 && (end & (csize - 1)) == 0)
 		goto out;
 
+	/* No page cache for EOF blocks, issue zero out to disk. */
+	if (end > isize) {
+		/*
+		 * zeroout eof blocks in last cluster starting from
+		 * "isize" even "start" > "isize" because it is
+		 * complicated to zeroout just at "start" as "start"
+		 * may be not aligned with block size, buffer write
+		 * would be required to do that, but out of eof buffer
+		 * write is not supported.
+		 */
+		ret = ocfs2_zeroout_partial_cluster(inode, isize,
+					end - isize);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+		if (start >= isize)
+			goto out;
+		end = isize;
+	}
 	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
@@ -1855,45 +1915,6 @@ out:
 	return ret;
 }
 
-/*
- * zero out partial blocks of one cluster.
- *
- * start: file offset where zero starts, will be made upper block aligned.
- * len: it will be trimmed to the end of current cluster if "start + len"
- *      is bigger than it.
- */
-static int ocfs2_zeroout_partial_cluster(struct inode *inode,
-					u64 start, u64 len)
-{
-	int ret;
-	u64 start_block, end_block, nr_blocks;
-	u64 p_block, offset;
-	u32 cluster, p_cluster, nr_clusters;
-	struct super_block *sb = inode->i_sb;
-	u64 end = ocfs2_align_bytes_to_clusters(sb, start);
-
-	if (start + len < end)
-		end = start + len;
-
-	start_block = ocfs2_blocks_for_bytes(sb, start);
-	end_block = ocfs2_blocks_for_bytes(sb, end);
-	nr_blocks = end_block - start_block;
-	if (!nr_blocks)
-		return 0;
-
-	cluster = ocfs2_bytes_to_clusters(sb, start);
-	ret = ocfs2_get_clusters(inode, cluster, &p_cluster,
-				&nr_clusters, NULL);
-	if (ret)
-		return ret;
-	if (!p_cluster)
-		return 0;
-
-	offset = start_block - ocfs2_clusters_to_blocks(sb, cluster);
-	p_block = ocfs2_clusters_to_blocks(sb, p_cluster) + offset;
-	return sb_issue_zeroout(sb, p_block, nr_blocks, GFP_NOFS);
-}
-
 /*
  * Parts of this function taken from xfs_change_file_space()
  */

From 30def93565e5ba08676aa2b9083f253fc586dbed Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 29 Jul 2021 14:53:44 -0700
Subject: [PATCH 361/502] mm: memcontrol: fix blocking rstat function called
 from atomic cgroup1 thresholding code

Dan Carpenter reports:

    The patch 2d146aa3aa84: "mm: memcontrol: switch to rstat" from Apr
    29, 2021, leads to the following static checker warning:

	    kernel/cgroup/rstat.c:200 cgroup_rstat_flush()
	    warn: sleeping in atomic context

    mm/memcontrol.c
      3572  static unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
      3573  {
      3574          unsigned long val;
      3575
      3576          if (mem_cgroup_is_root(memcg)) {
      3577                  cgroup_rstat_flush(memcg->css.cgroup);
			    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

    This is from static analysis and potentially a false positive.  The
    problem is that mem_cgroup_usage() is called from __mem_cgroup_threshold()
    which holds an rcu_read_lock().  And the cgroup_rstat_flush() function
    can sleep.

      3578                  val = memcg_page_state(memcg, NR_FILE_PAGES) +
      3579                          memcg_page_state(memcg, NR_ANON_MAPPED);
      3580                  if (swap)
      3581                          val += memcg_page_state(memcg, MEMCG_SWAP);
      3582          } else {
      3583                  if (!swap)
      3584                          val = page_counter_read(&memcg->memory);
      3585                  else
      3586                          val = page_counter_read(&memcg->memsw);
      3587          }
      3588          return val;
      3589  }

__mem_cgroup_threshold() indeed holds the rcu lock.  In addition, the
thresholding code is invoked during stat changes, and those contexts
have irqs disabled as well.  If the lock breaking occurs inside the
flush function, it will result in a sleep from an atomic context.

Use the irqsafe flushing variant in mem_cgroup_usage() to fix this.

Link: https://lkml.kernel.org/r/20210726150019.251820-1-hannes@cmpxchg.org
Fixes: 2d146aa3aa84 ("mm: memcontrol: switch to rstat")
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Chris Down <chris@chrisdown.name>
Reviewed-by: Rik van Riel <riel@surriel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ae1f5d0cb581..eb8e87c4833f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3574,7 +3574,8 @@ static unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
 	unsigned long val;
 
 	if (mem_cgroup_is_root(memcg)) {
-		cgroup_rstat_flush(memcg->css.cgroup);
+		/* mem_cgroup_threshold() calls here from irqsafe context */
+		cgroup_rstat_flush_irqsafe(memcg->css.cgroup);
 		val = memcg_page_state(memcg, NR_FILE_PAGES) +
 			memcg_page_state(memcg, NR_ANON_MAPPED);
 		if (swap)

From b5916c025432b7c776b6bb13617485fbc0bd3ebd Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Thu, 29 Jul 2021 14:53:47 -0700
Subject: [PATCH 362/502] mm/migrate: fix NR_ISOLATED corruption on 64-bit

Similar to commit 2da9f6305f30 ("mm/vmscan: fix NR_ISOLATED_FILE
corruption on 64-bit") avoid using unsigned int for nr_pages.  With
unsigned int type the large unsigned int converts to a large positive
signed long.

Symptoms include CMA allocations hanging forever due to
alloc_contig_range->...->isolate_migratepages_block waiting forever in
"while (unlikely(too_many_isolated(pgdat)))".

Link: https://lkml.kernel.org/r/20210728042531.359409-1-aneesh.kumar@linux.ibm.com
Fixes: c5fc5c3ae0c8 ("mm: migrate: account THP NUMA migration counters correctly")
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Reported-by: Michael Ellerman <mpe@ellerman.id.au>
Reported-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: Yang Shi <shy828301@gmail.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/migrate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index 34a9ad3e0a4f..7e240437e7d9 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2068,7 +2068,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
 	LIST_HEAD(migratepages);
 	new_page_t *new;
 	bool compound;
-	unsigned int nr_pages = thp_nr_pages(page);
+	int nr_pages = thp_nr_pages(page);
 
 	/*
 	 * PTE mapped THP or HugeTLB page can't reach here so the page could

From f227f0faf63b46a113c4d1aca633c80195622dd2 Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeelb@google.com>
Date: Thu, 29 Jul 2021 14:53:50 -0700
Subject: [PATCH 363/502] slub: fix unreclaimable slab stat for bulk free

SLUB uses page allocator for higher order allocations and update
unreclaimable slab stat for such allocations.  At the moment, the bulk
free for SLUB does not share code with normal free code path for these
type of allocations and have missed the stat update.  So, fix the stat
update by common code.  The user visible impact of the bug is the
potential of inconsistent unreclaimable slab stat visible through
meminfo and vmstat.

Link: https://lkml.kernel.org/r/20210728155354.3440560-1-shakeelb@google.com
Fixes: 6a486c0ad4dc ("mm, sl[ou]b: improve memory accounting")
Signed-off-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Roman Gushchin <guro@fb.com>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/slub.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 090fa14628f9..af984e4990e8 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3236,6 +3236,16 @@ struct detached_freelist {
 	struct kmem_cache *s;
 };
 
+static inline void free_nonslab_page(struct page *page)
+{
+	unsigned int order = compound_order(page);
+
+	VM_BUG_ON_PAGE(!PageCompound(page), page);
+	kfree_hook(page_address(page));
+	mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, -(PAGE_SIZE << order));
+	__free_pages(page, order);
+}
+
 /*
  * This function progressively scans the array with free objects (with
  * a limited look ahead) and extract objects belonging to the same
@@ -3272,9 +3282,7 @@ int build_detached_freelist(struct kmem_cache *s, size_t size,
 	if (!s) {
 		/* Handle kalloc'ed objects */
 		if (unlikely(!PageSlab(page))) {
-			BUG_ON(!PageCompound(page));
-			kfree_hook(object);
-			__free_pages(page, compound_order(page));
+			free_nonslab_page(page);
 			p[size] = NULL; /* mark object processed */
 			return size;
 		}
@@ -4250,13 +4258,7 @@ void kfree(const void *x)
 
 	page = virt_to_head_page(x);
 	if (unlikely(!PageSlab(page))) {
-		unsigned int order = compound_order(page);
-
-		BUG_ON(!PageCompound(page));
-		kfree_hook(object);
-		mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
-				      -(PAGE_SIZE << order));
-		__free_pages(page, order);
+		free_nonslab_page(page);
 		return;
 	}
 	slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);

From 121dffe20b141c9b27f39d49b15882469cbebae7 Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Thu, 29 Jul 2021 14:53:54 -0700
Subject: [PATCH 364/502] mm/memcg: fix NULL pointer dereference in
 memcg_slab_free_hook()

When I use kfree_rcu() to free a large memory allocated by kmalloc_node(),
the following dump occurs.

  BUG: kernel NULL pointer dereference, address: 0000000000000020
  [...]
  Oops: 0000 [#1] SMP
  [...]
  Workqueue: events kfree_rcu_work
  RIP: 0010:__obj_to_index include/linux/slub_def.h:182 [inline]
  RIP: 0010:obj_to_index include/linux/slub_def.h:191 [inline]
  RIP: 0010:memcg_slab_free_hook+0x120/0x260 mm/slab.h:363
  [...]
  Call Trace:
    kmem_cache_free_bulk+0x58/0x630 mm/slub.c:3293
    kfree_bulk include/linux/slab.h:413 [inline]
    kfree_rcu_work+0x1ab/0x200 kernel/rcu/tree.c:3300
    process_one_work+0x207/0x530 kernel/workqueue.c:2276
    worker_thread+0x320/0x610 kernel/workqueue.c:2422
    kthread+0x13d/0x160 kernel/kthread.c:313
    ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:294

When kmalloc_node() a large memory, page is allocated, not slab, so when
freeing memory via kfree_rcu(), this large memory should not be used by
memcg_slab_free_hook(), because memcg_slab_free_hook() is is used for
slab.

Using page_objcgs_check() instead of page_objcgs() in
memcg_slab_free_hook() to fix this bug.

Link: https://lkml.kernel.org/r/20210728145655.274476-1-wanghai38@huawei.com
Fixes: 270c6a71460e ("mm: memcontrol/slab: Use helpers to access slab page's memcg_data")
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Roman Gushchin <guro@fb.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/slab.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/slab.h b/mm/slab.h
index f997fd5e42c8..58c01a34e5b8 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -346,7 +346,7 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig,
 			continue;
 
 		page = virt_to_head_page(p[i]);
-		objcgs = page_objcgs(page);
+		objcgs = page_objcgs_check(page);
 		if (!objcgs)
 			continue;
 

From 852a8a97776a153be2e6c803218eced45f37a19c Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Fri, 30 Jul 2021 11:02:54 +0200
Subject: [PATCH 365/502] ALSA: pcm - fix mmap capability check for the
 snd-dummy driver

The snd-dummy driver (fake_buffer configuration) uses the ops->page
callback for the mmap operations. Allow mmap for this case, too.

Cc: <stable@vger.kernel.org>
Fixes: c4824ae7db41 ("ALSA: pcm: Fix mmap capability check")
Signed-off-by: Jaroslav Kysela <perex@perex.cz>
Link: https://lore.kernel.org/r/20210730090254.612478-1-perex@perex.cz
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/pcm_native.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 6a2971a7e6a1..09c0e2a6489c 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -246,7 +246,7 @@ static bool hw_support_mmap(struct snd_pcm_substream *substream)
 	if (!(substream->runtime->hw.info & SNDRV_PCM_INFO_MMAP))
 		return false;
 
-	if (substream->ops->mmap)
+	if (substream->ops->mmap || substream->ops->page)
 		return true;
 
 	switch (substream->dma_buffer.dev.type) {

From 9bac1bd6e6d36459087a728a968e79e37ebcea1a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 30 Jul 2021 18:26:22 -0300
Subject: [PATCH 366/502] Revert "perf map: Fix dso->nsinfo refcounting"

This makes 'perf top' abort in some cases, and the right fix will
involve surgery that is too much to do at this stage, so revert for now
and fix it in the next merge window.

This reverts commit 2d6b74baa7147251c30a46c4996e8cc224aa2dc5.

Cc: Riccardo Mancini <rickyman7@gmail.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Krister Johansen <kjlx@templeofstupid.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/map.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 72e7f3616157..8af693d9678c 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -192,8 +192,6 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
 			if (!(prot & PROT_EXEC))
 				dso__set_loaded(dso);
 		}
-
-		nsinfo__put(dso->nsinfo);
 		dso->nsinfo = nsi;
 
 		if (build_id__is_defined(bid))

From 3a34b13a88caeb2800ab44a4918f230041b37dd9 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 30 Jul 2021 15:42:34 -0700
Subject: [PATCH 367/502] pipe: make pipe writes always wake up readers

Since commit 1b6b26ae7053 ("pipe: fix and clarify pipe write wakeup
logic") we have sanitized the pipe write logic, and would only try to
wake up readers if they needed it.

In particular, if the pipe already had data in it before the write,
there was no point in trying to wake up a reader, since any existing
readers must have been aware of the pre-existing data already.  Doing
extraneous wakeups will only cause potential thundering herd problems.

However, it turns out that some Android libraries have misused the EPOLL
interface, and expected "edge triggered" be to "any new write will
trigger it".  Even if there was no edge in sight.

Quoting Sandeep Patil:
 "The commit 1b6b26ae7053 ('pipe: fix and clarify pipe write wakeup
  logic') changed pipe write logic to wakeup readers only if the pipe
  was empty at the time of write. However, there are libraries that
  relied upon the older behavior for notification scheme similar to
  what's described in [1]

  One such library 'realm-core'[2] is used by numerous Android
  applications. The library uses a similar notification mechanism as GNU
  Make but it never drains the pipe until it is full. When Android moved
  to v5.10 kernel, all applications using this library stopped working.

  The library has since been fixed[3] but it will be a while before all
  applications incorporate the updated library"

Our regression rule for the kernel is that if applications break from
new behavior, it's a regression, even if it was because the application
did something patently wrong.  Also note the original report [4] by
Michal Kerrisk about a test for this epoll behavior - but at that point
we didn't know of any actual broken use case.

So add the extraneous wakeup, to approximate the old behavior.

[ I say "approximate", because the exact old behavior was to do a wakeup
  not for each write(), but for each pipe buffer chunk that was filled
  in. The behavior introduced by this change is not that - this is just
  "every write will cause a wakeup, whether necessary or not", which
  seems to be sufficient for the broken library use. ]

It's worth noting that this adds the extraneous wakeup only for the
write side, while the read side still considers the "edge" to be purely
about reading enough from the pipe to allow further writes.

See commit f467a6a66419 ("pipe: fix and clarify pipe read wakeup logic")
for the pipe read case, which remains that "only wake up if the pipe was
full, and we read something from it".

Link: https://lore.kernel.org/lkml/CAHk-=wjeG0q1vgzu4iJhW5juPkTsjTYmiqiMUYAebWW+0bam6w@mail.gmail.com/ [1]
Link: https://github.com/realm/realm-core [2]
Link: https://github.com/realm/realm-core/issues/4666 [3]
Link: https://lore.kernel.org/lkml/CAKgNAkjMBGeAwF=2MKK758BhxvW58wYTgYKB2V-gY1PwXxrH+Q@mail.gmail.com/ [4]
Link: https://lore.kernel.org/lkml/20210729222635.2937453-1-sspatil@android.com/
Reported-by: Sandeep Patil <sspatil@android.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/pipe.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index bfd946a9ad01..9ef4231cce61 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -429,20 +429,20 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
 #endif
 
 	/*
-	 * Only wake up if the pipe started out empty, since
-	 * otherwise there should be no readers waiting.
+	 * Epoll nonsensically wants a wakeup whether the pipe
+	 * was already empty or not.
 	 *
 	 * If it wasn't empty we try to merge new data into
 	 * the last buffer.
 	 *
 	 * That naturally merges small writes, but it also
-	 * page-aligs the rest of the writes for large writes
+	 * page-aligns the rest of the writes for large writes
 	 * spanning multiple pages.
 	 */
 	head = pipe->head;
-	was_empty = pipe_empty(head, pipe->tail);
+	was_empty = true;
 	chars = total_len & (PAGE_SIZE-1);
-	if (chars && !was_empty) {
+	if (chars && !pipe_empty(head, pipe->tail)) {
 		unsigned int mask = pipe->ring_size - 1;
 		struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask];
 		int offset = buf->offset + buf->len;

From ff41c28c4b54052942180d8b3f49e75f1445135a Mon Sep 17 00:00:00 2001
From: Kamal Agrawal <kamaagra@codeaurora.org>
Date: Fri, 30 Jul 2021 18:53:06 +0530
Subject: [PATCH 368/502] tracing: Fix NULL pointer dereference in
 start_creating

The event_trace_add_tracer() can fail. In this case, it leads to a crash
in start_creating with below call stack. Handle the error scenario
properly in trace_array_create_dir.

Call trace:
down_write+0x7c/0x204
start_creating.25017+0x6c/0x194
tracefs_create_file+0xc4/0x2b4
init_tracer_tracefs+0x5c/0x940
trace_array_create_dir+0x58/0xb4
trace_array_create+0x1bc/0x2b8
trace_array_get_by_name+0xdc/0x18c

Link: https://lkml.kernel.org/r/1627651386-21315-1-git-send-email-kamaagra@codeaurora.org

Cc: stable@vger.kernel.org
Fixes: 4114fbfd02f1 ("tracing: Enable creating new instance early boot")
Signed-off-by: Kamal Agrawal <kamaagra@codeaurora.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c59dd35a6da5..33899a71fdc1 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -9135,8 +9135,10 @@ static int trace_array_create_dir(struct trace_array *tr)
 		return -EINVAL;
 
 	ret = event_trace_add_tracer(tr->dir, tr);
-	if (ret)
+	if (ret) {
 		tracefs_remove(tr->dir);
+		return ret;
+	}
 
 	init_tracer_tracefs(tr, tr->dir);
 	__update_tracer_options(tr);

From f828b0bcacef189edbd247e9f48864fc36bfbe33 Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@chromium.org>
Date: Fri, 30 Jul 2021 19:59:50 -0700
Subject: [PATCH 369/502] clk: fix leak on devm_clk_bulk_get_all() unwind

clk_bulk_get_all() allocates an array of struct clk_bulk data for us
(unlike clk_bulk_get()), so we need to free it. Let's use the
clk_bulk_put_all() helper.

kmemleak complains, on an RK3399 Gru/Kevin system:

unreferenced object 0xffffff80045def00 (size 128):
  comm "swapper/0", pid 1, jiffies 4294667682 (age 86.394s)
  hex dump (first 32 bytes):
    44 32 60 fe fe ff ff ff 00 00 00 00 00 00 00 00  D2`.............
    48 32 60 fe fe ff ff ff 00 00 00 00 00 00 00 00  H2`.............
  backtrace:
    [<00000000742860d6>] __kmalloc+0x22c/0x39c
    [<00000000b0493f2c>] clk_bulk_get_all+0x64/0x188
    [<00000000325f5900>] devm_clk_bulk_get_all+0x58/0xa8
    [<00000000175b9bc5>] dwc3_probe+0x8ac/0xb5c
    [<000000009169e2f9>] platform_drv_probe+0x9c/0xbc
    [<000000005c51e2ee>] really_probe+0x13c/0x378
    [<00000000c47b1f24>] driver_probe_device+0x84/0xc0
    [<00000000f870fcfb>] __device_attach_driver+0x94/0xb0
    [<000000004d1b92ae>] bus_for_each_drv+0x8c/0xd8
    [<00000000481d60c3>] __device_attach+0xc4/0x150
    [<00000000a163bd36>] device_initial_probe+0x1c/0x28
    [<00000000accb6bad>] bus_probe_device+0x3c/0x9c
    [<000000001a199f89>] device_add+0x218/0x3cc
    [<000000001bd84952>] of_device_add+0x40/0x50
    [<000000009c658c29>] of_platform_device_create_pdata+0xac/0x100
    [<0000000021c69ba4>] of_platform_bus_create+0x190/0x224

Fixes: f08c2e2865f6 ("clk: add managed version of clk_bulk_get_all")
Cc: Dong Aisheng <aisheng.dong@nxp.com>
Cc: stable@vger.kernel.org
Signed-off-by: Brian Norris <briannorris@chromium.org>
Link: https://lore.kernel.org/r/20210731025950.2238582-1-briannorris@chromium.org
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/clk-devres.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c
index be160764911b..f9d5b7334341 100644
--- a/drivers/clk/clk-devres.c
+++ b/drivers/clk/clk-devres.c
@@ -92,13 +92,20 @@ int __must_check devm_clk_bulk_get_optional(struct device *dev, int num_clks,
 }
 EXPORT_SYMBOL_GPL(devm_clk_bulk_get_optional);
 
+static void devm_clk_bulk_release_all(struct device *dev, void *res)
+{
+	struct clk_bulk_devres *devres = res;
+
+	clk_bulk_put_all(devres->num_clks, devres->clks);
+}
+
 int __must_check devm_clk_bulk_get_all(struct device *dev,
 				       struct clk_bulk_data **clks)
 {
 	struct clk_bulk_devres *devres;
 	int ret;
 
-	devres = devres_alloc(devm_clk_bulk_release,
+	devres = devres_alloc(devm_clk_bulk_release_all,
 			      sizeof(*devres), GFP_KERNEL);
 	if (!devres)
 		return -ENOMEM;

From c500bee1c5b2f1d59b1081ac879d73268ab0ff17 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 1 Aug 2021 17:04:17 -0700
Subject: [PATCH 370/502] Linux 5.14-rc4

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 6b555f64df06..27a072cffcb9 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 14
 SUBLEVEL = 0
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc4
 NAME = Opossums on Parade
 
 # *DOCUMENTATION*

From 7199ddede9f0f2f68d41e6928e1c6c4bca9c39c0 Mon Sep 17 00:00:00 2001
From: Juergen Borleis <jbe@pengutronix.de>
Date: Thu, 29 Jul 2021 09:18:21 +0200
Subject: [PATCH 371/502] dmaengine: imx-dma: configure the generic DMA type to
 make it work

Commit dea7a9fbb009 ("dmaengine: imx-dma: remove dma_slave_config
direction usage") changes the method from a "configuration when called"
to an "configuration when used". Due to this, only the cyclic DMA type
gets configured correctly, while the generic DMA type is left
non-configured.

Without this additional call, the struct imxdma_channel::word_size member
is stuck at DMA_SLAVE_BUSWIDTH_UNDEFINED and imxdma_prep_slave_sg() always
returns NULL.

Signed-off-by: Juergen Borleis <jbe@pengutronix.de>
Fixes: dea7a9fbb009 ("dmaengine: imx-dma: remove dma_slave_config direction usage")
Link: https://lore.kernel.org/r/20210729071821.9857-1-jbe@pengutronix.de
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/imx-dma.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c
index 7f116bbcfad2..2ddc31e64db0 100644
--- a/drivers/dma/imx-dma.c
+++ b/drivers/dma/imx-dma.c
@@ -812,6 +812,8 @@ static struct dma_async_tx_descriptor *imxdma_prep_slave_sg(
 		dma_length += sg_dma_len(sg);
 	}
 
+	imxdma_config_write(chan, &imxdmac->config, direction);
+
 	switch (imxdmac->word_size) {
 	case DMA_SLAVE_BUSWIDTH_4_BYTES:
 		if (sg_dma_len(sgl) & 3 || sgl->dma_address & 3)

From eda80d7c9c4db0f55f130e38c682e19b58d5add7 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Sun, 1 Aug 2021 13:38:01 +0200
Subject: [PATCH 372/502] ALSA: memalloc: Fix regression with
 SNDRV_DMA_TYPE_CONTINUOUS

The recent code refactoring made the mmap of continuous pages to be
done via the own helper snd_dma_continuous_mmap() with
remap_pfn_range().  There I overlooked that dmab->addr isn't set for
the allocation with SNDRV_DMA_TYPE_CONTINUOUS.  This resulted always
in an error at mmap with this buffer type on the system such as
Intel SST Baytrail driver.

This patch fixes the regression by passing the correct address.

Fixes: 30b7ba6972d5 ("ALSA: core: Add continuous and vmalloc mmap ops")
Reported-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/8d6674da-7d7b-803e-acc9-7de6cb1223fa@redhat.com
Link: https://lore.kernel.org/r/20210801113801.31290-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/memalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c
index 83b79edfa52d..439a358ecfe9 100644
--- a/sound/core/memalloc.c
+++ b/sound/core/memalloc.c
@@ -215,7 +215,7 @@ static int snd_dma_continuous_mmap(struct snd_dma_buffer *dmab,
 				   struct vm_area_struct *area)
 {
 	return remap_pfn_range(area, area->vm_start,
-			       dmab->addr >> PAGE_SHIFT,
+			       page_to_pfn(virt_to_page(dmab->area)),
 			       area->vm_end - area->vm_start,
 			       area->vm_page_prot);
 }

From 1159e25c137422bdc48ee96e3fb014bd942092c6 Mon Sep 17 00:00:00 2001
From: Prabhakar Kushwaha <pkushwaha@marvell.com>
Date: Thu, 29 Jul 2021 14:43:06 +0300
Subject: [PATCH 373/502] qede: fix crash in rmmod qede while automatic debug
 collection

A crash has been observed if rmmod is done while automatic debug
collection in progress. It is due to a race  condition between
both of them.

To fix stop the sp_task during unload to avoid running qede_sp_task
even if they are schedule during removal process.

Signed-off-by: Alok Prasad <palok@marvell.com>
Signed-off-by: Shai Malin <smalin@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qede/qede.h      | 1 +
 drivers/net/ethernet/qlogic/qede/qede_main.c | 8 ++++++++
 2 files changed, 9 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index 2e62a2c4eb63..5630008f38b7 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -501,6 +501,7 @@ struct qede_fastpath {
 #define QEDE_SP_HW_ERR                  4
 #define QEDE_SP_ARFS_CONFIG             5
 #define QEDE_SP_AER			7
+#define QEDE_SP_DISABLE			8
 
 #ifdef CONFIG_RFS_ACCEL
 int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 01ac1e93d27a..7c6064baeba2 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -1009,6 +1009,13 @@ static void qede_sp_task(struct work_struct *work)
 	struct qede_dev *edev = container_of(work, struct qede_dev,
 					     sp_task.work);
 
+	/* Disable execution of this deferred work once
+	 * qede removal is in progress, this stop any future
+	 * scheduling of sp_task.
+	 */
+	if (test_bit(QEDE_SP_DISABLE, &edev->sp_flags))
+		return;
+
 	/* The locking scheme depends on the specific flag:
 	 * In case of QEDE_SP_RECOVERY, acquiring the RTNL lock is required to
 	 * ensure that ongoing flows are ended and new ones are not started.
@@ -1300,6 +1307,7 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
 	qede_rdma_dev_remove(edev, (mode == QEDE_REMOVE_RECOVERY));
 
 	if (mode != QEDE_REMOVE_RECOVERY) {
+		set_bit(QEDE_SP_DISABLE, &edev->sp_flags);
 		unregister_netdev(ndev);
 
 		cancel_delayed_work_sync(&edev->sp_task);

From d51c5907e9809a803b276883d203f45849abd4d6 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Thu, 29 Jul 2021 15:48:20 +0200
Subject: [PATCH 374/502] net, gro: Set inner transport header offset in
 tcp/udp GRO hook

GSO expects inner transport header offset to be valid when
skb->encapsulation flag is set. GSO uses this value to calculate the length
of an individual segment of a GSO packet in skb_gso_transport_seglen().

However, tcp/udp gro_complete callbacks don't update the
skb->inner_transport_header when processing an encapsulated TCP/UDP
segment. As a result a GRO skb has ->inner_transport_header set to a value
carried over from earlier skb processing.

This can have mild to tragic consequences. From miscalculating the GSO
segment length to triggering a page fault [1], when trying to read TCP/UDP
header at an address past the skb->data page.

The latter scenario leads to an oops report like so:

  BUG: unable to handle page fault for address: ffff9fa7ec00d008
  #PF: supervisor read access in kernel mode
  #PF: error_code(0x0000) - not-present page
  PGD 123f201067 P4D 123f201067 PUD 123f209067 PMD 0
  Oops: 0000 [#1] SMP NOPTI
  CPU: 44 PID: 0 Comm: swapper/44 Not tainted 5.4.53-cloudflare-2020.7.21 #1
  Hardware name: HYVE EDGE-METAL-GEN10/HS-1811DLite1, BIOS V2.15 02/21/2020
  RIP: 0010:skb_gso_transport_seglen+0x44/0xa0
  Code: c0 41 83 e0 11 f6 87 81 00 00 00 20 74 30 0f b7 87 aa 00 00 00 0f [...]
  RSP: 0018:ffffad8640bacbb8 EFLAGS: 00010202
  RAX: 000000000000feda RBX: ffff9fcc8d31bc00 RCX: ffff9fa7ec00cffc
  RDX: ffff9fa7ebffdec0 RSI: 000000000000feda RDI: 0000000000000122
  RBP: 00000000000005c4 R08: 0000000000000001 R09: 0000000000000000
  R10: ffff9fe588ae3800 R11: ffff9fe011fc92f0 R12: ffff9fcc8d31bc00
  R13: ffff9fe0119d4300 R14: 00000000000005c4 R15: ffff9fba57d70900
  FS:  0000000000000000(0000) GS:ffff9fe68df00000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: ffff9fa7ec00d008 CR3: 0000003e99b1c000 CR4: 0000000000340ee0
  Call Trace:
   <IRQ>
   skb_gso_validate_network_len+0x11/0x70
   __ip_finish_output+0x109/0x1c0
   ip_sublist_rcv_finish+0x57/0x70
   ip_sublist_rcv+0x2aa/0x2d0
   ? ip_rcv_finish_core.constprop.0+0x390/0x390
   ip_list_rcv+0x12b/0x14f
   __netif_receive_skb_list_core+0x2a9/0x2d0
   netif_receive_skb_list_internal+0x1b5/0x2e0
   napi_complete_done+0x93/0x140
   veth_poll+0xc0/0x19f [veth]
   ? mlx5e_napi_poll+0x221/0x610 [mlx5_core]
   net_rx_action+0x1f8/0x790
   __do_softirq+0xe1/0x2bf
   irq_exit+0x8e/0xc0
   do_IRQ+0x58/0xe0
   common_interrupt+0xf/0xf
   </IRQ>

The bug can be observed in a simple setup where we send IP/GRE/IP/TCP
packets into a netns over a veth pair. Inside the netns, packets are
forwarded to dummy device:

  trafgen -> [veth A]--[veth B] -forward-> [dummy]

For veth B to GRO aggregate packets on receive, it needs to have an XDP
program attached (for example, a trivial XDP_PASS). Additionally, for UDP,
we need to enable GSO_UDP_L4 feature on the device:

  ip netns exec A ethtool -K AB rx-udp-gro-forwarding on

The last component is an artificial delay to increase the chances of GRO
batching happening:

  ip netns exec A tc qdisc add dev AB root \
     netem delay 200us slot 5ms 10ms packets 2 bytes 64k

With such a setup in place, the bug can be observed by tracing the skb
outer and inner offsets when GSO skb is transmitted from the dummy device:

tcp:

FUNC              DEV   SKB_LEN  NH  TH ENC INH ITH GSO_SIZE GSO_TYPE
ip_finish_output  dumB     2830 270 290   1 294 254     1383 (tcpv4,gre,)
                                                ^^^
udp:

FUNC              DEV   SKB_LEN  NH  TH ENC INH ITH GSO_SIZE GSO_TYPE
ip_finish_output  dumB     2818 270 290   1 294 254     1383 (gre,udp_l4,)
                                                ^^^

Fix it by updating the inner transport header offset in tcp/udp
gro_complete callbacks, similar to how {inet,ipv6}_gro_complete callbacks
update the inner network header offset, when skb->encapsulation flag is
set.

[1] https://lore.kernel.org/netdev/CAKxSbF01cLpZem2GFaUaifh0S-5WYViZemTicAg7FCHOnh6kug@mail.gmail.com/

Fixes: bf296b125b21 ("tcp: Add GRO support")
Fixes: f993bc25e519 ("net: core: handle encapsulation offloads when computing segment lengths")
Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.")
Reported-by: Alex Forster <aforster@cloudflare.com>
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_offload.c | 3 +++
 net/ipv4/udp_offload.c | 4 ++++
 2 files changed, 7 insertions(+)

diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index e09147ac9a99..fc61cd3fea65 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -298,6 +298,9 @@ int tcp_gro_complete(struct sk_buff *skb)
 	if (th->cwr)
 		skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
 
+	if (skb->encapsulation)
+		skb->inner_transport_header = skb->transport_header;
+
 	return 0;
 }
 EXPORT_SYMBOL(tcp_gro_complete);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 9dde1e5fb449..1380a6b6f4ff 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -624,6 +624,10 @@ static int udp_gro_complete_segment(struct sk_buff *skb)
 
 	skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
 	skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4;
+
+	if (skb->encapsulation)
+		skb->inner_transport_header = skb->transport_header;
+
 	return 0;
 }
 

From 85b1ebfea2b0d8797266bcc6f04b6cc87e38290a Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 30 Jul 2021 08:54:08 +0100
Subject: [PATCH 375/502] interconnect: Fix undersized devress_alloc allocation

The expression sizeof(**ptr) for the void **ptr is just 1 rather than
the size of a pointer. Fix this by using sizeof(*ptr).

Addresses-Coverity: ("Wrong sizeof argument")
Fixes: e145d9a184f2 ("interconnect: Add devm_of_icc_get() as exported API for users")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Link: https://lore.kernel.org/r/20210730075408.19945-1-colin.king@canonical.com
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c
index 1b2c564eaa99..7887941730db 100644
--- a/drivers/interconnect/core.c
+++ b/drivers/interconnect/core.c
@@ -403,7 +403,7 @@ struct icc_path *devm_of_icc_get(struct device *dev, const char *name)
 {
 	struct icc_path **ptr, *path;
 
-	ptr = devres_alloc(devm_icc_release, sizeof(**ptr), GFP_KERNEL);
+	ptr = devres_alloc(devm_icc_release, sizeof(*ptr), GFP_KERNEL);
 	if (!ptr)
 		return ERR_PTR(-ENOMEM);
 

From ebca25ead0711729e0aeeec45062e7ac4df3e158 Mon Sep 17 00:00:00 2001
From: Yannick Vignon <yannick.vignon@nxp.com>
Date: Fri, 30 Jul 2021 18:53:21 +0200
Subject: [PATCH 376/502] net/sched: taprio: Fix init procedure

Commit 13511704f8d759 ("net: taprio offload: enforce qdisc to netdev queue mapping")
resulted in duplicate entries in the qdisc hash.
While this did not impact the overall operation of the qdisc and taprio
code paths, it did result in an infinite loop when dumping the qdisc
properties, at least on one target (NXP LS1028 ARDB).
Removing the duplicate call to qdisc_hash_add() solves the problem.

Fixes: 13511704f8d759 ("net: taprio offload: enforce qdisc to netdev queue mapping")
Signed-off-by: Yannick Vignon <yannick.vignon@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_taprio.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 07b30d0601d7..9c79374457a0 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -1739,8 +1739,6 @@ static void taprio_attach(struct Qdisc *sch)
 		if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
 			qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
 			old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
-			if (ntx < dev->real_num_tx_queues)
-				qdisc_hash_add(qdisc, false);
 		} else {
 			old = dev_graft_qdisc(qdisc->dev_queue, sch);
 			qdisc_refcount_inc(sch);

From 0d5c3954b35eddff0da0436c31e8d721eceb7dc2 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sun, 1 Aug 2021 20:00:23 -0700
Subject: [PATCH 377/502] spi: mediatek: Fix fifo transfer

Commit 3a70dd2d0503 ("spi: mediatek: fix fifo rx mode") claims that
fifo RX mode was never handled, and adds the presumably missing code
to the FIFO transfer function. However, the claim that receive data
was not handled is incorrect. It was handled as part of interrupt
handling after the transfer was complete. The code added with the above
mentioned commit reads data from the receive FIFO before the transfer
is started, which is wrong. This results in an actual transfer error
on a Hayato Chromebook.

Remove the code trying to handle receive data before the transfer is
started to fix the problem.

Fixes: 3a70dd2d0503 ("spi: mediatek: fix fifo rx mode")
Cc: Peter Hess <peter.hess@ph-home.de>
Cc: Frank Wunderlich <frank-w@public-files.de>
Cc: Tzung-Bi Shih <tzungbi@google.com>
Cc: Hsin-Yi Wang <hsinyi@google.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Hsin-Yi Wang <hsinyi@google.com>
Tested-by: Tzung-Bi Shih <tzungbi@google.com>
Link: https://lore.kernel.org/r/20210802030023.1748777-1-linux@roeck-us.net
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-mt65xx.c | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c
index 68dca8ceb3ad..7914255521c3 100644
--- a/drivers/spi/spi-mt65xx.c
+++ b/drivers/spi/spi-mt65xx.c
@@ -426,24 +426,15 @@ static int mtk_spi_fifo_transfer(struct spi_master *master,
 	mtk_spi_prepare_transfer(master, xfer);
 	mtk_spi_setup_packet(master);
 
-	cnt = xfer->len / 4;
-	if (xfer->tx_buf)
+	if (xfer->tx_buf) {
+		cnt = xfer->len / 4;
 		iowrite32_rep(mdata->base + SPI_TX_DATA_REG, xfer->tx_buf, cnt);
-
-	if (xfer->rx_buf)
-		ioread32_rep(mdata->base + SPI_RX_DATA_REG, xfer->rx_buf, cnt);
-
-	remainder = xfer->len % 4;
-	if (remainder > 0) {
-		reg_val = 0;
-		if (xfer->tx_buf) {
+		remainder = xfer->len % 4;
+		if (remainder > 0) {
+			reg_val = 0;
 			memcpy(&reg_val, xfer->tx_buf + (cnt * 4), remainder);
 			writel(reg_val, mdata->base + SPI_TX_DATA_REG);
 		}
-		if (xfer->rx_buf) {
-			reg_val = readl(mdata->base + SPI_RX_DATA_REG);
-			memcpy(xfer->rx_buf + (cnt * 4), &reg_val, remainder);
-		}
 	}
 
 	mtk_spi_enable_transfer(master);

From 40e159403896f7d55c98f858d0b20fee1d941fa4 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 2 Aug 2021 12:21:30 +0100
Subject: [PATCH 378/502] mhi: Fix networking tree build.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mhi.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/linux/mhi.h b/include/linux/mhi.h
index 944aa3aa3035..5e08468854db 100644
--- a/include/linux/mhi.h
+++ b/include/linux/mhi.h
@@ -719,8 +719,13 @@ void mhi_device_put(struct mhi_device *mhi_dev);
  *                            host and device execution environments match and
  *                            channels are in a DISABLED state.
  * @mhi_dev: Device associated with the channels
+ * @flags: MHI channel flags
  */
-int mhi_prepare_for_transfer(struct mhi_device *mhi_dev);
+int mhi_prepare_for_transfer(struct mhi_device *mhi_dev,
+			     unsigned int flags);
+
+/* Automatically allocate and queue inbound buffers */
+#define MHI_CH_INBOUND_ALLOC_BUFS BIT(0)
 
 /**
  * mhi_unprepare_from_transfer - Reset UL and DL channels for data transfer.

From 47091f473b364c98207c4def197a0ae386fc9af1 Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Sat, 26 Jun 2021 02:01:03 +0200
Subject: [PATCH 379/502] ARM: dts: nomadik: Fix up interrupt controller node
 names

Once the new schema interrupt-controller/arm,vic.yaml is added, we get
the below warnings:

	arch/arm/boot/dts/ste-nomadik-nhk15.dt.yaml:
	intc@10140000: $nodename:0: 'intc@10140000' does not match
	'^interrupt-controller(@[0-9a-f,]+)*$'

Fix the node names for the interrupt controller to conform
to the standard node name interrupt-controller@..

Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Cc: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20210617210825.3064367-2-sudeep.holla@arm.com
Link: https://lore.kernel.org/r/20210626000103.830184-1-linus.walleij@linaro.org'
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/boot/dts/ste-nomadik-stn8815.dtsi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi
index c9b906432341..1815361fe73c 100644
--- a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi
+++ b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi
@@ -755,14 +755,14 @@
 			status = "disabled";
 		};
 
-		vica: intc@10140000 {
+		vica: interrupt-controller@10140000 {
 			compatible = "arm,versatile-vic";
 			interrupt-controller;
 			#interrupt-cells = <1>;
 			reg = <0x10140000 0x20>;
 		};
 
-		vicb: intc@10140020 {
+		vicb: interrupt-controller@10140020 {
 			compatible = "arm,versatile-vic";
 			interrupt-controller;
 			#interrupt-cells = <1>;

From a4282f66d90e93aacfe1b19509fd5851bf95be68 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Thu, 22 Jul 2021 02:26:16 +0300
Subject: [PATCH 380/502] soc/tegra: Make regulator couplers depend on
 CONFIG_REGULATOR

The regulator coupler drivers now use regulator-driver API function that
isn't available during compile-testing. Make regulator coupler drivers
dependent on CONFIG_REGULATOR in Kconfig.

Fixes: 03978d42ed0d ("soc/tegra: regulators: Bump voltages on system reboot")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Acked-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 drivers/soc/tegra/Kconfig | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/soc/tegra/Kconfig b/drivers/soc/tegra/Kconfig
index 20ace654553a..8b53ed1cc67e 100644
--- a/drivers/soc/tegra/Kconfig
+++ b/drivers/soc/tegra/Kconfig
@@ -15,7 +15,7 @@ config ARCH_TEGRA_2x_SOC
 	select PL310_ERRATA_769419 if CACHE_L2X0
 	select SOC_TEGRA_FLOWCTRL
 	select SOC_TEGRA_PMC
-	select SOC_TEGRA20_VOLTAGE_COUPLER
+	select SOC_TEGRA20_VOLTAGE_COUPLER if REGULATOR
 	select TEGRA_TIMER
 	help
 	  Support for NVIDIA Tegra AP20 and T20 processors, based on the
@@ -29,7 +29,7 @@ config ARCH_TEGRA_3x_SOC
 	select PL310_ERRATA_769419 if CACHE_L2X0
 	select SOC_TEGRA_FLOWCTRL
 	select SOC_TEGRA_PMC
-	select SOC_TEGRA30_VOLTAGE_COUPLER
+	select SOC_TEGRA30_VOLTAGE_COUPLER if REGULATOR
 	select TEGRA_TIMER
 	help
 	  Support for NVIDIA Tegra T30 processor family, based on the
@@ -155,7 +155,9 @@ config SOC_TEGRA_POWERGATE_BPMP
 config SOC_TEGRA20_VOLTAGE_COUPLER
 	bool "Voltage scaling support for Tegra20 SoCs"
 	depends on ARCH_TEGRA_2x_SOC || COMPILE_TEST
+	depends on REGULATOR
 
 config SOC_TEGRA30_VOLTAGE_COUPLER
 	bool "Voltage scaling support for Tegra30 SoCs"
 	depends on ARCH_TEGRA_3x_SOC || COMPILE_TEST
+	depends on REGULATOR

From 7f94b69ece515ac82defa60ef7cba2cf26180216 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 21 Jul 2021 23:13:43 +0200
Subject: [PATCH 381/502] ARM: ixp4xx: fix compile-testing soc drivers

Randconfig builds on the ixp4xx ethernet driver showed that the qmgr and
npe drivers are not actually built even when compile testing is enabled:

ERROR: modpost: "qmgr_stat_empty" [drivers/net/ethernet/xscale/ixp4xx_eth.ko] undefined!
ERROR: modpost: "qmgr_enable_irq" [drivers/net/ethernet/xscale/ixp4xx_eth.ko] undefined!
ERROR: modpost: "qmgr_set_irq" [drivers/net/ethernet/xscale/ixp4xx_eth.ko] undefined!
ERROR: modpost: "__qmgr_request_queue" [drivers/net/ethernet/xscale/ixp4xx_eth.ko] undefined!
ERROR: modpost: "npe_send_recv_message" [drivers/net/ethernet/xscale/ixp4xx_eth.ko] undefined!
ERROR: modpost: "npe_recv_message" [drivers/net/ethernet/xscale/ixp4xx_eth.ko] undefined!
ERROR: modpost: "npe_load_firmware" [drivers/net/ethernet/xscale/ixp4xx_eth.ko] undefined!
ERROR: modpost: "npe_running" [drivers/net/ethernet/xscale/ixp4xx_eth.ko] undefined!
ERROR: modpost: "qmgr_disable_irq" [drivers/net/ethernet/xscale/ixp4xx_eth.ko] undefined!
ERROR: modpost: "qmgr_stat_below_low_watermark" [drivers/net/ethernet/xscale/ixp4xx_eth.ko] undefined!

Fix it by always entering the drivers/soc/ixp4xx/ directory, and fix the
resulting compile test failures by removing the #include statements
that prevent building on most other platforms.

Fixes: 7a6c9dbb36a4 ("soc: ixp4xx: Protect IXP4xx SoC drivers by ARCH_IXP4XX || COMPILE_TEST")
Fixes: fcf2d8978cd5 ("ARM: ixp4xx: Move NPE and QMGR to drivers/soc")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20210721211412.3537004-1-arnd@kernel.org'
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 drivers/soc/Makefile             | 2 +-
 drivers/soc/ixp4xx/ixp4xx-npe.c  | 1 -
 drivers/soc/ixp4xx/ixp4xx-qmgr.c | 1 -
 3 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index f678e4d9e585..a05e9fbcd3e0 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -13,7 +13,7 @@ obj-$(CONFIG_MACH_DOVE)		+= dove/
 obj-y				+= fsl/
 obj-$(CONFIG_ARCH_GEMINI)	+= gemini/
 obj-y				+= imx/
-obj-$(CONFIG_ARCH_IXP4XX)	+= ixp4xx/
+obj-y				+= ixp4xx/
 obj-$(CONFIG_SOC_XWAY)		+= lantiq/
 obj-$(CONFIG_LITEX_SOC_CONTROLLER) += litex/
 obj-y				+= mediatek/
diff --git a/drivers/soc/ixp4xx/ixp4xx-npe.c b/drivers/soc/ixp4xx/ixp4xx-npe.c
index 7bd19354982a..fea50e04d5a1 100644
--- a/drivers/soc/ixp4xx/ixp4xx-npe.c
+++ b/drivers/soc/ixp4xx/ixp4xx-npe.c
@@ -21,7 +21,6 @@
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/soc/ixp4xx/npe.h>
-#include <mach/hardware.h>
 #include <linux/soc/ixp4xx/cpu.h>
 
 #define DEBUG_MSG			0
diff --git a/drivers/soc/ixp4xx/ixp4xx-qmgr.c b/drivers/soc/ixp4xx/ixp4xx-qmgr.c
index 7149510b307e..c6bf6ef257c0 100644
--- a/drivers/soc/ixp4xx/ixp4xx-qmgr.c
+++ b/drivers/soc/ixp4xx/ixp4xx-qmgr.c
@@ -12,7 +12,6 @@
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/soc/ixp4xx/qmgr.h>
-#include <mach/hardware.h>
 #include <linux/soc/ixp4xx/cpu.h>
 
 static struct qmgr_regs __iomem *qmgr_regs;

From 796a8c85b1216618258e08b463d3bef0d7123760 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 21 Jul 2021 17:16:04 +0200
Subject: [PATCH 382/502] ARM: ixp4xx: goramo_mlr depends on old PCI driver

When this driver is disabled, the board file fails to build,
so add a dependency:

arch/arm/mach-ixp4xx/goramo_mlr.c: In function 'gmlr_pci_preinit':
arch/arm/mach-ixp4xx/goramo_mlr.c:472:9: error: implicit declaration of function 'ixp4xx_pci_preinit'; did you mean 'iop3xx_pci_preinit'? [-Werror=implicit-function-declaration]
  472 |         ixp4xx_pci_preinit();
      |         ^~~~~~~~~~~~~~~~~~
      |         iop3xx_pci_preinit
arch/arm/mach-ixp4xx/goramo_mlr.c: In function 'gmlr_pci_postinit':
arch/arm/mach-ixp4xx/goramo_mlr.c:481:22: error: implicit declaration of function 'ixp4xx_pci_read' [-Werror=implicit-function-declaration]
  481 |                 if (!ixp4xx_pci_read(addr, NP_CMD_CONFIGREAD, &value)) {
      |                      ^~~~~~~~~~~~~~~
arch/arm/mach-ixp4xx/goramo_mlr.c:231:35: error: 'IXP4XX_UART1_BASE_PHYS' undeclared here (not in a function)
  231 |                 .start          = IXP4XX_UART1_BASE_PHYS,
      |                                   ^~~~~~~~~~~~~~~~~~~~~~
arch/arm/mach-ixp4xx/goramo_mlr.c: In function 'gmlr_init':
arch/arm/mach-ixp4xx/goramo_mlr.c:376:9: error: implicit declaration of function 'ixp4xx_sys_init' [-Werror=implicit-function-declaration]
  376 |         ixp4xx_sys_init();
      |         ^~~~~~~~~~~~~~~

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: soc@kernel.org
Link: https://lore.kernel.org/r/20210721151620.2373500-1-arnd@kernel.org'
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/mach-ixp4xx/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/mach-ixp4xx/Kconfig b/arch/arm/mach-ixp4xx/Kconfig
index bf14d65120b9..34a1c7742088 100644
--- a/arch/arm/mach-ixp4xx/Kconfig
+++ b/arch/arm/mach-ixp4xx/Kconfig
@@ -91,6 +91,7 @@ config MACH_IXDP465
 
 config MACH_GORAMO_MLR
 	bool "GORAMO Multi Link Router"
+	depends on IXP4XX_PCI_LEGACY
 	help
 	  Say 'Y' here if you want your kernel to support GORAMO
 	  MultiLink router.

From cb81698fddbcc9a3ee75857e99dfc29caa96135b Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 30 Jul 2021 20:18:10 +0300
Subject: [PATCH 383/502] net: dsa: sja1105: fix static FDB writes for SJA1110

The blamed commit made FDB access on SJA1110 functional only as far as
dumping the existing entries goes, but anything having to do with an
entry's index (adding, deleting) is still broken.

There are in fact 2 problems, all caused by improperly inheriting the
code from SJA1105P/Q/R/S:
- An entry size is SJA1110_SIZE_L2_LOOKUP_ENTRY (24) bytes and not
  SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY (20) bytes
- The "index" field within an FDB entry is at bits 10:1 for SJA1110 and
  not 15:6 as in SJA1105P/Q/R/S

This patch moves the packing function for the cmd->index outside of
sja1105pqrs_common_l2_lookup_cmd_packing() and into the device specific
functions sja1105pqrs_l2_lookup_cmd_packing and
sja1110_l2_lookup_cmd_packing.

Fixes: 74e7feff0e22 ("net: dsa: sja1105: fix dynamic access to L2 Address Lookup table for SJA1110")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/dsa/sja1105/sja1105_dynamic_config.c  | 27 ++++++++++---------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
index 56fead68ea9f..147709131c13 100644
--- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
@@ -304,6 +304,15 @@ sja1105pqrs_common_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
 			hostcmd = SJA1105_HOSTCMD_INVALIDATE;
 	}
 	sja1105_packing(p, &hostcmd, 25, 23, size, op);
+}
+
+static void
+sja1105pqrs_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
+				  enum packing_op op)
+{
+	int entry_size = SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY;
+
+	sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, entry_size);
 
 	/* Hack - The hardware takes the 'index' field within
 	 * struct sja1105_l2_lookup_entry as the index on which this command
@@ -313,26 +322,18 @@ sja1105pqrs_common_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
 	 * such that our API doesn't need to ask for a full-blown entry
 	 * structure when e.g. a delete is requested.
 	 */
-	sja1105_packing(buf, &cmd->index, 15, 6,
-			SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY, op);
-}
-
-static void
-sja1105pqrs_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
-				  enum packing_op op)
-{
-	int size = SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY;
-
-	return sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, size);
+	sja1105_packing(buf, &cmd->index, 15, 6, entry_size, op);
 }
 
 static void
 sja1110_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
 			      enum packing_op op)
 {
-	int size = SJA1110_SIZE_L2_LOOKUP_ENTRY;
+	int entry_size = SJA1110_SIZE_L2_LOOKUP_ENTRY;
 
-	return sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, size);
+	sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, entry_size);
+
+	sja1105_packing(buf, &cmd->index, 10, 1, entry_size, op);
 }
 
 /* The switch is so retarded that it makes our command/entry abstraction

From e11e865bf84e3c6ea91563ff3e858cfe0e184bd2 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 30 Jul 2021 20:18:11 +0300
Subject: [PATCH 384/502] net: dsa: sja1105: overwrite dynamic FDB entries with
 static ones in .port_fdb_add

The SJA1105 switch family leaves it up to software to decide where
within the FDB to install a static entry, and to concatenate destination
ports for already existing entries (the FDB is also used for multicast
entries), it is not as simple as just saying "please add this entry".

This means we first need to search for an existing FDB entry before
adding a new one. The driver currently manages to fool itself into
thinking that if an FDB entry already exists, there is nothing to be
done. But that FDB entry might be dynamically learned, case in which it
should be replaced with a static entry, but instead it is left alone.

This patch checks the LOCKEDS ("locked/static") bit from found FDB
entries, and lets the code "goto skip_finding_an_index;" if the FDB
entry was not static. So we also need to move the place where we set
LOCKEDS = true, to cover the new case where a dynamic FDB entry existed
but was dynamic.

Fixes: 291d1e72b756 ("net: dsa: sja1105: Add support for FDB and MDB management")
Fixes: 1da73821343c ("net: dsa: sja1105: Add FDB operations for P/Q/R/S series")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index e2dc997580a8..cc4a22ee1474 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -1333,7 +1333,7 @@ int sja1105et_fdb_add(struct dsa_switch *ds, int port,
 		 * mask? If yes, we need to do nothing. If not, we need
 		 * to rewrite the entry by adding this port to it.
 		 */
-		if (l2_lookup.destports & BIT(port))
+		if ((l2_lookup.destports & BIT(port)) && l2_lookup.lockeds)
 			return 0;
 		l2_lookup.destports |= BIT(port);
 	} else {
@@ -1364,6 +1364,7 @@ int sja1105et_fdb_add(struct dsa_switch *ds, int port,
 						     index, NULL, false);
 		}
 	}
+	l2_lookup.lockeds = true;
 	l2_lookup.index = sja1105et_fdb_index(bin, way);
 
 	rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
@@ -1434,10 +1435,10 @@ int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port,
 	rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,
 					 SJA1105_SEARCH, &l2_lookup);
 	if (rc == 0) {
-		/* Found and this port is already in the entry's
+		/* Found a static entry and this port is already in the entry's
 		 * port mask => job done
 		 */
-		if (l2_lookup.destports & BIT(port))
+		if ((l2_lookup.destports & BIT(port)) && l2_lookup.lockeds)
 			return 0;
 		/* l2_lookup.index is populated by the switch in case it
 		 * found something.
@@ -1460,10 +1461,11 @@ int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port,
 		dev_err(ds->dev, "FDB is full, cannot add entry.\n");
 		return -EINVAL;
 	}
-	l2_lookup.lockeds = true;
 	l2_lookup.index = i;
 
 skip_finding_an_index:
+	l2_lookup.lockeds = true;
+
 	rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
 					  l2_lookup.index, &l2_lookup,
 					  true);

From 6c5fc159e0927531707895709eee1f8bfa04289f Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 30 Jul 2021 20:18:12 +0300
Subject: [PATCH 385/502] net: dsa: sja1105: invalidate dynamic FDB entries
 learned concurrently with statically added ones

The procedure to add a static FDB entry in sja1105 is concurrent with
dynamic learning performed on all bridge ports and the CPU port.

The switch looks up the FDB from left to right, and also learns
dynamically from left to right, so it is possible that between the
moment when we pick up a free slot to install an FDB entry, another slot
to the left of that one becomes free due to an address ageing out, and
that other slot is then immediately used by the switch to learn
dynamically the same address as we're trying to add statically.

The result is that we succeeded to add our static FDB entry, but it is
being shadowed by a dynamic FDB entry to its left, and the switch will
behave as if our static FDB entry did not exist.

We cannot really prevent this from happening unless we make the entire
process to add a static FDB entry a huge critical section where address
learning is temporarily disabled on _all_ ports, and then re-enabled
according to the configuration done by sja1105_port_set_learning.
However, that is kind of disruptive for the operation of the network.

What we can do alternatively is to simply read back the FDB for dynamic
entries located before our newly added static one, and delete them.
This will guarantee that our static FDB entry is now operational. It
will still not guarantee that there aren't dynamic FDB entries to the
_right_ of that static FDB entry, but at least those entries will age
out by themselves since they aren't hit, and won't bother anyone.

Fixes: 291d1e72b756 ("net: dsa: sja1105: Add support for FDB and MDB management")
Fixes: 1da73821343c ("net: dsa: sja1105: Add FDB operations for P/Q/R/S series")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 57 +++++++++++++++++++++++++-
 1 file changed, 55 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index cc4a22ee1474..5a4c7789ca43 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -1318,10 +1318,11 @@ static int sja1105et_is_fdb_entry_in_bin(struct sja1105_private *priv, int bin,
 int sja1105et_fdb_add(struct dsa_switch *ds, int port,
 		      const unsigned char *addr, u16 vid)
 {
-	struct sja1105_l2_lookup_entry l2_lookup = {0};
+	struct sja1105_l2_lookup_entry l2_lookup = {0}, tmp;
 	struct sja1105_private *priv = ds->priv;
 	struct device *dev = ds->dev;
 	int last_unused = -1;
+	int start, end, i;
 	int bin, way, rc;
 
 	bin = sja1105et_fdb_hash(priv, addr, vid);
@@ -1373,6 +1374,29 @@ int sja1105et_fdb_add(struct dsa_switch *ds, int port,
 	if (rc < 0)
 		return rc;
 
+	/* Invalidate a dynamically learned entry if that exists */
+	start = sja1105et_fdb_index(bin, 0);
+	end = sja1105et_fdb_index(bin, way);
+
+	for (i = start; i < end; i++) {
+		rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,
+						 i, &tmp);
+		if (rc == -ENOENT)
+			continue;
+		if (rc)
+			return rc;
+
+		if (tmp.macaddr != ether_addr_to_u64(addr) || tmp.vlanid != vid)
+			continue;
+
+		rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
+						  i, NULL, false);
+		if (rc)
+			return rc;
+
+		break;
+	}
+
 	return sja1105_static_fdb_change(priv, port, &l2_lookup, true);
 }
 
@@ -1414,7 +1438,7 @@ int sja1105et_fdb_del(struct dsa_switch *ds, int port,
 int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port,
 			const unsigned char *addr, u16 vid)
 {
-	struct sja1105_l2_lookup_entry l2_lookup = {0};
+	struct sja1105_l2_lookup_entry l2_lookup = {0}, tmp;
 	struct sja1105_private *priv = ds->priv;
 	int rc, i;
 
@@ -1472,6 +1496,35 @@ skip_finding_an_index:
 	if (rc < 0)
 		return rc;
 
+	/* The switch learns dynamic entries and looks up the FDB left to
+	 * right. It is possible that our addition was concurrent with the
+	 * dynamic learning of the same address, so now that the static entry
+	 * has been installed, we are certain that address learning for this
+	 * particular address has been turned off, so the dynamic entry either
+	 * is in the FDB at an index smaller than the static one, or isn't (it
+	 * can also be at a larger index, but in that case it is inactive
+	 * because the static FDB entry will match first, and the dynamic one
+	 * will eventually age out). Search for a dynamically learned address
+	 * prior to our static one and invalidate it.
+	 */
+	tmp = l2_lookup;
+
+	rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,
+					 SJA1105_SEARCH, &tmp);
+	if (rc < 0) {
+		dev_err(ds->dev,
+			"port %d failed to read back entry for %pM vid %d: %pe\n",
+			port, addr, vid, ERR_PTR(rc));
+		return rc;
+	}
+
+	if (tmp.index < l2_lookup.index) {
+		rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
+						  tmp.index, NULL, false);
+		if (rc < 0)
+			return rc;
+	}
+
 	return sja1105_static_fdb_change(priv, port, &l2_lookup, true);
 }
 

From 728db843df88753aeb7224314807a203afa8eb32 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 30 Jul 2021 20:18:13 +0300
Subject: [PATCH 386/502] net: dsa: sja1105: ignore the FDB entry for unknown
 multicast when adding a new address

Currently, when sja1105pqrs_fdb_add() is called for a host-joined IPv6
MDB entry such as 33:33:00:00:00:6a, the search for that address will
return the FDB entry for SJA1105_UNKNOWN_MULTICAST, which has a
destination MAC of 01:00:00:00:00:00 and a mask of 01:00:00:00:00:00.
It returns that entry because, well, it matches, in the sense that
unknown multicast is supposed by design to match it...

But the issue is that we then proceed to overwrite this entry with the
one for our precise host-joined multicast address, and the unknown
multicast entry is no longer there - unknown multicast is now flooded to
the same group of ports as broadcast, which does not look up the FDB.

To solve this problem, we should ignore searches that return the unknown
multicast address as the match, and treat them as "no match" which will
result in the entry being installed to hardware.

For this to work properly, we need to put the result of the FDB search
in a temporary variable in order to avoid overwriting the l2_lookup
entry we want to program. The l2_lookup entry returned by the search
might not have the same set of DESTPORTS and not even the same MACADDR
as the entry we're trying to add.

Fixes: 4d9423549501 ("net: dsa: sja1105: offload bridge port flags to device")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 5a4c7789ca43..5d8739b30d8c 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -1456,14 +1456,19 @@ int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port,
 	}
 	l2_lookup.destports = BIT(port);
 
+	tmp = l2_lookup;
+
 	rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,
-					 SJA1105_SEARCH, &l2_lookup);
-	if (rc == 0) {
+					 SJA1105_SEARCH, &tmp);
+	if (rc == 0 && tmp.index != SJA1105_MAX_L2_LOOKUP_COUNT - 1) {
 		/* Found a static entry and this port is already in the entry's
 		 * port mask => job done
 		 */
-		if ((l2_lookup.destports & BIT(port)) && l2_lookup.lockeds)
+		if ((tmp.destports & BIT(port)) && tmp.lockeds)
 			return 0;
+
+		l2_lookup = tmp;
+
 		/* l2_lookup.index is populated by the switch in case it
 		 * found something.
 		 */

From 589918df93226a1e5f104306c185b6dcf2bd8051 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 30 Jul 2021 20:18:14 +0300
Subject: [PATCH 387/502] net: dsa: sja1105: be stateless with FDB entries on
 SJA1105P/Q/R/S/SJA1110 too

Similar but not quite the same with what was done in commit b11f0a4c0c81
("net: dsa: sja1105: be stateless when installing FDB entries") for
SJA1105E/T, it is desirable to drop the priv->vlan_aware check and
simply go ahead and install FDB entries in the VLAN that was given by
the bridge.

As opposed to SJA1105E/T, in SJA1105P/Q/R/S and SJA1110, the FDB is a
maskable TCAM, and we are installing VLAN-unaware FDB entries with the
VLAN ID masked off. However, such FDB entries might completely obscure
VLAN-aware entries where the VLAN ID is included in the search mask,
because the switch looks up the FDB from left to right and picks the
first entry which results in a masked match. So it depends on whether
the bridge installs first the VLAN-unaware or the VLAN-aware FDB entries.

Anyway, if we had a VLAN-unaware FDB entry towards one set of DESTPORTS
and a VLAN-aware one towards other set of DESTPORTS, the result is that
the packets in VLAN-aware mode will be forwarded towards the DESTPORTS
specified by the VLAN-unaware entry.

To solve this, simply do not use the masked matching ability of the FDB
for VLAN ID, and always match precisely on it. In VLAN-unaware mode, we
configure the switch for shared VLAN learning, so the VLAN ID will be
ignored anyway during lookup, so it is redundant to mask it off in the
TCAM.

This patch conflicts with net-next commit 0fac6aa098ed ("net: dsa: sja1105:
delete the best_effort_vlan_filtering mode") which changed this line:
	if (priv->vlan_state != SJA1105_VLAN_UNAWARE) {
into:
	if (priv->vlan_aware) {

When merging with net-next, the lines added by this patch should take
precedence in the conflict resolution (i.e. the "if" condition should be
deleted in both cases).

Fixes: 1da73821343c ("net: dsa: sja1105: Add FDB operations for P/Q/R/S series")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 5d8739b30d8c..335b608bad11 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -1447,13 +1447,8 @@ int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port,
 	l2_lookup.vlanid = vid;
 	l2_lookup.iotag = SJA1105_S_TAG;
 	l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0);
-	if (priv->vlan_state != SJA1105_VLAN_UNAWARE) {
-		l2_lookup.mask_vlanid = VLAN_VID_MASK;
-		l2_lookup.mask_iotag = BIT(0);
-	} else {
-		l2_lookup.mask_vlanid = 0;
-		l2_lookup.mask_iotag = 0;
-	}
+	l2_lookup.mask_vlanid = VLAN_VID_MASK;
+	l2_lookup.mask_iotag = BIT(0);
 	l2_lookup.destports = BIT(port);
 
 	tmp = l2_lookup;
@@ -1545,13 +1540,8 @@ int sja1105pqrs_fdb_del(struct dsa_switch *ds, int port,
 	l2_lookup.vlanid = vid;
 	l2_lookup.iotag = SJA1105_S_TAG;
 	l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0);
-	if (priv->vlan_state != SJA1105_VLAN_UNAWARE) {
-		l2_lookup.mask_vlanid = VLAN_VID_MASK;
-		l2_lookup.mask_iotag = BIT(0);
-	} else {
-		l2_lookup.mask_vlanid = 0;
-		l2_lookup.mask_iotag = 0;
-	}
+	l2_lookup.mask_vlanid = VLAN_VID_MASK;
+	l2_lookup.mask_iotag = BIT(0);
 	l2_lookup.destports = BIT(port);
 
 	rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,

From 47c2c0c2312118a478f738503781de1d1a6020d2 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 30 Jul 2021 20:18:15 +0300
Subject: [PATCH 388/502] net: dsa: sja1105: match FDB entries regardless of
 inner/outer VLAN tag

On SJA1105P/Q/R/S and SJA1110, the L2 Lookup Table entries contain a
maskable "inner/outer tag" bit which means:
- when set to 1: match single-outer and double tagged frames
- when set to 0: match untagged and single-inner tagged frames
- when masked off: match all frames regardless of the type of tag

This driver does not make any meaningful distinction between inner tags
(matches on TPID) and outer tags (matches on TPID2). In fact, all VLAN
table entries are installed as SJA1110_VLAN_D_TAG, which means that they
match on both inner and outer tags.

So it does not make sense that we install FDB entries with the IOTAG bit
set to 1.

In VLAN-unaware mode, we set both TPID and TPID2 to 0xdadb, so the
switch will see frames as outer-tagged or double-tagged (never inner).
So the FDB entries will match if IOTAG is set to 1.

In VLAN-aware mode, we set TPID to 0x8100 and TPID2 to 0x88a8. So the
switch will see untagged and 802.1Q-tagged packets as inner-tagged, and
802.1ad-tagged packets as outer-tagged. So untagged and 802.1Q-tagged
packets will not match FDB entries if IOTAG is set to 1, but 802.1ad
tagged packets will. Strange.

To fix this, simply mask off the IOTAG bit from FDB entries, and make
them match regardless of whether the VLAN tag is inner or outer.

Fixes: 1da73821343c ("net: dsa: sja1105: Add FDB operations for P/Q/R/S series")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 335b608bad11..8667c9754330 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -1445,10 +1445,8 @@ int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port,
 	/* Search for an existing entry in the FDB table */
 	l2_lookup.macaddr = ether_addr_to_u64(addr);
 	l2_lookup.vlanid = vid;
-	l2_lookup.iotag = SJA1105_S_TAG;
 	l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0);
 	l2_lookup.mask_vlanid = VLAN_VID_MASK;
-	l2_lookup.mask_iotag = BIT(0);
 	l2_lookup.destports = BIT(port);
 
 	tmp = l2_lookup;
@@ -1538,10 +1536,8 @@ int sja1105pqrs_fdb_del(struct dsa_switch *ds, int port,
 
 	l2_lookup.macaddr = ether_addr_to_u64(addr);
 	l2_lookup.vlanid = vid;
-	l2_lookup.iotag = SJA1105_S_TAG;
 	l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0);
 	l2_lookup.mask_vlanid = VLAN_VID_MASK;
-	l2_lookup.mask_iotag = BIT(0);
 	l2_lookup.destports = BIT(port);
 
 	rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,

From 4c156084daa8ee70978e4b150b5eb5fc7b1f15be Mon Sep 17 00:00:00 2001
From: Xiu Jianfeng <xiujianfeng@huawei.com>
Date: Thu, 29 Jul 2021 11:16:44 +0800
Subject: [PATCH 389/502] selinux: correct the return value when loads initial
 sids

It should not return 0 when SID 0 is assigned to isids.
This patch fixes it.

Cc: stable@vger.kernel.org
Fixes: e3e0b582c321a ("selinux: remove unused initial SIDs and improve handling")
Signed-off-by: Xiu Jianfeng <xiujianfeng@huawei.com>
[PM: remove changelog from description]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/ss/policydb.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c
index defc5ef35c66..0ae1b718194a 100644
--- a/security/selinux/ss/policydb.c
+++ b/security/selinux/ss/policydb.c
@@ -874,7 +874,7 @@ int policydb_load_isids(struct policydb *p, struct sidtab *s)
 	rc = sidtab_init(s);
 	if (rc) {
 		pr_err("SELinux:  out of memory on SID table init\n");
-		goto out;
+		return rc;
 	}
 
 	head = p->ocontexts[OCON_ISID];
@@ -885,7 +885,7 @@ int policydb_load_isids(struct policydb *p, struct sidtab *s)
 		if (sid == SECSID_NULL) {
 			pr_err("SELinux:  SID 0 was assigned a context.\n");
 			sidtab_destroy(s);
-			goto out;
+			return -EINVAL;
 		}
 
 		/* Ignore initial SIDs unused by this kernel. */
@@ -897,12 +897,10 @@ int policydb_load_isids(struct policydb *p, struct sidtab *s)
 			pr_err("SELinux:  unable to load initial SID %s.\n",
 			       name);
 			sidtab_destroy(s);
-			goto out;
+			return rc;
 		}
 	}
-	rc = 0;
-out:
-	return rc;
+	return 0;
 }
 
 int policydb_class_isvalid(struct policydb *p, unsigned int class)

From a5e63c7d38d548b8dab6c6205e0b6af76899dbf5 Mon Sep 17 00:00:00 2001
From: Steve Bennett <steveb@workware.net.au>
Date: Sat, 31 Jul 2021 08:57:50 +1000
Subject: [PATCH 390/502] net: phy: micrel: Fix detection of ksz87xx switch

The logic for discerning between KSZ8051 and KSZ87XX PHYs is incorrect
such that the that KSZ87XX switch is not identified correctly.

ksz8051_ksz8795_match_phy_device() uses the parameter ksz_phy_id
to discriminate whether it was called from ksz8051_match_phy_device()
or from ksz8795_match_phy_device() but since PHY_ID_KSZ87XX is the
same value as PHY_ID_KSZ8051, this doesn't work.

Instead use a bool to discriminate the caller.

Without this patch, the KSZ8795 switch port identifies as:

ksz8795-switch spi3.1 ade1 (uninitialized): PHY [dsa-0.1:03] driver [Generic PHY]

With the patch, it identifies correctly:

ksz8795-switch spi3.1 ade1 (uninitialized): PHY [dsa-0.1:03] driver [Micrel KSZ87XX Switch]

Fixes: 8b95599c55ed24b36cf4 ("net: phy: micrel: Discern KSZ8051 and KSZ8795 PHYs")
Signed-off-by: Steve Bennett <steveb@workware.net.au>
Reviewed-by: Marek Vasut <marex@denx.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/micrel.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 4d53886f7d51..53bdd673ae56 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -401,11 +401,11 @@ static int ksz8041_config_aneg(struct phy_device *phydev)
 }
 
 static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev,
-					    const u32 ksz_phy_id)
+					    const bool ksz_8051)
 {
 	int ret;
 
-	if ((phydev->phy_id & MICREL_PHY_ID_MASK) != ksz_phy_id)
+	if ((phydev->phy_id & MICREL_PHY_ID_MASK) != PHY_ID_KSZ8051)
 		return 0;
 
 	ret = phy_read(phydev, MII_BMSR);
@@ -418,7 +418,7 @@ static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev,
 	 * the switch does not.
 	 */
 	ret &= BMSR_ERCAP;
-	if (ksz_phy_id == PHY_ID_KSZ8051)
+	if (ksz_8051)
 		return ret;
 	else
 		return !ret;
@@ -426,7 +426,7 @@ static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev,
 
 static int ksz8051_match_phy_device(struct phy_device *phydev)
 {
-	return ksz8051_ksz8795_match_phy_device(phydev, PHY_ID_KSZ8051);
+	return ksz8051_ksz8795_match_phy_device(phydev, true);
 }
 
 static int ksz8081_config_init(struct phy_device *phydev)
@@ -535,7 +535,7 @@ static int ksz8061_config_init(struct phy_device *phydev)
 
 static int ksz8795_match_phy_device(struct phy_device *phydev)
 {
-	return ksz8051_ksz8795_match_phy_device(phydev, PHY_ID_KSZ87XX);
+	return ksz8051_ksz8795_match_phy_device(phydev, false);
 }
 
 static int ksz9021_load_values_from_of(struct phy_device *phydev,

From 7fe74dfd41c428afb24e2e615470832fa997ff14 Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Sat, 31 Jul 2021 14:38:01 +0800
Subject: [PATCH 391/502] net: natsemi: Fix missing pci_disable_device() in
 probe and remove

Replace pci_enable_device() with pcim_enable_device(),
pci_disable_device() and pci_release_regions() will be
called in release automatically.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/natsemi/natsemi.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c
index 51b4b25d15ad..84f7dbe9edff 100644
--- a/drivers/net/ethernet/natsemi/natsemi.c
+++ b/drivers/net/ethernet/natsemi/natsemi.c
@@ -819,7 +819,7 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent)
 		printk(version);
 #endif
 
-	i = pci_enable_device(pdev);
+	i = pcim_enable_device(pdev);
 	if (i) return i;
 
 	/* natsemi has a non-standard PM control register
@@ -852,7 +852,7 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent)
 	ioaddr = ioremap(iostart, iosize);
 	if (!ioaddr) {
 		i = -ENOMEM;
-		goto err_ioremap;
+		goto err_pci_request_regions;
 	}
 
 	/* Work around the dropped serial bit. */
@@ -974,9 +974,6 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent)
  err_register_netdev:
 	iounmap(ioaddr);
 
- err_ioremap:
-	pci_release_regions(pdev);
-
  err_pci_request_regions:
 	free_netdev(dev);
 	return i;
@@ -3241,7 +3238,6 @@ static void natsemi_remove1(struct pci_dev *pdev)
 
 	NATSEMI_REMOVE_FILE(pdev, dspcfg_workaround);
 	unregister_netdev (dev);
-	pci_release_regions (pdev);
 	iounmap(ioaddr);
 	free_netdev (dev);
 }

From 6387f65e2acb9a63044bd64464401771b8cf1acc Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Sat, 31 Jul 2021 07:39:17 -0700
Subject: [PATCH 392/502] net: sparx5: fix compiletime_assert for GCC 4.9

Stephen reports sparx5 broke GCC 4.9 build.
Move the compiletime_assert() out of the static function.
Compile-tested only, no object code changes.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Fixes: f3cad2611a77 ("net: sparx5: add hostmode with phylink support")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/microchip/sparx5/sparx5_netdev.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c
index 9d485a9d1f1f..1a240e6bddd0 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c
@@ -13,7 +13,19 @@
  */
 #define VSTAX 73
 
-static void ifh_encode_bitfield(void *ifh, u64 value, u32 pos, u32 width)
+#define ifh_encode_bitfield(ifh, value, pos, _width)			\
+	({								\
+		u32 width = (_width);					\
+									\
+		/* Max width is 5 bytes - 40 bits. In worst case this will
+		 * spread over 6 bytes - 48 bits
+		 */							\
+		compiletime_assert(width <= 40,				\
+				   "Unsupported width, must be <= 40");	\
+		__ifh_encode_bitfield((ifh), (value), (pos), width);	\
+	})
+
+static void __ifh_encode_bitfield(void *ifh, u64 value, u32 pos, u32 width)
 {
 	u8 *ifh_hdr = ifh;
 	/* Calculate the Start IFH byte position of this IFH bit position */
@@ -22,11 +34,6 @@ static void ifh_encode_bitfield(void *ifh, u64 value, u32 pos, u32 width)
 	u32 bit  = (pos % 8);
 	u64 encode = GENMASK(bit + width - 1, bit) & (value << bit);
 
-	/* Max width is 5 bytes - 40 bits. In worst case this will
-	 * spread over 6 bytes - 48 bits
-	 */
-	compiletime_assert(width <= 40, "Unsupported width, must be <= 40");
-
 	/* The b0-b7 goes into the start IFH byte */
 	if (encode & 0xFF)
 		ifh_hdr[byte] |= (u8)((encode & 0xFF));

From 66e0da21728343bd3e75230a53d909e045fb9dd7 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Sat, 31 Jul 2021 07:40:07 -0700
Subject: [PATCH 393/502] docs: operstates: fix typo

TVL -> TLV

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/operstates.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/networking/operstates.rst b/Documentation/networking/operstates.rst
index 9c918f7cb0e8..f6b9cce5b201 100644
--- a/Documentation/networking/operstates.rst
+++ b/Documentation/networking/operstates.rst
@@ -111,7 +111,7 @@ it as lower layer.
 
 Note that for certain kind of soft-devices, which are not managing any
 real hardware, it is possible to set this bit from userspace.  One
-should use TVL IFLA_CARRIER to do so.
+should use TLV IFLA_CARRIER to do so.
 
 netif_carrier_ok() can be used to query that bit.
 

From 7a7b8635b622add64d98cff84bf3ee71eac36237 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Sat, 31 Jul 2021 07:40:52 -0700
Subject: [PATCH 394/502] docs: operstates: document IF_OPER_TESTING

IF_OPER_TESTING is in fact used today.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/operstates.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/operstates.rst b/Documentation/networking/operstates.rst
index f6b9cce5b201..1ee2141e8ef1 100644
--- a/Documentation/networking/operstates.rst
+++ b/Documentation/networking/operstates.rst
@@ -73,7 +73,9 @@ IF_OPER_LOWERLAYERDOWN (3):
  state (f.e. VLAN).
 
 IF_OPER_TESTING (4):
- Unused in current kernel.
+ Interface is in testing mode, for example executing driver self-tests
+ or media (cable) test. It can't be used for normal traffic until tests
+ complete.
 
 IF_OPER_DORMANT (5):
  Interface is L1 up, but waiting for an external event, f.e. for a

From 1c69d7cf4a8b6b6cfd920a1e809f1cd33ae4369c Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 2 Aug 2021 07:30:29 -0700
Subject: [PATCH 395/502] Revert "mhi: Fix networking tree build."

This reverts commit 40e159403896f7d55c98f858d0b20fee1d941fa4.

Looks like this commit breaks the build for me.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mhi.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/include/linux/mhi.h b/include/linux/mhi.h
index 5e08468854db..944aa3aa3035 100644
--- a/include/linux/mhi.h
+++ b/include/linux/mhi.h
@@ -719,13 +719,8 @@ void mhi_device_put(struct mhi_device *mhi_dev);
  *                            host and device execution environments match and
  *                            channels are in a DISABLED state.
  * @mhi_dev: Device associated with the channels
- * @flags: MHI channel flags
  */
-int mhi_prepare_for_transfer(struct mhi_device *mhi_dev,
-			     unsigned int flags);
-
-/* Automatically allocate and queue inbound buffers */
-#define MHI_CH_INBOUND_ALLOC_BUFS BIT(0)
+int mhi_prepare_for_transfer(struct mhi_device *mhi_dev);
 
 /**
  * mhi_unprepare_from_transfer - Reset UL and DL channels for data transfer.

From 9b87f43537acfa24b95c236beba0f45901356eb2 Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <matthias.schiffer@ew.tq-group.com>
Date: Fri, 16 Jul 2021 12:00:47 +0200
Subject: [PATCH 396/502] gpio: tqmx86: really make IRQ optional

The tqmx86 MFD driver was passing IRQ 0 for "no IRQ" in the past. This
causes warnings with newer kernels.

Prepare the gpio-tqmx86 driver for the fixed MFD driver by handling a
missing IRQ properly.

Fixes: b868db94a6a7 ("gpio: tqmx86: Add GPIO from for this IO controller")
Signed-off-by: Matthias Schiffer <matthias.schiffer@ew.tq-group.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 drivers/gpio/gpio-tqmx86.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpio/gpio-tqmx86.c b/drivers/gpio/gpio-tqmx86.c
index 5022e0ad0fae..0f5d17f343f1 100644
--- a/drivers/gpio/gpio-tqmx86.c
+++ b/drivers/gpio/gpio-tqmx86.c
@@ -238,8 +238,8 @@ static int tqmx86_gpio_probe(struct platform_device *pdev)
 	struct resource *res;
 	int ret, irq;
 
-	irq = platform_get_irq(pdev, 0);
-	if (irq < 0)
+	irq = platform_get_irq_optional(pdev, 0);
+	if (irq < 0 && irq != -ENXIO)
 		return irq;
 
 	res = platform_get_resource(pdev, IORESOURCE_IO, 0);
@@ -278,7 +278,7 @@ static int tqmx86_gpio_probe(struct platform_device *pdev)
 
 	pm_runtime_enable(&pdev->dev);
 
-	if (irq) {
+	if (irq > 0) {
 		struct irq_chip *irq_chip = &gpio->irq_chip;
 		u8 irq_status;
 

From d6793ca97b76642b77629dd0783ec64782a50bdb Mon Sep 17 00:00:00 2001
From: Aharon Landau <aharonl@nvidia.com>
Date: Tue, 27 Jul 2021 10:16:06 +0300
Subject: [PATCH 397/502] RDMA/mlx5: Delay emptying a cache entry when a new MR
 is added to it recently

Fixing a typo that causes a cache entry to shrink immediately after adding
to it new MRs if the entry size exceeds the high limit.  In doing so, the
cache misses its purpose to prevent the creation of new mkeys on the
runtime by using the cached ones.

Fixes: b9358bdbc713 ("RDMA/mlx5: Fix locking in MR cache work queue")
Link: https://lore.kernel.org/r/fcb546986be346684a016f5ca23a0567399145fa.1627370131.git.leonro@nvidia.com
Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/mlx5/mr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 3263851ea574..3f1c5a4f158b 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -531,8 +531,8 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
 		 */
 		spin_unlock_irq(&ent->lock);
 		need_delay = need_resched() || someone_adding(cache) ||
-			     time_after(jiffies,
-					READ_ONCE(cache->last_add) + 300 * HZ);
+			     !time_after(jiffies,
+					 READ_ONCE(cache->last_add) + 300 * HZ);
 		spin_lock_irq(&ent->lock);
 		if (ent->disabled)
 			goto out;

From db4657afd10e45855ac1d8437fcc9a86bd3d741d Mon Sep 17 00:00:00 2001
From: Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
Date: Thu, 29 Jul 2021 14:26:22 -0400
Subject: [PATCH 398/502] RDMA/cma: Revert INIT-INIT patch

The net/sunrpc/xprtrdma module creates its QP using rdma_create_qp() and
immediately post receives, implicitly assuming the QP is in the INIT state
and thus valid for ib_post_recv().

The patch noted in Fixes: removed the RESET->INIT modifiy from
rdma_create_qp(), breaking NFS rdma for verbs providers that fail the
ib_post_recv() for a bad state.

This situation was proven using kprobes in rvt_post_recv() and
rvt_modify_qp(). The traces showed that the rvt_post_recv() failed before
ANY modify QP and that the current state was RESET.

Fix by reverting the patch below.

Fixes: dc70f7c3ed34 ("RDMA/cma: Remove unnecessary INIT->INIT transition")
Link: https://lore.kernel.org/r/1627583182-81330-1-git-send-email-mike.marciniszyn@cornelisnetworks.com
Cc: Haakon Bugge <haakon.bugge@oracle.com>
Cc: Chuck Lever III <chuck.lever@oracle.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/core/cma.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 515a7e95a421..5d3b8b8d163d 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -926,12 +926,25 @@ static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
 	return ret;
 }
 
+static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
+{
+	struct ib_qp_attr qp_attr;
+	int qp_attr_mask, ret;
+
+	qp_attr.qp_state = IB_QPS_INIT;
+	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
+	if (ret)
+		return ret;
+
+	return ib_modify_qp(qp, &qp_attr, qp_attr_mask);
+}
+
 int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
 		   struct ib_qp_init_attr *qp_init_attr)
 {
 	struct rdma_id_private *id_priv;
 	struct ib_qp *qp;
-	int ret = 0;
+	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	if (id->device != pd->device) {
@@ -948,6 +961,8 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
 
 	if (id->qp_type == IB_QPT_UD)
 		ret = cma_init_ud_qp(id_priv, qp);
+	else
+		ret = cma_init_conn_qp(id_priv, qp);
 	if (ret)
 		goto out_destroy;
 

From e2a05339fa1188b6b37540f4611893ac4c534fa2 Mon Sep 17 00:00:00 2001
From: Bob Pearson <rpearsonhpe@gmail.com>
Date: Thu, 29 Jul 2021 17:00:38 -0500
Subject: [PATCH 399/502] RDMA/rxe: Use the correct size of wqe when processing
 SRQ

The memcpy() that copies a WQE from a SRQ the QP uses an incorrect size.
The size should have been the size of the rxe_send_wqe struct not the size
of a pointer to it. The result is that IO operations using a SRQ on the
responder side will fail.

Fixes: ec0fa2445c18 ("RDMA/rxe: Fix over copying in get_srq_wqe")
Link: https://lore.kernel.org/r/20210729220039.18549-2-rpearsonhpe@gmail.com
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_resp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 3743dc39b60c..360ec67cb9e1 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -318,7 +318,7 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp)
 		pr_warn("%s: invalid num_sge in SRQ entry\n", __func__);
 		return RESPST_ERR_MALFORMED_WQE;
 	}
-	size = sizeof(wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge);
+	size = sizeof(*wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge);
 	memcpy(&qp->resp.srq_wqe, wqe, size);
 
 	qp->resp.wqe = &qp->resp.srq_wqe.wqe;

From ef4b96a5773d7f6568363b3d0c3c3f371fb690bd Mon Sep 17 00:00:00 2001
From: Bob Pearson <rpearsonhpe@gmail.com>
Date: Thu, 29 Jul 2021 17:00:39 -0500
Subject: [PATCH 400/502] RDMA/rxe: Restore setting tot_len in the IPv4 header

An earlier patch removed setting of tot_len in IPv4 headers because it was
also set in ip_local_out. However, this change resulted in an incorrect
ICRC being computed because the tot_len field is not masked out. This
patch restores that line. This fixes the bug reported by Zhu Yanjun.  This
bug affects anyone using rxe which is currently broken.

Fixes: 230bb836ee88 ("RDMA/rxe: Fix redundant call to ip_send_check")
Link: https://lore.kernel.org/r/20210729220039.18549-3-rpearsonhpe@gmail.com
Reported-by: Zhu Yanjun <zyjzyj2000@gmail.com>
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Reviewed-and-tested-by: Zhu Yanjun <zyjzyj2000@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_net.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index dec92928a1cd..5ac27f28ace1 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -259,6 +259,7 @@ static void prepare_ipv4_hdr(struct dst_entry *dst, struct sk_buff *skb,
 
 	iph->version	=	IPVERSION;
 	iph->ihl	=	sizeof(struct iphdr) >> 2;
+	iph->tot_len	=	htons(skb->len);
 	iph->frag_off	=	df;
 	iph->protocol	=	proto;
 	iph->tos	=	tos;

From e89afb51f97ae03ee246c1fd0b47e3e491266aef Mon Sep 17 00:00:00 2001
From: Zack Rusin <zackr@vmware.com>
Date: Tue, 15 Jun 2021 14:23:34 -0400
Subject: [PATCH 401/502] drm/vmwgfx: Fix a 64bit regression on svga3

Register accesses are always 4bytes, accidently this was changed to
a void pointer whwqich badly breaks 64bit archs when running on top
of svga3.

Fixes: 2cd80dbd3551 ("drm/vmwgfx: Add basic support for SVGA3")
Signed-off-by: Zack Rusin <zackr@vmware.com>
Reviewed-by: Martin Krastev <krastevm@vmware.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210615182336.995192-3-zackr@vmware.com
(cherry picked from commit 87360168759879d68550b0c052bbcc2a0339ff74)
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index d1cef3b69e9d..5652d982b1ce 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -492,7 +492,7 @@ struct vmw_private {
 	resource_size_t vram_start;
 	resource_size_t vram_size;
 	resource_size_t prim_bb_mem;
-	void __iomem *rmmio;
+	u32 __iomem *rmmio;
 	u32 *fifo_mem;
 	resource_size_t fifo_mem_size;
 	uint32_t fb_max_width;

From 0541a6293298fb52789de389dfb27ef54df81f73 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 2 Aug 2021 02:17:30 +0300
Subject: [PATCH 402/502] net: bridge: validate the NUD_PERMANENT bit when
 adding an extern_learn FDB entry

Currently it is possible to add broken extern_learn FDB entries to the
bridge in two ways:

1. Entries pointing towards the bridge device that are not local/permanent:

ip link add br0 type bridge
bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn static

2. Entries pointing towards the bridge device or towards a port that
are marked as local/permanent, however the bridge does not process the
'permanent' bit in any way, therefore they are recorded as though they
aren't permanent:

ip link add br0 type bridge
bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn permanent

Since commit 52e4bec15546 ("net: bridge: switchdev: treat local FDBs the
same as entries towards the bridge"), these incorrect FDB entries can
even trigger NULL pointer dereferences inside the kernel.

This is because that commit made the assumption that all FDB entries
that are not local/permanent have a valid destination port. For context,
local / permanent FDB entries either have fdb->dst == NULL, and these
point towards the bridge device and are therefore local and not to be
used for forwarding, or have fdb->dst == a net_bridge_port structure
(but are to be treated in the same way, i.e. not for forwarding).

That assumption _is_ correct as long as things are working correctly in
the bridge driver, i.e. we cannot logically have fdb->dst == NULL under
any circumstance for FDB entries that are not local. However, the
extern_learn code path where FDB entries are managed by a user space
controller show that it is possible for the bridge kernel driver to
misinterpret the NUD flags of an entry transmitted by user space, and
end up having fdb->dst == NULL while not being a local entry. This is
invalid and should be rejected.

Before, the two commands listed above both crashed the kernel in this
check from br_switchdev_fdb_notify:

	struct net_device *dev = info.is_local ? br->dev : dst->dev;

info.is_local == false, dst == NULL.

After this patch, the invalid entry added by the first command is
rejected:

ip link add br0 type bridge && bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn static; ip link del br0
Error: bridge: FDB entry towards bridge must be permanent.

and the valid entry added by the second command is properly treated as a
local address and does not crash br_switchdev_fdb_notify anymore:

ip link add br0 type bridge && bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn permanent; ip link del br0

Fixes: eb100e0e24a2 ("net: bridge: allow to add externally learned entries from user-space")
Reported-by: syzbot+9ba1174359adba5a5b7c@syzkaller.appspotmail.com
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Link: https://lore.kernel.org/r/20210801231730.7493-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/bridge/br.c         |  3 ++-
 net/bridge/br_fdb.c     | 30 ++++++++++++++++++++++++------
 net/bridge/br_private.h |  2 +-
 3 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/net/bridge/br.c b/net/bridge/br.c
index ef743f94254d..bbab9984f24e 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -166,7 +166,8 @@ static int br_switchdev_event(struct notifier_block *unused,
 	case SWITCHDEV_FDB_ADD_TO_BRIDGE:
 		fdb_info = ptr;
 		err = br_fdb_external_learn_add(br, p, fdb_info->addr,
-						fdb_info->vid, false);
+						fdb_info->vid,
+						fdb_info->is_local, false);
 		if (err) {
 			err = notifier_from_errno(err);
 			break;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index a16191dcaed1..835cec1e5a03 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -1019,7 +1019,8 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
 
 static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
 			struct net_bridge_port *p, const unsigned char *addr,
-			u16 nlh_flags, u16 vid, struct nlattr *nfea_tb[])
+			u16 nlh_flags, u16 vid, struct nlattr *nfea_tb[],
+			struct netlink_ext_ack *extack)
 {
 	int err = 0;
 
@@ -1038,7 +1039,15 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
 		rcu_read_unlock();
 		local_bh_enable();
 	} else if (ndm->ndm_flags & NTF_EXT_LEARNED) {
-		err = br_fdb_external_learn_add(br, p, addr, vid, true);
+		if (!p && !(ndm->ndm_state & NUD_PERMANENT)) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "FDB entry towards bridge must be permanent");
+			return -EINVAL;
+		}
+
+		err = br_fdb_external_learn_add(br, p, addr, vid,
+						ndm->ndm_state & NUD_PERMANENT,
+						true);
 	} else {
 		spin_lock_bh(&br->hash_lock);
 		err = fdb_add_entry(br, p, addr, ndm, nlh_flags, vid, nfea_tb);
@@ -1110,9 +1119,11 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 		}
 
 		/* VID was specified, so use it. */
-		err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid, nfea_tb);
+		err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid, nfea_tb,
+				   extack);
 	} else {
-		err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0, nfea_tb);
+		err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0, nfea_tb,
+				   extack);
 		if (err || !vg || !vg->num_vlans)
 			goto out;
 
@@ -1124,7 +1135,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 			if (!br_vlan_should_use(v))
 				continue;
 			err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid,
-					   nfea_tb);
+					   nfea_tb, extack);
 			if (err)
 				goto out;
 		}
@@ -1264,7 +1275,7 @@ void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p)
 }
 
 int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
-			      const unsigned char *addr, u16 vid,
+			      const unsigned char *addr, u16 vid, bool is_local,
 			      bool swdev_notify)
 {
 	struct net_bridge_fdb_entry *fdb;
@@ -1281,6 +1292,10 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
 
 		if (swdev_notify)
 			flags |= BIT(BR_FDB_ADDED_BY_USER);
+
+		if (is_local)
+			flags |= BIT(BR_FDB_LOCAL);
+
 		fdb = fdb_create(br, p, addr, vid, flags);
 		if (!fdb) {
 			err = -ENOMEM;
@@ -1307,6 +1322,9 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
 		if (swdev_notify)
 			set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
 
+		if (is_local)
+			set_bit(BR_FDB_LOCAL, &fdb->flags);
+
 		if (modified)
 			fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify);
 	}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 2b48b204205e..aa64d8d63ca3 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -711,7 +711,7 @@ int br_fdb_get(struct sk_buff *skb, struct nlattr *tb[], struct net_device *dev,
 int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p);
 void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p);
 int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
-			      const unsigned char *addr, u16 vid,
+			      const unsigned char *addr, u16 vid, bool is_local,
 			      bool swdev_notify);
 int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
 			      const unsigned char *addr, u16 vid,

From 8861452b2097bb0b5d0081a1c137fb3870b0a31f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 8 Nov 2019 09:43:06 +0100
Subject: [PATCH 403/502] soc: ixp4xx: fix printing resources

When compile-testing with 64-bit resource_size_t, gcc reports an invalid
printk format string:

In file included from include/linux/dma-mapping.h:7,
                 from drivers/soc/ixp4xx/ixp4xx-npe.c:15:
drivers/soc/ixp4xx/ixp4xx-npe.c: In function 'ixp4xx_npe_probe':
drivers/soc/ixp4xx/ixp4xx-npe.c:694:18: error: format '%x' expects argument of type 'unsigned int', but argument 4 has type 'resource_size_t' {aka 'long long unsigned int'} [-Werror=format=]
    dev_info(dev, "NPE%d at 0x%08x-0x%08x not available\n",

Use the special %pR format string to print the resources.

Fixes: 0b458d7b10f8 ("soc: ixp4xx: npe: Pass addresses as resources")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 drivers/soc/ixp4xx/ixp4xx-npe.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/soc/ixp4xx/ixp4xx-npe.c b/drivers/soc/ixp4xx/ixp4xx-npe.c
index fea50e04d5a1..f490c4ca51f5 100644
--- a/drivers/soc/ixp4xx/ixp4xx-npe.c
+++ b/drivers/soc/ixp4xx/ixp4xx-npe.c
@@ -693,8 +693,8 @@ static int ixp4xx_npe_probe(struct platform_device *pdev)
 
 		if (!(ixp4xx_read_feature_bits() &
 		      (IXP4XX_FEATURE_RESET_NPEA << i))) {
-			dev_info(dev, "NPE%d at 0x%08x-0x%08x not available\n",
-				 i, res->start, res->end);
+			dev_info(dev, "NPE%d at %pR not available\n",
+				 i, res);
 			continue; /* NPE already disabled or not present */
 		}
 		npe->regs = devm_ioremap_resource(dev, res);
@@ -702,13 +702,12 @@ static int ixp4xx_npe_probe(struct platform_device *pdev)
 			return PTR_ERR(npe->regs);
 
 		if (npe_reset(npe)) {
-			dev_info(dev, "NPE%d at 0x%08x-0x%08x does not reset\n",
-				 i, res->start, res->end);
+			dev_info(dev, "NPE%d at %pR does not reset\n",
+				 i, res);
 			continue;
 		}
 		npe->valid = 1;
-		dev_info(dev, "NPE%d at 0x%08x-0x%08x registered\n",
-			 i, res->start, res->end);
+		dev_info(dev, "NPE%d at %pR registered\n", i, res);
 		found++;
 	}
 

From a8eee86317f11e97990d755d4615c1c0db203d08 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 3 Aug 2021 10:12:34 +0200
Subject: [PATCH 404/502] soc: ixp4xx/qmgr: fix invalid __iomem access

Sparse reports a compile time warning when dereferencing an
__iomem pointer:

drivers/soc/ixp4xx/ixp4xx-qmgr.c:149:37: warning: dereference of noderef expression
drivers/soc/ixp4xx/ixp4xx-qmgr.c:153:40: warning: dereference of noderef expression
drivers/soc/ixp4xx/ixp4xx-qmgr.c:154:40: warning: dereference of noderef expression
drivers/soc/ixp4xx/ixp4xx-qmgr.c:174:38: warning: dereference of noderef expression
drivers/soc/ixp4xx/ixp4xx-qmgr.c:174:44: warning: dereference of noderef expression

Use __raw_readl() here for consistency with the rest of the file.
This should really get converted to some proper accessor, as the
__raw functions are not meant to be used in drivers, but the driver
has used these since the start, so for the moment, let's only fix
the warning.

Reported-by: kernel test robot <lkp@intel.com>
Fixes: d4c9e9fc9751 ("IXP42x: Add QMgr support for IXP425 rev. A0 processors.")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 drivers/soc/ixp4xx/ixp4xx-qmgr.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/soc/ixp4xx/ixp4xx-qmgr.c b/drivers/soc/ixp4xx/ixp4xx-qmgr.c
index c6bf6ef257c0..9154c7029b05 100644
--- a/drivers/soc/ixp4xx/ixp4xx-qmgr.c
+++ b/drivers/soc/ixp4xx/ixp4xx-qmgr.c
@@ -146,12 +146,12 @@ static irqreturn_t qmgr_irq1_a0(int irq, void *pdev)
 	/* ACK - it may clear any bits so don't rely on it */
 	__raw_writel(0xFFFFFFFF, &qmgr_regs->irqstat[0]);
 
-	en_bitmap = qmgr_regs->irqen[0];
+	en_bitmap = __raw_readl(&qmgr_regs->irqen[0]);
 	while (en_bitmap) {
 		i = __fls(en_bitmap); /* number of the last "low" queue */
 		en_bitmap &= ~BIT(i);
-		src = qmgr_regs->irqsrc[i >> 3];
-		stat = qmgr_regs->stat1[i >> 3];
+		src = __raw_readl(&qmgr_regs->irqsrc[i >> 3]);
+		stat = __raw_readl(&qmgr_regs->stat1[i >> 3]);
 		if (src & 4) /* the IRQ condition is inverted */
 			stat = ~stat;
 		if (stat & BIT(src & 3)) {
@@ -171,7 +171,8 @@ static irqreturn_t qmgr_irq2_a0(int irq, void *pdev)
 	/* ACK - it may clear any bits so don't rely on it */
 	__raw_writel(0xFFFFFFFF, &qmgr_regs->irqstat[1]);
 
-	req_bitmap = qmgr_regs->irqen[1] & qmgr_regs->statne_h;
+	req_bitmap = __raw_readl(&qmgr_regs->irqen[1]) &
+		     __raw_readl(&qmgr_regs->statne_h);
 	while (req_bitmap) {
 		i = __fls(req_bitmap); /* number of the last "high" queue */
 		req_bitmap &= ~BIT(i);

From e30e8d46cf605d216a799a28c77b8a41c328613a Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 2 Aug 2021 11:42:00 +0100
Subject: [PATCH 405/502] arm64: fix compat syscall return truncation

Due to inconsistencies in the way we manipulate compat GPRs, we have a
few issues today:

* For audit and tracing, where error codes are handled as a (native)
  long, negative error codes are expected to be sign-extended to the
  native 64-bits, or they may fail to be matched correctly. Thus a
  syscall which fails with an error may erroneously be identified as
  failing.

* For ptrace, *all* compat return values should be sign-extended for
  consistency with 32-bit arm, but we currently only do this for
  negative return codes.

* As we may transiently set the upper 32 bits of some compat GPRs while
  in the kernel, these can be sampled by perf, which is somewhat
  confusing. This means that where a syscall returns a pointer above 2G,
  this will be sign-extended, but will not be mistaken for an error as
  error codes are constrained to the inclusive range [-4096, -1] where
  no user pointer can exist.

To fix all of these, we must consistently use helpers to get/set the
compat GPRs, ensuring that we never write the upper 32 bits of the
return code, and always sign-extend when reading the return code.  This
patch does so, with the following changes:

* We re-organise syscall_get_return_value() to always sign-extend for
  compat tasks, and reimplement syscall_get_error() atop. We update
  syscall_trace_exit() to use syscall_get_return_value().

* We consistently use syscall_set_return_value() to set the return
  value, ensureing the upper 32 bits are never set unexpectedly.

* As the core audit code currently uses regs_return_value() rather than
  syscall_get_return_value(), we special-case this for
  compat_user_mode(regs) such that this will do the right thing. Going
  forward, we should try to move the core audit code over to
  syscall_get_return_value().

Cc: <stable@vger.kernel.org>
Reported-by: He Zhe <zhe.he@windriver.com>
Reported-by: weiyuchen <weiyuchen3@huawei.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210802104200.21390-1-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/ptrace.h  | 12 +++++++++++-
 arch/arm64/include/asm/syscall.h | 21 +++++++++++----------
 arch/arm64/kernel/ptrace.c       |  2 +-
 arch/arm64/kernel/signal.c       |  3 ++-
 arch/arm64/kernel/syscall.c      |  9 +++------
 5 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index e58bca832dff..41b332c054ab 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -320,7 +320,17 @@ static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
 
 static inline unsigned long regs_return_value(struct pt_regs *regs)
 {
-	return regs->regs[0];
+	unsigned long val = regs->regs[0];
+
+	/*
+	 * Audit currently uses regs_return_value() instead of
+	 * syscall_get_return_value(). Apply the same sign-extension here until
+	 * audit is updated to use syscall_get_return_value().
+	 */
+	if (compat_user_mode(regs))
+		val = sign_extend64(val, 31);
+
+	return val;
 }
 
 static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h
index cfc0672013f6..03e20895453a 100644
--- a/arch/arm64/include/asm/syscall.h
+++ b/arch/arm64/include/asm/syscall.h
@@ -29,24 +29,25 @@ static inline void syscall_rollback(struct task_struct *task,
 	regs->regs[0] = regs->orig_x0;
 }
 
+static inline long syscall_get_return_value(struct task_struct *task,
+					    struct pt_regs *regs)
+{
+	unsigned long val = regs->regs[0];
+
+	if (is_compat_thread(task_thread_info(task)))
+		val = sign_extend64(val, 31);
+
+	return val;
+}
 
 static inline long syscall_get_error(struct task_struct *task,
 				     struct pt_regs *regs)
 {
-	unsigned long error = regs->regs[0];
-
-	if (is_compat_thread(task_thread_info(task)))
-		error = sign_extend64(error, 31);
+	unsigned long error = syscall_get_return_value(task, regs);
 
 	return IS_ERR_VALUE(error) ? error : 0;
 }
 
-static inline long syscall_get_return_value(struct task_struct *task,
-					    struct pt_regs *regs)
-{
-	return regs->regs[0];
-}
-
 static inline void syscall_set_return_value(struct task_struct *task,
 					    struct pt_regs *regs,
 					    int error, long val)
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 499b6b2f9757..b381a1ee9ea7 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1862,7 +1862,7 @@ void syscall_trace_exit(struct pt_regs *regs)
 	audit_syscall_exit(regs);
 
 	if (flags & _TIF_SYSCALL_TRACEPOINT)
-		trace_sys_exit(regs, regs_return_value(regs));
+		trace_sys_exit(regs, syscall_get_return_value(current, regs));
 
 	if (flags & (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP))
 		tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT);
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index f8192f4ae0b8..23036334f4dc 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -29,6 +29,7 @@
 #include <asm/unistd.h>
 #include <asm/fpsimd.h>
 #include <asm/ptrace.h>
+#include <asm/syscall.h>
 #include <asm/signal32.h>
 #include <asm/traps.h>
 #include <asm/vdso.h>
@@ -890,7 +891,7 @@ static void do_signal(struct pt_regs *regs)
 		     retval == -ERESTART_RESTARTBLOCK ||
 		     (retval == -ERESTARTSYS &&
 		      !(ksig.ka.sa.sa_flags & SA_RESTART)))) {
-			regs->regs[0] = -EINTR;
+			syscall_set_return_value(current, regs, -EINTR, 0);
 			regs->pc = continue_addr;
 		}
 
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index 263d6c1a525f..50a0f1a38e84 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -54,10 +54,7 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno,
 		ret = do_ni_syscall(regs, scno);
 	}
 
-	if (is_compat_task())
-		ret = lower_32_bits(ret);
-
-	regs->regs[0] = ret;
+	syscall_set_return_value(current, regs, 0, ret);
 
 	/*
 	 * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
@@ -115,7 +112,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
 		 * syscall. do_notify_resume() will send a signal to userspace
 		 * before the syscall is restarted.
 		 */
-		regs->regs[0] = -ERESTARTNOINTR;
+		syscall_set_return_value(current, regs, -ERESTARTNOINTR, 0);
 		return;
 	}
 
@@ -136,7 +133,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
 		 * anyway.
 		 */
 		if (scno == NO_SYSCALL)
-			regs->regs[0] = -ENOSYS;
+			syscall_set_return_value(current, regs, -ENOSYS, 0);
 		scno = syscall_trace_enter(regs);
 		if (scno == NO_SYSCALL)
 			goto trace_exit;

From 64ee84c75b5f75132eec97f2c7a201a056d53698 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sun, 1 Aug 2021 14:35:25 +0900
Subject: [PATCH 406/502] arm64: move warning about toolchains to archprepare

Commit 987fdfec2410 ("arm64: move --fix-cortex-a53-843419 linker test to
Kconfig") fixed the false-positive warning in the installation step.

Yet, there are some cases where this false-positive is shown. For example,
you can see it when you cross 987fdfec2410 during git-bisect.

  $ git checkout 987fdfec2410^
    [ snip ]
  $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- defconfig all
    [ snip ]
  $ git checkout v5.13
    [ snip]
  $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- defconfig all
    [ snip ]
  arch/arm64/Makefile:25: ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum

In the stale include/config/auto.config, CONFIG_ARM64_ERRATUM_843419=y
is set without CONFIG_ARM64_LD_HAS_FIX_ERRATUM_843419, so the warning
is displayed while parsing the Makefiles.

Make will restart with the updated include/config/auto.config, hence
CONFIG_ARM64_LD_HAS_FIX_ERRATUM_843419 will be set eventually, but
this warning is a surprise for users.

Commit 25896d073d8a ("x86/build: Fix compiler support check for
CONFIG_RETPOLINE") addressed a similar issue.

Move $(warning ...) out of the parse stage of Makefiles.

The same applies to CONFIG_ARM64_USE_LSE_ATOMICS.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Link: https://lore.kernel.org/r/20210801053525.105235-1-masahiroy@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/Makefile | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 7bc37d0a1b68..7b668db43261 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -21,19 +21,11 @@ LDFLAGS_vmlinux		+= -shared -Bsymbolic -z notext \
 endif
 
 ifeq ($(CONFIG_ARM64_ERRATUM_843419),y)
-  ifneq ($(CONFIG_ARM64_LD_HAS_FIX_ERRATUM_843419),y)
-$(warning ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum)
-  else
+  ifeq ($(CONFIG_ARM64_LD_HAS_FIX_ERRATUM_843419),y)
 LDFLAGS_vmlinux	+= --fix-cortex-a53-843419
   endif
 endif
 
-ifeq ($(CONFIG_ARM64_USE_LSE_ATOMICS), y)
-  ifneq ($(CONFIG_ARM64_LSE_ATOMICS), y)
-$(warning LSE atomics not supported by binutils)
-  endif
-endif
-
 cc_has_k_constraint := $(call try-run,echo				\
 	'int main(void) {						\
 		asm volatile("and w0, w0, %w0" :: "K" (4294967295));	\
@@ -176,6 +168,17 @@ vdso_install:
 
 archprepare:
 	$(Q)$(MAKE) $(build)=arch/arm64/tools kapi
+ifeq ($(CONFIG_ARM64_ERRATUM_843419),y)
+  ifneq ($(CONFIG_ARM64_LD_HAS_FIX_ERRATUM_843419),y)
+	@echo "warning: ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum" >&2
+  endif
+endif
+ifeq ($(CONFIG_ARM64_USE_LSE_ATOMICS),y)
+  ifneq ($(CONFIG_ARM64_LSE_ATOMICS),y)
+	@echo "warning: LSE atomics not supported by binutils" >&2
+  endif
+endif
+
 
 # We use MRPROPER_FILES and CLEAN_FILES now
 archclean:

From f9c4ff2ab9fe433d44ebbc2e3c2368a49df44798 Mon Sep 17 00:00:00 2001
From: Barry Song <song.bao.hua@hisilicon.com>
Date: Sat, 31 Jul 2021 00:51:31 +1200
Subject: [PATCH 407/502] arm64: fix the doc of RANDOMIZE_MODULE_REGION_FULL

Obviously kaslr is setting the module region to 2GB rather than 4GB since
commit b2eed9b588112 ("arm64/kernel: kaslr: reduce module randomization
range to 2 GB"). So fix the size of region in Kconfig.
On the other hand, even though RANDOMIZE_MODULE_REGION_FULL is not set,
module_alloc() can fall back to a 2GB window if ARM64_MODULE_PLTS is set.
In this case, veneers are still needed. !RANDOMIZE_MODULE_REGION_FULL
doesn't necessarily mean veneers are not needed.
So fix the doc to be more precise to avoid any confusion to the readers
of the code.

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Ard Biesheuvel <ard.biesheuvel@arm.com>
Cc: Qi Liu <liuqi115@huawei.com>
Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Link: https://lore.kernel.org/r/20210730125131.13724-1-song.bao.hua@hisilicon.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/Kconfig        | 9 ++++++---
 arch/arm64/kernel/kaslr.c | 4 +++-
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b5b13a932561..fdcd54d39c1e 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1800,11 +1800,11 @@ config RANDOMIZE_BASE
 	  If unsure, say N.
 
 config RANDOMIZE_MODULE_REGION_FULL
-	bool "Randomize the module region over a 4 GB range"
+	bool "Randomize the module region over a 2 GB range"
 	depends on RANDOMIZE_BASE
 	default y
 	help
-	  Randomizes the location of the module region inside a 4 GB window
+	  Randomizes the location of the module region inside a 2 GB window
 	  covering the core kernel. This way, it is less likely for modules
 	  to leak information about the location of core kernel data structures
 	  but it does imply that function calls between modules and the core
@@ -1812,7 +1812,10 @@ config RANDOMIZE_MODULE_REGION_FULL
 
 	  When this option is not set, the module region will be randomized over
 	  a limited range that contains the [_stext, _etext] interval of the
-	  core kernel, so branch relocations are always in range.
+	  core kernel, so branch relocations are almost always in range unless
+	  ARM64_MODULE_PLTS is enabled and the region is exhausted. In this
+	  particular case of region exhaustion, modules might be able to fall
+	  back to a larger 2GB area.
 
 config CC_HAVE_STACKPROTECTOR_SYSREG
 	def_bool $(cc-option,-mstack-protector-guard=sysreg -mstack-protector-guard-reg=sp_el0 -mstack-protector-guard-offset=0)
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index cfa2cfde3019..418b2bba1521 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -162,7 +162,9 @@ u64 __init kaslr_early_init(void)
 		 * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE,
 		 * _stext) . This guarantees that the resulting region still
 		 * covers [_stext, _etext], and that all relative branches can
-		 * be resolved without veneers.
+		 * be resolved without veneers unless this region is exhausted
+		 * and we fall back to a larger 2GB window in module_alloc()
+		 * when ARM64_MODULE_PLTS is enabled.
 		 */
 		module_range = MODULES_VSIZE - (u64)(_etext - _stext);
 		module_alloc_base = (u64)_etext + offset - MODULES_VSIZE;

From 8d5903f457145e3fcd858578b065d667822d99ac Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 2 Aug 2021 17:48:44 +0100
Subject: [PATCH 408/502] arm64: stacktrace: fix comment

Due to a copy-paste error, we describe struct stackframe::pc as a
snapshot of the `fp` field rather than the `lr` field.

Fix the comment.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210802164845.45506-2-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/stacktrace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 1801399204d7..8aebc00c1718 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -35,7 +35,7 @@ struct stack_info {
  * accounting information necessary for robust unwinding.
  *
  * @fp:          The fp value in the frame record (or the real fp)
- * @pc:          The fp value in the frame record (or the real lr)
+ * @pc:          The lr value in the frame record (or the real lr)
  *
  * @stacks_done: Stacks which have been entirely unwound, for which it is no
  *               longer valid to unwind to.

From 0c32706dac1b0a72713184246952ab0f54327c21 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 2 Aug 2021 17:48:45 +0100
Subject: [PATCH 409/502] arm64: stacktrace: avoid tracing arch_stack_walk()

When the function_graph tracer is in use, arch_stack_walk() may unwind
the stack incorrectly, erroneously reporting itself, missing the final
entry which is being traced, and reporting all traced entries between
these off-by-one from where they should be.

When ftrace hooks a function return, the original return address is
saved to the fgraph ret_stack, and the return address  in the LR (or the
function's frame record) is replaced with `return_to_handler`.

When arm64's unwinder encounter frames returning to `return_to_handler`,
it finds the associated original return address from the fgraph ret
stack, assuming the most recent `ret_to_hander` entry on the stack
corresponds to the most recent entry in the fgraph ret stack, and so on.

When arch_stack_walk() is used to dump the current task's stack, it
starts from the caller of arch_stack_walk(). However, arch_stack_walk()
can be traced, and so may push an entry on to the fgraph ret stack,
leaving the fgraph ret stack offset by one from the expected position.

This can be seen when dumping the stack via /proc/self/stack, where
enabling the graph tracer results in an unexpected
`stack_trace_save_tsk` entry at the start of the trace, and `el0_svc`
missing form the end of the trace.

This patch fixes this by marking arch_stack_walk() as notrace, as we do
for all other functions on the path to ftrace_graph_get_ret_stack().
While a few helper functions are not marked notrace, their calls/returns
are balanced, and will have no observable effect when examining the
fgraph ret stack.

It is possible for an exeption boundary to cause a similar offset if the
return address of the interrupted context was in the LR. Fixing those
cases will require some more substantial rework, and is left for
subsequent patches.

Before:

| # cat /proc/self/stack
| [<0>] proc_pid_stack+0xc4/0x140
| [<0>] proc_single_show+0x6c/0x120
| [<0>] seq_read_iter+0x240/0x4e0
| [<0>] seq_read+0xe8/0x140
| [<0>] vfs_read+0xb8/0x1e4
| [<0>] ksys_read+0x74/0x100
| [<0>] __arm64_sys_read+0x28/0x3c
| [<0>] invoke_syscall+0x50/0x120
| [<0>] el0_svc_common.constprop.0+0xc4/0xd4
| [<0>] do_el0_svc+0x30/0x9c
| [<0>] el0_svc+0x2c/0x54
| [<0>] el0t_64_sync_handler+0x1a8/0x1b0
| [<0>] el0t_64_sync+0x198/0x19c
| # echo function_graph > /sys/kernel/tracing/current_tracer
| # cat /proc/self/stack
| [<0>] stack_trace_save_tsk+0xa4/0x110
| [<0>] proc_pid_stack+0xc4/0x140
| [<0>] proc_single_show+0x6c/0x120
| [<0>] seq_read_iter+0x240/0x4e0
| [<0>] seq_read+0xe8/0x140
| [<0>] vfs_read+0xb8/0x1e4
| [<0>] ksys_read+0x74/0x100
| [<0>] __arm64_sys_read+0x28/0x3c
| [<0>] invoke_syscall+0x50/0x120
| [<0>] el0_svc_common.constprop.0+0xc4/0xd4
| [<0>] do_el0_svc+0x30/0x9c
| [<0>] el0t_64_sync_handler+0x1a8/0x1b0
| [<0>] el0t_64_sync+0x198/0x19c

After:

| # cat /proc/self/stack
| [<0>] proc_pid_stack+0xc4/0x140
| [<0>] proc_single_show+0x6c/0x120
| [<0>] seq_read_iter+0x240/0x4e0
| [<0>] seq_read+0xe8/0x140
| [<0>] vfs_read+0xb8/0x1e4
| [<0>] ksys_read+0x74/0x100
| [<0>] __arm64_sys_read+0x28/0x3c
| [<0>] invoke_syscall+0x50/0x120
| [<0>] el0_svc_common.constprop.0+0xc4/0xd4
| [<0>] do_el0_svc+0x30/0x9c
| [<0>] el0_svc+0x2c/0x54
| [<0>] el0t_64_sync_handler+0x1a8/0x1b0
| [<0>] el0t_64_sync+0x198/0x19c
| # echo function_graph > /sys/kernel/tracing/current_tracer
| # cat /proc/self/stack
| [<0>] proc_pid_stack+0xc4/0x140
| [<0>] proc_single_show+0x6c/0x120
| [<0>] seq_read_iter+0x240/0x4e0
| [<0>] seq_read+0xe8/0x140
| [<0>] vfs_read+0xb8/0x1e4
| [<0>] ksys_read+0x74/0x100
| [<0>] __arm64_sys_read+0x28/0x3c
| [<0>] invoke_syscall+0x50/0x120
| [<0>] el0_svc_common.constprop.0+0xc4/0xd4
| [<0>] do_el0_svc+0x30/0x9c
| [<0>] el0_svc+0x2c/0x54
| [<0>] el0t_64_sync_handler+0x1a8/0x1b0
| [<0>] el0t_64_sync+0x198/0x19c

Cc: <stable@vger.kernel.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Will Deacon <will@kernel.org>
Reviwed-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210802164845.45506-3-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/stacktrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index b83c8d911930..8982a2b78acf 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -218,7 +218,7 @@ void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl)
 
 #ifdef CONFIG_STACKTRACE
 
-noinline void arch_stack_walk(stack_trace_consume_fn consume_entry,
+noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
 			      void *cookie, struct task_struct *task,
 			      struct pt_regs *regs)
 {

From ce78ffa3ef1681065ba451cfd545da6126f5ca88 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 3 Aug 2021 11:14:03 +0100
Subject: [PATCH 410/502] net: really fix the build...

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/bus/mhi/core/internal.h  |  2 +-
 drivers/bus/mhi/core/main.c      |  9 ++++++---
 drivers/net/mhi/net.c            |  2 +-
 drivers/net/wwan/mhi_wwan_ctrl.c |  2 +-
 include/linux/mhi.h              |  7 ++++++-
 net/qrtr/mhi.c                   | 16 +++++++++++++++-
 6 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/drivers/bus/mhi/core/internal.h b/drivers/bus/mhi/core/internal.h
index 5b9ea66b92dc..bc239a11aa69 100644
--- a/drivers/bus/mhi/core/internal.h
+++ b/drivers/bus/mhi/core/internal.h
@@ -682,7 +682,7 @@ void mhi_rddm_prepare(struct mhi_controller *mhi_cntrl,
 		      struct image_info *img_info);
 void mhi_fw_load_handler(struct mhi_controller *mhi_cntrl);
 int mhi_prepare_channel(struct mhi_controller *mhi_cntrl,
-			struct mhi_chan *mhi_chan);
+			struct mhi_chan *mhi_chan, unsigned int flags);
 int mhi_init_chan_ctxt(struct mhi_controller *mhi_cntrl,
 		       struct mhi_chan *mhi_chan);
 void mhi_deinit_chan_ctxt(struct mhi_controller *mhi_cntrl,
diff --git a/drivers/bus/mhi/core/main.c b/drivers/bus/mhi/core/main.c
index fc9196f11cb7..84448233f64c 100644
--- a/drivers/bus/mhi/core/main.c
+++ b/drivers/bus/mhi/core/main.c
@@ -1430,7 +1430,7 @@ exit_unprepare_channel:
 }
 
 int mhi_prepare_channel(struct mhi_controller *mhi_cntrl,
-			struct mhi_chan *mhi_chan)
+			struct mhi_chan *mhi_chan, unsigned int flags)
 {
 	int ret = 0;
 	struct device *dev = &mhi_chan->mhi_dev->dev;
@@ -1455,6 +1455,9 @@ int mhi_prepare_channel(struct mhi_controller *mhi_cntrl,
 	if (ret)
 		goto error_pm_state;
 
+	if (mhi_chan->dir == DMA_FROM_DEVICE)
+		mhi_chan->pre_alloc = !!(flags & MHI_CH_INBOUND_ALLOC_BUFS);
+	
 	/* Pre-allocate buffer for xfer ring */
 	if (mhi_chan->pre_alloc) {
 		int nr_el = get_nr_avail_ring_elements(mhi_cntrl,
@@ -1610,7 +1613,7 @@ void mhi_reset_chan(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan)
 }
 
 /* Move channel to start state */
-int mhi_prepare_for_transfer(struct mhi_device *mhi_dev)
+int mhi_prepare_for_transfer(struct mhi_device *mhi_dev, unsigned int flags)
 {
 	int ret, dir;
 	struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
@@ -1621,7 +1624,7 @@ int mhi_prepare_for_transfer(struct mhi_device *mhi_dev)
 		if (!mhi_chan)
 			continue;
 
-		ret = mhi_prepare_channel(mhi_cntrl, mhi_chan);
+		ret = mhi_prepare_channel(mhi_cntrl, mhi_chan, flags);
 		if (ret)
 			goto error_open_chan;
 	}
diff --git a/drivers/net/mhi/net.c b/drivers/net/mhi/net.c
index e60e38c1f09d..11be6bcdd551 100644
--- a/drivers/net/mhi/net.c
+++ b/drivers/net/mhi/net.c
@@ -335,7 +335,7 @@ static int mhi_net_newlink(void *ctxt, struct net_device *ndev, u32 if_id,
 	u64_stats_init(&mhi_netdev->stats.tx_syncp);
 
 	/* Start MHI channels */
-	err = mhi_prepare_for_transfer(mhi_dev);
+	err = mhi_prepare_for_transfer(mhi_dev, 0);
 	if (err)
 		goto out_err;
 
diff --git a/drivers/net/wwan/mhi_wwan_ctrl.c b/drivers/net/wwan/mhi_wwan_ctrl.c
index 1bc6b69aa530..1e18420ce404 100644
--- a/drivers/net/wwan/mhi_wwan_ctrl.c
+++ b/drivers/net/wwan/mhi_wwan_ctrl.c
@@ -110,7 +110,7 @@ static int mhi_wwan_ctrl_start(struct wwan_port *port)
 	int ret;
 
 	/* Start mhi device's channel(s) */
-	ret = mhi_prepare_for_transfer(mhiwwan->mhi_dev);
+	ret = mhi_prepare_for_transfer(mhiwwan->mhi_dev, 0);
 	if (ret)
 		return ret;
 
diff --git a/include/linux/mhi.h b/include/linux/mhi.h
index 944aa3aa3035..5e08468854db 100644
--- a/include/linux/mhi.h
+++ b/include/linux/mhi.h
@@ -719,8 +719,13 @@ void mhi_device_put(struct mhi_device *mhi_dev);
  *                            host and device execution environments match and
  *                            channels are in a DISABLED state.
  * @mhi_dev: Device associated with the channels
+ * @flags: MHI channel flags
  */
-int mhi_prepare_for_transfer(struct mhi_device *mhi_dev);
+int mhi_prepare_for_transfer(struct mhi_device *mhi_dev,
+			     unsigned int flags);
+
+/* Automatically allocate and queue inbound buffers */
+#define MHI_CH_INBOUND_ALLOC_BUFS BIT(0)
 
 /**
  * mhi_unprepare_from_transfer - Reset UL and DL channels for data transfer.
diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c
index fa611678af05..1dc955ca57d3 100644
--- a/net/qrtr/mhi.c
+++ b/net/qrtr/mhi.c
@@ -15,6 +15,7 @@ struct qrtr_mhi_dev {
 	struct qrtr_endpoint ep;
 	struct mhi_device *mhi_dev;
 	struct device *dev;
+	struct completion ready;
 };
 
 /* From MHI to QRTR */
@@ -50,6 +51,10 @@ static int qcom_mhi_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb)
 	struct qrtr_mhi_dev *qdev = container_of(ep, struct qrtr_mhi_dev, ep);
 	int rc;
 
+	rc = wait_for_completion_interruptible(&qdev->ready);
+	if (rc)
+		goto free_skb;
+
 	if (skb->sk)
 		sock_hold(skb->sk);
 
@@ -79,7 +84,7 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev,
 	int rc;
 
 	/* start channels */
-	rc = mhi_prepare_for_transfer(mhi_dev);
+	rc = mhi_prepare_for_transfer(mhi_dev, 0);
 	if (rc)
 		return rc;
 
@@ -96,6 +101,15 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev,
 	if (rc)
 		return rc;
 
+	/* start channels */
+	rc = mhi_prepare_for_transfer(mhi_dev, MHI_CH_INBOUND_ALLOC_BUFS);
+	if (rc) {
+		qrtr_endpoint_unregister(&qdev->ep);
+		dev_set_drvdata(&mhi_dev->dev, NULL);
+		return rc;
+	}
+
+	complete_all(&qdev->ready);
 	dev_dbg(qdev->dev, "Qualcomm MHI QRTR driver probed\n");
 
 	return 0;

From 2e2f1e8d0450c561c0c936b4b67e8b5a95975fb7 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 30 Jul 2021 14:26:22 +0200
Subject: [PATCH 411/502] KVM: x86: hyper-v: Check access to hypercall before
 reading XMM registers

In case guest doesn't have access to the particular hypercall we can avoid
reading XMM registers.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Reviewed-by: Siddharth Chandrasekaran <sidcha@amazon.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210730122625.112848-2-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index b07592ca92f0..cb7e045905a5 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -2173,9 +2173,6 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 	hc.rep_idx = (hc.param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff;
 	hc.rep = !!(hc.rep_cnt || hc.rep_idx);
 
-	if (hc.fast && is_xmm_fast_hypercall(&hc))
-		kvm_hv_hypercall_read_xmm(&hc);
-
 	trace_kvm_hv_hypercall(hc.code, hc.fast, hc.rep_cnt, hc.rep_idx,
 			       hc.ingpa, hc.outgpa);
 
@@ -2184,6 +2181,9 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 		goto hypercall_complete;
 	}
 
+	if (hc.fast && is_xmm_fast_hypercall(&hc))
+		kvm_hv_hypercall_read_xmm(&hc);
+
 	switch (hc.code) {
 	case HVCALL_NOTIFY_LONG_SPIN_WAIT:
 		if (unlikely(hc.rep)) {

From f5714bbb5b3120b33dfbf3d81ffc0b98ae4cd4c1 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 30 Jul 2021 14:26:23 +0200
Subject: [PATCH 412/502] KVM: x86: Introduce trace_kvm_hv_hypercall_done()

Hypercall failures are unusual with potentially far going consequences
so it would be useful to see their results when tracing.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Reviewed-by: Siddharth Chandrasekaran <sidcha@amazon.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210730122625.112848-3-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c |  1 +
 arch/x86/kvm/trace.h  | 15 +++++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index cb7e045905a5..2945b93dbadd 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -2016,6 +2016,7 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
 
 static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result)
 {
+	trace_kvm_hv_hypercall_done(result);
 	kvm_hv_hypercall_set_result(vcpu, result);
 	++vcpu->stat.hypercalls;
 	return kvm_skip_emulated_instruction(vcpu);
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index b484141ea15b..03ebe368333e 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -92,6 +92,21 @@ TRACE_EVENT(kvm_hv_hypercall,
 		  __entry->outgpa)
 );
 
+TRACE_EVENT(kvm_hv_hypercall_done,
+	TP_PROTO(u64 result),
+	TP_ARGS(result),
+
+	TP_STRUCT__entry(
+		__field(__u64, result)
+	),
+
+	TP_fast_assign(
+		__entry->result	= result;
+	),
+
+	TP_printk("result 0x%llx", __entry->result)
+);
+
 /*
  * Tracepoint for Xen hypercall.
  */

From 4e62aa96d6e55c1b2a4e841f1f8601eae81e81ae Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 30 Jul 2021 14:26:24 +0200
Subject: [PATCH 413/502] KVM: x86: hyper-v: Check if guest is allowed to use
 XMM registers for hypercall input
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

TLFS states that "Availability of the XMM fast hypercall interface is
indicated via the “Hypervisor Feature Identification” CPUID Leaf
(0x40000003, see section 2.4.4) ... Any attempt to use this interface
when the hypervisor does not indicate availability will result in a #UD
fault."

Implement the check for 'strict' mode (KVM_CAP_HYPERV_ENFORCE_CPUID).

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Reviewed-by: Siddharth Chandrasekaran <sidcha@amazon.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210730122625.112848-4-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 2945b93dbadd..0b38f944c6b6 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -2140,6 +2140,7 @@ static bool hv_check_hypercall_access(struct kvm_vcpu_hv *hv_vcpu, u16 code)
 
 int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 {
+	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
 	struct kvm_hv_hcall hc;
 	u64 ret = HV_STATUS_SUCCESS;
 
@@ -2177,13 +2178,21 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 	trace_kvm_hv_hypercall(hc.code, hc.fast, hc.rep_cnt, hc.rep_idx,
 			       hc.ingpa, hc.outgpa);
 
-	if (unlikely(!hv_check_hypercall_access(to_hv_vcpu(vcpu), hc.code))) {
+	if (unlikely(!hv_check_hypercall_access(hv_vcpu, hc.code))) {
 		ret = HV_STATUS_ACCESS_DENIED;
 		goto hypercall_complete;
 	}
 
-	if (hc.fast && is_xmm_fast_hypercall(&hc))
+	if (hc.fast && is_xmm_fast_hypercall(&hc)) {
+		if (unlikely(hv_vcpu->enforce_cpuid &&
+			     !(hv_vcpu->cpuid_cache.features_edx &
+			       HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE))) {
+			kvm_queue_exception(vcpu, UD_VECTOR);
+			return 1;
+		}
+
 		kvm_hv_hypercall_read_xmm(&hc);
+	}
 
 	switch (hc.code) {
 	case HVCALL_NOTIFY_LONG_SPIN_WAIT:

From 2476b5a1b16ced78a80629da8ff87538d5c95073 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 30 Jul 2021 14:26:25 +0200
Subject: [PATCH 414/502] KVM: selftests: Test access to XMM fast hypercalls

Check that #UD is raised if bit 16 is clear in
HYPERV_CPUID_FEATURES.EDX and an 'XMM fast' hypercall is issued.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Reviewed-by: Siddharth Chandrasekaran <sidcha@amazon.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210730122625.112848-5-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/include/x86_64/hyperv.h     |  5 ++-
 .../selftests/kvm/x86_64/hyperv_features.c    | 41 +++++++++++++++++--
 2 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
index 412eaee7884a..b66910702c0a 100644
--- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h
+++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
@@ -117,7 +117,7 @@
 #define HV_X64_GUEST_DEBUGGING_AVAILABLE		BIT(1)
 #define HV_X64_PERF_MONITOR_AVAILABLE			BIT(2)
 #define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE	BIT(3)
-#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE		BIT(4)
+#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE		BIT(4)
 #define HV_X64_GUEST_IDLE_STATE_AVAILABLE		BIT(5)
 #define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE		BIT(8)
 #define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE		BIT(10)
@@ -182,4 +182,7 @@
 #define HV_STATUS_INVALID_CONNECTION_ID		18
 #define HV_STATUS_INSUFFICIENT_BUFFERS		19
 
+/* hypercall options */
+#define HV_HYPERCALL_FAST_BIT		BIT(16)
+
 #endif /* !SELFTEST_KVM_HYPERV_H */
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
index af27c7e829c1..91d88aaa9899 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
@@ -47,6 +47,7 @@ static void do_wrmsr(u32 idx, u64 val)
 }
 
 static int nr_gp;
+static int nr_ud;
 
 static inline u64 hypercall(u64 control, vm_vaddr_t input_address,
 			    vm_vaddr_t output_address)
@@ -80,6 +81,12 @@ static void guest_gp_handler(struct ex_regs *regs)
 		regs->rip = (uint64_t)&wrmsr_end;
 }
 
+static void guest_ud_handler(struct ex_regs *regs)
+{
+	nr_ud++;
+	regs->rip += 3;
+}
+
 struct msr_data {
 	uint32_t idx;
 	bool available;
@@ -90,6 +97,7 @@ struct msr_data {
 struct hcall_data {
 	uint64_t control;
 	uint64_t expect;
+	bool ud_expected;
 };
 
 static void guest_msr(struct msr_data *msr)
@@ -117,13 +125,26 @@ static void guest_msr(struct msr_data *msr)
 static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
 {
 	int i = 0;
+	u64 res, input, output;
 
 	wrmsr(HV_X64_MSR_GUEST_OS_ID, LINUX_OS_ID);
 	wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
 
 	while (hcall->control) {
-		GUEST_ASSERT(hypercall(hcall->control, pgs_gpa,
-				       pgs_gpa + 4096) == hcall->expect);
+		nr_ud = 0;
+		if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) {
+			input = pgs_gpa;
+			output = pgs_gpa + 4096;
+		} else {
+			input = output = 0;
+		}
+
+		res = hypercall(hcall->control, input, output);
+		if (hcall->ud_expected)
+			GUEST_ASSERT(nr_ud == 1);
+		else
+			GUEST_ASSERT(res == hcall->expect);
+
 		GUEST_SYNC(i++);
 	}
 
@@ -552,8 +573,18 @@ static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall
 			recomm.ebx = 0xfff;
 			hcall->expect = HV_STATUS_SUCCESS;
 			break;
-
 		case 17:
+			/* XMM fast hypercall */
+			hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT;
+			hcall->ud_expected = true;
+			break;
+		case 18:
+			feat.edx |= HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE;
+			hcall->ud_expected = false;
+			hcall->expect = HV_STATUS_SUCCESS;
+			break;
+
+		case 19:
 			/* END */
 			hcall->control = 0;
 			break;
@@ -625,6 +656,10 @@ int main(void)
 	/* Test hypercalls */
 	vm = vm_create_default(VCPU_ID, 0, guest_hcall);
 
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(vm, VCPU_ID);
+	vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+
 	/* Hypercall input/output */
 	hcall_page = vm_vaddr_alloc_pages(vm, 2);
 	memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());

From ae954bbc451d267f7d60d7b49db811d5a68ebd7b Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 1 Aug 2021 02:25:31 -0400
Subject: [PATCH 415/502] sctp: move the active_key update after sh_keys is
 added

In commit 58acd1009226 ("sctp: update active_key for asoc when old key is
being replaced"), sctp_auth_asoc_init_active_key() is called to update
the active_key right after the old key is deleted and before the new key
is added, and it caused that the active_key could be found with the key_id.

In Ying Xu's testing, the BUG_ON in sctp_auth_asoc_init_active_key() was
triggered:

  [ ] kernel BUG at net/sctp/auth.c:416!
  [ ] RIP: 0010:sctp_auth_asoc_init_active_key.part.8+0xe7/0xf0 [sctp]
  [ ] Call Trace:
  [ ]  sctp_auth_set_key+0x16d/0x1b0 [sctp]
  [ ]  sctp_setsockopt.part.33+0x1ba9/0x2bd0 [sctp]
  [ ]  __sys_setsockopt+0xd6/0x1d0
  [ ]  __x64_sys_setsockopt+0x20/0x30
  [ ]  do_syscall_64+0x5b/0x1a0

So fix it by moving the active_key update after sh_keys is added.

Fixes: 58acd1009226 ("sctp: update active_key for asoc when old key is being replaced")
Reported-by: Ying Xu <yinxu@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/auth.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index fe74c5f95630..db6b7373d16c 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -857,14 +857,18 @@ int sctp_auth_set_key(struct sctp_endpoint *ep,
 	memcpy(key->data, &auth_key->sca_key[0], auth_key->sca_keylength);
 	cur_key->key = key;
 
-	if (replace) {
-		list_del_init(&shkey->key_list);
-		sctp_auth_shkey_release(shkey);
-		if (asoc && asoc->active_key_id == auth_key->sca_keynumber)
-			sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL);
+	if (!replace) {
+		list_add(&cur_key->key_list, sh_keys);
+		return 0;
 	}
+
+	list_del_init(&shkey->key_list);
+	sctp_auth_shkey_release(shkey);
 	list_add(&cur_key->key_list, sh_keys);
 
+	if (asoc && asoc->active_key_id == auth_key->sca_keynumber)
+		sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL);
+
 	return 0;
 }
 

From f41e57af926ad840d114439d34cafc0533bf25f0 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 2 Aug 2021 17:21:53 +0200
Subject: [PATCH 416/502] net: sparx5: fix bitmask on 32-bit targets

I saw the build failure that was fixed in commit 6387f65e2acb ("net:
sparx5: fix compiletime_assert for GCC 4.9") and noticed another
issue that was introduced in the same patch: Using GENMASK() to
create a 64-bit mask does not work on 32-bit architectures.

This probably won't ever happen on this driver since it's specific
to a 64-bit SoC, but it's better to write it portably, so use
GENMASK_ULL() instead.

Fixes: f3cad2611a77 ("net: sparx5: add hostmode with phylink support")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c
index 1a240e6bddd0..cb68eaaac881 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c
@@ -32,7 +32,7 @@ static void __ifh_encode_bitfield(void *ifh, u64 value, u32 pos, u32 width)
 	u32 byte = (35 - (pos / 8));
 	/* Calculate the Start bit position in the Start IFH byte */
 	u32 bit  = (pos % 8);
-	u64 encode = GENMASK(bit + width - 1, bit) & (value << bit);
+	u64 encode = GENMASK_ULL(bit + width - 1, bit) & (value << bit);
 
 	/* The b0-b7 goes into the start IFH byte */
 	if (encode & 0xFF)

From 9c9c6d0ab08acfe41c9f7efa72c4ad3f133a266b Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Wed, 28 Jul 2021 16:34:11 -0700
Subject: [PATCH 417/502] drm/i915: Correct SFC_DONE register offset

The register offset for SFC_DONE was missing a '0' at the end, causing
us to read from a non-existent register address.  We only use this
register in error state dumps so the mistake hasn't caused any real
problems, but fixing it will hopefully make the error state dumps a bit
more useful for debugging.

Fixes: e50dbdbfd9fb ("drm/i915/tgl: Add SFC instdone to error state")
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210728233411.2365788-1-matthew.d.roper@intel.com
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
(cherry picked from commit 82929a2140eb99f1f1d21855f3f580e70d7abdd8)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/i915_reg.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 94fde5ca26ae..41186c1f771e 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -422,7 +422,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define   GEN12_HCP_SFC_LOCK_ACK_BIT		REG_BIT(1)
 #define   GEN12_HCP_SFC_USAGE_BIT			REG_BIT(0)
 
-#define GEN12_SFC_DONE(n)		_MMIO(0x1cc00 + (n) * 0x100)
+#define GEN12_SFC_DONE(n)		_MMIO(0x1cc000 + (n) * 0x1000)
 #define GEN12_SFC_DONE_MAX		4
 
 #define RING_PP_DIR_BASE(base)		_MMIO((base) + 0x228)

From 1354d830cb8f9be966cc07fc61368af27ffb7c4a Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason@jlekstrand.net>
Date: Wed, 21 Jul 2021 10:23:54 -0500
Subject: [PATCH 418/502] drm/i915: Call i915_globals_exit() if
 pci_register_device() fails

In the unlikely event that pci_register_device() fails, we were tearing
down our PMU setup but not globals.  This leaves a bunch of memory slabs
lying around.

Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
Fixes: 32eb6bcfdda9 ("drm/i915: Make request allocation caches global")
[danvet: Fix conflicts against removal of the globals_flush
infrastructure.]
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20210721152358.2893314-3-jason@jlekstrand.net
(cherry picked from commit db484889d1ff0645e07e360d3e3ad306c0515821)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
[Fixed small conflict while cherry picking]
---
 drivers/gpu/drm/i915/i915_globals.c | 2 +-
 drivers/gpu/drm/i915/i915_pci.c     | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c
index 77f1911c463b..2db90e770616 100644
--- a/drivers/gpu/drm/i915/i915_globals.c
+++ b/drivers/gpu/drm/i915/i915_globals.c
@@ -148,7 +148,7 @@ static void __exit __i915_globals_flush(void)
 	atomic_dec(&active);
 }
 
-void __exit i915_globals_exit(void)
+void i915_globals_exit(void)
 {
 	GEM_BUG_ON(atomic_read(&active));
 
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 83b500bb170c..2880ec57c97d 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -1195,6 +1195,7 @@ static int __init i915_init(void)
 	err = pci_register_driver(&i915_pci_driver);
 	if (err) {
 		i915_pmu_exit();
+		i915_globals_exit();
 		return err;
 	}
 

From 97367c97226aab8b298ada954ce12659ee3ad2a4 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 3 Aug 2021 13:43:12 +0200
Subject: [PATCH 419/502] ALSA: seq: Fix racy deletion of subscriber

It turned out that the current implementation of the port subscription
is racy.  The subscription contains two linked lists, and we have to
add to or delete from both lists.  Since both connection and
disconnection procedures perform the same order for those two lists
(i.e. src list, then dest list), when a deletion happens during a
connection procedure, the src list may be deleted before the dest list
addition completes, and this may lead to a use-after-free or an Oops,
even though the access to both lists are protected via mutex.

The simple workaround for this race is to change the access order for
the disconnection, namely, dest list, then src list.  This assures
that the connection has been established when disconnecting, and also
the concurrent deletion can be avoided.

Reported-and-tested-by: folkert <folkert@vanheusden.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210801182754.GP890690@belle.intranet.vanheusden.com
Link: https://lore.kernel.org/r/20210803114312.2536-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/seq/seq_ports.c | 39 ++++++++++++++++++++++++++------------
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/sound/core/seq/seq_ports.c b/sound/core/seq/seq_ports.c
index b9c2ce2b8d5a..84d78630463e 100644
--- a/sound/core/seq/seq_ports.c
+++ b/sound/core/seq/seq_ports.c
@@ -514,10 +514,11 @@ static int check_and_subscribe_port(struct snd_seq_client *client,
 	return err;
 }
 
-static void delete_and_unsubscribe_port(struct snd_seq_client *client,
-					struct snd_seq_client_port *port,
-					struct snd_seq_subscribers *subs,
-					bool is_src, bool ack)
+/* called with grp->list_mutex held */
+static void __delete_and_unsubscribe_port(struct snd_seq_client *client,
+					  struct snd_seq_client_port *port,
+					  struct snd_seq_subscribers *subs,
+					  bool is_src, bool ack)
 {
 	struct snd_seq_port_subs_info *grp;
 	struct list_head *list;
@@ -525,7 +526,6 @@ static void delete_and_unsubscribe_port(struct snd_seq_client *client,
 
 	grp = is_src ? &port->c_src : &port->c_dest;
 	list = is_src ? &subs->src_list : &subs->dest_list;
-	down_write(&grp->list_mutex);
 	write_lock_irq(&grp->list_lock);
 	empty = list_empty(list);
 	if (!empty)
@@ -535,6 +535,18 @@ static void delete_and_unsubscribe_port(struct snd_seq_client *client,
 
 	if (!empty)
 		unsubscribe_port(client, port, grp, &subs->info, ack);
+}
+
+static void delete_and_unsubscribe_port(struct snd_seq_client *client,
+					struct snd_seq_client_port *port,
+					struct snd_seq_subscribers *subs,
+					bool is_src, bool ack)
+{
+	struct snd_seq_port_subs_info *grp;
+
+	grp = is_src ? &port->c_src : &port->c_dest;
+	down_write(&grp->list_mutex);
+	__delete_and_unsubscribe_port(client, port, subs, is_src, ack);
 	up_write(&grp->list_mutex);
 }
 
@@ -590,27 +602,30 @@ int snd_seq_port_disconnect(struct snd_seq_client *connector,
 			    struct snd_seq_client_port *dest_port,
 			    struct snd_seq_port_subscribe *info)
 {
-	struct snd_seq_port_subs_info *src = &src_port->c_src;
+	struct snd_seq_port_subs_info *dest = &dest_port->c_dest;
 	struct snd_seq_subscribers *subs;
 	int err = -ENOENT;
 
-	down_write(&src->list_mutex);
+	/* always start from deleting the dest port for avoiding concurrent
+	 * deletions
+	 */
+	down_write(&dest->list_mutex);
 	/* look for the connection */
-	list_for_each_entry(subs, &src->list_head, src_list) {
+	list_for_each_entry(subs, &dest->list_head, dest_list) {
 		if (match_subs_info(info, &subs->info)) {
-			atomic_dec(&subs->ref_count); /* mark as not ready */
+			__delete_and_unsubscribe_port(dest_client, dest_port,
+						      subs, false,
+						      connector->number != dest_client->number);
 			err = 0;
 			break;
 		}
 	}
-	up_write(&src->list_mutex);
+	up_write(&dest->list_mutex);
 	if (err < 0)
 		return err;
 
 	delete_and_unsubscribe_port(src_client, src_port, subs, true,
 				    connector->number != src_client->number);
-	delete_and_unsubscribe_port(dest_client, dest_port, subs, false,
-				    connector->number != dest_client->number);
 	kfree(subs);
 	return 0;
 }

From c87a4c542b5a796f795fec2b7a909c7d3067b11c Mon Sep 17 00:00:00 2001
From: Bijie Xu <bijie.xu@corigine.com>
Date: Tue, 3 Aug 2021 11:40:18 +0200
Subject: [PATCH 420/502] net: flow_offload: correct comments mismatch with
 code

Correct mismatch between the name of flow_offload_has_one_action()
and its kdoc entry.

Found using ./scripts/kernel-doc -Werror -none include/net/flow_offload.h

Signed-off-by: Bijie Xu <bijie.xu@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_offload.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 69c9eabf8325..f3c2841566a0 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -293,7 +293,7 @@ static inline bool flow_action_has_entries(const struct flow_action *action)
 }
 
 /**
- * flow_action_has_one_action() - check if exactly one action is present
+ * flow_offload_has_one_action() - check if exactly one action is present
  * @action: tc filter flow offload action
  *
  * Returns true if exactly one action is present.

From 0161d151f3e36306219f5aa6f5f6b3877038afd3 Mon Sep 17 00:00:00 2001
From: Bijie Xu <bijie.xu@corigine.com>
Date: Tue, 3 Aug 2021 11:40:19 +0200
Subject: [PATCH 421/502] net: sched: provide missing kdoc for tcf_pkt_info and
 tcf_ematch_ops

Provide missing kdoc of fields of struct tcf_pkt_info and tcf_ematch_ops.

Found using ./scripts/kernel-doc -none -Werror include/net/pkt_cls.h

Signed-off-by: Bijie Xu <bijie.xu@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index ec7823921bd2..298a8d10168b 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -337,6 +337,9 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts);
 
 /**
  * struct tcf_pkt_info - packet information
+ *
+ * @ptr: start of the pkt data
+ * @nexthdr: offset of the next header
  */
 struct tcf_pkt_info {
 	unsigned char *		ptr;
@@ -355,6 +358,7 @@ struct tcf_ematch_ops;
  * @ops: the operations lookup table of the corresponding ematch module
  * @datalen: length of the ematch specific configuration data
  * @data: ematch specific data
+ * @net: the network namespace
  */
 struct tcf_ematch {
 	struct tcf_ematch_ops * ops;

From 9fdc5d85a8fe684cdf24dc31c6bc4a727decfe87 Mon Sep 17 00:00:00 2001
From: Fei Qin <fei.qin@corigine.com>
Date: Tue, 3 Aug 2021 12:39:11 +0200
Subject: [PATCH 422/502] nfp: update ethtool reporting of pauseframe control

Pauseframe control is set to symmetric mode by default on the NFP.
Pause frames can not be configured through ethtool now, but ethtool can
report the supported mode.

Fixes: 265aeb511bd5 ("nfp: add support for .get_link_ksettings()")
Signed-off-by: Fei Qin <fei.qin@corigine.com>
Signed-off-by: Louis Peens <louis.peens@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index 1b482446536d..8803faadd302 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -286,6 +286,8 @@ nfp_net_get_link_ksettings(struct net_device *netdev,
 
 	/* Init to unknowns */
 	ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
+	ethtool_link_ksettings_add_link_mode(cmd, supported, Pause);
+	ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause);
 	cmd->base.port = PORT_OTHER;
 	cmd->base.speed = SPEED_UNKNOWN;
 	cmd->base.duplex = DUPLEX_UNKNOWN;

From 4039146777a91e1576da2bf38e0d8a1061a1ae47 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <atenart@kernel.org>
Date: Tue, 3 Aug 2021 12:00:16 +0200
Subject: [PATCH 423/502] net: ipv6: fix returned variable type in
 ip6_skb_dst_mtu

The patch fixing the returned value of ip6_skb_dst_mtu (int -> unsigned
int) was rebased between its initial review and the version applied. In
the meantime fade56410c22 was applied, which added a new variable (int)
used as the returned value. This lead to a mismatch between the function
prototype and the variable used as the return value.

Fixes: 40fc3054b458 ("net: ipv6: fix return value of ip6_skb_dst_mtu")
Cc: Vadim Fedorenko <vfedorenko@novek.ru>
Signed-off-by: Antoine Tenart <atenart@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 625a38ccb5d9..0bf09a9bca4e 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -265,7 +265,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 
 static inline unsigned int ip6_skb_dst_mtu(struct sk_buff *skb)
 {
-	int mtu;
+	unsigned int mtu;
 
 	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
 				inet6_sk(skb->sk) : NULL;

From ecd92e2167c30faa18df21e3ec3dbec510ddebaa Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Sat, 31 Jul 2021 13:13:40 +0200
Subject: [PATCH 424/502] s390: update defconfigs

Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/configs/debug_defconfig | 2 +-
 arch/s390/configs/defconfig       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 7de253f766e8..b88184019af9 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -335,7 +335,7 @@ CONFIG_L2TP_DEBUGFS=m
 CONFIG_L2TP_V3=y
 CONFIG_L2TP_IP=m
 CONFIG_L2TP_ETH=m
-CONFIG_BRIDGE=m
+CONFIG_BRIDGE=y
 CONFIG_BRIDGE_MRP=y
 CONFIG_VLAN_8021Q=m
 CONFIG_VLAN_8021Q_GVRP=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index b671642967ba..1667a3cdcf0a 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -325,7 +325,7 @@ CONFIG_L2TP_DEBUGFS=m
 CONFIG_L2TP_V3=y
 CONFIG_L2TP_IP=m
 CONFIG_L2TP_ETH=m
-CONFIG_BRIDGE=m
+CONFIG_BRIDGE=y
 CONFIG_BRIDGE_MRP=y
 CONFIG_VLAN_8021Q=m
 CONFIG_VLAN_8021Q_GVRP=y

From c2ec772b87408259cb01209a22fb4e1ae7d346de Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 30 Jul 2021 16:38:07 +0200
Subject: [PATCH 425/502] cpuidle: teo: Fix alternative idle state lookup

There are three mistakes in the loop in teo_select() that is looking
for an alternative candidate idle state.  First, it should walk all
of the idle states shallower than the current candidate one,
including all of the disabled ones, but it terminates after the first
enabled idle state.  Second, it should not terminate its last step
if idle state 0 is disabled (which is related to the first issue).
Finally, it may return the current alternative candidate idle state
prematurely if the time span criterion is not met by the idle state
under consideration at the moment.

To address the issues mentioned above, make the loop in question walk
all of the idle states shallower than the current candidate idle state
all the way down to idle state 0 and rearrange the checks in it.

Fixes: 77577558f25d ("cpuidle: teo: Rework most recent idle duration values treatment")
Reported-by: Doug Smythies <dsmythies@telus.net>
Tested-by: Doug Smythies <dsmythies@telus.net>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governors/teo.c | 44 ++++++++++++++++++++++-----------
 1 file changed, 29 insertions(+), 15 deletions(-)

diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 7b91060e82f6..8c59050ba419 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -397,32 +397,46 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		intercept_sum = 0;
 		recent_sum = 0;
 
-		for (i = idx - 1; i >= idx0; i--) {
+		for (i = idx - 1; i >= 0; i--) {
 			struct teo_bin *bin = &cpu_data->state_bins[i];
 			s64 span_ns;
 
 			intercept_sum += bin->intercepts;
 			recent_sum += bin->recent;
 
-			if (dev->states_usage[i].disable)
-				continue;
-
 			span_ns = teo_middle_of_bin(i, drv);
-			if (!teo_time_ok(span_ns)) {
-				/*
-				 * The current state is too shallow, so select
-				 * the first enabled deeper state.
-				 */
-				duration_ns = last_enabled_span_ns;
-				idx = last_enabled_idx;
-				break;
-			}
 
 			if ((!alt_recent || 2 * recent_sum > idx_recent_sum) &&
 			    (!alt_intercepts ||
 			     2 * intercept_sum > idx_intercept_sum)) {
-				idx = i;
-				duration_ns = span_ns;
+				if (teo_time_ok(span_ns) &&
+				    !dev->states_usage[i].disable) {
+					idx = i;
+					duration_ns = span_ns;
+				} else {
+					/*
+					 * The current state is too shallow or
+					 * disabled, so take the first enabled
+					 * deeper state with suitable time span.
+					 */
+					idx = last_enabled_idx;
+					duration_ns = last_enabled_span_ns;
+				}
+				break;
+			}
+
+			if (dev->states_usage[i].disable)
+				continue;
+
+			if (!teo_time_ok(span_ns)) {
+				/*
+				 * The current state is too shallow, but if an
+				 * alternative candidate state has been found,
+				 * it may still turn out to be a better choice.
+				 */
+				if (last_enabled_idx != idx)
+					continue;
+
 				break;
 			}
 

From 4adae7dd10db10f20f51833dc11b3cf7a342ad38 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 30 Jul 2021 16:38:52 +0200
Subject: [PATCH 426/502] cpuidle: teo: Rename two local variables in
 teo_select()

Rename two local variables in teo_select() so that their names better
reflect their purpose.

No functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governors/teo.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 8c59050ba419..d9262db79cae 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -382,8 +382,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	alt_intercepts = 2 * idx_intercept_sum > cpu_data->total - idx_hit_sum;
 	alt_recent = idx_recent_sum > NR_RECENT / 2;
 	if (alt_recent || alt_intercepts) {
-		s64 last_enabled_span_ns = duration_ns;
-		int last_enabled_idx = idx;
+		s64 first_suitable_span_ns = duration_ns;
+		int first_suitable_idx = idx;
 
 		/*
 		 * Look for the deepest idle state whose target residency had
@@ -419,8 +419,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 					 * disabled, so take the first enabled
 					 * deeper state with suitable time span.
 					 */
-					idx = last_enabled_idx;
-					duration_ns = last_enabled_span_ns;
+					idx = first_suitable_idx;
+					duration_ns = first_suitable_span_ns;
 				}
 				break;
 			}
@@ -434,14 +434,14 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 				 * alternative candidate state has been found,
 				 * it may still turn out to be a better choice.
 				 */
-				if (last_enabled_idx != idx)
+				if (first_suitable_idx != idx)
 					continue;
 
 				break;
 			}
 
-			last_enabled_span_ns = span_ns;
-			last_enabled_idx = i;
+			first_suitable_span_ns = span_ns;
+			first_suitable_idx = i;
 		}
 	}
 

From 6511a8b5b7a65037340cd8ee91a377811effbc83 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 3 Aug 2021 18:14:44 +0200
Subject: [PATCH 427/502] Revert "ACPICA: Fix memory leak caused by _CID repair
 function"

Revert commit c27bac0314131 ("ACPICA: Fix memory leak caused by _CID
repair function") which is reported to cause a boot issue on Acer
Swift 3 (SF314-51).

Reported-by: Adrien Precigout <dev@asdrip.fr>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/nsrepair2.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c
index 38e10ab976e6..14b71b41e845 100644
--- a/drivers/acpi/acpica/nsrepair2.c
+++ b/drivers/acpi/acpica/nsrepair2.c
@@ -379,13 +379,6 @@ acpi_ns_repair_CID(struct acpi_evaluate_info *info,
 
 			(*element_ptr)->common.reference_count =
 			    original_ref_count;
-
-			/*
-			 * The original_element holds a reference from the package object
-			 * that represents _HID. Since a new element was created by _HID,
-			 * remove the reference from the _CID package.
-			 */
-			acpi_ut_remove_reference(original_element);
 		}
 
 		element_ptr++;

From 8b436a99cd708bd158231a0630ffa49b1d6175e4 Mon Sep 17 00:00:00 2001
From: Yangyang Li <liyangyang20@huawei.com>
Date: Mon, 2 Aug 2021 14:56:14 +0800
Subject: [PATCH 428/502] RDMA/hns: Fix the double unlock problem of poll_sem

If hns_roce_cmd_use_events() fails then it means that the poll_sem is not
obtained, but the poll_sem is released in hns_roce_cmd_use_polling(), this
will cause an unlock problem.

This is the static checker warning:
	drivers/infiniband/hw/hns/hns_roce_main.c:926 hns_roce_init()
	error: double unlocked '&hr_dev->cmd.poll_sem' (orig line 879)

Event mode and polling mode are mutually exclusive and resources are
separated, so there is no need to process polling mode resources in event
mode.

The initial mode of cmd is polling mode, so even if cmd fails to switch to
event mode, it is not necessary to switch to polling mode.

Fixes: a389d016c030 ("RDMA/hns: Enable all CMDQ context")
Fixes: 3d50503b3b33 ("RDMA/hns: Optimize cmd init and mode selection for hip08")
Link: https://lore.kernel.org/r/1627887374-20019-1-git-send-email-liangwenpeng@huawei.com
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Yangyang Li <liyangyang20@huawei.com>
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/hns/hns_roce_cmd.c  | 7 +++----
 drivers/infiniband/hw/hns/hns_roce_main.c | 4 +---
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c
index 8f68cc3ff193..84f3f2b5f097 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c
@@ -213,8 +213,10 @@ int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev)
 
 	hr_cmd->context =
 		kcalloc(hr_cmd->max_cmds, sizeof(*hr_cmd->context), GFP_KERNEL);
-	if (!hr_cmd->context)
+	if (!hr_cmd->context) {
+		hr_dev->cmd_mod = 0;
 		return -ENOMEM;
+	}
 
 	for (i = 0; i < hr_cmd->max_cmds; ++i) {
 		hr_cmd->context[i].token = i;
@@ -228,7 +230,6 @@ int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev)
 	spin_lock_init(&hr_cmd->context_lock);
 
 	hr_cmd->use_events = 1;
-	down(&hr_cmd->poll_sem);
 
 	return 0;
 }
@@ -239,8 +240,6 @@ void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev)
 
 	kfree(hr_cmd->context);
 	hr_cmd->use_events = 0;
-
-	up(&hr_cmd->poll_sem);
 }
 
 struct hns_roce_cmd_mailbox *
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 078a97193f0e..cc6eab14a222 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -873,11 +873,9 @@ int hns_roce_init(struct hns_roce_dev *hr_dev)
 
 	if (hr_dev->cmd_mod) {
 		ret = hns_roce_cmd_use_events(hr_dev);
-		if (ret) {
+		if (ret)
 			dev_warn(dev,
 				 "Cmd event  mode failed, set back to poll!\n");
-			hns_roce_cmd_use_polling(hr_dev);
-		}
 	}
 
 	ret = hns_roce_init_hem(hr_dev);

From abc7285d89ffd089739a1a3059ddd843dd019637 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Mon, 2 Aug 2021 16:19:14 -0700
Subject: [PATCH 429/502] mptcp: drop unused rcu member in mptcp_pm_addr_entry

kfree_rcu() had been removed from pm_netlink.c, so this rcu field in
struct mptcp_pm_addr_entry became useless. Let's drop it.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Link: https://lore.kernel.org/r/20210802231914.54709-1-mathew.j.martineau@linux.intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/mptcp/pm_netlink.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index d2591ebf01d9..56263c2c4014 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -27,7 +27,6 @@ struct mptcp_pm_addr_entry {
 	struct mptcp_addr_info	addr;
 	u8			flags;
 	int			ifindex;
-	struct rcu_head		rcu;
 	struct socket		*lsk;
 };
 

From e3ea110d6e796146920e1be0108464ebcf283ef7 Mon Sep 17 00:00:00 2001
From: Harshavardhan Unnibhavi <harshanavkis@gmail.com>
Date: Mon, 2 Aug 2021 19:35:06 +0200
Subject: [PATCH 430/502] VSOCK: handle VIRTIO_VSOCK_OP_CREDIT_REQUEST

The original implementation of the virtio-vsock driver does not
handle a VIRTIO_VSOCK_OP_CREDIT_REQUEST as required by the
virtio-vsock specification. The vsock device emulated by
vhost-vsock and the virtio-vsock driver never uses this request,
which was probably why nobody noticed it. However, another
implementation of the device may use this request type.

Hence, this commit introduces a way to handle an explicit credit
request by responding with a corresponding credit update as
required by the virtio-vsock specification.

Fixes: 06a8fc78367d ("VSOCK: Introduce virtio_vsock_common.ko")
Signed-off-by: Harshavardhan Unnibhavi <harshanavkis@gmail.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Link: https://lore.kernel.org/r/20210802173506.2383-1-harshanavkis@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/vmw_vsock/virtio_transport_common.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 169ba8b72a63..081e7ae93cb1 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -1079,6 +1079,9 @@ virtio_transport_recv_connected(struct sock *sk,
 		virtio_transport_recv_enqueue(vsk, pkt);
 		sk->sk_data_ready(sk);
 		return err;
+	case VIRTIO_VSOCK_OP_CREDIT_REQUEST:
+		virtio_transport_send_credit_update(vsk);
+		break;
 	case VIRTIO_VSOCK_OP_CREDIT_UPDATE:
 		sk->sk_write_space(sk);
 		break;

From d1a58c013a5837451e3213e7a426d350fa524ead Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Tue, 3 Aug 2021 08:37:46 +0200
Subject: [PATCH 431/502] net: dsa: qca: ar9331: reorder MDIO write sequence

In case of this switch we work with 32bit registers on top of 16bit
bus. Some registers (for example access to forwarding database) have
trigger bit on the first 16bit half of request and the result +
configuration of request in the second half. Without this patch, we would
trigger database operation and overwrite result in one run.

To make it work properly, we should do the second part of transfer
before the first one is done.

So far, this rule seems to work for all registers on this switch.

Fixes: ec6698c272de ("net: dsa: add support for Atheros AR9331 built-in switch")
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Link: https://lore.kernel.org/r/20210803063746.3600-1-o.rempel@pengutronix.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/qca/ar9331.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c
index ca2ad77b71f1..6686192e1883 100644
--- a/drivers/net/dsa/qca/ar9331.c
+++ b/drivers/net/dsa/qca/ar9331.c
@@ -837,16 +837,24 @@ static int ar9331_mdio_write(void *ctx, u32 reg, u32 val)
 		return 0;
 	}
 
-	ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg, val);
-	if (ret < 0)
-		goto error;
-
+	/* In case of this switch we work with 32bit registers on top of 16bit
+	 * bus. Some registers (for example access to forwarding database) have
+	 * trigger bit on the first 16bit half of request, the result and
+	 * configuration of request in the second half.
+	 * To make it work properly, we should do the second part of transfer
+	 * before the first one is done.
+	 */
 	ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg + 2,
 				  val >> 16);
 	if (ret < 0)
 		goto error;
 
+	ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg, val);
+	if (ret < 0)
+		goto error;
+
 	return 0;
+
 error:
 	dev_err_ratelimited(&sbus->dev, "Bus error. Failed to write register.\n");
 	return ret;

From d09560435cb712c9ec1e62b8a43a79b0af69fe77 Mon Sep 17 00:00:00 2001
From: Qiu Wenbo <qiuwenbo@kylinos.com.cn>
Date: Sun, 4 Jul 2021 16:34:41 +0800
Subject: [PATCH 432/502] riscv: dts: fix memory size for the SiFive HiFive
 Unmatched

The production version of HiFive Unmatched have 16GB memory.

Signed-off-by: Qiu Wenbo <qiuwenbo@kylinos.com.cn>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
index b1c3c596578f..2e4ea84f27e7 100644
--- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
+++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
@@ -24,7 +24,7 @@
 
 	memory@80000000 {
 		device_type = "memory";
-		reg = <0x0 0x80000000 0x2 0x00000000>;
+		reg = <0x0 0x80000000 0x4 0x00000000>;
 	};
 
 	soc {

From a18b14d8886614b3c7d290c4cfc33389822b0535 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 6 Jul 2021 09:26:21 -0700
Subject: [PATCH 433/502] riscv: Disable STACKPROTECTOR_PER_TASK if
 GCC_PLUGIN_RANDSTRUCT is enabled

riscv uses the value of TSK_STACK_CANARY to set
stack-protector-guard-offset. With GCC_PLUGIN_RANDSTRUCT enabled, that
value is non-deterministic, and with riscv:allmodconfig often results
in build errors such as

cc1: error: '8120' is not a valid offset in '-mstack-protector-guard-offset='

Enable STACKPROTECTOR_PER_TASK only if GCC_PLUGIN_RANDSTRUCT is disabled
to fix the problem.

Fixes: fea2fed201ee5 ("riscv: Enable per-task stack canaries")
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 8fcceb8eda07..31f9e92f1402 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -492,6 +492,7 @@ config CC_HAVE_STACKPROTECTOR_TLS
 
 config STACKPROTECTOR_PER_TASK
 	def_bool y
+	depends on !GCC_PLUGIN_RANDSTRUCT
 	depends on STACKPROTECTOR && CC_HAVE_STACKPROTECTOR_TLS
 
 config PHYS_RAM_BASE

From 5648c073c33d33a0a19d0cb1194a4eb88efe2b71 Mon Sep 17 00:00:00 2001
From: Daniele Palmas <dnlplm@gmail.com>
Date: Tue, 3 Aug 2021 21:47:11 +0200
Subject: [PATCH 434/502] USB: serial: option: add Telit FD980 composition
 0x1056

Add the following Telit FD980 composition 0x1056:

Cfg #1: mass storage
Cfg #2: rndis, tty, adb, tty, tty, tty, tty

Signed-off-by: Daniele Palmas <dnlplm@gmail.com>
Link: https://lore.kernel.org/r/20210803194711.3036-1-dnlplm@gmail.com
Cc: stable@vger.kernel.org
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/option.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 0fbe253dc570..039450069ca4 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -1203,6 +1203,8 @@ static const struct usb_device_id option_ids[] = {
 	  .driver_info = NCTRL(2) | RSVD(3) },
 	{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1055, 0xff),	/* Telit FN980 (PCIe) */
 	  .driver_info = NCTRL(0) | RSVD(1) },
+	{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1056, 0xff),	/* Telit FD980 */
+	  .driver_info = NCTRL(2) | RSVD(3) },
 	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910),
 	  .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) },
 	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM),

From 06f5553e0f0c2182268179b93856187d9cb86dd5 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Tue, 3 Aug 2021 18:58:21 +0800
Subject: [PATCH 435/502] net: sched: fix lockdep_set_class() typo error for
 sch->seqlock

According to comment in qdisc_alloc(), sch->seqlock's lockdep
class key should be set to qdisc_tx_busylock, due to possible
type error, sch->busylock's lockdep class key is set to
qdisc_tx_busylock, which is duplicated because sch->busylock's
lockdep class key is already set in qdisc_alloc().

So fix it by replacing sch->busylock with sch->seqlock.

Fixes: 96009c7d500e ("sched: replace __QDISC_STATE_RUNNING bit with a spin lock")
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index d9ac60ffe927..a8dd06c74e31 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -913,7 +913,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 
 	/* seqlock has the same scope of busylock, for NOLOCK qdisc */
 	spin_lock_init(&sch->seqlock);
-	lockdep_set_class(&sch->busylock,
+	lockdep_set_class(&sch->seqlock,
 			  dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
 
 	seqcount_init(&sch->running);

From 13a9c4ac319a23c792e2e03ac73777b6710132c3 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Tue, 3 Aug 2021 15:00:43 +0300
Subject: [PATCH 436/502] net/prestera: Fix devlink groups leakage in error
 flow

Devlink trap group is registered but not released in error flow,
add the missing devlink_trap_groups_unregister() call.

Fixes: 0a9003f45e91 ("net: marvell: prestera: devlink: add traps/groups implementation")
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/prestera/prestera_devlink.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c
index d12e21db9fd6..fa7a0682ad1e 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c
@@ -530,6 +530,8 @@ err_trap_register:
 		prestera_trap = &prestera_trap_items_arr[i];
 		devlink_traps_unregister(devlink, &prestera_trap->trap, 1);
 	}
+	devlink_trap_groups_unregister(devlink, prestera_trap_groups_arr,
+				       groups_count);
 err_groups_register:
 	kfree(trap_data->trap_items_arr);
 err_trap_items_alloc:

From 3212a99349cee5fb611d3ffcf0e65bc3cd6dcf2f Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 4 Aug 2021 11:31:00 +0200
Subject: [PATCH 437/502] USB: serial: pl2303: fix GT type detection

At least some PL2303GT have a bcdDevice of 0x305 instead of 0x100 as the
datasheet claims. Add it to the list of known release numbers for the
HXN (G) type.

Fixes: 894758d0571d ("USB: serial: pl2303: tighten type HXN (G) detection")
Reported-by: Vasily Khoruzhick <anarsoul@gmail.com>
Tested-by: Vasily Khoruzhick <anarsoul@gmail.com>
Cc: stable@vger.kernel.org	# 5.13
Link: https://lore.kernel.org/r/20210804093100.24811-1-johan@kernel.org
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/pl2303.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index 17601e32083e..930b3d50a330 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -432,6 +432,7 @@ static int pl2303_detect_type(struct usb_serial *serial)
 	case 0x200:
 		switch (bcdDevice) {
 		case 0x100:
+		case 0x305:
 			/*
 			 * Assume it's an HXN-type if the device doesn't
 			 * support the old read request value.

From 8a160e2e9aeb8318159b48701ad8a6e22274372d Mon Sep 17 00:00:00 2001
From: Petko Manolov <petkan@nucleusys.com>
Date: Tue, 3 Aug 2021 20:25:23 +0300
Subject: [PATCH 438/502] net: usb: pegasus: Check the return value of
 get_geristers() and friends;

Certain call sites of get_geristers() did not do proper error handling.  This
could be a problem as get_geristers() typically return the data via pointer to a
buffer.  If an error occurred the code is carelessly manipulating the wrong data.

Signed-off-by: Petko Manolov <petkan@nucleusys.com>
Reviewed-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/pegasus.c | 108 ++++++++++++++++++++++++++------------
 1 file changed, 75 insertions(+), 33 deletions(-)

diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c
index 9a907182569c..22353bab76c8 100644
--- a/drivers/net/usb/pegasus.c
+++ b/drivers/net/usb/pegasus.c
@@ -132,9 +132,15 @@ static int get_registers(pegasus_t *pegasus, __u16 indx, __u16 size, void *data)
 static int set_registers(pegasus_t *pegasus, __u16 indx, __u16 size,
 			 const void *data)
 {
-	return usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REGS,
+	int ret;
+
+	ret = usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REGS,
 				    PEGASUS_REQT_WRITE, 0, indx, data, size,
 				    1000, GFP_NOIO);
+	if (ret < 0)
+		netif_dbg(pegasus, drv, pegasus->net, "%s failed with %d\n", __func__, ret);
+
+	return ret;
 }
 
 /*
@@ -145,10 +151,15 @@ static int set_registers(pegasus_t *pegasus, __u16 indx, __u16 size,
 static int set_register(pegasus_t *pegasus, __u16 indx, __u8 data)
 {
 	void *buf = &data;
+	int ret;
 
-	return usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REG,
+	ret = usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REG,
 				    PEGASUS_REQT_WRITE, data, indx, buf, 1,
 				    1000, GFP_NOIO);
+	if (ret < 0)
+		netif_dbg(pegasus, drv, pegasus->net, "%s failed with %d\n", __func__, ret);
+
+	return ret;
 }
 
 static int update_eth_regs_async(pegasus_t *pegasus)
@@ -188,10 +199,9 @@ static int update_eth_regs_async(pegasus_t *pegasus)
 
 static int __mii_op(pegasus_t *p, __u8 phy, __u8 indx, __u16 *regd, __u8 cmd)
 {
-	int i;
-	__u8 data[4] = { phy, 0, 0, indx };
+	int i, ret;
 	__le16 regdi;
-	int ret = -ETIMEDOUT;
+	__u8 data[4] = { phy, 0, 0, indx };
 
 	if (cmd & PHY_WRITE) {
 		__le16 *t = (__le16 *) & data[1];
@@ -207,12 +217,15 @@ static int __mii_op(pegasus_t *p, __u8 phy, __u8 indx, __u16 *regd, __u8 cmd)
 		if (data[0] & PHY_DONE)
 			break;
 	}
-	if (i >= REG_TIMEOUT)
+	if (i >= REG_TIMEOUT) {
+		ret = -ETIMEDOUT;
 		goto fail;
+	}
 	if (cmd & PHY_READ) {
 		ret = get_registers(p, PhyData, 2, &regdi);
+		if (ret < 0)
+			goto fail;
 		*regd = le16_to_cpu(regdi);
-		return ret;
 	}
 	return 0;
 fail:
@@ -235,9 +248,13 @@ static int write_mii_word(pegasus_t *pegasus, __u8 phy, __u8 indx, __u16 *regd)
 static int mdio_read(struct net_device *dev, int phy_id, int loc)
 {
 	pegasus_t *pegasus = netdev_priv(dev);
+	int ret;
 	u16 res;
 
-	read_mii_word(pegasus, phy_id, loc, &res);
+	ret = read_mii_word(pegasus, phy_id, loc, &res);
+	if (ret < 0)
+		return ret;
+
 	return (int)res;
 }
 
@@ -251,10 +268,9 @@ static void mdio_write(struct net_device *dev, int phy_id, int loc, int val)
 
 static int read_eprom_word(pegasus_t *pegasus, __u8 index, __u16 *retdata)
 {
-	int i;
-	__u8 tmp = 0;
+	int ret, i;
 	__le16 retdatai;
-	int ret;
+	__u8 tmp = 0;
 
 	set_register(pegasus, EpromCtrl, 0);
 	set_register(pegasus, EpromOffset, index);
@@ -262,21 +278,25 @@ static int read_eprom_word(pegasus_t *pegasus, __u8 index, __u16 *retdata)
 
 	for (i = 0; i < REG_TIMEOUT; i++) {
 		ret = get_registers(pegasus, EpromCtrl, 1, &tmp);
+		if (ret < 0)
+			goto fail;
 		if (tmp & EPROM_DONE)
 			break;
-		if (ret == -ESHUTDOWN)
-			goto fail;
 	}
-	if (i >= REG_TIMEOUT)
+	if (i >= REG_TIMEOUT) {
+		ret = -ETIMEDOUT;
 		goto fail;
+	}
 
 	ret = get_registers(pegasus, EpromData, 2, &retdatai);
+	if (ret < 0)
+		goto fail;
 	*retdata = le16_to_cpu(retdatai);
 	return ret;
 
 fail:
-	netif_warn(pegasus, drv, pegasus->net, "%s failed\n", __func__);
-	return -ETIMEDOUT;
+	netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__);
+	return ret;
 }
 
 #ifdef	PEGASUS_WRITE_EEPROM
@@ -324,10 +344,10 @@ static int write_eprom_word(pegasus_t *pegasus, __u8 index, __u16 data)
 	return ret;
 
 fail:
-	netif_warn(pegasus, drv, pegasus->net, "%s failed\n", __func__);
+	netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__);
 	return -ETIMEDOUT;
 }
-#endif				/* PEGASUS_WRITE_EEPROM */
+#endif	/* PEGASUS_WRITE_EEPROM */
 
 static inline int get_node_id(pegasus_t *pegasus, u8 *id)
 {
@@ -367,19 +387,21 @@ static void set_ethernet_addr(pegasus_t *pegasus)
 	return;
 err:
 	eth_hw_addr_random(pegasus->net);
-	dev_info(&pegasus->intf->dev, "software assigned MAC address.\n");
+	netif_dbg(pegasus, drv, pegasus->net, "software assigned MAC address.\n");
 
 	return;
 }
 
 static inline int reset_mac(pegasus_t *pegasus)
 {
+	int ret, i;
 	__u8 data = 0x8;
-	int i;
 
 	set_register(pegasus, EthCtrl1, data);
 	for (i = 0; i < REG_TIMEOUT; i++) {
-		get_registers(pegasus, EthCtrl1, 1, &data);
+		ret = get_registers(pegasus, EthCtrl1, 1, &data);
+		if (ret < 0)
+			goto fail;
 		if (~data & 0x08) {
 			if (loopback)
 				break;
@@ -402,22 +424,29 @@ static inline int reset_mac(pegasus_t *pegasus)
 	}
 	if (usb_dev_id[pegasus->dev_index].vendor == VENDOR_ELCON) {
 		__u16 auxmode;
-		read_mii_word(pegasus, 3, 0x1b, &auxmode);
+		ret = read_mii_word(pegasus, 3, 0x1b, &auxmode);
+		if (ret < 0)
+			goto fail;
 		auxmode |= 4;
 		write_mii_word(pegasus, 3, 0x1b, &auxmode);
 	}
 
 	return 0;
+fail:
+	netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__);
+	return ret;
 }
 
 static int enable_net_traffic(struct net_device *dev, struct usb_device *usb)
 {
-	__u16 linkpart;
-	__u8 data[4];
 	pegasus_t *pegasus = netdev_priv(dev);
 	int ret;
+	__u16 linkpart;
+	__u8 data[4];
 
-	read_mii_word(pegasus, pegasus->phy, MII_LPA, &linkpart);
+	ret = read_mii_word(pegasus, pegasus->phy, MII_LPA, &linkpart);
+	if (ret < 0)
+		goto fail;
 	data[0] = 0xc8; /* TX & RX enable, append status, no CRC */
 	data[1] = 0;
 	if (linkpart & (ADVERTISE_100FULL | ADVERTISE_10FULL))
@@ -435,11 +464,16 @@ static int enable_net_traffic(struct net_device *dev, struct usb_device *usb)
 	    usb_dev_id[pegasus->dev_index].vendor == VENDOR_LINKSYS2 ||
 	    usb_dev_id[pegasus->dev_index].vendor == VENDOR_DLINK) {
 		u16 auxmode;
-		read_mii_word(pegasus, 0, 0x1b, &auxmode);
+		ret = read_mii_word(pegasus, 0, 0x1b, &auxmode);
+		if (ret < 0)
+			goto fail;
 		auxmode |= 4;
 		write_mii_word(pegasus, 0, 0x1b, &auxmode);
 	}
 
+	return 0;
+fail:
+	netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__);
 	return ret;
 }
 
@@ -447,9 +481,9 @@ static void read_bulk_callback(struct urb *urb)
 {
 	pegasus_t *pegasus = urb->context;
 	struct net_device *net;
+	u8 *buf = urb->transfer_buffer;
 	int rx_status, count = urb->actual_length;
 	int status = urb->status;
-	u8 *buf = urb->transfer_buffer;
 	__u16 pkt_len;
 
 	if (!pegasus)
@@ -998,8 +1032,7 @@ static int pegasus_ioctl(struct net_device *net, struct ifreq *rq, int cmd)
 		data[0] = pegasus->phy;
 		fallthrough;
 	case SIOCDEVPRIVATE + 1:
-		read_mii_word(pegasus, data[0], data[1] & 0x1f, &data[3]);
-		res = 0;
+		res = read_mii_word(pegasus, data[0], data[1] & 0x1f, &data[3]);
 		break;
 	case SIOCDEVPRIVATE + 2:
 		if (!capable(CAP_NET_ADMIN))
@@ -1033,22 +1066,25 @@ static void pegasus_set_multicast(struct net_device *net)
 
 static __u8 mii_phy_probe(pegasus_t *pegasus)
 {
-	int i;
+	int i, ret;
 	__u16 tmp;
 
 	for (i = 0; i < 32; i++) {
-		read_mii_word(pegasus, i, MII_BMSR, &tmp);
+		ret = read_mii_word(pegasus, i, MII_BMSR, &tmp);
+		if (ret < 0)
+			goto fail;
 		if (tmp == 0 || tmp == 0xffff || (tmp & BMSR_MEDIA) == 0)
 			continue;
 		else
 			return i;
 	}
-
+fail:
 	return 0xff;
 }
 
 static inline void setup_pegasus_II(pegasus_t *pegasus)
 {
+	int ret;
 	__u8 data = 0xa5;
 
 	set_register(pegasus, Reg1d, 0);
@@ -1060,7 +1096,9 @@ static inline void setup_pegasus_II(pegasus_t *pegasus)
 		set_register(pegasus, Reg7b, 2);
 
 	set_register(pegasus, 0x83, data);
-	get_registers(pegasus, 0x83, 1, &data);
+	ret = get_registers(pegasus, 0x83, 1, &data);
+	if (ret < 0)
+		goto fail;
 
 	if (data == 0xa5)
 		pegasus->chip = 0x8513;
@@ -1075,6 +1113,10 @@ static inline void setup_pegasus_II(pegasus_t *pegasus)
 		set_register(pegasus, Reg81, 6);
 	else
 		set_register(pegasus, Reg81, 2);
+
+	return;
+fail:
+	netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__);
 }
 
 static void check_carrier(struct work_struct *work)

From bc65bacf239d0bc1d00d92cd535a4031921dd78a Mon Sep 17 00:00:00 2001
From: Petko Manolov <petkan@nucleusys.com>
Date: Tue, 3 Aug 2021 20:25:24 +0300
Subject: [PATCH 439/502] net: usb: pegasus: Remove the changelog and
 DRIVER_VERSION.

These are now deemed redundant.

Signed-off-by: Petko Manolov <petkan@nucleusys.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/pegasus.c | 30 ++----------------------------
 1 file changed, 2 insertions(+), 28 deletions(-)

diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c
index 22353bab76c8..f18b03be1b87 100644
--- a/drivers/net/usb/pegasus.c
+++ b/drivers/net/usb/pegasus.c
@@ -1,31 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- *  Copyright (c) 1999-2013 Petko Manolov (petkan@nucleusys.com)
+ *  Copyright (c) 1999-2021 Petko Manolov (petkan@nucleusys.com)
  *
- *	ChangeLog:
- *		....	Most of the time spent on reading sources & docs.
- *		v0.2.x	First official release for the Linux kernel.
- *		v0.3.0	Beutified and structured, some bugs fixed.
- *		v0.3.x	URBifying bulk requests and bugfixing. First relatively
- *			stable release. Still can touch device's registers only
- *			from top-halves.
- *		v0.4.0	Control messages remained unurbified are now URBs.
- *			Now we can touch the HW at any time.
- *		v0.4.9	Control urbs again use process context to wait. Argh...
- *			Some long standing bugs (enable_net_traffic) fixed.
- *			Also nasty trick about resubmiting control urb from
- *			interrupt context used. Please let me know how it
- *			behaves. Pegasus II support added since this version.
- *			TODO: suppressing HCD warnings spewage on disconnect.
- *		v0.4.13	Ethernet address is now set at probe(), not at open()
- *			time as this seems to break dhcpd.
- *		v0.5.0	branch to 2.5.x kernels
- *		v0.5.1	ethtool support added
- *		v0.5.5	rx socket buffers are in a pool and the their allocation
- *			is out of the interrupt routine.
- *		...
- *		v0.9.3	simplified [get|set]_register(s), async update registers
- *			logic revisited, receive skb_pool removed.
  */
 
 #include <linux/sched.h>
@@ -45,7 +21,6 @@
 /*
  * Version Information
  */
-#define DRIVER_VERSION "v0.9.3 (2013/04/25)"
 #define DRIVER_AUTHOR "Petko Manolov <petkan@nucleusys.com>"
 #define DRIVER_DESC "Pegasus/Pegasus II USB Ethernet driver"
 
@@ -914,7 +889,6 @@ static void pegasus_get_drvinfo(struct net_device *dev,
 	pegasus_t *pegasus = netdev_priv(dev);
 
 	strlcpy(info->driver, driver_name, sizeof(info->driver));
-	strlcpy(info->version, DRIVER_VERSION, sizeof(info->version));
 	usb_make_path(pegasus->usb, info->bus_info, sizeof(info->bus_info));
 }
 
@@ -1338,7 +1312,7 @@ static void __init parse_id(char *id)
 
 static int __init pegasus_init(void)
 {
-	pr_info("%s: %s, " DRIVER_DESC "\n", driver_name, DRIVER_VERSION);
+	pr_info("%s: " DRIVER_DESC "\n", driver_name);
 	if (devid)
 		parse_id(devid);
 	return usb_register(&pegasus_driver);

From 85cd39af14f498f791d8aab3fbd64cd175787f1a Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 4 Aug 2021 05:28:52 -0400
Subject: [PATCH 440/502] KVM: Do not leak memory for duplicate debugfs
 directories

KVM creates a debugfs directory for each VM in order to store statistics
about the virtual machine.  The directory name is built from the process
pid and a VM fd.  While generally unique, it is possible to keep a
file descriptor alive in a way that causes duplicate directories, which
manifests as these messages:

  [  471.846235] debugfs: Directory '20245-4' with parent 'kvm' already present!

Even though this should not happen in practice, it is more or less
expected in the case of KVM for testcases that call KVM_CREATE_VM and
close the resulting file descriptor repeatedly and in parallel.

When this happens, debugfs_create_dir() returns an error but
kvm_create_vm_debugfs() goes on to allocate stat data structs which are
later leaked.  The slow memory leak was spotted by syzkaller, where it
caused OOM reports.

Since the issue only affects debugfs, do a lookup before calling
debugfs_create_dir, so that the message is downgraded and rate-limited.
While at it, ensure kvm->debugfs_dentry is NULL rather than an error
if it is not created.  This fixes kvm_destroy_vm_debugfs, which was not
checking IS_ERR_OR_NULL correctly.

Cc: stable@vger.kernel.org
Fixes: 536a6f88c49d ("KVM: Create debugfs dir and stat files for each VM")
Reported-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Suggested-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/kvm_main.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d20fba0fc290..b50dbe269f4b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -892,6 +892,8 @@ static void kvm_destroy_vm_debugfs(struct kvm *kvm)
 
 static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
 {
+	static DEFINE_MUTEX(kvm_debugfs_lock);
+	struct dentry *dent;
 	char dir_name[ITOA_MAX_LEN * 2];
 	struct kvm_stat_data *stat_data;
 	const struct _kvm_stats_desc *pdesc;
@@ -903,8 +905,20 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
 		return 0;
 
 	snprintf(dir_name, sizeof(dir_name), "%d-%d", task_pid_nr(current), fd);
-	kvm->debugfs_dentry = debugfs_create_dir(dir_name, kvm_debugfs_dir);
+	mutex_lock(&kvm_debugfs_lock);
+	dent = debugfs_lookup(dir_name, kvm_debugfs_dir);
+	if (dent) {
+		pr_warn_ratelimited("KVM: debugfs: duplicate directory %s\n", dir_name);
+		dput(dent);
+		mutex_unlock(&kvm_debugfs_lock);
+		return 0;
+	}
+	dent = debugfs_create_dir(dir_name, kvm_debugfs_dir);
+	mutex_unlock(&kvm_debugfs_lock);
+	if (IS_ERR(dent))
+		return 0;
 
+	kvm->debugfs_dentry = dent;
 	kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries,
 					 sizeof(*kvm->debugfs_stat_data),
 					 GFP_KERNEL_ACCOUNT);
@@ -5201,7 +5215,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
 	}
 	add_uevent_var(env, "PID=%d", kvm->userspace_pid);
 
-	if (!IS_ERR_OR_NULL(kvm->debugfs_dentry)) {
+	if (kvm->debugfs_dentry) {
 		char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT);
 
 		if (p) {

From 179c6c27bf487273652efc99acd3ba512a23c137 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 3 Aug 2021 09:27:46 -0700
Subject: [PATCH 441/502] KVM: SVM: Fix off-by-one indexing when nullifying
 last used SEV VMCB

Use the raw ASID, not ASID-1, when nullifying the last used VMCB when
freeing an SEV ASID.  The consumer, pre_sev_run(), indexes the array by
the raw ASID, thus KVM could get a false negative when checking for a
different VMCB if KVM manages to reallocate the same ASID+VMCB combo for
a new VM.

Note, this cannot cause a functional issue _in the current code_, as
pre_sev_run() also checks which pCPU last did VMRUN for the vCPU, and
last_vmentry_cpu is initialized to -1 during vCPU creation, i.e. is
guaranteed to mismatch on the first VMRUN.  However, prior to commit
8a14fe4f0c54 ("kvm: x86: Move last_cpu into kvm_vcpu_arch as
last_vmentry_cpu"), SVM tracked pCPU on its own and zero-initialized the
last_cpu variable.  Thus it's theoretically possible that older versions
of KVM could miss a TLB flush if the first VMRUN is on pCPU0 and the ASID
and VMCB exactly match those of a prior VM.

Fixes: 70cd94e60c73 ("KVM: SVM: VMRUN should use associated ASID when SEV is enabled")
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Brijesh Singh <brijesh.singh@amd.com>
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/sev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 6710d9ee2e4b..4d0aba185412 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -189,7 +189,7 @@ static void sev_asid_free(struct kvm_sev_info *sev)
 
 	for_each_possible_cpu(cpu) {
 		sd = per_cpu(svm_data, cpu);
-		sd->sev_vmcbs[pos] = NULL;
+		sd->sev_vmcbs[sev->asid] = NULL;
 	}
 
 	mutex_unlock(&sev_bitmap_lock);

From 396492b4c5f249f616002bb5de787d060d2b2974 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 3 Aug 2021 16:14:15 -0700
Subject: [PATCH 442/502] docs: networking: netdevsim rules

There are aspects of netdevsim which are commonly
misunderstood and pointed out in review. Cong
suggest we document them.

Suggested-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/netdev-FAQ.rst | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst
index 91b2cf712801..e26532f49760 100644
--- a/Documentation/networking/netdev-FAQ.rst
+++ b/Documentation/networking/netdev-FAQ.rst
@@ -228,6 +228,23 @@ before posting to the mailing list. The patchwork build bot instance
 gets overloaded very easily and netdev@vger really doesn't need more
 traffic if we can help it.
 
+netdevsim is great, can I extend it for my out-of-tree tests?
+-------------------------------------------------------------
+
+No, `netdevsim` is a test vehicle solely for upstream tests.
+(Please add your tests under tools/testing/selftests/.)
+
+We also give no guarantees that `netdevsim` won't change in the future
+in a way which would break what would normally be considered uAPI.
+
+Is netdevsim considered a "user" of an API?
+-------------------------------------------
+
+Linux kernel has a long standing rule that no API should be added unless
+it has a real, in-tree user. Mock-ups and tests based on `netdevsim` are
+strongly encouraged when adding new APIs, but `netdevsim` in itself
+is **not** considered a use case/user.
+
 Any other tips to help ensure my net/net-next patch gets OK'd?
 --------------------------------------------------------------
 Attention to detail.  Re-read your own work as if you were the

From 6b67d4d63edece1033972214704c04f36c5be89a Mon Sep 17 00:00:00 2001
From: "Ivan T. Ivanov" <iivanov@suse.de>
Date: Wed, 4 Aug 2021 11:13:39 +0300
Subject: [PATCH 443/502] net: usb: lan78xx: don't modify phy_device state
 concurrently

Currently phy_device state could be left in inconsistent state shown
by following alert message[1]. This is because phy_read_status could
be called concurrently from lan78xx_delayedwork, phy_state_machine and
__ethtool_get_link. Fix this by making sure that phy_device state is
updated atomically.

[1] lan78xx 1-1.1.1:1.0 eth0: No phy led trigger registered for speed(-1)

Signed-off-by: Ivan T. Ivanov <iivanov@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/lan78xx.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 25489389ea49..6d092d78e0cb 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1154,7 +1154,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
 {
 	struct phy_device *phydev = dev->net->phydev;
 	struct ethtool_link_ksettings ecmd;
-	int ladv, radv, ret;
+	int ladv, radv, ret, link;
 	u32 buf;
 
 	/* clear LAN78xx interrupt status */
@@ -1162,9 +1162,12 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
 	if (unlikely(ret < 0))
 		return -EIO;
 
+	mutex_lock(&phydev->lock);
 	phy_read_status(phydev);
+	link = phydev->link;
+	mutex_unlock(&phydev->lock);
 
-	if (!phydev->link && dev->link_on) {
+	if (!link && dev->link_on) {
 		dev->link_on = false;
 
 		/* reset MAC */
@@ -1177,7 +1180,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
 			return -EIO;
 
 		del_timer(&dev->stat_monitor);
-	} else if (phydev->link && !dev->link_on) {
+	} else if (link && !dev->link_on) {
 		dev->link_on = true;
 
 		phy_ethtool_ksettings_get(phydev, &ecmd);
@@ -1466,9 +1469,14 @@ static int lan78xx_set_eee(struct net_device *net, struct ethtool_eee *edata)
 
 static u32 lan78xx_get_link(struct net_device *net)
 {
-	phy_read_status(net->phydev);
+	u32 link;
 
-	return net->phydev->link;
+	mutex_lock(&net->phydev->lock);
+	phy_read_status(net->phydev);
+	link = net->phydev->link;
+	mutex_unlock(&net->phydev->lock);
+
+	return link;
 }
 
 static void lan78xx_get_drvinfo(struct net_device *net,

From f558c2b834ec27e75d37b1c860c139e7b7c3a8e4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 3 Aug 2021 12:45:01 +0200
Subject: [PATCH 444/502] sched/rt: Fix double enqueue caused by
 rt_effective_prio

Double enqueues in rt runqueues (list) have been reported while running
a simple test that spawns a number of threads doing a short sleep/run
pattern while being concurrently setscheduled between rt and fair class.

  WARNING: CPU: 3 PID: 2825 at kernel/sched/rt.c:1294 enqueue_task_rt+0x355/0x360
  CPU: 3 PID: 2825 Comm: setsched__13
  RIP: 0010:enqueue_task_rt+0x355/0x360
  Call Trace:
   __sched_setscheduler+0x581/0x9d0
   _sched_setscheduler+0x63/0xa0
   do_sched_setscheduler+0xa0/0x150
   __x64_sys_sched_setscheduler+0x1a/0x30
   do_syscall_64+0x33/0x40
   entry_SYSCALL_64_after_hwframe+0x44/0xae

  list_add double add: new=ffff9867cb629b40, prev=ffff9867cb629b40,
		       next=ffff98679fc67ca0.
  kernel BUG at lib/list_debug.c:31!
  invalid opcode: 0000 [#1] PREEMPT_RT SMP PTI
  CPU: 3 PID: 2825 Comm: setsched__13
  RIP: 0010:__list_add_valid+0x41/0x50
  Call Trace:
   enqueue_task_rt+0x291/0x360
   __sched_setscheduler+0x581/0x9d0
   _sched_setscheduler+0x63/0xa0
   do_sched_setscheduler+0xa0/0x150
   __x64_sys_sched_setscheduler+0x1a/0x30
   do_syscall_64+0x33/0x40
   entry_SYSCALL_64_after_hwframe+0x44/0xae

__sched_setscheduler() uses rt_effective_prio() to handle proper queuing
of priority boosted tasks that are setscheduled while being boosted.
rt_effective_prio() is however called twice per each
__sched_setscheduler() call: first directly by __sched_setscheduler()
before dequeuing the task and then by __setscheduler() to actually do
the priority change. If the priority of the pi_top_task is concurrently
being changed however, it might happen that the two calls return
different results. If, for example, the first call returned the same rt
priority the task was running at and the second one a fair priority, the
task won't be removed by the rt list (on_list still set) and then
enqueued in the fair runqueue. When eventually setscheduled back to rt
it will be seen as enqueued already and the WARNING/BUG be issued.

Fix this by calling rt_effective_prio() only once and then reusing the
return value. While at it refactor code as well for clarity. Concurrent
priority inheritance handling is still safe and will eventually converge
to a new state by following the inheritance chain(s).

Fixes: 0782e63bc6fe ("sched: Handle priority boosted tasks proper in setscheduler()")
[squashed Peterz changes; added changelog]
Reported-by: Mark Simmons <msimmons@redhat.com>
Signed-off-by: Juri Lelli <juri.lelli@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210803104501.38333-1-juri.lelli@redhat.com
---
 kernel/sched/core.c | 90 ++++++++++++++++++---------------------------
 1 file changed, 35 insertions(+), 55 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2d9ff40f4661..20ffcc044134 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1981,12 +1981,18 @@ void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
 	dequeue_task(rq, p, flags);
 }
 
-/*
- * __normal_prio - return the priority that is based on the static prio
- */
-static inline int __normal_prio(struct task_struct *p)
+static inline int __normal_prio(int policy, int rt_prio, int nice)
 {
-	return p->static_prio;
+	int prio;
+
+	if (dl_policy(policy))
+		prio = MAX_DL_PRIO - 1;
+	else if (rt_policy(policy))
+		prio = MAX_RT_PRIO - 1 - rt_prio;
+	else
+		prio = NICE_TO_PRIO(nice);
+
+	return prio;
 }
 
 /*
@@ -1998,15 +2004,7 @@ static inline int __normal_prio(struct task_struct *p)
  */
 static inline int normal_prio(struct task_struct *p)
 {
-	int prio;
-
-	if (task_has_dl_policy(p))
-		prio = MAX_DL_PRIO-1;
-	else if (task_has_rt_policy(p))
-		prio = MAX_RT_PRIO-1 - p->rt_priority;
-	else
-		prio = __normal_prio(p);
-	return prio;
+	return __normal_prio(p->policy, p->rt_priority, PRIO_TO_NICE(p->static_prio));
 }
 
 /*
@@ -4099,7 +4097,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
 		} else if (PRIO_TO_NICE(p->static_prio) < 0)
 			p->static_prio = NICE_TO_PRIO(0);
 
-		p->prio = p->normal_prio = __normal_prio(p);
+		p->prio = p->normal_prio = p->static_prio;
 		set_load_weight(p, false);
 
 		/*
@@ -6341,6 +6339,18 @@ int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flag
 }
 EXPORT_SYMBOL(default_wake_function);
 
+static void __setscheduler_prio(struct task_struct *p, int prio)
+{
+	if (dl_prio(prio))
+		p->sched_class = &dl_sched_class;
+	else if (rt_prio(prio))
+		p->sched_class = &rt_sched_class;
+	else
+		p->sched_class = &fair_sched_class;
+
+	p->prio = prio;
+}
+
 #ifdef CONFIG_RT_MUTEXES
 
 static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
@@ -6456,22 +6466,19 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
 		} else {
 			p->dl.pi_se = &p->dl;
 		}
-		p->sched_class = &dl_sched_class;
 	} else if (rt_prio(prio)) {
 		if (dl_prio(oldprio))
 			p->dl.pi_se = &p->dl;
 		if (oldprio < prio)
 			queue_flag |= ENQUEUE_HEAD;
-		p->sched_class = &rt_sched_class;
 	} else {
 		if (dl_prio(oldprio))
 			p->dl.pi_se = &p->dl;
 		if (rt_prio(oldprio))
 			p->rt.timeout = 0;
-		p->sched_class = &fair_sched_class;
 	}
 
-	p->prio = prio;
+	__setscheduler_prio(p, prio);
 
 	if (queued)
 		enqueue_task(rq, p, queue_flag);
@@ -6824,35 +6831,6 @@ static void __setscheduler_params(struct task_struct *p,
 	set_load_weight(p, true);
 }
 
-/* Actually do priority change: must hold pi & rq lock. */
-static void __setscheduler(struct rq *rq, struct task_struct *p,
-			   const struct sched_attr *attr, bool keep_boost)
-{
-	/*
-	 * If params can't change scheduling class changes aren't allowed
-	 * either.
-	 */
-	if (attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)
-		return;
-
-	__setscheduler_params(p, attr);
-
-	/*
-	 * Keep a potential priority boosting if called from
-	 * sched_setscheduler().
-	 */
-	p->prio = normal_prio(p);
-	if (keep_boost)
-		p->prio = rt_effective_prio(p, p->prio);
-
-	if (dl_prio(p->prio))
-		p->sched_class = &dl_sched_class;
-	else if (rt_prio(p->prio))
-		p->sched_class = &rt_sched_class;
-	else
-		p->sched_class = &fair_sched_class;
-}
-
 /*
  * Check the target process has a UID that matches the current process's:
  */
@@ -6873,10 +6851,8 @@ static int __sched_setscheduler(struct task_struct *p,
 				const struct sched_attr *attr,
 				bool user, bool pi)
 {
-	int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 :
-		      MAX_RT_PRIO - 1 - attr->sched_priority;
-	int retval, oldprio, oldpolicy = -1, queued, running;
-	int new_effective_prio, policy = attr->sched_policy;
+	int oldpolicy = -1, policy = attr->sched_policy;
+	int retval, oldprio, newprio, queued, running;
 	const struct sched_class *prev_class;
 	struct callback_head *head;
 	struct rq_flags rf;
@@ -7074,6 +7050,7 @@ change:
 	p->sched_reset_on_fork = reset_on_fork;
 	oldprio = p->prio;
 
+	newprio = __normal_prio(policy, attr->sched_priority, attr->sched_nice);
 	if (pi) {
 		/*
 		 * Take priority boosted tasks into account. If the new
@@ -7082,8 +7059,8 @@ change:
 		 * the runqueue. This will be done when the task deboost
 		 * itself.
 		 */
-		new_effective_prio = rt_effective_prio(p, newprio);
-		if (new_effective_prio == oldprio)
+		newprio = rt_effective_prio(p, newprio);
+		if (newprio == oldprio)
 			queue_flags &= ~DEQUEUE_MOVE;
 	}
 
@@ -7096,7 +7073,10 @@ change:
 
 	prev_class = p->sched_class;
 
-	__setscheduler(rq, p, attr, pi);
+	if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) {
+		__setscheduler_params(p, attr);
+		__setscheduler_prio(p, newprio);
+	}
 	__setscheduler_uclamp(p, attr);
 
 	if (queued) {

From f4b4b45652578357031fbbef7f7a1b04f6fa2dc3 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 29 Jul 2021 11:14:57 +0200
Subject: [PATCH 445/502] perf/x86: Fix out of bound MSR access

On Wed, Jul 28, 2021 at 12:49:43PM -0400, Vince Weaver wrote:
> [32694.087403] unchecked MSR access error: WRMSR to 0x318 (tried to write 0x0000000000000000) at rIP: 0xffffffff8106f854 (native_write_msr+0x4/0x20)
> [32694.101374] Call Trace:
> [32694.103974]  perf_clear_dirty_counters+0x86/0x100

The problem being that it doesn't filter out all fake counters, in
specific the above (erroneously) tries to use FIXED_BTS. Limit the
fixed counters indexes to the hardware supplied number.

Reported-by: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Vince Weaver <vincent.weaver@maine.edu>
Tested-by: Like Xu <likexu@tencent.com>
Link: https://lkml.kernel.org/r/YQJxka3dxgdIdebG@hirez.programming.kicks-ass.net
---
 arch/x86/events/core.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 1eb45139fcc6..3092fbf9dbe4 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2489,13 +2489,15 @@ void perf_clear_dirty_counters(void)
 		return;
 
 	for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) {
-		/* Metrics and fake events don't have corresponding HW counters. */
-		if (is_metric_idx(i) || (i == INTEL_PMC_IDX_FIXED_VLBR))
-			continue;
-		else if (i >= INTEL_PMC_IDX_FIXED)
+		if (i >= INTEL_PMC_IDX_FIXED) {
+			/* Metrics and fake events don't have corresponding HW counters. */
+			if ((i - INTEL_PMC_IDX_FIXED) >= hybrid(cpuc->pmu, num_counters_fixed))
+				continue;
+
 			wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0);
-		else
+		} else {
 			wrmsrl(x86_pmu_event_addr(i), 0);
+		}
 	}
 
 	bitmap_zero(cpuc->dirty, X86_PMC_IDX_MAX);

From df51fe7ea1c1c2c3bfdb81279712fdd2e4ea6c27 Mon Sep 17 00:00:00 2001
From: Like Xu <likexu@tencent.com>
Date: Mon, 2 Aug 2021 15:08:50 +0800
Subject: [PATCH 446/502] perf/x86/amd: Don't touch the AMD64_EVENTSEL_HOSTONLY
 bit inside the guest

If we use "perf record" in an AMD Milan guest, dmesg reports a #GP
warning from an unchecked MSR access error on MSR_F15H_PERF_CTLx:

  [] unchecked MSR access error: WRMSR to 0xc0010200 (tried to write 0x0000020000110076) at rIP: 0xffffffff8106ddb4 (native_write_msr+0x4/0x20)
  [] Call Trace:
  []  amd_pmu_disable_event+0x22/0x90
  []  x86_pmu_stop+0x4c/0xa0
  []  x86_pmu_del+0x3a/0x140

The AMD64_EVENTSEL_HOSTONLY bit is defined and used on the host,
while the guest perf driver should avoid such use.

Fixes: 1018faa6cf23 ("perf/x86/kvm: Fix Host-Only/Guest-Only counting with SVM disabled")
Signed-off-by: Like Xu <likexu@tencent.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Liam Merwick <liam.merwick@oracle.com>
Tested-by: Kim Phillips <kim.phillips@amd.com>
Tested-by: Liam Merwick <liam.merwick@oracle.com>
Link: https://lkml.kernel.org/r/20210802070850.35295-1-likexu@tencent.com
---
 arch/x86/events/perf_event.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 2bf1c7ea2758..2938c902ffbe 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -1115,9 +1115,10 @@ void x86_pmu_stop(struct perf_event *event, int flags);
 
 static inline void x86_pmu_disable_event(struct perf_event *event)
 {
+	u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask);
 	struct hw_perf_event *hwc = &event->hw;
 
-	wrmsrl(hwc->config_base, hwc->config);
+	wrmsrl(hwc->config_base, hwc->config & ~disable_mask);
 
 	if (is_counter_pair(hwc))
 		wrmsrl(x86_pmu_config_addr(hwc->idx + 1), 0);

From bb2baeb214a71cda47d50dce80414016117ddda0 Mon Sep 17 00:00:00 2001
From: Mingwei Zhang <mizhang@google.com>
Date: Mon, 2 Aug 2021 11:09:03 -0700
Subject: [PATCH 447/502] KVM: SVM: improve the code readability for ASID
 management

KVM SEV code uses bitmaps to manage ASID states. ASID 0 was always skipped
because it is never used by VM. Thus, in existing code, ASID value and its
bitmap postion always has an 'offset-by-1' relationship.

Both SEV and SEV-ES shares the ASID space, thus KVM uses a dynamic range
[min_asid, max_asid] to handle SEV and SEV-ES ASIDs separately.

Existing code mixes the usage of ASID value and its bitmap position by
using the same variable called 'min_asid'.

Fix the min_asid usage: ensure that its usage is consistent with its name;
allocate extra size for ASID 0 to ensure that each ASID has the same value
with its bitmap position. Add comments on ASID bitmap allocation to clarify
the size change.

Signed-off-by: Mingwei Zhang <mizhang@google.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Marc Orr <marcorr@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Alper Gun <alpergun@google.com>
Cc: Dionna Glaze <dionnaglaze@google.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Vipin Sharma <vipinsh@google.com>
Cc: Peter Gonda <pgonda@google.com>
Cc: Joerg Roedel <joro@8bytes.org>
Message-Id: <20210802180903.159381-1-mizhang@google.com>
[Fix up sev_asid_free to also index by ASID, as suggested by Sean
 Christopherson, and use nr_asids in sev_cpu_init. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/sev.c | 43 +++++++++++++++++++++++-------------------
 1 file changed, 24 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 4d0aba185412..7fbce342eec4 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -64,6 +64,7 @@ static DEFINE_MUTEX(sev_bitmap_lock);
 unsigned int max_sev_asid;
 static unsigned int min_sev_asid;
 static unsigned long sev_me_mask;
+static unsigned int nr_asids;
 static unsigned long *sev_asid_bitmap;
 static unsigned long *sev_reclaim_asid_bitmap;
 
@@ -78,11 +79,11 @@ struct enc_region {
 /* Called with the sev_bitmap_lock held, or on shutdown  */
 static int sev_flush_asids(int min_asid, int max_asid)
 {
-	int ret, pos, error = 0;
+	int ret, asid, error = 0;
 
 	/* Check if there are any ASIDs to reclaim before performing a flush */
-	pos = find_next_bit(sev_reclaim_asid_bitmap, max_asid, min_asid);
-	if (pos >= max_asid)
+	asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid);
+	if (asid > max_asid)
 		return -EBUSY;
 
 	/*
@@ -115,15 +116,15 @@ static bool __sev_recycle_asids(int min_asid, int max_asid)
 
 	/* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */
 	bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap,
-		   max_sev_asid);
-	bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid);
+		   nr_asids);
+	bitmap_zero(sev_reclaim_asid_bitmap, nr_asids);
 
 	return true;
 }
 
 static int sev_asid_new(struct kvm_sev_info *sev)
 {
-	int pos, min_asid, max_asid, ret;
+	int asid, min_asid, max_asid, ret;
 	bool retry = true;
 	enum misc_res_type type;
 
@@ -143,11 +144,11 @@ static int sev_asid_new(struct kvm_sev_info *sev)
 	 * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
 	 * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
 	 */
-	min_asid = sev->es_active ? 0 : min_sev_asid - 1;
+	min_asid = sev->es_active ? 1 : min_sev_asid;
 	max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
 again:
-	pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_asid);
-	if (pos >= max_asid) {
+	asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid);
+	if (asid > max_asid) {
 		if (retry && __sev_recycle_asids(min_asid, max_asid)) {
 			retry = false;
 			goto again;
@@ -157,11 +158,11 @@ again:
 		goto e_uncharge;
 	}
 
-	__set_bit(pos, sev_asid_bitmap);
+	__set_bit(asid, sev_asid_bitmap);
 
 	mutex_unlock(&sev_bitmap_lock);
 
-	return pos + 1;
+	return asid;
 e_uncharge:
 	misc_cg_uncharge(type, sev->misc_cg, 1);
 	put_misc_cg(sev->misc_cg);
@@ -179,13 +180,12 @@ static int sev_get_asid(struct kvm *kvm)
 static void sev_asid_free(struct kvm_sev_info *sev)
 {
 	struct svm_cpu_data *sd;
-	int cpu, pos;
+	int cpu;
 	enum misc_res_type type;
 
 	mutex_lock(&sev_bitmap_lock);
 
-	pos = sev->asid - 1;
-	__set_bit(pos, sev_reclaim_asid_bitmap);
+	__set_bit(sev->asid, sev_reclaim_asid_bitmap);
 
 	for_each_possible_cpu(cpu) {
 		sd = per_cpu(svm_data, cpu);
@@ -1857,12 +1857,17 @@ void __init sev_hardware_setup(void)
 	min_sev_asid = edx;
 	sev_me_mask = 1UL << (ebx & 0x3f);
 
-	/* Initialize SEV ASID bitmaps */
-	sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
+	/*
+	 * Initialize SEV ASID bitmaps. Allocate space for ASID 0 in the bitmap,
+	 * even though it's never used, so that the bitmap is indexed by the
+	 * actual ASID.
+	 */
+	nr_asids = max_sev_asid + 1;
+	sev_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL);
 	if (!sev_asid_bitmap)
 		goto out;
 
-	sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
+	sev_reclaim_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL);
 	if (!sev_reclaim_asid_bitmap) {
 		bitmap_free(sev_asid_bitmap);
 		sev_asid_bitmap = NULL;
@@ -1907,7 +1912,7 @@ void sev_hardware_teardown(void)
 		return;
 
 	/* No need to take sev_bitmap_lock, all VMs have been destroyed. */
-	sev_flush_asids(0, max_sev_asid);
+	sev_flush_asids(1, max_sev_asid);
 
 	bitmap_free(sev_asid_bitmap);
 	bitmap_free(sev_reclaim_asid_bitmap);
@@ -1921,7 +1926,7 @@ int sev_cpu_init(struct svm_cpu_data *sd)
 	if (!sev_enabled)
 		return 0;
 
-	sd->sev_vmcbs = kcalloc(max_sev_asid + 1, sizeof(void *), GFP_KERNEL);
+	sd->sev_vmcbs = kcalloc(nr_asids, sizeof(void *), GFP_KERNEL);
 	if (!sd->sev_vmcbs)
 		return -ENOMEM;
 

From 13c2c3cfe01952575b1dd5e24d450fcccff93bc0 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Wed, 4 Aug 2021 14:20:57 +0300
Subject: [PATCH 448/502] KVM: selftests: fix hyperv_clock test

The test was mistakenly using addr_gpa2hva on a gva and that happened
to work accidentally.  Commit 106a2e766eae ("KVM: selftests: Lower the
min virtual address for misc page allocations") revealed this bug.

Fixes: 2c7f76b4c42b ("selftests: kvm: Add basic Hyper-V clocksources tests", 2021-03-18)
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210804112057.409498-1-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/x86_64/hyperv_clock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
index bab10ae787b6..e0b2bb1339b1 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
@@ -215,7 +215,7 @@ int main(void)
 	vcpu_set_hv_cpuid(vm, VCPU_ID);
 
 	tsc_page_gva = vm_vaddr_alloc_page(vm);
-	memset(addr_gpa2hva(vm, tsc_page_gva), 0x0, getpagesize());
+	memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize());
 	TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0,
 		"TSC page has to be page aligned\n");
 	vcpu_args_set(vm, VCPU_ID, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva));

From 952835edb4fdad49361d5330da918be8b765b787 Mon Sep 17 00:00:00 2001
From: Stefan Haberland <sth@linux.ibm.com>
Date: Wed, 4 Aug 2021 17:18:00 +0200
Subject: [PATCH 449/502] s390/dasd: fix use after free in dasd path handling

When new configuration data is obtained after a path event it is stored
in the per path array. The old data needs to be freed.
The first valid configuration data is also referenced in the device
private structure to identify the device.
When the old per path configuration data was freed the device still
pointed to the already freed data leading to a use after free.

Fix by replacing also the device configuration data with the newly
obtained one before the old data gets freed.

Fixes: 460181217a24 ("s390/dasd: Store path configuration data during path handling")
Cc: stable@vger.kernel.org # 5.11+
Signed-off-by: Stefan Haberland <sth@linux.ibm.com>
Reviewed-by: Jan Hoeppner <hoeppner@linux.ibm.com>
Link: https://lore.kernel.org/r/20210804151800.4031761-2-sth@linux.ibm.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/s390/block/dasd_eckd.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 0de1a463c509..fb5d8152652d 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -1004,15 +1004,23 @@ static unsigned char dasd_eckd_path_access(void *conf_data, int conf_len)
 static void dasd_eckd_store_conf_data(struct dasd_device *device,
 				      struct dasd_conf_data *conf_data, int chp)
 {
+	struct dasd_eckd_private *private = device->private;
 	struct channel_path_desc_fmt0 *chp_desc;
 	struct subchannel_id sch_id;
+	void *cdp;
 
-	ccw_device_get_schid(device->cdev, &sch_id);
 	/*
 	 * path handling and read_conf allocate data
 	 * free it before replacing the pointer
+	 * also replace the old private->conf_data pointer
+	 * with the new one if this points to the same data
 	 */
-	kfree(device->path[chp].conf_data);
+	cdp = device->path[chp].conf_data;
+	if (private->conf_data == cdp) {
+		private->conf_data = (void *)conf_data;
+		dasd_eckd_identify_conf_parts(private);
+	}
+	ccw_device_get_schid(device->cdev, &sch_id);
 	device->path[chp].conf_data = conf_data;
 	device->path[chp].cssid = sch_id.cssid;
 	device->path[chp].ssid = sch_id.ssid;
@@ -1020,6 +1028,7 @@ static void dasd_eckd_store_conf_data(struct dasd_device *device,
 	if (chp_desc)
 		device->path[chp].chpid = chp_desc->chpid;
 	kfree(chp_desc);
+	kfree(cdp);
 }
 
 static void dasd_eckd_clear_conf_data(struct dasd_device *device)

From 402e0b8cd00284a25c6eb8c0a43319bc8430b1c7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 4 Aug 2021 11:49:58 +0200
Subject: [PATCH 450/502] n64cart: fix the dma address in n64cart_do_bvec

dma_map_bvec already takes bv_offset into account.

Fixes: 9b2a2bbbb4d0 ("block: Add n64 cart driver")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/n64cart.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c
index 7b4dd10af9ec..c84be0028f63 100644
--- a/drivers/block/n64cart.c
+++ b/drivers/block/n64cart.c
@@ -74,7 +74,7 @@ static bool n64cart_do_bvec(struct device *dev, struct bio_vec *bv, u32 pos)
 
 	n64cart_wait_dma();
 
-	n64cart_write_reg(PI_DRAM_REG, dma_addr + bv->bv_offset);
+	n64cart_write_reg(PI_DRAM_REG, dma_addr);
 	n64cart_write_reg(PI_CART_REG, (bstart | CART_DOMAIN) & CART_MAX);
 	n64cart_write_reg(PI_WRITE_REG, bv->bv_len - 1);
 

From 83d6c39310b6d11199179f6384c2b0a415389597 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 3 Aug 2021 09:14:35 -0600
Subject: [PATCH 451/502] io-wq: fix race between worker exiting and activating
 free worker

Nadav correctly reports that we have a race between a worker exiting,
and new work being queued. This can lead to work being queued behind
an existing worker that could be sleeping on an event before it can
run to completion, and hence introducing potential big latency gaps
if we hit this race condition:

cpu0					cpu1
----					----
					io_wqe_worker()
					schedule_timeout()
					 // timed out
io_wqe_enqueue()
io_wqe_wake_worker()
// work_flags & IO_WQ_WORK_CONCURRENT
io_wqe_activate_free_worker()
					 io_worker_exit()

Fix this by having the exiting worker go through the normal decrement
of a running worker, which will spawn a new one if needed.

The free worker activation is modified to only return success if we
were able to find a sleeping worker - if not, we keep looking through
the list. If we fail, we create a new worker as per usual.

Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/io-uring/BFF746C0-FEDE-4646-A253-3021C57C26C9@gmail.com/
Reported-by: Nadav Amit <nadav.amit@gmail.com>
Tested-by: Nadav Amit <nadav.amit@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io-wq.c | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/fs/io-wq.c b/fs/io-wq.c
index cf086b01c6c6..50dc93ffc153 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -130,6 +130,7 @@ struct io_cb_cancel_data {
 };
 
 static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index);
+static void io_wqe_dec_running(struct io_worker *worker);
 
 static bool io_worker_get(struct io_worker *worker)
 {
@@ -168,26 +169,21 @@ static void io_worker_exit(struct io_worker *worker)
 {
 	struct io_wqe *wqe = worker->wqe;
 	struct io_wqe_acct *acct = io_wqe_get_acct(worker);
-	unsigned flags;
 
 	if (refcount_dec_and_test(&worker->ref))
 		complete(&worker->ref_done);
 	wait_for_completion(&worker->ref_done);
 
-	preempt_disable();
-	current->flags &= ~PF_IO_WORKER;
-	flags = worker->flags;
-	worker->flags = 0;
-	if (flags & IO_WORKER_F_RUNNING)
-		atomic_dec(&acct->nr_running);
-	worker->flags = 0;
-	preempt_enable();
-
 	raw_spin_lock_irq(&wqe->lock);
-	if (flags & IO_WORKER_F_FREE)
+	if (worker->flags & IO_WORKER_F_FREE)
 		hlist_nulls_del_rcu(&worker->nulls_node);
 	list_del_rcu(&worker->all_list);
 	acct->nr_workers--;
+	preempt_disable();
+	io_wqe_dec_running(worker);
+	worker->flags = 0;
+	current->flags &= ~PF_IO_WORKER;
+	preempt_enable();
 	raw_spin_unlock_irq(&wqe->lock);
 
 	kfree_rcu(worker, rcu);
@@ -214,15 +210,19 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
 	struct hlist_nulls_node *n;
 	struct io_worker *worker;
 
-	n = rcu_dereference(hlist_nulls_first_rcu(&wqe->free_list));
-	if (is_a_nulls(n))
-		return false;
-
-	worker = hlist_nulls_entry(n, struct io_worker, nulls_node);
-	if (io_worker_get(worker)) {
-		wake_up_process(worker->task);
+	/*
+	 * Iterate free_list and see if we can find an idle worker to
+	 * activate. If a given worker is on the free_list but in the process
+	 * of exiting, keep trying.
+	 */
+	hlist_nulls_for_each_entry_rcu(worker, n, &wqe->free_list, nulls_node) {
+		if (!io_worker_get(worker))
+			continue;
+		if (wake_up_process(worker->task)) {
+			io_worker_release(worker);
+			return true;
+		}
 		io_worker_release(worker);
-		return true;
 	}
 
 	return false;

From a07296453bf2778952a09b6244a695bf7607babb Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 4 Aug 2021 13:41:47 -0700
Subject: [PATCH 452/502] drm/i915: fix i915_globals_exit() section mismatch
 error

Fix modpost Section mismatch error in i915_globals_exit().
Since both an __init function and an __exit function can call
i915_globals_exit(), any function that i915_globals_exit() calls
should not be marked as __init or __exit. I.e., it needs to be
available for either of them.

WARNING: modpost: vmlinux.o(.text+0x8b796a): Section mismatch in reference from the function i915_globals_exit() to the function .exit.text:__i915_globals_flush()
The function i915_globals_exit() references a function in an exit section.
Often the function __i915_globals_flush() has valid usage outside the exit section
and the fix is to remove the __exit annotation of __i915_globals_flush.

ERROR: modpost: Section mismatches detected.
Set CONFIG_SECTION_MISMATCH_WARN_ONLY=y to allow them.

Fixes: 1354d830cb8f ("drm/i915: Call i915_globals_exit() if pci_register_device() fails")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Jason Ekstrand <jason@jlekstrand.net>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: intel-gfx@lists.freedesktop.org
Cc: dri-devel@lists.freedesktop.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210804204147.2070-1-rdunlap@infradead.org
---
 drivers/gpu/drm/i915/i915_globals.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c
index 2db90e770616..3acb0b6be284 100644
--- a/drivers/gpu/drm/i915/i915_globals.c
+++ b/drivers/gpu/drm/i915/i915_globals.c
@@ -138,7 +138,7 @@ void i915_globals_unpark(void)
 	atomic_inc(&active);
 }
 
-static void __exit __i915_globals_flush(void)
+static void  __i915_globals_flush(void)
 {
 	atomic_inc(&active); /* skip shrinking */
 

From 2c05caa7ba8803209769b9e4fe02c38d77ae88d0 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Fri, 30 Jul 2021 17:19:51 -0400
Subject: [PATCH 453/502] tracing / histogram: Give calculation hist_fields a
 size

When working on my user space applications, I found a bug in the synthetic
event code where the automated synthetic event field was not matching the
event field calculation it was attached to. Looking deeper into it, it was
because the calculation hist_field was not given a size.

The synthetic event fields are matched to their hist_fields either by
having the field have an identical string type, or if that does not match,
then the size and signed values are used to match the fields.

The problem arose when I tried to match a calculation where the fields
were "unsigned int". My tool created a synthetic event of type "u32". But
it failed to match. The string was:

  diff=field1-field2:onmatch(event).trace(synth,$diff)

Adding debugging into the kernel, I found that the size of "diff" was 0.
And since it was given "unsigned int" as a type, the histogram fallback
code used size and signed. The signed matched, but the size of u32 (4) did
not match zero, and the event failed to be created.

This can be worse if the field you want to match is not one of the
acceptable fields for a synthetic event. As event fields can have any type
that is supported in Linux, this can cause an issue. For example, if a
type is an enum. Then there's no way to use that with any calculations.

Have the calculation field simply take on the size of what it is
calculating.

Link: https://lkml.kernel.org/r/20210730171951.59c7743f@oasis.local.home

Cc: Tom Zanussi <zanussi@kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: stable@vger.kernel.org
Fixes: 100719dcef447 ("tracing: Add simple expression support to hist triggers")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_hist.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 34325f41ebc0..362db9b81b8d 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -2287,6 +2287,10 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
 
 	expr->operands[0] = operand1;
 	expr->operands[1] = operand2;
+
+	/* The operand sizes should be the same, so just pick one */
+	expr->size = operand1->size;
+
 	expr->operator = field_op;
 	expr->name = expr_str(expr, 0);
 	expr->type = kstrdup(operand1->type, GFP_KERNEL);

From a9d10ca4986571bffc19778742d508cc8dd13e02 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Wed, 28 Jul 2021 07:55:43 +0900
Subject: [PATCH 454/502] tracing: Reject string operand in the histogram
 expression

Since the string type can not be the target of the addition / subtraction
operation, it must be rejected. Without this fix, the string type silently
converted to digits.

Link: https://lkml.kernel.org/r/162742654278.290973.1523000673366456634.stgit@devnote2

Cc: stable@vger.kernel.org
Fixes: 100719dcef447 ("tracing: Add simple expression support to hist triggers")
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_hist.c | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 362db9b81b8d..949ef09dc537 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -65,7 +65,8 @@
 	C(INVALID_SORT_MODIFIER,"Invalid sort modifier"),		\
 	C(EMPTY_SORT_FIELD,	"Empty sort field"),			\
 	C(TOO_MANY_SORT_FIELDS,	"Too many sort fields (Max = 2)"),	\
-	C(INVALID_SORT_FIELD,	"Sort field must be a key or a val"),
+	C(INVALID_SORT_FIELD,	"Sort field must be a key or a val"),	\
+	C(INVALID_STR_OPERAND,	"String type can not be an operand in expression"),
 
 #undef C
 #define C(a, b)		HIST_ERR_##a
@@ -2156,6 +2157,13 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data,
 		ret = PTR_ERR(operand1);
 		goto free;
 	}
+	if (operand1->flags & HIST_FIELD_FL_STRING) {
+		/* String type can not be the operand of unary operator. */
+		hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(str));
+		destroy_hist_field(operand1, 0);
+		ret = -EINVAL;
+		goto free;
+	}
 
 	expr->flags |= operand1->flags &
 		(HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS);
@@ -2257,6 +2265,11 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
 		operand1 = NULL;
 		goto free;
 	}
+	if (operand1->flags & HIST_FIELD_FL_STRING) {
+		hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(operand1_str));
+		ret = -EINVAL;
+		goto free;
+	}
 
 	/* rest of string could be another expression e.g. b+c in a+b+c */
 	operand_flags = 0;
@@ -2266,6 +2279,11 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
 		operand2 = NULL;
 		goto free;
 	}
+	if (operand2->flags & HIST_FIELD_FL_STRING) {
+		hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(str));
+		ret = -EINVAL;
+		goto free;
+	}
 
 	ret = check_expr_operands(file->tr, operand1, operand2);
 	if (ret)

From b18b851ba85a5855cb53865fcff3cd2c17b44b0b Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Mon, 2 Aug 2021 14:03:07 -0700
Subject: [PATCH 455/502] scripts/recordmcount.pl: Remove check_objcopy() and
 $can_use_local

When building ARCH=riscv allmodconfig with llvm-objcopy, the objcopy
version warning from this script appears:

WARNING: could not find objcopy version or version is less than 2.17.
        Local function references are disabled.

The check_objcopy() function in scripts/recordmcount.pl is set up to
parse GNU objcopy's version string, not llvm-objcopy's, which triggers
the warning.

Commit 799c43415442 ("kbuild: thin archives make default for all archs")
made binutils 2.20 mandatory and commit ba64beb17493 ("kbuild: check the
minimum assembler version in Kconfig") enforces this at configuration
time so just remove check_objcopy() and $can_use_local instead, assuming
--globalize-symbol is always available.

llvm-objcopy has supported --globalize-symbol since LLVM 7.0.0 in 2018
and the minimum version for building the kernel with LLVM is 10.0.1 so
there is no issue introduced:

Link: https://github.com/llvm/llvm-project/commit/ee5be798dae30d5f9414b01f76ff807edbc881aa
Link: https://lkml.kernel.org/r/20210802210307.3202472-1-nathan@kernel.org

Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 Makefile                |  1 -
 scripts/recordmcount.pl | 40 ----------------------------------------
 2 files changed, 41 deletions(-)

diff --git a/Makefile b/Makefile
index e4f5895badb5..d6915f361aa4 100644
--- a/Makefile
+++ b/Makefile
@@ -546,7 +546,6 @@ export RCS_TAR_IGNORE := --exclude SCCS --exclude BitKeeper --exclude .svn \
 PHONY += scripts_basic
 scripts_basic:
 	$(Q)$(MAKE) $(build)=scripts/basic
-	$(Q)rm -f .tmp_quiet_recordmcount
 
 PHONY += outputmakefile
 ifdef building_out_of_srctree
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index c17e48020ec3..8f6b13ae46bf 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -173,39 +173,6 @@ my $mcount_regex;	# Find the call site to mcount (return offset)
 my $mcount_adjust;	# Address adjustment to mcount offset
 my $alignment;		# The .align value to use for $mcount_section
 my $section_type;	# Section header plus possible alignment command
-my $can_use_local = 0; 	# If we can use local function references
-
-# Shut up recordmcount if user has older objcopy
-my $quiet_recordmcount = ".tmp_quiet_recordmcount";
-my $print_warning = 1;
-$print_warning = 0 if ( -f $quiet_recordmcount);
-
-##
-# check_objcopy - whether objcopy supports --globalize-symbols
-#
-#  --globalize-symbols came out in 2.17, we must test the version
-#  of objcopy, and if it is less than 2.17, then we can not
-#  record local functions.
-sub check_objcopy
-{
-    open (IN, "$objcopy --version |") or die "error running $objcopy";
-    while (<IN>) {
-	if (/objcopy.*\s(\d+)\.(\d+)/) {
-	    $can_use_local = 1 if ($1 > 2 || ($1 == 2 && $2 >= 17));
-	    last;
-	}
-    }
-    close (IN);
-
-    if (!$can_use_local && $print_warning) {
-	print STDERR "WARNING: could not find objcopy version or version " .
-	    "is less than 2.17.\n" .
-	    "\tLocal function references are disabled.\n";
-	open (QUIET, ">$quiet_recordmcount");
-	printf QUIET "Disables the warning from recordmcount.pl\n";
-	close QUIET;
-    }
-}
 
 if ($arch =~ /(x86(_64)?)|(i386)/) {
     if ($bits == 64) {
@@ -434,8 +401,6 @@ if ($filename =~ m,^(.*)(\.\S),) {
 my $mcount_s = $dirname . "/.tmp_mc_" . $prefix . ".s";
 my $mcount_o = $dirname . "/.tmp_mc_" . $prefix . ".o";
 
-check_objcopy();
-
 #
 # Step 1: find all the local (static functions) and weak symbols.
 #         't' is local, 'w/W' is weak
@@ -473,11 +438,6 @@ sub update_funcs
 
     # is this function static? If so, note this fact.
     if (defined $locals{$ref_func}) {
-
-	# only use locals if objcopy supports globalize-symbols
-	if (!$can_use_local) {
-	    return;
-	}
 	$convert{$ref_func} = 1;
     }
 

From 1c0cec64a7cc545eb49f374a43e9f7190a14defa Mon Sep 17 00:00:00 2001
From: Hui Su <suhui@zeku.com>
Date: Fri, 11 Jun 2021 10:21:07 +0800
Subject: [PATCH 456/502] scripts/tracing: fix the bug that can't parse
 raw_trace_func

Since commit 77271ce4b2c0 ("tracing: Add irq, preempt-count and need resched info
to default trace output"), the default trace output format has been changed to:
          <idle>-0       [009] d.h. 22420.068695: _raw_spin_lock_irqsave <-hrtimer_interrupt
          <idle>-0       [000] ..s. 22420.068695: _nohz_idle_balance <-run_rebalance_domains
          <idle>-0       [011] d.h. 22420.068695: account_process_tick <-update_process_times

origin trace output format:(before v3.2.0)
     # tracer: nop
     #
     #           TASK-PID    CPU#    TIMESTAMP  FUNCTION
     #              | |       |          |         |
          migration/0-6     [000]    50.025810: rcu_note_context_switch <-__schedule
          migration/0-6     [000]    50.025812: trace_rcu_utilization <-rcu_note_context_switch
          migration/0-6     [000]    50.025813: rcu_sched_qs <-rcu_note_context_switch
          migration/0-6     [000]    50.025815: rcu_preempt_qs <-rcu_note_context_switch
          migration/0-6     [000]    50.025817: trace_rcu_utilization <-rcu_note_context_switch
          migration/0-6     [000]    50.025818: debug_lockdep_rcu_enabled <-__schedule
          migration/0-6     [000]    50.025820: debug_lockdep_rcu_enabled <-__schedule

The draw_functrace.py(introduced in v2.6.28) can't parse the new version format trace_func,
So we need modify draw_functrace.py to adapt the new version trace output format.

Link: https://lkml.kernel.org/r/20210611022107.608787-1-suhui@zeku.com

Cc: stable@vger.kernel.org
Fixes: 77271ce4b2c0 tracing: Add irq, preempt-count and need resched info to default trace output
Signed-off-by: Hui Su <suhui@zeku.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 scripts/tracing/draw_functrace.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/tracing/draw_functrace.py b/scripts/tracing/draw_functrace.py
index 74f8aadfd4cb..7011fbe003ff 100755
--- a/scripts/tracing/draw_functrace.py
+++ b/scripts/tracing/draw_functrace.py
@@ -17,7 +17,7 @@ Usage:
 	$ cat /sys/kernel/debug/tracing/trace_pipe > ~/raw_trace_func
 	Wait some times but not too much, the script is a bit slow.
 	Break the pipe (Ctrl + Z)
-	$ scripts/draw_functrace.py < raw_trace_func > draw_functrace
+	$ scripts/tracing/draw_functrace.py < ~/raw_trace_func > draw_functrace
 	Then you have your drawn trace in draw_functrace
 """
 
@@ -103,10 +103,10 @@ def parseLine(line):
 	line = line.strip()
 	if line.startswith("#"):
 		raise CommentLineException
-	m = re.match("[^]]+?\\] +([0-9.]+): (\\w+) <-(\\w+)", line)
+	m = re.match("[^]]+?\\] +([a-z.]+) +([0-9.]+): (\\w+) <-(\\w+)", line)
 	if m is None:
 		raise BrokenLineException
-	return (m.group(1), m.group(2), m.group(3))
+	return (m.group(2), m.group(3), m.group(4))
 
 
 def main():

From d5aaad6f83420efb8357ac8e11c868708b22d0a9 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 4 Aug 2021 14:46:09 -0700
Subject: [PATCH 457/502] KVM: x86/mmu: Fix per-cpu counter corruption on
 32-bit builds

Take a signed 'long' instead of an 'unsigned long' for the number of
pages to add/subtract to the total number of pages used by the MMU.  This
fixes a zero-extension bug on 32-bit kernels that effectively corrupts
the per-cpu counter used by the shrinker.

Per-cpu counters take a signed 64-bit value on both 32-bit and 64-bit
kernels, whereas kvm_mod_used_mmu_pages() takes an unsigned long and thus
an unsigned 32-bit value on 32-bit kernels.  As a result, the value used
to adjust the per-cpu counter is zero-extended (unsigned -> signed), not
sign-extended (signed -> signed), and so KVM's intended -1 gets morphed to
4294967295 and effectively corrupts the counter.

This was found by a staggering amount of sheer dumb luck when running
kvm-unit-tests on a 32-bit KVM build.  The shrinker just happened to kick
in while running tests and do_shrink_slab() logged an error about trying
to free a negative number of objects.  The truly lucky part is that the
kernel just happened to be a slightly stale build, as the shrinker no
longer yells about negative objects as of commit 18bb473e5031 ("mm:
vmscan: shrink deferred objects proportional to priority").

 vmscan: shrink_slab: mmu_shrink_scan+0x0/0x210 [kvm] negative objects to delete nr=-858993460

Fixes: bc8a3d8925a8 ("kvm: mmu: Fix overflow on kvm mmu page limit calculation")
Cc: stable@vger.kernel.org
Cc: Ben Gardon <bgardon@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210804214609.1096003-1-seanjc@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 66f7f5bc3482..c4f4fa23320e 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1644,7 +1644,7 @@ static int is_empty_shadow_page(u64 *spt)
  * aggregate version in order to make the slab shrinker
  * faster
  */
-static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, unsigned long nr)
+static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, long nr)
 {
 	kvm->arch.n_used_mmu_pages += nr;
 	percpu_counter_add(&kvm_total_used_mmu_pages, nr);

From 8da0e55c7988ef9f08a708c38e5c75ecd8862cf8 Mon Sep 17 00:00:00 2001
From: David Bauer <mail@david-bauer.net>
Date: Thu, 5 Aug 2021 01:25:22 +0200
Subject: [PATCH 458/502] USB: serial: ftdi_sio: add device ID for Auto-M3
 OP-COM v2

The Auto-M3 OP-COM v2 is a OBD diagnostic device using a FTD232 for the
USB connection.

Signed-off-by: David Bauer <mail@david-bauer.net>
Cc: stable@vger.kernel.org
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/ftdi_sio.c     | 1 +
 drivers/usb/serial/ftdi_sio_ids.h | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 4a1f3a95d017..33bbb3470ca3 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -219,6 +219,7 @@ static const struct usb_device_id id_table_combined[] = {
 	{ USB_DEVICE(FTDI_VID, FTDI_MTXORB_6_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_R2000KU_TRUE_RNG) },
 	{ USB_DEVICE(FTDI_VID, FTDI_VARDAAN_PID) },
+	{ USB_DEVICE(FTDI_VID, FTDI_AUTO_M3_OP_COM_V2_PID) },
 	{ USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0100_PID) },
 	{ USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0101_PID) },
 	{ USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0102_PID) },
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index add602bebd82..755858ca20ba 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -159,6 +159,9 @@
 /* Vardaan Enterprises Serial Interface VEUSB422R3 */
 #define FTDI_VARDAAN_PID	0xF070
 
+/* Auto-M3 Ltd. - OP-COM USB V2 - OBD interface Adapter */
+#define FTDI_AUTO_M3_OP_COM_V2_PID	0x4f50
+
 /*
  * Xsens Technologies BV products (http://www.xsens.com).
  */

From d25d85061bd856d6be221626605319154f9b5043 Mon Sep 17 00:00:00 2001
From: Wesley Cheng <wcheng@codeaurora.org>
Date: Thu, 29 Jul 2021 00:33:14 -0700
Subject: [PATCH 459/502] usb: dwc3: gadget: Use list_replace_init() before
 traversing lists

The list_for_each_entry_safe() macro saves the current item (n) and
the item after (n+1), so that n can be safely removed without
corrupting the list.  However, when traversing the list and removing
items using gadget giveback, the DWC3 lock is briefly released,
allowing other routines to execute.  There is a situation where, while
items are being removed from the cancelled_list using
dwc3_gadget_ep_cleanup_cancelled_requests(), the pullup disable
routine is running in parallel (due to UDC unbind).  As the cleanup
routine removes n, and the pullup disable removes n+1, once the
cleanup retakes the DWC3 lock, it references a request who was already
removed/handled.  With list debug enabled, this leads to a panic.
Ensure all instances of the macro are replaced where gadget giveback
is used.

Example call stack:

Thread#1:
__dwc3_gadget_ep_set_halt() - CLEAR HALT
  -> dwc3_gadget_ep_cleanup_cancelled_requests()
    ->list_for_each_entry_safe()
    ->dwc3_gadget_giveback(n)
      ->dwc3_gadget_del_and_unmap_request()- n deleted[cancelled_list]
      ->spin_unlock
      ->Thread#2 executes
      ...
    ->dwc3_gadget_giveback(n+1)
      ->Already removed!

Thread#2:
dwc3_gadget_pullup()
  ->waiting for dwc3 spin_lock
  ...
  ->Thread#1 released lock
  ->dwc3_stop_active_transfers()
    ->dwc3_remove_requests()
      ->fetches n+1 item from cancelled_list (n removed by Thread#1)
      ->dwc3_gadget_giveback()
        ->dwc3_gadget_del_and_unmap_request()- n+1
deleted[cancelled_list]
        ->spin_unlock

Fix this condition by utilizing list_replace_init(), and traversing
through a local copy of the current elements in the endpoint lists.
This will also set the parent list as empty, so if another thread is
also looping through the list, it will be empty on the next iteration.

Fixes: d4f1afe5e896 ("usb: dwc3: gadget: move requests to cancelled_list")
Cc: stable <stable@vger.kernel.org>
Acked-by: Felipe Balbi <balbi@kernel.org>
Signed-off-by: Wesley Cheng <wcheng@codeaurora.org>
Link: https://lore.kernel.org/r/1627543994-20327-1-git-send-email-wcheng@codeaurora.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/gadget.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 45f2bc0807e8..a1b262669574 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1741,9 +1741,13 @@ static void dwc3_gadget_ep_cleanup_cancelled_requests(struct dwc3_ep *dep)
 {
 	struct dwc3_request		*req;
 	struct dwc3_request		*tmp;
+	struct list_head		local;
 	struct dwc3			*dwc = dep->dwc;
 
-	list_for_each_entry_safe(req, tmp, &dep->cancelled_list, list) {
+restart:
+	list_replace_init(&dep->cancelled_list, &local);
+
+	list_for_each_entry_safe(req, tmp, &local, list) {
 		dwc3_gadget_ep_skip_trbs(dep, req);
 		switch (req->status) {
 		case DWC3_REQUEST_STATUS_DISCONNECTED:
@@ -1761,6 +1765,9 @@ static void dwc3_gadget_ep_cleanup_cancelled_requests(struct dwc3_ep *dep)
 			break;
 		}
 	}
+
+	if (!list_empty(&dep->cancelled_list))
+		goto restart;
 }
 
 static int dwc3_gadget_ep_dequeue(struct usb_ep *ep,
@@ -2958,8 +2965,12 @@ static void dwc3_gadget_ep_cleanup_completed_requests(struct dwc3_ep *dep,
 {
 	struct dwc3_request	*req;
 	struct dwc3_request	*tmp;
+	struct list_head	local;
 
-	list_for_each_entry_safe(req, tmp, &dep->started_list, list) {
+restart:
+	list_replace_init(&dep->started_list, &local);
+
+	list_for_each_entry_safe(req, tmp, &local, list) {
 		int ret;
 
 		ret = dwc3_gadget_ep_cleanup_completed_request(dep, event,
@@ -2967,6 +2978,9 @@ static void dwc3_gadget_ep_cleanup_completed_requests(struct dwc3_ep *dep,
 		if (ret)
 			break;
 	}
+
+	if (!list_empty(&dep->started_list))
+		goto restart;
 }
 
 static bool dwc3_gadget_ep_should_continue(struct dwc3_ep *dep)

From cb10f68ad8150f243964b19391711aaac5e8ff42 Mon Sep 17 00:00:00 2001
From: Wesley Cheng <wcheng@codeaurora.org>
Date: Tue, 3 Aug 2021 23:24:05 -0700
Subject: [PATCH 460/502] usb: dwc3: gadget: Avoid runtime resume if disabling
 pullup

If the device is already in the runtime suspended state, any call to
the pullup routine will issue a runtime resume on the DWC3 core
device.  If the USB gadget is disabling the pullup, then avoid having
to issue a runtime resume, as DWC3 gadget has already been
halted/stopped.

This fixes an issue where the following condition occurs:

usb_gadget_remove_driver()
-->usb_gadget_disconnect()
 -->dwc3_gadget_pullup(0)
  -->pm_runtime_get_sync() -> ret = 0
  -->pm_runtime_put() [async]
-->usb_gadget_udc_stop()
 -->dwc3_gadget_stop()
  -->dwc->gadget_driver = NULL
...

dwc3_suspend_common()
-->dwc3_gadget_suspend()
 -->DWC3 halt/stop routine skipped, driver_data == NULL

This leads to a situation where the DWC3 gadget is not properly
stopped, as the runtime resume would have re-enabled EP0 and event
interrupts, and since we avoided the DWC3 gadget suspend, these
resources were never disabled.

Fixes: 77adb8bdf422 ("usb: dwc3: gadget: Allow runtime suspend if UDC unbinded")
Cc: stable <stable@vger.kernel.org>
Acked-by: Felipe Balbi <balbi@kernel.org>
Signed-off-by: Wesley Cheng <wcheng@codeaurora.org>
Link: https://lore.kernel.org/r/1628058245-30692-1-git-send-email-wcheng@codeaurora.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/gadget.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index a1b262669574..b8d4b2d327b2 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2256,6 +2256,17 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
 		}
 	}
 
+	/*
+	 * Avoid issuing a runtime resume if the device is already in the
+	 * suspended state during gadget disconnect.  DWC3 gadget was already
+	 * halted/stopped during runtime suspend.
+	 */
+	if (!is_on) {
+		pm_runtime_barrier(dwc->dev);
+		if (pm_runtime_suspended(dwc->dev))
+			return 0;
+	}
+
 	/*
 	 * Check the return value for successful resume, or error.  For a
 	 * successful resume, the DWC3 runtime PM resume routine will handle

From 6aa32467299e9e12280a6aec9dbc21bf2db830b0 Mon Sep 17 00:00:00 2001
From: Huang Pei <huangpei@loongson.cn>
Date: Wed, 21 Jul 2021 17:30:45 +0800
Subject: [PATCH 461/502] MIPS: check return value of pgtable_pmd_page_ctor

+. According to Documentation/vm/split_page_table_lock, handle failure
of pgtable_pmd_page_ctor

+. Use GFP_KERNEL_ACCOUNT instead of GFP_KERNEL|__GFP_ACCOUNT

+. Adjust coding style

Fixes: ed914d48b6a1 ("MIPS: add PMD table accounting into MIPS')
Reported-by: Joshua Kinard <kumba@gentoo.org>
Signed-off-by: Huang Pei <huangpei@loongson.cn>
Reviewed-by: Joshua Kinard <kumba@gentoo.org>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/include/asm/pgalloc.h | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index 4b2567d6b2df..c7925d0e9874 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -58,15 +58,20 @@ do {							\
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-	pmd_t *pmd = NULL;
+	pmd_t *pmd;
 	struct page *pg;
 
-	pg = alloc_pages(GFP_KERNEL | __GFP_ACCOUNT, PMD_ORDER);
-	if (pg) {
-		pgtable_pmd_page_ctor(pg);
-		pmd = (pmd_t *)page_address(pg);
-		pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table);
+	pg = alloc_pages(GFP_KERNEL_ACCOUNT, PMD_ORDER);
+	if (!pg)
+		return NULL;
+
+	if (!pgtable_pmd_page_ctor(pg)) {
+		__free_pages(pg, PMD_ORDER);
+		return NULL;
 	}
+
+	pmd = (pmd_t *)page_address(pg);
+	pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table);
 	return pmd;
 }
 

From 43ad944cd73f2360ec8ff31d29ea44830b3119af Mon Sep 17 00:00:00 2001
From: Kyle Tso <kyletso@google.com>
Date: Tue, 3 Aug 2021 17:13:14 +0800
Subject: [PATCH 462/502] usb: typec: tcpm: Keep other events when receiving
 FRS and Sourcing_vbus events

When receiving FRS and Sourcing_Vbus events from low-level drivers, keep
other events which come a bit earlier so that they will not be ignored
in the event handler.

Fixes: 8dc4bd073663 ("usb: typec: tcpm: Add support for Sink Fast Role SWAP(FRS)")
Cc: stable <stable@vger.kernel.org>
Cc: Badhri Jagan Sridharan <badhri@google.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Badhri Jagan Sridharan <badhri@google.com>
Signed-off-by: Kyle Tso <kyletso@google.com>
Link: https://lore.kernel.org/r/20210803091314.3051302-1-kyletso@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm/tcpm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 5b22a1c931a9..b9bb63d749ec 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -5369,7 +5369,7 @@ EXPORT_SYMBOL_GPL(tcpm_pd_hard_reset);
 void tcpm_sink_frs(struct tcpm_port *port)
 {
 	spin_lock(&port->pd_event_lock);
-	port->pd_events = TCPM_FRS_EVENT;
+	port->pd_events |= TCPM_FRS_EVENT;
 	spin_unlock(&port->pd_event_lock);
 	kthread_queue_work(port->wq, &port->event_work);
 }
@@ -5378,7 +5378,7 @@ EXPORT_SYMBOL_GPL(tcpm_sink_frs);
 void tcpm_sourcing_vbus(struct tcpm_port *port)
 {
 	spin_lock(&port->pd_event_lock);
-	port->pd_events = TCPM_SOURCING_VBUS;
+	port->pd_events |= TCPM_SOURCING_VBUS;
 	spin_unlock(&port->pd_event_lock);
 	kthread_queue_work(port->wq, &port->event_work);
 }

From 5a7c1b2a5bb4461967b15f3484a0ff75d3199719 Mon Sep 17 00:00:00 2001
From: M Chetan Kumar <m.chetan.kumar@linux.intel.com>
Date: Wed, 4 Aug 2021 21:39:49 +0530
Subject: [PATCH 463/502] net: wwan: iosm: fix lkp buildbot warning

Correct td buffer type casting & format specifier to fix lkp buildbot
warning.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: M Chetan Kumar <m.chetan.kumar@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c b/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c
index 91109e27efd3..35d590743d3a 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c
+++ b/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c
@@ -412,8 +412,8 @@ struct sk_buff *ipc_protocol_dl_td_process(struct iosm_protocol *ipc_protocol,
 	}
 
 	if (p_td->buffer.address != IPC_CB(skb)->mapping) {
-		dev_err(ipc_protocol->dev, "invalid buf=%p or skb=%p",
-			(void *)p_td->buffer.address, skb->data);
+		dev_err(ipc_protocol->dev, "invalid buf=%llx or skb=%p",
+			(unsigned long long)p_td->buffer.address, skb->data);
 		ipc_pcie_kfree_skb(ipc_protocol->pcie, skb);
 		skb = NULL;
 		goto ret;

From b46c5795d641b759eb0f001ab21852fe5df5ef92 Mon Sep 17 00:00:00 2001
From: M Chetan Kumar <m.chetan.kumar@linux.intel.com>
Date: Wed, 4 Aug 2021 21:39:50 +0530
Subject: [PATCH 464/502] net: wwan: iosm: endianness type correction

Endianness type correction for nr_of_bytes. This field is exchanged
as part of host-device protocol communication.

Signed-off-by: M Chetan Kumar <m.chetan.kumar@linux.intel.com>
Reviewed-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/iosm/iosm_ipc_mux_codec.c | 4 ++--
 drivers/net/wwan/iosm/iosm_ipc_mux_codec.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c
index 562de275797a..bdb2d32cdb6d 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c
+++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c
@@ -320,7 +320,7 @@ static void ipc_mux_dl_fcth_decode(struct iosm_mux *ipc_mux,
 		return;
 	}
 
-	ul_credits = fct->vfl.nr_of_bytes;
+	ul_credits = le32_to_cpu(fct->vfl.nr_of_bytes);
 
 	dev_dbg(ipc_mux->dev, "Flow_Credit:: if_id[%d] Old: %d Grants: %d",
 		if_id, ipc_mux->session[if_id].ul_flow_credits, ul_credits);
@@ -586,7 +586,7 @@ static bool ipc_mux_lite_send_qlt(struct iosm_mux *ipc_mux)
 		qlt->reserved[0] = 0;
 		qlt->reserved[1] = 0;
 
-		qlt->vfl.nr_of_bytes = session->ul_list.qlen;
+		qlt->vfl.nr_of_bytes = cpu_to_le32(session->ul_list.qlen);
 
 		/* Add QLT to the transfer list. */
 		skb_queue_tail(&ipc_mux->channel->ul_list,
diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h
index 4a74e3c9457f..aae83db5cbb8 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h
+++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h
@@ -106,7 +106,7 @@ struct mux_lite_cmdh {
  * @nr_of_bytes:	Number of bytes available to transmit in the queue.
  */
 struct mux_lite_vfl {
-	u32 nr_of_bytes;
+	__le32 nr_of_bytes;
 };
 
 /**

From c98f5220e9703db2d73b4e89c07879dc61eeab14 Mon Sep 17 00:00:00 2001
From: M Chetan Kumar <m.chetan.kumar@linux.intel.com>
Date: Wed, 4 Aug 2021 21:39:51 +0530
Subject: [PATCH 465/502] net: wwan: iosm: correct data protocol mask bit

Correct ul/dl data protocol mask bit to know which protocol capability
does device implement.

Signed-off-by: M Chetan Kumar <m.chetan.kumar@linux.intel.com>
Reviewed-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/iosm/iosm_ipc_mmio.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wwan/iosm/iosm_ipc_mmio.h b/drivers/net/wwan/iosm/iosm_ipc_mmio.h
index 45e6923da78f..f861994a6d90 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_mmio.h
+++ b/drivers/net/wwan/iosm/iosm_ipc_mmio.h
@@ -10,10 +10,10 @@
 #define IOSM_CP_VERSION 0x0100UL
 
 /* DL dir Aggregation support mask */
-#define DL_AGGR BIT(23)
+#define DL_AGGR BIT(9)
 
 /* UL dir Aggregation support mask */
-#define UL_AGGR BIT(22)
+#define UL_AGGR BIT(8)
 
 /* UL flow credit support mask */
 #define UL_FLOW_CREDIT BIT(21)

From 679505baaaabed98359c1dfb78f81600e299af21 Mon Sep 17 00:00:00 2001
From: M Chetan Kumar <m.chetan.kumar@linux.intel.com>
Date: Wed, 4 Aug 2021 21:39:52 +0530
Subject: [PATCH 466/502] net: wwan: iosm: fix recursive lock acquire in
 unregister

Calling unregister_netdevice() inside wwan del link is trying to
acquire the held lock in ndo_stop_cb(). Instead, queue net dev to
be unregistered later.

Signed-off-by: M Chetan Kumar <m.chetan.kumar@linux.intel.com>
Reviewed-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/iosm/iosm_ipc_wwan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wwan/iosm/iosm_ipc_wwan.c b/drivers/net/wwan/iosm/iosm_ipc_wwan.c
index b2357ad5d517..b571d9cedba4 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_wwan.c
+++ b/drivers/net/wwan/iosm/iosm_ipc_wwan.c
@@ -228,7 +228,7 @@ static void ipc_wwan_dellink(void *ctxt, struct net_device *dev,
 
 	RCU_INIT_POINTER(ipc_wwan->sub_netlist[if_id], NULL);
 	/* unregistering includes synchronize_net() */
-	unregister_netdevice(dev);
+	unregister_netdevice_queue(dev, head);
 
 unlock:
 	mutex_unlock(&ipc_wwan->if_mutex);

From fa953adfad7cf9c7e30d9ea0e4ccfd38cfb5495d Mon Sep 17 00:00:00 2001
From: "H. Nikolaus Schaller" <hns@goldelico.com>
Date: Thu, 8 Jul 2021 10:57:09 +0200
Subject: [PATCH 467/502] x86/tools/relocs: Fix non-POSIX regexp

Trying to run a cross-compiled x86 relocs tool on a BSD based
HOSTCC leads to errors like

  VOFFSET arch/x86/boot/compressed/../voffset.h - due to: vmlinux
  CC      arch/x86/boot/compressed/misc.o - due to: arch/x86/boot/compressed/../voffset.h
  OBJCOPY arch/x86/boot/compressed/vmlinux.bin - due to: vmlinux
  RELOCS  arch/x86/boot/compressed/vmlinux.relocs - due to: vmlinux
empty (sub)expressionarch/x86/boot/compressed/Makefile:118: recipe for target 'arch/x86/boot/compressed/vmlinux.relocs' failed
make[3]: *** [arch/x86/boot/compressed/vmlinux.relocs] Error 1

It turns out that relocs.c uses patterns like

	"something(|_end)"

This is not valid syntax or gives undefined results according
to POSIX 9.5.3 ERE Grammar

	https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html

It seems to be silently accepted by the Linux regexp() implementation
while a BSD host complains.

Such patterns can be replaced by a transformation like

	"(|p1|p2)" -> "(p1|p2)?"

Fixes: fd952815307f ("x86-32, relocs: Whitelist more symbols for ld bug workaround")
Signed-off-by: H. Nikolaus Schaller <hns@goldelico.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 arch/x86/tools/relocs.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 04c5a44b9682..9ba700dc47de 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -57,12 +57,12 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = {
 	[S_REL] =
 	"^(__init_(begin|end)|"
 	"__x86_cpu_dev_(start|end)|"
-	"(__parainstructions|__alt_instructions)(|_end)|"
-	"(__iommu_table|__apicdrivers|__smp_locks)(|_end)|"
+	"(__parainstructions|__alt_instructions)(_end)?|"
+	"(__iommu_table|__apicdrivers|__smp_locks)(_end)?|"
 	"__(start|end)_pci_.*|"
 	"__(start|end)_builtin_fw|"
-	"__(start|stop)___ksymtab(|_gpl)|"
-	"__(start|stop)___kcrctab(|_gpl)|"
+	"__(start|stop)___ksymtab(_gpl)?|"
+	"__(start|stop)___kcrctab(_gpl)?|"
 	"__(start|stop)___param|"
 	"__(start|stop)___modver|"
 	"__(start|stop)___bug_table|"

From 28bbbb9875a35975904e46f9b06fa689d051b290 Mon Sep 17 00:00:00 2001
From: "H. Nikolaus Schaller" <hns@goldelico.com>
Date: Thu, 8 Jul 2021 10:57:10 +0200
Subject: [PATCH 468/502] mips: Fix non-POSIX regexp

When cross compiling a MIPS kernel on a BSD based HOSTCC leads
to errors like

  SYNC    include/config/auto.conf.cmd - due to: .config
egrep: empty (sub)expression
  UPD     include/config/kernel.release
  HOSTCC  scripts/dtc/dtc.o - due to target missing

It turns out that egrep uses this egrep pattern:

		(|MINOR_|PATCHLEVEL_)

This is not valid syntax or gives undefined results according
to POSIX 9.5.3 ERE Grammar

	https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html

It seems to be silently accepted by the Linux egrep implementation
while a BSD host complains.

Such patterns can be replaced by a transformation like

	"(|p1|p2)" -> "(p1|p2)?"

Fixes: 48c35b2d245f ("[MIPS] There is no __GNUC_MAJOR__")
Signed-off-by: H. Nikolaus Schaller <hns@goldelico.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 arch/mips/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 4e942b7ef022..653befc1b176 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -321,7 +321,7 @@ KBUILD_LDFLAGS		+= -m $(ld-emul)
 
 ifdef CONFIG_MIPS
 CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \
-	egrep -vw '__GNUC_(|MINOR_|PATCHLEVEL_)_' | \
+	egrep -vw '__GNUC_(MINOR_|PATCHLEVEL_)?_' | \
 	sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g')
 endif
 

From 54eacba0e3bbda9777788b44b45a5186918569f2 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 26 Jul 2021 19:57:37 -0700
Subject: [PATCH 469/502] scripts: checkversion: modernize linux/version.h
 search strings

Update scripts/checkversion.pl to recognize the current contents
of <linux/version.h> and both of its current locations.

Also update my email address.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/checkversion.pl | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/scripts/checkversion.pl b/scripts/checkversion.pl
index f67b125c5269..94cd49eff605 100755
--- a/scripts/checkversion.pl
+++ b/scripts/checkversion.pl
@@ -1,10 +1,10 @@
 #! /usr/bin/env perl
 # SPDX-License-Identifier: GPL-2.0
 #
-# checkversion find uses of LINUX_VERSION_CODE or KERNEL_VERSION
-# without including <linux/version.h>, or cases of
-# including <linux/version.h> that don't need it.
-# Copyright (C) 2003, Randy Dunlap <rdunlap@xenotime.net>
+# checkversion finds uses of all macros in <linux/version.h>
+# where the source files do not #include <linux/version.h>; or cases
+# of including <linux/version.h> where it is not needed.
+# Copyright (C) 2003, Randy Dunlap <rdunlap@infradead.org>
 
 use strict;
 
@@ -13,7 +13,8 @@ $| = 1;
 my $debugging;
 
 foreach my $file (@ARGV) {
-    next if $file =~ "include/linux/version\.h";
+    next if $file =~ "include/generated/uapi/linux/version\.h";
+    next if $file =~ "usr/include/linux/version\.h";
     # Open this file.
     open( my $f, '<', $file )
       or die "Can't open $file: $!\n";
@@ -41,8 +42,11 @@ foreach my $file (@ARGV) {
 	    $iLinuxVersion      = $. if m/^\s*#\s*include\s*<linux\/version\.h>/o;
 	}
 
-	# Look for uses: LINUX_VERSION_CODE, KERNEL_VERSION, UTS_RELEASE
-	if (($_ =~ /LINUX_VERSION_CODE/) || ($_ =~ /\WKERNEL_VERSION/)) {
+	# Look for uses: LINUX_VERSION_CODE, KERNEL_VERSION,
+	# LINUX_VERSION_MAJOR, LINUX_VERSION_PATCHLEVEL, LINUX_VERSION_SUBLEVEL
+	if (($_ =~ /LINUX_VERSION_CODE/) || ($_ =~ /\WKERNEL_VERSION/) ||
+	    ($_ =~ /LINUX_VERSION_MAJOR/) || ($_ =~ /LINUX_VERSION_PATCHLEVEL/) ||
+	    ($_ =~ /LINUX_VERSION_SUBLEVEL/)) {
 	    $fUseVersion = 1;
             last if $iLinuxVersion;
         }

From 14ccc638b02f9ec500c17d9e39efe979145a4b61 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Thu, 29 Jul 2021 09:12:54 +0900
Subject: [PATCH 470/502] kbuild: cancel sub_make_done for the install target
 to fix DKMS

Since commit bcf637f54f6d ("kbuild: parse C= and M= before changing the
working directory"), external module builds invoked by DKMS fail because
M= option is not parsed.

I wanted to add 'unset sub_make_done' in install.sh but similar scripts,
arch/*/boot/install.sh, are duplicated, so I set sub_make_done empty in
the top Makefile.

Fixes: bcf637f54f6d ("kbuild: parse C= and M= before changing the working directory")
Reported-by: John S Gruber <johnsgruber@gmail.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Tested-by: John S Gruber <johnsgruber@gmail.com>
---
 Makefile | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/Makefile b/Makefile
index 27a072cffcb9..efb294568f28 100644
--- a/Makefile
+++ b/Makefile
@@ -1317,6 +1317,16 @@ PHONY += scripts_unifdef
 scripts_unifdef: scripts_basic
 	$(Q)$(MAKE) $(build)=scripts scripts/unifdef
 
+# ---------------------------------------------------------------------------
+# Install
+
+# Many distributions have the custom install script, /sbin/installkernel.
+# If DKMS is installed, 'make install' will eventually recuses back
+# to the this Makefile to build and install external modules.
+# Cancel sub_make_done so that options such as M=, V=, etc. are parsed.
+
+install: sub_make_done :=
+
 # ---------------------------------------------------------------------------
 # Tools
 

From fb653827c758725b149b5c924a5eb50ab4812750 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 5 Aug 2021 13:38:26 +0300
Subject: [PATCH 471/502] bnx2x: fix an error code in bnx2x_nic_load()

Set the error code if bnx2x_alloc_fw_stats_mem() fails.  The current
code returns success.

Fixes: ad5afc89365e ("bnx2x: Separate VF and PF logic")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 1a6ec1a12d53..b5d954cb409a 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -2669,7 +2669,8 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
 	}
 
 	/* Allocated memory for FW statistics  */
-	if (bnx2x_alloc_fw_stats_mem(bp))
+	rc = bnx2x_alloc_fw_stats_mem(bp);
+	if (rc)
 		LOAD_ERROR_EXIT(bp, load_error0);
 
 	/* request pf to initialize status blocks */

From ae03d189bae306e1e00aa631feee090ebda6cf63 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Thu, 5 Aug 2021 13:14:09 +0300
Subject: [PATCH 472/502] net: ethernet: ti: am65-cpsw: fix crash in
 am65_cpsw_port_offload_fwd_mark_update()

The am65_cpsw_port_offload_fwd_mark_update() causes NULL exception crash
when there is at least one disabled port and any other port added to the
bridge first time.

Unable to handle kernel NULL pointer dereference at virtual address 0000000000000858
pc : am65_cpsw_port_offload_fwd_mark_update+0x54/0x68
lr : am65_cpsw_netdevice_event+0x8c/0xf0
Call trace:
am65_cpsw_port_offload_fwd_mark_update+0x54/0x68
notifier_call_chain+0x54/0x98
raw_notifier_call_chain+0x14/0x20
call_netdevice_notifiers_info+0x34/0x78
__netdev_upper_dev_link+0x1c8/0x290
netdev_master_upper_dev_link+0x1c/0x28
br_add_if+0x3f0/0x6d0 [bridge]

Fix it by adding proper check for port->ndev != NULL.

Fixes: 2934db9bcb30 ("net: ti: am65-cpsw-nuss: Add netdevice notifiers")
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/am65-cpsw-nuss.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index 718539cdd2f2..67a08cbba859 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -2060,8 +2060,12 @@ static void am65_cpsw_port_offload_fwd_mark_update(struct am65_cpsw_common *comm
 
 	for (i = 1; i <= common->port_num; i++) {
 		struct am65_cpsw_port *port = am65_common_get_port(common, i);
-		struct am65_cpsw_ndev_priv *priv = am65_ndev_to_priv(port->ndev);
+		struct am65_cpsw_ndev_priv *priv;
 
+		if (!port->ndev)
+			continue;
+
+		priv = am65_ndev_to_priv(port->ndev);
 		priv->offload_fwd_mark = set_val;
 	}
 }

From 51397dc6f283bb570e1cf8226017d300d8ea1f5b Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 4 Aug 2021 14:18:48 -0400
Subject: [PATCH 473/502] tracing: Quiet smp_processor_id() use in preemptable
 warning in hwlat

The hardware latency detector (hwlat) has a mode that it runs one thread
across CPUs. The logic to move from the currently running CPU to the next
one in the list does a smp_processor_id() to find where it currently is.
Unfortunately, it's done with preemption enabled, and this triggers a
warning for using smp_processor_id() in a preempt enabled section.

As it is only using smp_processor_id() to get information on where it
currently is in order to simply move it to the next CPU, it doesn't really
care if it got moved in the mean time. It will simply balance out later if
such a case arises.

Switch smp_processor_id() to raw_smp_processor_id() to quiet that warning.

Link: https://lkml.kernel.org/r/20210804141848.79edadc0@oasis.local.home

Acked-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Fixes: 8fa826b7344d ("trace/hwlat: Implement the mode config option")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_hwlat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
index a6c0cdaf4b87..14f46aae1981 100644
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -327,7 +327,7 @@ static void move_to_next_cpu(void)
 
 	get_online_cpus();
 	cpumask_and(current_mask, cpu_online_mask, tr->tracing_cpumask);
-	next_cpu = cpumask_next(smp_processor_id(), current_mask);
+	next_cpu = cpumask_next(raw_smp_processor_id(), current_mask);
 	put_online_cpus();
 
 	if (next_cpu >= nr_cpu_ids)

From af35fc37354cda3c9c8cc4961b1d24bdc9d27903 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Wed, 4 Aug 2021 17:30:05 +0300
Subject: [PATCH 474/502] net: pegasus: fix uninit-value in
 get_interrupt_interval

Syzbot reported uninit value pegasus_probe(). The problem was in missing
error handling.

get_interrupt_interval() internally calls read_eprom_word() which can
fail in some cases. For example: failed to receive usb control message.
These cases should be handled to prevent uninit value bug, since
read_eprom_word() will not initialize passed stack variable in case of
internal failure.

Fail log:

BUG: KMSAN: uninit-value in get_interrupt_interval drivers/net/usb/pegasus.c:746 [inline]
BUG: KMSAN: uninit-value in pegasus_probe+0x10e7/0x4080 drivers/net/usb/pegasus.c:1152
CPU: 1 PID: 825 Comm: kworker/1:1 Not tainted 5.12.0-rc6-syzkaller #0
...
Workqueue: usb_hub_wq hub_event
Call Trace:
 __dump_stack lib/dump_stack.c:79 [inline]
 dump_stack+0x24c/0x2e0 lib/dump_stack.c:120
 kmsan_report+0xfb/0x1e0 mm/kmsan/kmsan_report.c:118
 __msan_warning+0x5c/0xa0 mm/kmsan/kmsan_instr.c:197
 get_interrupt_interval drivers/net/usb/pegasus.c:746 [inline]
 pegasus_probe+0x10e7/0x4080 drivers/net/usb/pegasus.c:1152
....

Local variable ----data.i@pegasus_probe created at:
 get_interrupt_interval drivers/net/usb/pegasus.c:1151 [inline]
 pegasus_probe+0xe57/0x4080 drivers/net/usb/pegasus.c:1152
 get_interrupt_interval drivers/net/usb/pegasus.c:1151 [inline]
 pegasus_probe+0xe57/0x4080 drivers/net/usb/pegasus.c:1152

Reported-and-tested-by: syzbot+02c9f70f3afae308464a@syzkaller.appspotmail.com
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Link: https://lore.kernel.org/r/20210804143005.439-1-paskripkin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/usb/pegasus.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c
index f18b03be1b87..652e9fcf0b77 100644
--- a/drivers/net/usb/pegasus.c
+++ b/drivers/net/usb/pegasus.c
@@ -744,12 +744,16 @@ static inline void disable_net_traffic(pegasus_t *pegasus)
 	set_registers(pegasus, EthCtrl0, sizeof(tmp), &tmp);
 }
 
-static inline void get_interrupt_interval(pegasus_t *pegasus)
+static inline int get_interrupt_interval(pegasus_t *pegasus)
 {
 	u16 data;
 	u8 interval;
+	int ret;
+
+	ret = read_eprom_word(pegasus, 4, &data);
+	if (ret < 0)
+		return ret;
 
-	read_eprom_word(pegasus, 4, &data);
 	interval = data >> 8;
 	if (pegasus->usb->speed != USB_SPEED_HIGH) {
 		if (interval < 0x80) {
@@ -764,6 +768,8 @@ static inline void get_interrupt_interval(pegasus_t *pegasus)
 		}
 	}
 	pegasus->intr_interval = interval;
+
+	return 0;
 }
 
 static void set_carrier(struct net_device *net)
@@ -1165,7 +1171,9 @@ static int pegasus_probe(struct usb_interface *intf,
 				| NETIF_MSG_PROBE | NETIF_MSG_LINK);
 
 	pegasus->features = usb_dev_id[dev_index].private;
-	get_interrupt_interval(pegasus);
+	res = get_interrupt_interval(pegasus);
+	if (res)
+		goto out2;
 	if (reset_mac(pegasus)) {
 		dev_err(&intf->dev, "can't reset MAC\n");
 		res = -EIO;

From 44712965bf12ae1758cec4de53816ed4b914ca1a Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Wed, 4 Aug 2021 18:51:51 +0300
Subject: [PATCH 475/502] net: fec: fix use-after-free in fec_drv_remove

Smatch says:
	drivers/net/ethernet/freescale/fec_main.c:3994 fec_drv_remove() error: Using fep after free_{netdev,candev}(ndev);
	drivers/net/ethernet/freescale/fec_main.c:3995 fec_drv_remove() error: Using fep after free_{netdev,candev}(ndev);

Since fep pointer is netdev private data, accessing it after free_netdev()
call can cause use-after-free bug. Fix it by moving free_netdev() call at
the end of the function

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Fixes: a31eda65ba21 ("net: fec: fix clock count mis-match")
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Reviewed-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/freescale/fec_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 8aea707a65a7..7e4c4980ced7 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -3843,13 +3843,13 @@ fec_drv_remove(struct platform_device *pdev)
 	if (of_phy_is_fixed_link(np))
 		of_phy_deregister_fixed_link(np);
 	of_node_put(fep->phy_node);
-	free_netdev(ndev);
 
 	clk_disable_unprepare(fep->clk_ahb);
 	clk_disable_unprepare(fep->clk_ipg);
 	pm_runtime_put_noidle(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
+	free_netdev(ndev);
 	return 0;
 }
 

From 942e560a3d3862dd5dee1411dbdd7097d29b8416 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Wed, 4 Aug 2021 18:52:20 +0300
Subject: [PATCH 476/502] net: vxge: fix use-after-free in
 vxge_device_unregister

Smatch says:
drivers/net/ethernet/neterion/vxge/vxge-main.c:3518 vxge_device_unregister() error: Using vdev after free_{netdev,candev}(dev);
drivers/net/ethernet/neterion/vxge/vxge-main.c:3518 vxge_device_unregister() error: Using vdev after free_{netdev,candev}(dev);
drivers/net/ethernet/neterion/vxge/vxge-main.c:3520 vxge_device_unregister() error: Using vdev after free_{netdev,candev}(dev);
drivers/net/ethernet/neterion/vxge/vxge-main.c:3520 vxge_device_unregister() error: Using vdev after free_{netdev,candev}(dev);

Since vdev pointer is netdev private data accessing it after free_netdev()
call can cause use-after-free bug. Fix it by moving free_netdev() call at
the end of the function

Fixes: 6cca200362b4 ("vxge: cleanup probe error paths")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/neterion/vxge/vxge-main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c
index 82eef4c72f01..7abd13e69471 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c
@@ -3512,13 +3512,13 @@ static void vxge_device_unregister(struct __vxge_hw_device *hldev)
 
 	kfree(vdev->vpaths);
 
-	/* we are safe to free it now */
-	free_netdev(dev);
-
 	vxge_debug_init(vdev->level_trace, "%s: ethernet device unregistered",
 			buf);
 	vxge_debug_entryexit(vdev->level_trace,	"%s: %s:%d  Exiting...", buf,
 			     __func__, __LINE__);
+
+	/* we are safe to free it now */
+	free_netdev(dev);
 }
 
 /*

From 8d75d0eff6887bcac7225e12b9c75595e523d92d Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Thu, 5 Aug 2021 20:46:45 +0800
Subject: [PATCH 477/502] blk-iolatency: error out if blk_get_queue() failed in
 iolatency_set_limit()

If queue is dying while iolatency_set_limit() is in progress,
blk_get_queue() won't increment the refcount of the queue. However,
blk_put_queue() will still decrement the refcount later, which will
cause the refcout to be unbalanced.

Thus error out in such case to fix the problem.

Fixes: 8c772a9bfc7c ("blk-iolatency: fix IO hang due to negative inflight counter")
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Acked-by: Tejun Heo <tj@kernel.org>
Link: https://lore.kernel.org/r/20210805124645.543797-1-yukuai3@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-iolatency.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 81be0096411d..d8b0d8bd132b 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -833,7 +833,11 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
 
 	enable = iolatency_set_min_lat_nsec(blkg, lat_val);
 	if (enable) {
-		WARN_ON_ONCE(!blk_get_queue(blkg->q));
+		if (!blk_get_queue(blkg->q)) {
+			ret = -ENODEV;
+			goto out;
+		}
+
 		blkg_get(blkg);
 	}
 

From 46c4c9d1beb7f5b4cec4dd90e7728720583ee348 Mon Sep 17 00:00:00 2001
From: "Alex Xu (Hello71)" <alex_y_xu@yahoo.ca>
Date: Thu, 5 Aug 2021 10:40:47 -0400
Subject: [PATCH 478/502] pipe: increase minimum default pipe size to 2 pages

This program always prints 4096 and hangs before the patch, and always
prints 8192 and exits successfully after:

  int main()
  {
      int pipefd[2];
      for (int i = 0; i < 1025; i++)
          if (pipe(pipefd) == -1)
              return 1;
      size_t bufsz = fcntl(pipefd[1], F_GETPIPE_SZ);
      printf("%zd\n", bufsz);
      char *buf = calloc(bufsz, 1);
      write(pipefd[1], buf, bufsz);
      read(pipefd[0], buf, bufsz-1);
      write(pipefd[1], buf, 1);
  }

Note that you may need to increase your RLIMIT_NOFILE before running the
program.

Fixes: 759c01142a ("pipe: limit the per-user amount of pages allocated in pipes")
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/lkml/1628086770.5rn8p04n6j.none@localhost/
Link: https://lore.kernel.org/lkml/1628127094.lxxn016tj7.none@localhost/
Signed-off-by: Alex Xu (Hello71) <alex_y_xu@yahoo.ca>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/pipe.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index 9ef4231cce61..8e6ef62aeb1c 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -31,6 +31,21 @@
 
 #include "internal.h"
 
+/*
+ * New pipe buffers will be restricted to this size while the user is exceeding
+ * their pipe buffer quota. The general pipe use case needs at least two
+ * buffers: one for data yet to be read, and one for new data. If this is less
+ * than two, then a write to a non-empty pipe may block even if the pipe is not
+ * full. This can occur with GNU make jobserver or similar uses of pipes as
+ * semaphores: multiple processes may be waiting to write tokens back to the
+ * pipe before reading tokens: https://lore.kernel.org/lkml/1628086770.5rn8p04n6j.none@localhost/.
+ *
+ * Users can reduce their pipe buffers with F_SETPIPE_SZ below this at their
+ * own risk, namely: pipe writes to non-full pipes may block until the pipe is
+ * emptied.
+ */
+#define PIPE_MIN_DEF_BUFFERS 2
+
 /*
  * The max size that a non-root user is allowed to grow the pipe. Can
  * be set by root in /proc/sys/fs/pipe-max-size
@@ -781,8 +796,8 @@ struct pipe_inode_info *alloc_pipe_info(void)
 	user_bufs = account_pipe_buffers(user, 0, pipe_bufs);
 
 	if (too_many_pipe_buffers_soft(user_bufs) && pipe_is_unprivileged_user()) {
-		user_bufs = account_pipe_buffers(user, pipe_bufs, 1);
-		pipe_bufs = 1;
+		user_bufs = account_pipe_buffers(user, pipe_bufs, PIPE_MIN_DEF_BUFFERS);
+		pipe_bufs = PIPE_MIN_DEF_BUFFERS;
 	}
 
 	if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user())

From 2e9fb2c11e0ec3113fcf0e8e052c99ecd82fcd4e Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 5 Aug 2021 10:34:47 -0700
Subject: [PATCH 479/502] block/partitions/ldm.c: Fix a kernel-doc warning

Fix the following kernel-doc warning that appears when building with W=1:

block/partitions/ldm.c:31: warning: expecting prototype for ldm().
Prototype was for ldm_debug() instead

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20210805173447.3249906-1-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/partitions/ldm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/partitions/ldm.c b/block/partitions/ldm.c
index d333786b5c7e..14b124cdacfc 100644
--- a/block/partitions/ldm.c
+++ b/block/partitions/ldm.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
-/**
+/*
  * ldm - Support for Windows Logical Disk Manager (Dynamic Disks)
  *
  * Copyright (C) 2001,2002 Richard Russon <ldm@flatcap.org>

From e04480920d1eec9c061841399aa6f35b6f987d8b Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Date: Wed, 4 Aug 2021 19:26:56 +0900
Subject: [PATCH 480/502] Bluetooth: defer cleanup of resources in
 hci_unregister_dev()

syzbot is hitting might_sleep() warning at hci_sock_dev_event() due to
calling lock_sock() with rw spinlock held [1].

It seems that history of this locking problem is a trial and error.

Commit b40df5743ee8 ("[PATCH] bluetooth: fix socket locking in
hci_sock_dev_event()") in 2.6.21-rc4 changed bh_lock_sock() to
lock_sock() as an attempt to fix lockdep warning.

Then, commit 4ce61d1c7a8e ("[BLUETOOTH]: Fix locking in
hci_sock_dev_event().") in 2.6.22-rc2 changed lock_sock() to
local_bh_disable() + bh_lock_sock_nested() as an attempt to fix the
sleep in atomic context warning.

Then, commit 4b5dd696f81b ("Bluetooth: Remove local_bh_disable() from
hci_sock.c") in 3.3-rc1 removed local_bh_disable().

Then, commit e305509e678b ("Bluetooth: use correct lock to prevent UAF
of hdev object") in 5.13-rc5 again changed bh_lock_sock_nested() to
lock_sock() as an attempt to fix CVE-2021-3573.

This difficulty comes from current implementation that
hci_sock_dev_event(HCI_DEV_UNREG) is responsible for dropping all
references from sockets because hci_unregister_dev() immediately
reclaims resources as soon as returning from
hci_sock_dev_event(HCI_DEV_UNREG).

But the history suggests that hci_sock_dev_event(HCI_DEV_UNREG) was not
doing what it should do.

Therefore, instead of trying to detach sockets from device, let's accept
not detaching sockets from device at hci_sock_dev_event(HCI_DEV_UNREG),
by moving actual cleanup of resources from hci_unregister_dev() to
hci_cleanup_dev() which is called by bt_host_release() when all
references to this unregistered device (which is a kobject) are gone.

Since hci_sock_dev_event(HCI_DEV_UNREG) no longer resets
hci_pi(sk)->hdev, we need to check whether this device was unregistered
and return an error based on HCI_UNREGISTER flag.  There might be subtle
behavioral difference in "monitor the hdev" functionality; please report
if you found something went wrong due to this patch.

Link: https://syzkaller.appspot.com/bug?extid=a5df189917e79d5e59c9 [1]
Reported-by: syzbot <syzbot+a5df189917e79d5e59c9@syzkaller.appspotmail.com>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Fixes: e305509e678b ("Bluetooth: use correct lock to prevent UAF of hdev object")
Acked-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/net/bluetooth/hci_core.h |  1 +
 net/bluetooth/hci_core.c         | 16 +++++------
 net/bluetooth/hci_sock.c         | 49 +++++++++++++++++++++-----------
 net/bluetooth/hci_sysfs.c        |  3 ++
 4 files changed, 45 insertions(+), 24 deletions(-)

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index a53e94459ecd..db4312e44d47 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1230,6 +1230,7 @@ struct hci_dev *hci_alloc_dev(void);
 void hci_free_dev(struct hci_dev *hdev);
 int hci_register_dev(struct hci_dev *hdev);
 void hci_unregister_dev(struct hci_dev *hdev);
+void hci_cleanup_dev(struct hci_dev *hdev);
 int hci_suspend_dev(struct hci_dev *hdev);
 int hci_resume_dev(struct hci_dev *hdev);
 int hci_reset_dev(struct hci_dev *hdev);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 2560ed2f144d..e1a545c8a69f 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -3996,14 +3996,10 @@ EXPORT_SYMBOL(hci_register_dev);
 /* Unregister HCI device */
 void hci_unregister_dev(struct hci_dev *hdev)
 {
-	int id;
-
 	BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);
 
 	hci_dev_set_flag(hdev, HCI_UNREGISTER);
 
-	id = hdev->id;
-
 	write_lock(&hci_dev_list_lock);
 	list_del(&hdev->list);
 	write_unlock(&hci_dev_list_lock);
@@ -4038,7 +4034,14 @@ void hci_unregister_dev(struct hci_dev *hdev)
 	}
 
 	device_del(&hdev->dev);
+	/* Actual cleanup is deferred until hci_cleanup_dev(). */
+	hci_dev_put(hdev);
+}
+EXPORT_SYMBOL(hci_unregister_dev);
 
+/* Cleanup HCI device */
+void hci_cleanup_dev(struct hci_dev *hdev)
+{
 	debugfs_remove_recursive(hdev->debugfs);
 	kfree_const(hdev->hw_info);
 	kfree_const(hdev->fw_info);
@@ -4063,11 +4066,8 @@ void hci_unregister_dev(struct hci_dev *hdev)
 	hci_blocked_keys_clear(hdev);
 	hci_dev_unlock(hdev);
 
-	hci_dev_put(hdev);
-
-	ida_simple_remove(&hci_index_ida, id);
+	ida_simple_remove(&hci_index_ida, hdev->id);
 }
-EXPORT_SYMBOL(hci_unregister_dev);
 
 /* Suspend HCI device */
 int hci_suspend_dev(struct hci_dev *hdev)
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index b04a5a02ecf3..f1128c2134f0 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -59,6 +59,17 @@ struct hci_pinfo {
 	char              comm[TASK_COMM_LEN];
 };
 
+static struct hci_dev *hci_hdev_from_sock(struct sock *sk)
+{
+	struct hci_dev *hdev = hci_pi(sk)->hdev;
+
+	if (!hdev)
+		return ERR_PTR(-EBADFD);
+	if (hci_dev_test_flag(hdev, HCI_UNREGISTER))
+		return ERR_PTR(-EPIPE);
+	return hdev;
+}
+
 void hci_sock_set_flag(struct sock *sk, int nr)
 {
 	set_bit(nr, &hci_pi(sk)->flags);
@@ -759,19 +770,13 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event)
 	if (event == HCI_DEV_UNREG) {
 		struct sock *sk;
 
-		/* Detach sockets from device */
+		/* Wake up sockets using this dead device */
 		read_lock(&hci_sk_list.lock);
 		sk_for_each(sk, &hci_sk_list.head) {
-			lock_sock(sk);
 			if (hci_pi(sk)->hdev == hdev) {
-				hci_pi(sk)->hdev = NULL;
 				sk->sk_err = EPIPE;
-				sk->sk_state = BT_OPEN;
 				sk->sk_state_change(sk);
-
-				hci_dev_put(hdev);
 			}
-			release_sock(sk);
 		}
 		read_unlock(&hci_sk_list.lock);
 	}
@@ -930,10 +935,10 @@ static int hci_sock_reject_list_del(struct hci_dev *hdev, void __user *arg)
 static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd,
 				unsigned long arg)
 {
-	struct hci_dev *hdev = hci_pi(sk)->hdev;
+	struct hci_dev *hdev = hci_hdev_from_sock(sk);
 
-	if (!hdev)
-		return -EBADFD;
+	if (IS_ERR(hdev))
+		return PTR_ERR(hdev);
 
 	if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL))
 		return -EBUSY;
@@ -1103,6 +1108,18 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
 
 	lock_sock(sk);
 
+	/* Allow detaching from dead device and attaching to alive device, if
+	 * the caller wants to re-bind (instead of close) this socket in
+	 * response to hci_sock_dev_event(HCI_DEV_UNREG) notification.
+	 */
+	hdev = hci_pi(sk)->hdev;
+	if (hdev && hci_dev_test_flag(hdev, HCI_UNREGISTER)) {
+		hci_pi(sk)->hdev = NULL;
+		sk->sk_state = BT_OPEN;
+		hci_dev_put(hdev);
+	}
+	hdev = NULL;
+
 	if (sk->sk_state == BT_BOUND) {
 		err = -EALREADY;
 		goto done;
@@ -1379,9 +1396,9 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr,
 
 	lock_sock(sk);
 
-	hdev = hci_pi(sk)->hdev;
-	if (!hdev) {
-		err = -EBADFD;
+	hdev = hci_hdev_from_sock(sk);
+	if (IS_ERR(hdev)) {
+		err = PTR_ERR(hdev);
 		goto done;
 	}
 
@@ -1743,9 +1760,9 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
 		goto done;
 	}
 
-	hdev = hci_pi(sk)->hdev;
-	if (!hdev) {
-		err = -EBADFD;
+	hdev = hci_hdev_from_sock(sk);
+	if (IS_ERR(hdev)) {
+		err = PTR_ERR(hdev);
 		goto done;
 	}
 
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 9874844a95a9..b69d88b88d2e 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -83,6 +83,9 @@ void hci_conn_del_sysfs(struct hci_conn *conn)
 static void bt_host_release(struct device *dev)
 {
 	struct hci_dev *hdev = to_hci_dev(dev);
+
+	if (hci_dev_test_flag(hdev, HCI_UNREGISTER))
+		hci_cleanup_dev(hdev);
 	kfree(hdev);
 	module_put(THIS_MODULE);
 }

From 0395be967b067d99494113d78470574e86a02ed4 Mon Sep 17 00:00:00 2001
From: Apurva Nandan <a-nandan@ti.com>
Date: Fri, 16 Jul 2021 23:25:03 +0000
Subject: [PATCH 481/502] spi: cadence-quadspi: Fix check condition for DTR ops

buswidth and dtr fields in spi_mem_op are only valid when the
corresponding spi_mem_op phase has a non-zero length. For example,
SPI NAND core doesn't set buswidth when using SPI_MEM_OP_NO_ADDR
phase.

Fix the dtr checks in set_protocol() and suppports_mem_op() to
ignore empty spi_mem_op phases, as checking for dtr field in
empty phase will result in false negatives.

Signed-off-by: Apurva Nandan <a-nandan@ti.com>
Link: https://lore.kernel.org/r/20210716232504.182-3-a-nandan@ti.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-cadence-quadspi.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c
index a2de23516553..101cc71bffa7 100644
--- a/drivers/spi/spi-cadence-quadspi.c
+++ b/drivers/spi/spi-cadence-quadspi.c
@@ -325,7 +325,15 @@ static int cqspi_set_protocol(struct cqspi_flash_pdata *f_pdata,
 	f_pdata->inst_width = CQSPI_INST_TYPE_SINGLE;
 	f_pdata->addr_width = CQSPI_INST_TYPE_SINGLE;
 	f_pdata->data_width = CQSPI_INST_TYPE_SINGLE;
-	f_pdata->dtr = op->data.dtr && op->cmd.dtr && op->addr.dtr;
+
+	/*
+	 * For an op to be DTR, cmd phase along with every other non-empty
+	 * phase should have dtr field set to 1. If an op phase has zero
+	 * nbytes, ignore its dtr field; otherwise, check its dtr field.
+	 */
+	f_pdata->dtr = op->cmd.dtr &&
+		       (!op->addr.nbytes || op->addr.dtr) &&
+		       (!op->data.nbytes || op->data.dtr);
 
 	switch (op->data.buswidth) {
 	case 0:
@@ -1228,8 +1236,15 @@ static bool cqspi_supports_mem_op(struct spi_mem *mem,
 {
 	bool all_true, all_false;
 
-	all_true = op->cmd.dtr && op->addr.dtr && op->dummy.dtr &&
-		   op->data.dtr;
+	/*
+	 * op->dummy.dtr is required for converting nbytes into ncycles.
+	 * Also, don't check the dtr field of the op phase having zero nbytes.
+	 */
+	all_true = op->cmd.dtr &&
+		   (!op->addr.nbytes || op->addr.dtr) &&
+		   (!op->dummy.nbytes || op->dummy.dtr) &&
+		   (!op->data.nbytes || op->data.dtr);
+
 	all_false = !op->cmd.dtr && !op->addr.dtr && !op->dummy.dtr &&
 		    !op->data.dtr;
 

From f7ec4121256393e1d03274acdca73eb18958f27e Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Thu, 5 Aug 2021 09:27:15 -0400
Subject: [PATCH 482/502] tracepoint: static call: Compare data on transition
 from 2->1 callees

On transition from 2->1 callees, we should be comparing .data rather
than .func, because the same callback can be registered twice with
different data, and what we care about here is that the data of array
element 0 is unchanged to skip rcu sync.

Link: https://lkml.kernel.org/r/20210805132717.23813-2-mathieu.desnoyers@efficios.com
Link: https://lore.kernel.org/io-uring/4ebea8f0-58c9-e571-fd30-0ce4f6f09c70@samba.org/

Cc: stable@vger.kernel.org
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Stefan Metzmacher <metze@samba.org>
Fixes: 547305a64632 ("tracepoint: Fix out of sync data passing by static caller")
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/tracepoint.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index fc32821f8240..133b6454b287 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -338,7 +338,7 @@ static int tracepoint_remove_func(struct tracepoint *tp,
 	} else {
 		rcu_assign_pointer(tp->funcs, tp_funcs);
 		tracepoint_update_call(tp, tp_funcs,
-				       tp_funcs[0].func != old[0].func);
+				       tp_funcs[0].data != old[0].data);
 	}
 	release_probes(old);
 	return 0;

From 231264d6927f6740af36855a622d0e240be9d94c Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Thu, 5 Aug 2021 09:27:16 -0400
Subject: [PATCH 483/502] tracepoint: Fix static call function vs data state
 mismatch

On a 1->0->1 callbacks transition, there is an issue with the new
callback using the old callback's data.

Considering __DO_TRACE_CALL:

        do {                                                            \
                struct tracepoint_func *it_func_ptr;                    \
                void *__data;                                           \
                it_func_ptr =                                           \
                        rcu_dereference_raw((&__tracepoint_##name)->funcs); \
                if (it_func_ptr) {                                      \
                        __data = (it_func_ptr)->data;                   \

----> [ delayed here on one CPU (e.g. vcpu preempted by the host) ]

                        static_call(tp_func_##name)(__data, args);      \
                }                                                       \
        } while (0)

It has loaded the tp->funcs of the old callback, so it will try to use the old
data. This can be fixed by adding a RCU sync anywhere in the 1->0->1
transition chain.

On a N->2->1 transition, we need an rcu-sync because you may have a
sequence of 3->2->1 (or 1->2->1) where the element 0 data is unchanged
between 2->1, but was changed from 3->2 (or from 1->2), which may be
observed by the static call. This can be fixed by adding an
unconditional RCU sync in transition 2->1.

Note, this fixes a correctness issue at the cost of adding a tremendous
performance regression to the disabling of tracepoints.

Before this commit:

  # trace-cmd start -e all
  # time trace-cmd start -p nop

  real	0m0.778s
  user	0m0.000s
  sys	0m0.061s

After this commit:

  # trace-cmd start -e all
  # time trace-cmd start -p nop

  real	0m10.593s
  user	0m0.017s
  sys	0m0.259s

A follow up fix will introduce a more lightweight scheme based on RCU
get_state and cond_sync, that will return the performance back to what it
was. As both this change and the lightweight versions are complex on their
own, for bisecting any issues that this may cause, they are kept as two
separate changes.

Link: https://lkml.kernel.org/r/20210805132717.23813-3-mathieu.desnoyers@efficios.com
Link: https://lore.kernel.org/io-uring/4ebea8f0-58c9-e571-fd30-0ce4f6f09c70@samba.org/

Cc: stable@vger.kernel.org
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Stefan Metzmacher <metze@samba.org>
Fixes: d25e37d89dd2 ("tracepoint: Optimize using static_call()")
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/tracepoint.c | 102 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 82 insertions(+), 20 deletions(-)

diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 133b6454b287..8d772bd6894d 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -15,6 +15,13 @@
 #include <linux/sched/task.h>
 #include <linux/static_key.h>
 
+enum tp_func_state {
+	TP_FUNC_0,
+	TP_FUNC_1,
+	TP_FUNC_2,
+	TP_FUNC_N,
+};
+
 extern tracepoint_ptr_t __start___tracepoints_ptrs[];
 extern tracepoint_ptr_t __stop___tracepoints_ptrs[];
 
@@ -246,26 +253,29 @@ static void *func_remove(struct tracepoint_func **funcs,
 	return old;
 }
 
-static void tracepoint_update_call(struct tracepoint *tp, struct tracepoint_func *tp_funcs, bool sync)
+/*
+ * Count the number of functions (enum tp_func_state) in a tp_funcs array.
+ */
+static enum tp_func_state nr_func_state(const struct tracepoint_func *tp_funcs)
+{
+	if (!tp_funcs)
+		return TP_FUNC_0;
+	if (!tp_funcs[1].func)
+		return TP_FUNC_1;
+	if (!tp_funcs[2].func)
+		return TP_FUNC_2;
+	return TP_FUNC_N;	/* 3 or more */
+}
+
+static void tracepoint_update_call(struct tracepoint *tp, struct tracepoint_func *tp_funcs)
 {
 	void *func = tp->iterator;
 
 	/* Synthetic events do not have static call sites */
 	if (!tp->static_call_key)
 		return;
-
-	if (!tp_funcs[1].func) {
+	if (nr_func_state(tp_funcs) == TP_FUNC_1)
 		func = tp_funcs[0].func;
-		/*
-		 * If going from the iterator back to a single caller,
-		 * we need to synchronize with __DO_TRACE to make sure
-		 * that the data passed to the callback is the one that
-		 * belongs to that callback.
-		 */
-		if (sync)
-			tracepoint_synchronize_unregister();
-	}
-
 	__static_call_update(tp->static_call_key, tp->static_call_tramp, func);
 }
 
@@ -299,9 +309,31 @@ static int tracepoint_add_func(struct tracepoint *tp,
 	 * a pointer to it.  This array is referenced by __DO_TRACE from
 	 * include/linux/tracepoint.h using rcu_dereference_sched().
 	 */
-	tracepoint_update_call(tp, tp_funcs, false);
-	rcu_assign_pointer(tp->funcs, tp_funcs);
-	static_key_enable(&tp->key);
+	switch (nr_func_state(tp_funcs)) {
+	case TP_FUNC_1:		/* 0->1 */
+		/* Set static call to first function */
+		tracepoint_update_call(tp, tp_funcs);
+		/* Both iterator and static call handle NULL tp->funcs */
+		rcu_assign_pointer(tp->funcs, tp_funcs);
+		static_key_enable(&tp->key);
+		break;
+	case TP_FUNC_2:		/* 1->2 */
+		/* Set iterator static call */
+		tracepoint_update_call(tp, tp_funcs);
+		/*
+		 * Iterator callback installed before updating tp->funcs.
+		 * Requires ordering between RCU assign/dereference and
+		 * static call update/call.
+		 */
+		rcu_assign_pointer(tp->funcs, tp_funcs);
+		break;
+	case TP_FUNC_N:		/* N->N+1 (N>1) */
+		rcu_assign_pointer(tp->funcs, tp_funcs);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		break;
+	}
 
 	release_probes(old);
 	return 0;
@@ -328,17 +360,47 @@ static int tracepoint_remove_func(struct tracepoint *tp,
 		/* Failed allocating new tp_funcs, replaced func with stub */
 		return 0;
 
-	if (!tp_funcs) {
+	switch (nr_func_state(tp_funcs)) {
+	case TP_FUNC_0:		/* 1->0 */
 		/* Removed last function */
 		if (tp->unregfunc && static_key_enabled(&tp->key))
 			tp->unregfunc();
 
 		static_key_disable(&tp->key);
+		/* Set iterator static call */
+		tracepoint_update_call(tp, tp_funcs);
+		/* Both iterator and static call handle NULL tp->funcs */
+		rcu_assign_pointer(tp->funcs, NULL);
+		/*
+		 * Make sure new func never uses old data after a 1->0->1
+		 * transition sequence.
+		 * Considering that transition 0->1 is the common case
+		 * and don't have rcu-sync, issue rcu-sync after
+		 * transition 1->0 to break that sequence by waiting for
+		 * readers to be quiescent.
+		 */
+		tracepoint_synchronize_unregister();
+		break;
+	case TP_FUNC_1:		/* 2->1 */
 		rcu_assign_pointer(tp->funcs, tp_funcs);
-	} else {
+		/*
+		 * On 2->1 transition, RCU sync is needed before setting
+		 * static call to first callback, because the observer
+		 * may have loaded any prior tp->funcs after the last one
+		 * associated with an rcu-sync.
+		 */
+		tracepoint_synchronize_unregister();
+		/* Set static call to first function */
+		tracepoint_update_call(tp, tp_funcs);
+		break;
+	case TP_FUNC_2:		/* N->N-1 (N>2) */
+		fallthrough;
+	case TP_FUNC_N:
 		rcu_assign_pointer(tp->funcs, tp_funcs);
-		tracepoint_update_call(tp, tp_funcs,
-				       tp_funcs[0].data != old[0].data);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		break;
 	}
 	release_probes(old);
 	return 0;

From 23c0ebac20de19e3f54e5e81f4c3fa0caf2f8395 Mon Sep 17 00:00:00 2001
From: Xiaomeng Hou <Xiaomeng.Hou@amd.com>
Date: Thu, 29 Jul 2021 15:44:39 +0800
Subject: [PATCH 484/502] drm/amd/pm: update yellow carp pmfw interface version

Correct yellow carp driver-PMFW interface version to v4.

Signed-off-by: Xiaomeng Hou <Xiaomeng.Hou@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/inc/smu_v13_0.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
index 3fea2430dec0..dc91eb608791 100644
--- a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
@@ -26,7 +26,7 @@
 #include "amdgpu_smu.h"
 
 #define SMU13_DRIVER_IF_VERSION_INV 0xFFFFFFFF
-#define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x03
+#define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04
 #define SMU13_DRIVER_IF_VERSION_ALDE 0x07
 
 /* MP Apertures */

From 5706cb3c910cc8283f344bc37a889a8d523a2c6d Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 29 Jul 2021 20:03:47 -0700
Subject: [PATCH 485/502] drm/amdgpu: fix checking pmops when PM_SLEEP is not
 enabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

'pm_suspend_target_state' is only available when CONFIG_PM_SLEEP
is set/enabled. OTOH, when both SUSPEND and HIBERNATION are not set,
PM_SLEEP is not set, so this variable cannot be used.

../drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c: In function ‘amdgpu_acpi_is_s0ix_active’:
../drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c:1046:11: error: ‘pm_suspend_target_state’ undeclared (first use in this function); did you mean ‘__KSYM_pm_suspend_target_state’?
    return pm_suspend_target_state == PM_SUSPEND_TO_IDLE;
           ^~~~~~~~~~~~~~~~~~~~~~~
           __KSYM_pm_suspend_target_state

Also use shorter IS_ENABLED(CONFIG_foo) notation for checking the
2 config symbols.

Fixes: 91e273712ab8dd ("drm/amdgpu: Check pmops for desired suspend state")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: "Pan, Xinhui" <Xinhui.Pan@amd.com>
Cc: amd-gfx@lists.freedesktop.org
Cc: dri-devel@lists.freedesktop.org
Cc: linux-next@vger.kernel.org
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 6cc0d4fa4d0a..4137e848f6a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -1040,7 +1040,7 @@ void amdgpu_acpi_detect(void)
  */
 bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev)
 {
-#if defined(CONFIG_AMD_PMC) || defined(CONFIG_AMD_PMC_MODULE)
+#if IS_ENABLED(CONFIG_AMD_PMC) && IS_ENABLED(CONFIG_PM_SLEEP)
 	if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) {
 		if (adev->flags & AMD_IS_APU)
 			return pm_suspend_target_state == PM_SUSPEND_TO_IDLE;

From ffb9ee8eb272ba2b5a7325e69bb98118869637db Mon Sep 17 00:00:00 2001
From: Wesley Chalmers <Wesley.Chalmers@amd.com>
Date: Mon, 19 Jul 2021 13:13:33 -0400
Subject: [PATCH 486/502] drm/amd/display: Assume LTTPR interop for DCN31+

[WHY]
For DCN31 onward, LTTPR is to be enabled and set to Transparent by
VBIOS.  Driver is to assume that VBIOS has done this without needing to
check the VBIOS interop bit.

[HOW]
Add LTTPR enable and interop VBIOS bits into dc->caps, and force-set the
interop bit to true for DCN31+.

Reviewed-by: Jun Lei <Jun.Lei@amd.com>
Acked-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Wesley Chalmers <Wesley.Chalmers@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/dc/core/dc_link_dp.c  | 21 ++-----------------
 drivers/gpu/drm/amd/display/dc/dc.h           |  2 ++
 .../drm/amd/display/dc/dcn30/dcn30_resource.c | 20 ++++++++++++++++++
 .../drm/amd/display/dc/dcn31/dcn31_resource.c | 16 ++++++++++++++
 4 files changed, 40 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index 9fb8c46dc606..a6d0fd24fd02 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -3602,29 +3602,12 @@ static bool dpcd_read_sink_ext_caps(struct dc_link *link)
 bool dp_retrieve_lttpr_cap(struct dc_link *link)
 {
 	uint8_t lttpr_dpcd_data[6];
-	bool vbios_lttpr_enable = false;
-	bool vbios_lttpr_interop = false;
-	struct dc_bios *bios = link->dc->ctx->dc_bios;
+	bool vbios_lttpr_enable = link->dc->caps.vbios_lttpr_enable;
+	bool vbios_lttpr_interop = link->dc->caps.vbios_lttpr_aware;
 	enum dc_status status = DC_ERROR_UNEXPECTED;
 	bool is_lttpr_present = false;
 
 	memset(lttpr_dpcd_data, '\0', sizeof(lttpr_dpcd_data));
-	/* Query BIOS to determine if LTTPR functionality is forced on by system */
-	if (bios->funcs->get_lttpr_caps) {
-		enum bp_result bp_query_result;
-		uint8_t is_vbios_lttpr_enable = 0;
-
-		bp_query_result = bios->funcs->get_lttpr_caps(bios, &is_vbios_lttpr_enable);
-		vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable;
-	}
-
-	if (bios->funcs->get_lttpr_interop) {
-		enum bp_result bp_query_result;
-		uint8_t is_vbios_interop_enabled = 0;
-
-		bp_query_result = bios->funcs->get_lttpr_interop(bios, &is_vbios_interop_enabled);
-		vbios_lttpr_interop = (bp_query_result == BP_RESULT_OK) && !!is_vbios_interop_enabled;
-	}
 
 	/*
 	 * Logic to determine LTTPR mode
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 8dcea8ff5c5a..af7b60108e9d 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -183,6 +183,8 @@ struct dc_caps {
 	unsigned int cursor_cache_size;
 	struct dc_plane_cap planes[MAX_PLANES];
 	struct dc_color_caps color;
+	bool vbios_lttpr_aware;
+	bool vbios_lttpr_enable;
 };
 
 struct dc_bug_wa {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
index 596c97dce67e..253654d605c2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
@@ -2617,6 +2617,26 @@ static bool dcn30_resource_construct(
 	dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
 	dc->caps.color.mpc.ocsc = 1;
 
+	/* read VBIOS LTTPR caps */
+	{
+		if (ctx->dc_bios->funcs->get_lttpr_caps) {
+			enum bp_result bp_query_result;
+			uint8_t is_vbios_lttpr_enable = 0;
+
+			bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable);
+			dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable;
+		}
+
+		if (ctx->dc_bios->funcs->get_lttpr_interop) {
+			enum bp_result bp_query_result;
+			uint8_t is_vbios_interop_enabled = 0;
+
+			bp_query_result = ctx->dc_bios->funcs->get_lttpr_interop(ctx->dc_bios,
+					&is_vbios_interop_enabled);
+			dc->caps.vbios_lttpr_aware = (bp_query_result == BP_RESULT_OK) && !!is_vbios_interop_enabled;
+		}
+	}
+
 	if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV)
 		dc->debug = debug_defaults_drv;
 	else if (dc->ctx->dce_environment == DCE_ENV_FPGA_MAXIMUS) {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
index 38c010afade1..cd3248dc31d8 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
@@ -1968,6 +1968,22 @@ static bool dcn31_resource_construct(
 	dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
 	dc->caps.color.mpc.ocsc = 1;
 
+	/* read VBIOS LTTPR caps */
+	{
+		if (ctx->dc_bios->funcs->get_lttpr_caps) {
+			enum bp_result bp_query_result;
+			uint8_t is_vbios_lttpr_enable = 0;
+
+			bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable);
+			dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable;
+		}
+
+		/* interop bit is implicit */
+		{
+			dc->caps.vbios_lttpr_aware = true;
+		}
+	}
+
 	if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV)
 		dc->debug = debug_defaults_drv;
 	else if (dc->ctx->dce_environment == DCE_ENV_FPGA_MAXIMUS) {

From 06050a0f01dbac2ca33145ef19a72041206ea983 Mon Sep 17 00:00:00 2001
From: Bing Guo <bing.guo@amd.com>
Date: Mon, 19 Jul 2021 18:24:06 -0400
Subject: [PATCH 487/502] drm/amd/display: Fix Dynamic bpp issue with 8K30 with
 Navi 1X

Why:
In DCN2x, HW doesn't automatically divide MASTER_UPDATE_LOCK_DB_X
by the number of pipes ODM Combined.

How:
Set MASTER_UPDATE_LOCK_DB_X to the value that is adjusted by the
number of pipes ODM Combined.

Reviewed-by: Martin Leung <martin.leung@amd.com>
Acked-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Bing Guo <bing.guo@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c
index 7fa9fc656b0c..f6e747f25ebe 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c
@@ -464,7 +464,7 @@ void optc2_lock_doublebuffer_enable(struct timing_generator *optc)
 
 	REG_UPDATE_2(OTG_GLOBAL_CONTROL1,
 			MASTER_UPDATE_LOCK_DB_X,
-			h_blank_start - 200 - 1,
+			(h_blank_start - 200 - 1) / optc1->opp_count,
 			MASTER_UPDATE_LOCK_DB_Y,
 			v_blank_start - 1);
 }

From cd7b0531a61811429e7907c875e864ab918f3e62 Mon Sep 17 00:00:00 2001
From: Bing Guo <bing.guo@amd.com>
Date: Tue, 20 Jul 2021 15:13:38 -0400
Subject: [PATCH 488/502] drm/amd/display: Increase stutter watermark for
 dcn303

[Why&How]
Hardware team suggested to use SRExitTime= 35.5us as w/a to prevent
underflow in certain modes.

Reviewed-by: Martin Leung <martin.leung@amd.com>
Acked-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Bing Guo <bing.guo@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
index 833ab13fa834..dc7823d23ba8 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
@@ -146,8 +146,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_03_soc = {
 
 		.min_dcfclk = 500.0, /* TODO: set this to actual min DCFCLK */
 		.num_states = 1,
-		.sr_exit_time_us = 26.5,
-		.sr_enter_plus_exit_time_us = 31,
+		.sr_exit_time_us = 35.5,
+		.sr_enter_plus_exit_time_us = 40,
 		.urgent_latency_us = 4.0,
 		.urgent_latency_pixel_data_only_us = 4.0,
 		.urgent_latency_pixel_mixed_with_vm_data_us = 4.0,

From d5c5ac3a7bca35261eb599204cbf1efee0af22cc Mon Sep 17 00:00:00 2001
From: Jude Shih <jude.shih@amd.com>
Date: Tue, 6 Jul 2021 18:04:11 +0800
Subject: [PATCH 489/502] drm/amd/display: Fix resetting DCN3.1 HW when
 resuming from S4

[Why] On S4 resume we also need to fix detection of when to reload DMCUB
firmware because we're currently using the VBIOS version which isn't
compatible with the driver version.

[How] Update the hardware init check for DCN31 since it's the ASIC that
has this issue.

Reviewed-by: Nicholas Kazlauskas <Nicholas.Kazlauskas@amd.com>
Acked-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Jude Shih <jude.shih@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c
index 973de346410d..27c7fa3110c8 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c
@@ -267,11 +267,13 @@ void dmub_dcn31_set_outbox1_rptr(struct dmub_srv *dmub, uint32_t rptr_offset)
 
 bool dmub_dcn31_is_hw_init(struct dmub_srv *dmub)
 {
-	uint32_t is_hw_init;
+	union dmub_fw_boot_status status;
+	uint32_t is_enable;
 
-	REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_hw_init);
+	status.all = REG_READ(DMCUB_SCRATCH0);
+	REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_enable);
 
-	return is_hw_init != 0;
+	return is_enable != 0 && status.bits.dal_fw;
 }
 
 bool dmub_dcn31_is_supported(struct dmub_srv *dmub)

From c4152b297d56d3696ad0a9003169bc5b98ad7b72 Mon Sep 17 00:00:00 2001
From: Qingqing Zhuo <qingqing.zhuo@amd.com>
Date: Thu, 22 Jul 2021 14:48:54 -0400
Subject: [PATCH 490/502] drm/amd/display: workaround for hard hang on HPD on
 native DP

[Why]
HPD disable and enable sequences are not mutually exclusive
on Linux. For HPDs that spans over 1s (i.e. HPD low = 1s),
part of the disable sequence (specifically, a request to SMU
to lower refclk) could come right before the call to PHY
enable, causing DMUB to access an unresponsive PHY
and thus a hard hang on the system.

[How]
Disable 48mhz refclk off on native DP.

Reviewed-by: Hersen Wu <hersenxs.wu@amd.com>
Acked-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Qingqing Zhuo <qingqing.zhuo@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
index c6f494f0dcea..6185f9475fa2 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
@@ -66,9 +66,11 @@ int rn_get_active_display_cnt_wa(
 	for (i = 0; i < context->stream_count; i++) {
 		const struct dc_stream_state *stream = context->streams[i];
 
+		/* Extend the WA to DP for Linux*/
 		if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A ||
 				stream->signal == SIGNAL_TYPE_DVI_SINGLE_LINK ||
-				stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK)
+				stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK ||
+				stream->signal == SIGNAL_TYPE_DISPLAY_PORT)
 			tmds_present = true;
 	}
 

From 0e99e960ce6d5ff586fc0733bc393c087f52c27b Mon Sep 17 00:00:00 2001
From: Shirish S <shirish.s@amd.com>
Date: Tue, 3 Aug 2021 14:03:44 +0530
Subject: [PATCH 491/502] drm/amdgpu/display: fix DMUB firmware version info

DMUB firmware info is printed before it gets initialized.
Correct this order to ensure true value is conveyed.

Signed-off-by: Shirish S <shirish.s@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index b53f49a23ddc..c0ae73b0691c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1548,6 +1548,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
 	}
 
 	hdr = (const struct dmcub_firmware_header_v1_0 *)adev->dm.dmub_fw->data;
+	adev->dm.dmcub_fw_version = le32_to_cpu(hdr->header.ucode_version);
 
 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 		adev->firmware.ucode[AMDGPU_UCODE_ID_DMCUB].ucode_id =
@@ -1561,7 +1562,6 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
 			 adev->dm.dmcub_fw_version);
 	}
 
-	adev->dm.dmcub_fw_version = le32_to_cpu(hdr->header.ucode_version);
 
 	adev->dm.dmub_srv = kzalloc(sizeof(*adev->dm.dmub_srv), GFP_KERNEL);
 	dmub_srv = adev->dm.dmub_srv;

From e00f543d3596c71201438d967877138ab33bb3de Mon Sep 17 00:00:00 2001
From: Chengming Gui <Jack.Gui@amd.com>
Date: Wed, 24 Feb 2021 11:48:23 +0800
Subject: [PATCH 492/502] drm/amdgpu: add DID for beige goby

Add device ids.

Signed-off-by: Chengming Gui <Jack.Gui@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 361b86b71b56..5ed8381ae0f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1213,6 +1213,13 @@ static const struct pci_device_id pciidlist[] = {
 	{0x1002, 0x740F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
 	{0x1002, 0x7410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
 
+	/* BEIGE_GOBY */
+	{0x1002, 0x7420, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
+	{0x1002, 0x7421, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
+	{0x1002, 0x7422, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
+	{0x1002, 0x7423, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
+	{0x1002, 0x743F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
+
 	{0, 0, 0}
 };
 

From 2638a32348bbb1c384dbbd515fd2b12c155f0188 Mon Sep 17 00:00:00 2001
From: Dakshaja Uppalapati <dakshaja@chelsio.com>
Date: Thu, 5 Aug 2021 18:13:32 +0530
Subject: [PATCH 493/502] RDMA/iw_cxgb4: Fix refcount underflow while
 destroying cqs.

Previous atomic increment/decrement logic expects the atomic count to be
'0' after the final decrement.

Replacing atomic count with refcount does not allow that, as
refcount_dec() considers count of 1 as underflow and triggers a kernel
splat.

Fix the current refcount logic by using the usual pattern of decrementing
the refcount and test if it is '0' on the final deref in
c4iw_destroy_cq(). Use wait_for_completion() instead of wait_event().

Fixes: 7183451f846d ("RDMA/cxgb4: Use refcount_t instead of atomic_t for reference counting")
Link: https://lore.kernel.org/r/1628167412-12114-1-git-send-email-dakshaja@chelsio.com
Signed-off-by: Dakshaja Uppalapati <dakshaja@chelsio.com>
Reviewed-by: Potnuri Bharat Teja <bharat@chelsio.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/cxgb4/cq.c       | 12 +++++++++---
 drivers/infiniband/hw/cxgb4/ev.c       |  6 ++----
 drivers/infiniband/hw/cxgb4/iw_cxgb4.h |  3 ++-
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 6c8c910f4e86..c7e8d7b3baa1 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -967,6 +967,12 @@ int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 	return !err || err == -ENODATA ? npolled : err;
 }
 
+void c4iw_cq_rem_ref(struct c4iw_cq *chp)
+{
+	if (refcount_dec_and_test(&chp->refcnt))
+		complete(&chp->cq_rel_comp);
+}
+
 int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
 {
 	struct c4iw_cq *chp;
@@ -976,8 +982,8 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
 	chp = to_c4iw_cq(ib_cq);
 
 	xa_erase_irq(&chp->rhp->cqs, chp->cq.cqid);
-	refcount_dec(&chp->refcnt);
-	wait_event(chp->wait, !refcount_read(&chp->refcnt));
+	c4iw_cq_rem_ref(chp);
+	wait_for_completion(&chp->cq_rel_comp);
 
 	ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext,
 					     ibucontext);
@@ -1081,7 +1087,7 @@ int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	spin_lock_init(&chp->lock);
 	spin_lock_init(&chp->comp_handler_lock);
 	refcount_set(&chp->refcnt, 1);
-	init_waitqueue_head(&chp->wait);
+	init_completion(&chp->cq_rel_comp);
 	ret = xa_insert_irq(&rhp->cqs, chp->cq.cqid, chp, GFP_KERNEL);
 	if (ret)
 		goto err_destroy_cq;
diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c
index 7798d090888b..34211a533d5c 100644
--- a/drivers/infiniband/hw/cxgb4/ev.c
+++ b/drivers/infiniband/hw/cxgb4/ev.c
@@ -213,8 +213,7 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
 		break;
 	}
 done:
-	if (refcount_dec_and_test(&chp->refcnt))
-		wake_up(&chp->wait);
+	c4iw_cq_rem_ref(chp);
 	c4iw_qp_rem_ref(&qhp->ibqp);
 out:
 	return;
@@ -234,8 +233,7 @@ int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid)
 		spin_lock_irqsave(&chp->comp_handler_lock, flag);
 		(*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
 		spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
-		if (refcount_dec_and_test(&chp->refcnt))
-			wake_up(&chp->wait);
+		c4iw_cq_rem_ref(chp);
 	} else {
 		pr_debug("unknown cqid 0x%x\n", qid);
 		xa_unlock_irqrestore(&dev->cqs, flag);
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 3883af3d2312..ac5f581aff4c 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -428,7 +428,7 @@ struct c4iw_cq {
 	spinlock_t lock;
 	spinlock_t comp_handler_lock;
 	refcount_t refcnt;
-	wait_queue_head_t wait;
+	struct completion cq_rel_comp;
 	struct c4iw_wr_wait *wr_waitp;
 };
 
@@ -979,6 +979,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start,
 struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc);
 int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata);
 int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
+void c4iw_cq_rem_ref(struct c4iw_cq *chp);
 int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 		   struct ib_udata *udata);
 int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);

From acade6379930dfa7987f4bd9b26d1a701cc1b542 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 3 Aug 2021 06:25:28 -0700
Subject: [PATCH 494/502] perf/x86/intel: Apply mid ACK for small core

A warning as below may be occasionally triggered in an ADL machine when
these conditions occur:

 - Two perf record commands run one by one. Both record a PEBS event.
 - Both runs on small cores.
 - They have different adaptive PEBS configuration (PEBS_DATA_CFG).

  [ ] WARNING: CPU: 4 PID: 9874 at arch/x86/events/intel/ds.c:1743 setup_pebs_adaptive_sample_data+0x55e/0x5b0
  [ ] RIP: 0010:setup_pebs_adaptive_sample_data+0x55e/0x5b0
  [ ] Call Trace:
  [ ]  <NMI>
  [ ]  intel_pmu_drain_pebs_icl+0x48b/0x810
  [ ]  perf_event_nmi_handler+0x41/0x80
  [ ]  </NMI>
  [ ]  __perf_event_task_sched_in+0x2c2/0x3a0

Different from the big core, the small core requires the ACK right
before re-enabling counters in the NMI handler, otherwise a stale PEBS
record may be dumped into the later NMI handler, which trigger the
warning.

Add a new mid_ack flag to track the case. Add all PMI handler bits in
the struct x86_hybrid_pmu to track the bits for different types of
PMUs.  Apply mid ACK for the small cores on an Alder Lake machine.

The existing hybrid() macro has a compile error when taking address of
a bit-field variable. Add a new macro hybrid_bit() to get the
bit-field value of a given PMU.

Fixes: f83d2f91d259 ("perf/x86/intel: Add Alder Lake Hybrid support")
Reported-by: Ammy Yi <ammy.yi@intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Tested-by: Ammy Yi <ammy.yi@intel.com>
Link: https://lkml.kernel.org/r/1627997128-57891-1-git-send-email-kan.liang@linux.intel.com
---
 arch/x86/events/intel/core.c | 23 +++++++++++++++--------
 arch/x86/events/perf_event.h | 15 +++++++++++++++
 2 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index fca7a6e2242f..ac6fd2dabf6a 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2904,24 +2904,28 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
  */
 static int intel_pmu_handle_irq(struct pt_regs *regs)
 {
-	struct cpu_hw_events *cpuc;
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	bool late_ack = hybrid_bit(cpuc->pmu, late_ack);
+	bool mid_ack = hybrid_bit(cpuc->pmu, mid_ack);
 	int loops;
 	u64 status;
 	int handled;
 	int pmu_enabled;
 
-	cpuc = this_cpu_ptr(&cpu_hw_events);
-
 	/*
 	 * Save the PMU state.
 	 * It needs to be restored when leaving the handler.
 	 */
 	pmu_enabled = cpuc->enabled;
 	/*
-	 * No known reason to not always do late ACK,
-	 * but just in case do it opt-in.
+	 * In general, the early ACK is only applied for old platforms.
+	 * For the big core starts from Haswell, the late ACK should be
+	 * applied.
+	 * For the small core after Tremont, we have to do the ACK right
+	 * before re-enabling counters, which is in the middle of the
+	 * NMI handler.
 	 */
-	if (!x86_pmu.late_ack)
+	if (!late_ack && !mid_ack)
 		apic_write(APIC_LVTPC, APIC_DM_NMI);
 	intel_bts_disable_local();
 	cpuc->enabled = 0;
@@ -2958,6 +2962,8 @@ again:
 		goto again;
 
 done:
+	if (mid_ack)
+		apic_write(APIC_LVTPC, APIC_DM_NMI);
 	/* Only restore PMU state when it's active. See x86_pmu_disable(). */
 	cpuc->enabled = pmu_enabled;
 	if (pmu_enabled)
@@ -2969,7 +2975,7 @@ done:
 	 * have been reset. This avoids spurious NMIs on
 	 * Haswell CPUs.
 	 */
-	if (x86_pmu.late_ack)
+	if (late_ack)
 		apic_write(APIC_LVTPC, APIC_DM_NMI);
 	return handled;
 }
@@ -6129,7 +6135,6 @@ __init int intel_pmu_init(void)
 		static_branch_enable(&perf_is_hybrid);
 		x86_pmu.num_hybrid_pmus = X86_HYBRID_NUM_PMUS;
 
-		x86_pmu.late_ack = true;
 		x86_pmu.pebs_aliases = NULL;
 		x86_pmu.pebs_prec_dist = true;
 		x86_pmu.pebs_block = true;
@@ -6167,6 +6172,7 @@ __init int intel_pmu_init(void)
 		pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
 		pmu->name = "cpu_core";
 		pmu->cpu_type = hybrid_big;
+		pmu->late_ack = true;
 		if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) {
 			pmu->num_counters = x86_pmu.num_counters + 2;
 			pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1;
@@ -6192,6 +6198,7 @@ __init int intel_pmu_init(void)
 		pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
 		pmu->name = "cpu_atom";
 		pmu->cpu_type = hybrid_small;
+		pmu->mid_ack = true;
 		pmu->num_counters = x86_pmu.num_counters;
 		pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
 		pmu->max_pebs_events = x86_pmu.max_pebs_events;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 2938c902ffbe..e3ac05c97b5e 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -656,6 +656,10 @@ struct x86_hybrid_pmu {
 	struct event_constraint		*event_constraints;
 	struct event_constraint		*pebs_constraints;
 	struct extra_reg		*extra_regs;
+
+	unsigned int			late_ack	:1,
+					mid_ack		:1,
+					enabled_ack	:1;
 };
 
 static __always_inline struct x86_hybrid_pmu *hybrid_pmu(struct pmu *pmu)
@@ -686,6 +690,16 @@ extern struct static_key_false perf_is_hybrid;
 	__Fp;						\
 }))
 
+#define hybrid_bit(_pmu, _field)			\
+({							\
+	bool __Fp = x86_pmu._field;			\
+							\
+	if (is_hybrid() && (_pmu))			\
+		__Fp = hybrid_pmu(_pmu)->_field;	\
+							\
+	__Fp;						\
+})
+
 enum hybrid_pmu_type {
 	hybrid_big		= 0x40,
 	hybrid_small		= 0x20,
@@ -755,6 +769,7 @@ struct x86_pmu {
 
 	/* PMI handler bits */
 	unsigned int	late_ack		:1,
+			mid_ack			:1,
 			enabled_ack		:1;
 	/*
 	 * sysfs attrs

From 3d4e4face9c1548752a2891e98b38b100feee336 Mon Sep 17 00:00:00 2001
From: Hao Xu <haoxu@linux.alibaba.com>
Date: Thu, 5 Aug 2021 18:05:37 +0800
Subject: [PATCH 495/502] io-wq: fix no lock protection of acct->nr_worker

There is an acct->nr_worker visit without lock protection. Think about
the case: two callers call io_wqe_wake_worker(), one is the original
context and the other one is an io-worker(by calling
io_wqe_enqueue(wqe, linked)), on two cpus paralelly, this may cause
nr_worker to be larger than max_worker.
Let's fix it by adding lock for it, and let's do nr_workers++ before
create_io_worker. There may be a edge cause that the first caller fails
to create an io-worker, but the second caller doesn't know it and then
quit creating io-worker as well:

say nr_worker = max_worker - 1
        cpu 0                        cpu 1
   io_wqe_wake_worker()          io_wqe_wake_worker()
      nr_worker < max_worker
      nr_worker++
      create_io_worker()         nr_worker == max_worker
         failed                  return
      return

But the chance of this case is very slim.

Fixes: 685fe7feedb9 ("io-wq: eliminate the need for a manager thread")
Signed-off-by: Hao Xu <haoxu@linux.alibaba.com>
[axboe: fix unconditional create_io_worker() call]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io-wq.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/fs/io-wq.c b/fs/io-wq.c
index 50dc93ffc153..64d904ce4f2d 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -247,10 +247,19 @@ static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
 	ret = io_wqe_activate_free_worker(wqe);
 	rcu_read_unlock();
 
-	if (!ret && acct->nr_workers < acct->max_workers) {
-		atomic_inc(&acct->nr_running);
-		atomic_inc(&wqe->wq->worker_refs);
-		create_io_worker(wqe->wq, wqe, acct->index);
+	if (!ret) {
+		bool do_create = false;
+
+		raw_spin_lock_irq(&wqe->lock);
+		if (acct->nr_workers < acct->max_workers) {
+			atomic_inc(&acct->nr_running);
+			atomic_inc(&wqe->wq->worker_refs);
+			acct->nr_workers++;
+			do_create = true;
+		}
+		raw_spin_unlock_irq(&wqe->lock);
+		if (do_create)
+			create_io_worker(wqe->wq, wqe, acct->index);
 	}
 }
 
@@ -635,6 +644,9 @@ static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
 		kfree(worker);
 fail:
 		atomic_dec(&acct->nr_running);
+		raw_spin_lock_irq(&wqe->lock);
+		acct->nr_workers--;
+		raw_spin_unlock_irq(&wqe->lock);
 		io_worker_ref_put(wq);
 		return;
 	}
@@ -650,9 +662,8 @@ fail:
 	worker->flags |= IO_WORKER_F_FREE;
 	if (index == IO_WQ_ACCT_BOUND)
 		worker->flags |= IO_WORKER_F_BOUND;
-	if (!acct->nr_workers && (worker->flags & IO_WORKER_F_BOUND))
+	if ((acct->nr_workers == 1) && (worker->flags & IO_WORKER_F_BOUND))
 		worker->flags |= IO_WORKER_F_FIXED;
-	acct->nr_workers++;
 	raw_spin_unlock_irq(&wqe->lock);
 	wake_up_new_task(tsk);
 }

From 21698274da5b6fc724b005bc7ec3e6b9fbcfaa06 Mon Sep 17 00:00:00 2001
From: Hao Xu <haoxu@linux.alibaba.com>
Date: Thu, 5 Aug 2021 18:05:38 +0800
Subject: [PATCH 496/502] io-wq: fix lack of acct->nr_workers <
 acct->max_workers judgement

There should be this judgement before we create an io-worker

Fixes: 685fe7feedb9 ("io-wq: eliminate the need for a manager thread")
Signed-off-by: Hao Xu <haoxu@linux.alibaba.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io-wq.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/fs/io-wq.c b/fs/io-wq.c
index 64d904ce4f2d..12fc19353bb0 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -280,9 +280,17 @@ static void create_worker_cb(struct callback_head *cb)
 {
 	struct create_worker_data *cwd;
 	struct io_wq *wq;
+	struct io_wqe *wqe;
+	struct io_wqe_acct *acct;
 
 	cwd = container_of(cb, struct create_worker_data, work);
-	wq = cwd->wqe->wq;
+	wqe = cwd->wqe;
+	wq = wqe->wq;
+	acct = &wqe->acct[cwd->index];
+	raw_spin_lock_irq(&wqe->lock);
+	if (acct->nr_workers < acct->max_workers)
+		acct->nr_workers++;
+	raw_spin_unlock_irq(&wqe->lock);
 	create_io_worker(wq, cwd->wqe, cwd->index);
 	kfree(cwd);
 }

From 7b40066c97ec66a44e388f82fcf694987451768f Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Thu, 5 Aug 2021 15:29:54 -0400
Subject: [PATCH 497/502] tracepoint: Use rcu get state and cond sync for
 static call updates

State transitions from 1->0->1 and N->2->1 callbacks require RCU
synchronization. Rather than performing the RCU synchronization every
time the state change occurs, which is quite slow when many tracepoints
are registered in batch, instead keep a snapshot of the RCU state on the
most recent transitions which belong to a chain, and conditionally wait
for a grace period on the last transition of the chain if one g.p. has
not elapsed since the last snapshot.

This applies to both RCU and SRCU.

This brings the performance regression caused by commit 231264d6927f
("Fix: tracepoint: static call function vs data state mismatch") back to
what it was originally.

Before this commit:

  # trace-cmd start -e all
  # time trace-cmd start -p nop

  real	0m10.593s
  user	0m0.017s
  sys	0m0.259s

After this commit:

  # trace-cmd start -e all
  # time trace-cmd start -p nop

  real	0m0.878s
  user	0m0.000s
  sys	0m0.103s

Link: https://lkml.kernel.org/r/20210805192954.30688-1-mathieu.desnoyers@efficios.com
Link: https://lore.kernel.org/io-uring/4ebea8f0-58c9-e571-fd30-0ce4f6f09c70@samba.org/

Cc: stable@vger.kernel.org
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Stefan Metzmacher <metze@samba.org>
Fixes: 231264d6927f ("Fix: tracepoint: static call function vs data state mismatch")
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/tracepoint.c | 81 +++++++++++++++++++++++++++++++++++++--------
 1 file changed, 67 insertions(+), 14 deletions(-)

diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 8d772bd6894d..efd14c79fab4 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -28,6 +28,44 @@ extern tracepoint_ptr_t __stop___tracepoints_ptrs[];
 DEFINE_SRCU(tracepoint_srcu);
 EXPORT_SYMBOL_GPL(tracepoint_srcu);
 
+enum tp_transition_sync {
+	TP_TRANSITION_SYNC_1_0_1,
+	TP_TRANSITION_SYNC_N_2_1,
+
+	_NR_TP_TRANSITION_SYNC,
+};
+
+struct tp_transition_snapshot {
+	unsigned long rcu;
+	unsigned long srcu;
+	bool ongoing;
+};
+
+/* Protected by tracepoints_mutex */
+static struct tp_transition_snapshot tp_transition_snapshot[_NR_TP_TRANSITION_SYNC];
+
+static void tp_rcu_get_state(enum tp_transition_sync sync)
+{
+	struct tp_transition_snapshot *snapshot = &tp_transition_snapshot[sync];
+
+	/* Keep the latest get_state snapshot. */
+	snapshot->rcu = get_state_synchronize_rcu();
+	snapshot->srcu = start_poll_synchronize_srcu(&tracepoint_srcu);
+	snapshot->ongoing = true;
+}
+
+static void tp_rcu_cond_sync(enum tp_transition_sync sync)
+{
+	struct tp_transition_snapshot *snapshot = &tp_transition_snapshot[sync];
+
+	if (!snapshot->ongoing)
+		return;
+	cond_synchronize_rcu(snapshot->rcu);
+	if (!poll_state_synchronize_srcu(&tracepoint_srcu, snapshot->srcu))
+		synchronize_srcu(&tracepoint_srcu);
+	snapshot->ongoing = false;
+}
+
 /* Set to 1 to enable tracepoint debug output */
 static const int tracepoint_debug;
 
@@ -311,6 +349,11 @@ static int tracepoint_add_func(struct tracepoint *tp,
 	 */
 	switch (nr_func_state(tp_funcs)) {
 	case TP_FUNC_1:		/* 0->1 */
+		/*
+		 * Make sure new static func never uses old data after a
+		 * 1->0->1 transition sequence.
+		 */
+		tp_rcu_cond_sync(TP_TRANSITION_SYNC_1_0_1);
 		/* Set static call to first function */
 		tracepoint_update_call(tp, tp_funcs);
 		/* Both iterator and static call handle NULL tp->funcs */
@@ -325,10 +368,15 @@ static int tracepoint_add_func(struct tracepoint *tp,
 		 * Requires ordering between RCU assign/dereference and
 		 * static call update/call.
 		 */
-		rcu_assign_pointer(tp->funcs, tp_funcs);
-		break;
+		fallthrough;
 	case TP_FUNC_N:		/* N->N+1 (N>1) */
 		rcu_assign_pointer(tp->funcs, tp_funcs);
+		/*
+		 * Make sure static func never uses incorrect data after a
+		 * N->...->2->1 (N>1) transition sequence.
+		 */
+		if (tp_funcs[0].data != old[0].data)
+			tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1);
 		break;
 	default:
 		WARN_ON_ONCE(1);
@@ -372,24 +420,23 @@ static int tracepoint_remove_func(struct tracepoint *tp,
 		/* Both iterator and static call handle NULL tp->funcs */
 		rcu_assign_pointer(tp->funcs, NULL);
 		/*
-		 * Make sure new func never uses old data after a 1->0->1
-		 * transition sequence.
-		 * Considering that transition 0->1 is the common case
-		 * and don't have rcu-sync, issue rcu-sync after
-		 * transition 1->0 to break that sequence by waiting for
-		 * readers to be quiescent.
+		 * Make sure new static func never uses old data after a
+		 * 1->0->1 transition sequence.
 		 */
-		tracepoint_synchronize_unregister();
+		tp_rcu_get_state(TP_TRANSITION_SYNC_1_0_1);
 		break;
 	case TP_FUNC_1:		/* 2->1 */
 		rcu_assign_pointer(tp->funcs, tp_funcs);
 		/*
-		 * On 2->1 transition, RCU sync is needed before setting
-		 * static call to first callback, because the observer
-		 * may have loaded any prior tp->funcs after the last one
-		 * associated with an rcu-sync.
+		 * Make sure static func never uses incorrect data after a
+		 * N->...->2->1 (N>2) transition sequence. If the first
+		 * element's data has changed, then force the synchronization
+		 * to prevent current readers that have loaded the old data
+		 * from calling the new function.
 		 */
-		tracepoint_synchronize_unregister();
+		if (tp_funcs[0].data != old[0].data)
+			tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1);
+		tp_rcu_cond_sync(TP_TRANSITION_SYNC_N_2_1);
 		/* Set static call to first function */
 		tracepoint_update_call(tp, tp_funcs);
 		break;
@@ -397,6 +444,12 @@ static int tracepoint_remove_func(struct tracepoint *tp,
 		fallthrough;
 	case TP_FUNC_N:
 		rcu_assign_pointer(tp->funcs, tp_funcs);
+		/*
+		 * Make sure static func never uses incorrect data after a
+		 * N->...->2->1 (N>2) transition sequence.
+		 */
+		if (tp_funcs[0].data != old[0].data)
+			tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1);
 		break;
 	default:
 		WARN_ON_ONCE(1);

From 877ba3f729fd3d8ef0e29bc2a55e57cfa54b2e43 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 4 Aug 2021 14:23:55 -0400
Subject: [PATCH 498/502] ext4: fix potential htree corruption when growing
 large_dir directories
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit b5776e7524af ("ext4: fix potential htree index checksum
corruption) removed a required restart when multiple levels of index
nodes need to be split.  Fix this to avoid directory htree corruptions
when using the large_dir feature.

Cc: stable@kernel.org # v5.11
Cc: Благодаренко Артём <artem.blagodarenko@gmail.com>
Fixes: b5776e7524af ("ext4: fix potential htree index checksum corruption)
Reported-by: Denis <denis@voxelsoft.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/namei.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5fd56f616cf0..f3bbcd4efb56 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2517,7 +2517,7 @@ again:
 				goto journal_error;
 			err = ext4_handle_dirty_dx_node(handle, dir,
 							frame->bh);
-			if (err)
+			if (restart || err)
 				goto journal_error;
 		} else {
 			struct dx_root *dxroot;

From fb7b9b0231ba8f77587c23f5257a4fdb6df1219e Mon Sep 17 00:00:00 2001
From: Vincent Fu <vincent.fu@samsung.com>
Date: Wed, 4 Aug 2021 19:49:23 +0000
Subject: [PATCH 499/502] kyber: make trace_block_rq call consistent with
 documentation

The kyber ioscheduler calls trace_block_rq_insert() *after* the request
is added to the queue but the documentation for trace_block_rq_insert()
says that the call should be made *before* the request is added to the
queue.  Move the tracepoint for the kyber ioscheduler so that it is
consistent with the documentation.

Signed-off-by: Vincent Fu <vincent.fu@samsung.com>
Link: https://lore.kernel.org/r/20210804194913.10497-1-vincent.fu@samsung.com
Reviewed by: Adam Manzanares <a.manzanares@samsung.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/kyber-iosched.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index 81e3279ecd57..15a8be57203d 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -596,13 +596,13 @@ static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx,
 		struct list_head *head = &kcq->rq_list[sched_domain];
 
 		spin_lock(&kcq->lock);
+		trace_block_rq_insert(rq);
 		if (at_head)
 			list_move(&rq->queuelist, head);
 		else
 			list_move_tail(&rq->queuelist, head);
 		sbitmap_set_bit(&khd->kcq_map[sched_domain],
 				rq->mq_ctx->index_hw[hctx->type]);
-		trace_block_rq_insert(rq);
 		spin_unlock(&kcq->lock);
 	}
 }

From 6d7f91d914bc90a15ebc426440c26081337ceaa1 Mon Sep 17 00:00:00 2001
From: Alexandre Ghiti <alex@ghiti.fr>
Date: Wed, 21 Jul 2021 09:59:35 +0200
Subject: [PATCH 500/502] riscv: Get rid of CONFIG_PHYS_RAM_BASE in kernel
 physical address conversion

The usage of CONFIG_PHYS_RAM_BASE for all kernel types was a mistake:
this value is implementation-specific and this breaks the genericity of
the RISC-V kernel.

Fix this by introducing a new variable phys_ram_base that holds this
value at runtime and use it in the kernel physical address conversion
macro. Since this value is used only for XIP kernels, evaluate it only if
CONFIG_XIP_KERNEL is set which in addition optimizes this macro for
standard kernels at compile-time.

Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
Tested-by: Emil Renner Berthing <kernel@esmil.dk>
Reviewed-by: Jisheng Zhang <jszhang@kernel.org>
Fixes: 44c922572952 ("RISC-V: enable XIP")
Cc: stable@vger.kernel.org
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/include/asm/page.h |  7 ++++---
 arch/riscv/mm/init.c          | 17 ++++++++++++-----
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index cca8764aed83..b0ca5058e7ae 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -103,6 +103,7 @@ struct kernel_mapping {
 };
 
 extern struct kernel_mapping kernel_map;
+extern phys_addr_t phys_ram_base;
 
 #ifdef CONFIG_64BIT
 #define is_kernel_mapping(x)	\
@@ -113,9 +114,9 @@ extern struct kernel_mapping kernel_map;
 #define linear_mapping_pa_to_va(x)	((void *)((unsigned long)(x) + kernel_map.va_pa_offset))
 #define kernel_mapping_pa_to_va(y)	({						\
 	unsigned long _y = y;								\
-	(_y >= CONFIG_PHYS_RAM_BASE) ?							\
-		(void *)((unsigned long)(_y) + kernel_map.va_kernel_pa_offset + XIP_OFFSET) :	\
-		(void *)((unsigned long)(_y) + kernel_map.va_kernel_xip_pa_offset);		\
+	(IS_ENABLED(CONFIG_XIP_KERNEL) && _y < phys_ram_base) ?					\
+		(void *)((unsigned long)(_y) + kernel_map.va_kernel_xip_pa_offset) :		\
+		(void *)((unsigned long)(_y) + kernel_map.va_kernel_pa_offset + XIP_OFFSET);	\
 	})
 #define __pa_to_va_nodebug(x)		linear_mapping_pa_to_va(x)
 
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index a14bf3910eec..88134cc288d9 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -36,6 +36,9 @@ EXPORT_SYMBOL(kernel_map);
 #define kernel_map	(*(struct kernel_mapping *)XIP_FIXUP(&kernel_map))
 #endif
 
+phys_addr_t phys_ram_base __ro_after_init;
+EXPORT_SYMBOL(phys_ram_base);
+
 #ifdef CONFIG_XIP_KERNEL
 extern char _xiprom[], _exiprom[];
 #endif
@@ -160,7 +163,7 @@ static void __init setup_bootmem(void)
 	phys_addr_t vmlinux_end = __pa_symbol(&_end);
 	phys_addr_t vmlinux_start = __pa_symbol(&_start);
 	phys_addr_t __maybe_unused max_mapped_addr;
-	phys_addr_t dram_end;
+	phys_addr_t phys_ram_end;
 
 #ifdef CONFIG_XIP_KERNEL
 	vmlinux_start = __pa_symbol(&_sdata);
@@ -181,9 +184,12 @@ static void __init setup_bootmem(void)
 #endif
 	memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
 
-	dram_end = memblock_end_of_DRAM();
 
+	phys_ram_end = memblock_end_of_DRAM();
 #ifndef CONFIG_64BIT
+#ifndef CONFIG_XIP_KERNEL
+	phys_ram_base = memblock_start_of_DRAM();
+#endif
 	/*
 	 * memblock allocator is not aware of the fact that last 4K bytes of
 	 * the addressable memory can not be mapped because of IS_ERR_VALUE
@@ -194,12 +200,12 @@ static void __init setup_bootmem(void)
 	 * be done in create_kernel_page_table.
 	 */
 	max_mapped_addr = __pa(~(ulong)0);
-	if (max_mapped_addr == (dram_end - 1))
+	if (max_mapped_addr == (phys_ram_end - 1))
 		memblock_set_current_limit(max_mapped_addr - 4096);
 #endif
 
-	min_low_pfn = PFN_UP(memblock_start_of_DRAM());
-	max_low_pfn = max_pfn = PFN_DOWN(dram_end);
+	min_low_pfn = PFN_UP(phys_ram_base);
+	max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end);
 
 	dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn));
 	set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET);
@@ -558,6 +564,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 	kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR;
 	kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom);
 
+	phys_ram_base = CONFIG_PHYS_RAM_BASE;
 	kernel_map.phys_addr = (uintptr_t)CONFIG_PHYS_RAM_BASE;
 	kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_sdata);
 

From 867432bec1c6e7df21a361d7f12022a8c5f54022 Mon Sep 17 00:00:00 2001
From: Alexandre Ghiti <alex@ghiti.fr>
Date: Wed, 21 Jul 2021 09:59:36 +0200
Subject: [PATCH 501/502] Revert "riscv: Remove CONFIG_PHYS_RAM_BASE_FIXED"

This reverts commit 9b79878ced8f7ab85c57623f8b1f6882e484a316.

The removal of this config exposes CONFIG_PHYS_RAM_BASE for all kernel
types: this value being implementation-specific, this breaks the
genericity of the RISC-V kernel so revert it.

Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
Tested-by: Emil Renner Berthing <kernel@esmil.dk>
Reviewed-by: Jisheng Zhang <jszhang@kernel.org>
Cc: stable@vger.kernel.org
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/Kconfig | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 31f9e92f1402..4f7b70ae7c31 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -495,8 +495,13 @@ config STACKPROTECTOR_PER_TASK
 	depends on !GCC_PLUGIN_RANDSTRUCT
 	depends on STACKPROTECTOR && CC_HAVE_STACKPROTECTOR_TLS
 
+config PHYS_RAM_BASE_FIXED
+	bool "Explicitly specified physical RAM address"
+	default n
+
 config PHYS_RAM_BASE
 	hex "Platform Physical RAM address"
+	depends on PHYS_RAM_BASE_FIXED
 	default "0x80000000"
 	help
 	  This is the physical address of RAM in the system. It has to be
@@ -509,6 +514,7 @@ config XIP_KERNEL
 	# This prevents XIP from being enabled by all{yes,mod}config, which
 	# fail to build since XIP doesn't support large kernels.
 	depends on !COMPILE_TEST
+	select PHYS_RAM_BASE_FIXED
 	help
 	  Execute-In-Place allows the kernel to run from non-volatile storage
 	  directly addressable by the CPU, such as NOR flash. This saves RAM

From 36a21d51725af2ce0700c6ebcb6b9594aac658a6 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 8 Aug 2021 13:49:31 -0700
Subject: [PATCH 502/502] Linux 5.14-rc5

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 9ca176ff1e40..eae1314a5b86 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 14
 SUBLEVEL = 0
-EXTRAVERSION = -rc4
+EXTRAVERSION = -rc5
 NAME = Opossums on Parade
 
 # *DOCUMENTATION*