From 3ff340e24c9dd5cff9fc07d67914c5adf67f80d6 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 11 Jun 2021 08:42:50 +0300 Subject: [PATCH 001/502] bus: ti-sysc: Fix gpt12 system timer issue with reserved status Jarkko Nikula reported that Beagleboard revision c2 stopped booting. Jarkko bisected the issue down to commit 6cfcd5563b4f ("clocksource/drivers/timer-ti-dm: Fix suspend and resume for am3 and am4"). Let's fix the issue by tagging system timers as reserved rather than ignoring them. And let's not probe any interconnect target module child devices for reserved modules. This allows PM runtime to keep track of clocks and clockdomains for the interconnect target module, and prevent the system timer from idling as we already have SYSC_QUIRK_NO_IDLE and SYSC_QUIRK_NO_IDLE_ON_INIT flags set for system timers. Fixes: 6cfcd5563b4f ("clocksource/drivers/timer-ti-dm: Fix suspend and resume for am3 and am4") Reported-by: Jarkko Nikula Tested-by: Jarkko Nikula Signed-off-by: Tony Lindgren --- drivers/bus/ti-sysc.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index 38cb116ed433..188cdb0a394e 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -100,6 +100,7 @@ static const char * const clock_names[SYSC_MAX_CLOCKS] = { * @cookie: data used by legacy platform callbacks * @name: name if available * @revision: interconnect target module revision + * @reserved: target module is reserved and already in use * @enabled: sysc runtime enabled status * @needs_resume: runtime resume needed on resume from suspend * @child_needs_resume: runtime resume needed for child on resume from suspend @@ -130,6 +131,7 @@ struct sysc { struct ti_sysc_cookie cookie; const char *name; u32 revision; + unsigned int reserved:1; unsigned int enabled:1; unsigned int needs_resume:1; unsigned int child_needs_resume:1; @@ -3093,8 +3095,8 @@ static int sysc_probe(struct platform_device *pdev) return error; error = sysc_check_active_timer(ddata); - if (error) - return error; + if (error == -EBUSY) + ddata->reserved = true; error = sysc_get_clocks(ddata); if (error) @@ -3130,11 +3132,15 @@ static int sysc_probe(struct platform_device *pdev) sysc_show_registers(ddata); ddata->dev->type = &sysc_device_type; - error = of_platform_populate(ddata->dev->of_node, sysc_match_table, - pdata ? pdata->auxdata : NULL, - ddata->dev); - if (error) - goto err; + + if (!ddata->reserved) { + error = of_platform_populate(ddata->dev->of_node, + sysc_match_table, + pdata ? pdata->auxdata : NULL, + ddata->dev); + if (error) + goto err; + } INIT_DELAYED_WORK(&ddata->idle_work, ti_sysc_idle); From 7c1a80e80cde008f271bae630d28cf684351e807 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Fri, 25 Jun 2021 13:23:54 +0300 Subject: [PATCH 002/502] net: xfrm: fix memory leak in xfrm_user_rcv_msg Syzbot reported memory leak in xfrm_user_rcv_msg(). The problem was is non-freed skb's frag_list. In skb_release_all() skb_release_data() will be called only in case of skb->head != NULL, but netlink_skb_destructor() sets head to NULL. So, allocated frag_list skb should be freed manualy, since consume_skb() won't take care of it Fixes: 5106f4a8acff ("xfrm/compat: Add 32=>64-bit messages translator") Reported-and-tested-by: syzbot+fb347cf82c73a90efcca@syzkaller.appspotmail.com Signed-off-by: Pavel Skripkin Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b47d613409b7..7aff641c717d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2811,6 +2811,16 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, err = link->doit(skb, nlh, attrs); + /* We need to free skb allocated in xfrm_alloc_compat() before + * returning from this function, because consume_skb() won't take + * care of frag_list since netlink destructor sets + * sbk->head to NULL. (see netlink_skb_destructor()) + */ + if (skb_has_frag_list(skb)) { + kfree_skb(skb_shinfo(skb)->frag_list); + skb_shinfo(skb)->frag_list = NULL; + } + err: kvfree(nlh64); return err; From eaf228263921cd15962654b539d916380a0f076e Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 2 Jul 2021 09:20:22 +0200 Subject: [PATCH 003/502] Revert "xfrm: policy: Read seqcount outside of rcu-read side in xfrm_policy_lookup_bytype" This reverts commit d7b0408934c749f546b01f2b33d07421a49b6f3e. This commit tried to fix a locking bug introduced by commit 77cc278f7b20 ("xfrm: policy: Use sequence counters with associated lock"). As it turned out, this patch did not really fix the bug. A proper fix for this bug is applied on top of this revert. Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index e9d0df2a2ab1..ce500f847b99 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2092,15 +2092,12 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, if (unlikely(!daddr || !saddr)) return NULL; - retry: - sequence = read_seqcount_begin(&xfrm_policy_hash_generation); rcu_read_lock(); - - chain = policy_hash_direct(net, daddr, saddr, family, dir); - if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) { - rcu_read_unlock(); - goto retry; - } + retry: + do { + sequence = read_seqcount_begin(&xfrm_policy_hash_generation); + chain = policy_hash_direct(net, daddr, saddr, family, dir); + } while (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)); ret = NULL; hlist_for_each_entry_rcu(pol, chain, bydst) { @@ -2131,15 +2128,11 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, } skip_inexact: - if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) { - rcu_read_unlock(); + if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) goto retry; - } - if (ret && !xfrm_pol_hold_rcu(ret)) { - rcu_read_unlock(); + if (ret && !xfrm_pol_hold_rcu(ret)) goto retry; - } fail: rcu_read_unlock(); From 2580d3f40022642452dd8422bfb8c22e54cf84bb Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 28 Jun 2021 15:34:28 +0200 Subject: [PATCH 004/502] xfrm: Fix RCU vs hash_resize_mutex lock inversion xfrm_bydst_resize() calls synchronize_rcu() while holding hash_resize_mutex. But then on PREEMPT_RT configurations, xfrm_policy_lookup_bytype() may acquire that mutex while running in an RCU read side critical section. This results in a deadlock. In fact the scope of hash_resize_mutex is way beyond the purpose of xfrm_policy_lookup_bytype() to just fetch a coherent and stable policy for a given destination/direction, along with other details. The lower level net->xfrm.xfrm_policy_lock, which among other things protects per destination/direction references to policy entries, is enough to serialize and benefit from priority inheritance against the write side. As a bonus, it makes it officially a per network namespace synchronization business where a policy table resize on namespace A shouldn't block a policy lookup on namespace B. Fixes: 77cc278f7b20 (xfrm: policy: Use sequence counters with associated lock) Cc: stable@vger.kernel.org Cc: Ahmed S. Darwish Cc: Peter Zijlstra (Intel) Cc: Varad Gautam Cc: Steffen Klassert Cc: Herbert Xu Cc: David S. Miller Signed-off-by: Frederic Weisbecker Signed-off-by: Steffen Klassert --- include/net/netns/xfrm.h | 1 + net/xfrm/xfrm_policy.c | 17 ++++++++--------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index e816b6a3ef2b..9b376b87bd54 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -74,6 +74,7 @@ struct netns_xfrm { #endif spinlock_t xfrm_state_lock; seqcount_spinlock_t xfrm_state_hash_generation; + seqcount_spinlock_t xfrm_policy_hash_generation; spinlock_t xfrm_policy_lock; struct mutex xfrm_cfg_mutex; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ce500f847b99..46a6d15b66d6 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -155,7 +155,6 @@ static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1] __read_mostly; static struct kmem_cache *xfrm_dst_cache __ro_after_init; -static __read_mostly seqcount_mutex_t xfrm_policy_hash_generation; static struct rhashtable xfrm_policy_inexact_table; static const struct rhashtable_params xfrm_pol_inexact_params; @@ -585,7 +584,7 @@ static void xfrm_bydst_resize(struct net *net, int dir) return; spin_lock_bh(&net->xfrm.xfrm_policy_lock); - write_seqcount_begin(&xfrm_policy_hash_generation); + write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation); odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table, lockdep_is_held(&net->xfrm.xfrm_policy_lock)); @@ -596,7 +595,7 @@ static void xfrm_bydst_resize(struct net *net, int dir) rcu_assign_pointer(net->xfrm.policy_bydst[dir].table, ndst); net->xfrm.policy_bydst[dir].hmask = nhashmask; - write_seqcount_end(&xfrm_policy_hash_generation); + write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); synchronize_rcu(); @@ -1245,7 +1244,7 @@ static void xfrm_hash_rebuild(struct work_struct *work) } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq)); spin_lock_bh(&net->xfrm.xfrm_policy_lock); - write_seqcount_begin(&xfrm_policy_hash_generation); + write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation); /* make sure that we can insert the indirect policies again before * we start with destructive action. @@ -1354,7 +1353,7 @@ static void xfrm_hash_rebuild(struct work_struct *work) out_unlock: __xfrm_policy_inexact_flush(net); - write_seqcount_end(&xfrm_policy_hash_generation); + write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); mutex_unlock(&hash_resize_mutex); @@ -2095,9 +2094,9 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, rcu_read_lock(); retry: do { - sequence = read_seqcount_begin(&xfrm_policy_hash_generation); + sequence = read_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation); chain = policy_hash_direct(net, daddr, saddr, family, dir); - } while (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)); + } while (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence)); ret = NULL; hlist_for_each_entry_rcu(pol, chain, bydst) { @@ -2128,7 +2127,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, } skip_inexact: - if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) + if (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence)) goto retry; if (ret && !xfrm_pol_hold_rcu(ret)) @@ -4084,6 +4083,7 @@ static int __net_init xfrm_net_init(struct net *net) /* Initialize the per-net locks here */ spin_lock_init(&net->xfrm.xfrm_state_lock); spin_lock_init(&net->xfrm.xfrm_policy_lock); + seqcount_spinlock_init(&net->xfrm.xfrm_policy_hash_generation, &net->xfrm.xfrm_policy_lock); mutex_init(&net->xfrm.xfrm_cfg_mutex); rv = xfrm_statistics_init(net); @@ -4128,7 +4128,6 @@ void __init xfrm_init(void) { register_pernet_subsys(&xfrm_net_ops); xfrm_dev_init(); - seqcount_mutex_init(&xfrm_policy_hash_generation, &hash_resize_mutex); xfrm_input_init(); #ifdef CONFIG_XFRM_ESPINTCP From ecef6a9effe49e8e2635c839020b9833b71e934c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 9 Jul 2021 15:02:37 +0200 Subject: [PATCH 005/502] libata: fix ata_pio_sector for CONFIG_HIGHMEM Data transfers are not required to be block aligned in memory, so they span two pages. Fix this by splitting the call to >sff_data_xfer into two for that case. This has been broken since the initial libata import before the damn of git, but was uncovered by the legacy ide driver removal. Reported-by: kernel test robot Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210709130237.3730959-1-hch@lst.de Signed-off-by: Jens Axboe --- drivers/ata/libata-sff.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index ae7189d1a568..b71ea4a680b0 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -637,6 +637,20 @@ unsigned int ata_sff_data_xfer32(struct ata_queued_cmd *qc, unsigned char *buf, } EXPORT_SYMBOL_GPL(ata_sff_data_xfer32); +static void ata_pio_xfer(struct ata_queued_cmd *qc, struct page *page, + unsigned int offset, size_t xfer_size) +{ + bool do_write = (qc->tf.flags & ATA_TFLAG_WRITE); + unsigned char *buf; + + buf = kmap_atomic(page); + qc->ap->ops->sff_data_xfer(qc, buf + offset, xfer_size, do_write); + kunmap_atomic(buf); + + if (!do_write && !PageSlab(page)) + flush_dcache_page(page); +} + /** * ata_pio_sector - Transfer a sector of data. * @qc: Command on going @@ -648,11 +662,9 @@ EXPORT_SYMBOL_GPL(ata_sff_data_xfer32); */ static void ata_pio_sector(struct ata_queued_cmd *qc) { - int do_write = (qc->tf.flags & ATA_TFLAG_WRITE); struct ata_port *ap = qc->ap; struct page *page; unsigned int offset; - unsigned char *buf; if (!qc->cursg) { qc->curbytes = qc->nbytes; @@ -670,13 +682,20 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) DPRINTK("data %s\n", qc->tf.flags & ATA_TFLAG_WRITE ? "write" : "read"); - /* do the actual data transfer */ - buf = kmap_atomic(page); - ap->ops->sff_data_xfer(qc, buf + offset, qc->sect_size, do_write); - kunmap_atomic(buf); + /* + * Split the transfer when it splits a page boundary. Note that the + * split still has to be dword aligned like all ATA data transfers. + */ + WARN_ON_ONCE(offset % 4); + if (offset + qc->sect_size > PAGE_SIZE) { + unsigned int split_len = PAGE_SIZE - offset; - if (!do_write && !PageSlab(page)) - flush_dcache_page(page); + ata_pio_xfer(qc, page, offset, split_len); + ata_pio_xfer(qc, nth_page(page, 1), 0, + qc->sect_size - split_len); + } else { + ata_pio_xfer(qc, page, offset, qc->sect_size); + } qc->curbytes += qc->sect_size; qc->cursg_ofs += qc->sect_size; From 0c23af52ccd1605926480b5dfd1dd857ef604611 Mon Sep 17 00:00:00 2001 From: Naresh Kumar PBS Date: Sun, 11 Jul 2021 06:31:36 -0700 Subject: [PATCH 006/502] RDMA/bnxt_re: Fix stats counters Statistical counters are not incrementing in some adapter versions with newer FW. This is due to the stats context length mismatch between FW and driver. Since the L2 driver updates the length correctly, use the stats length from L2 driver while allocating the DMA'able memory and creating the stats context. Fixes: 9d6b648c3112 ("bnxt_en: Update firmware interface spec to 1.10.1.65.") Link: https://lore.kernel.org/r/1626010296-6076-1-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Naresh Kumar PBS Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/main.c | 4 +++- drivers/infiniband/hw/bnxt_re/qplib_res.c | 10 ++++------ drivers/infiniband/hw/bnxt_re/qplib_res.h | 1 + 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index d5674026512a..a8688a92c760 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -120,6 +120,7 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode) if (!chip_ctx) return -ENOMEM; chip_ctx->chip_num = bp->chip_num; + chip_ctx->hw_stats_size = bp->hw_ring_stats_size; rdev->chip_ctx = chip_ctx; /* rest members to follow eventually */ @@ -550,6 +551,7 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev, dma_addr_t dma_map, u32 *fw_stats_ctx_id) { + struct bnxt_qplib_chip_ctx *chip_ctx = rdev->chip_ctx; struct hwrm_stat_ctx_alloc_output resp = {0}; struct hwrm_stat_ctx_alloc_input req = {0}; struct bnxt_en_dev *en_dev = rdev->en_dev; @@ -566,7 +568,7 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev, bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, -1); req.update_period_ms = cpu_to_le32(1000); req.stats_dma_addr = cpu_to_le64(dma_map); - req.stats_dma_length = cpu_to_le16(sizeof(struct ctx_hw_stats_ext)); + req.stats_dma_length = cpu_to_le16(chip_ctx->hw_stats_size); req.stat_ctx_flags = STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE; bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 17f0701b3cee..44282a8cdd4f 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -56,6 +56,7 @@ static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev, struct bnxt_qplib_stats *stats); static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev, + struct bnxt_qplib_chip_ctx *cctx, struct bnxt_qplib_stats *stats); /* PBL */ @@ -559,7 +560,7 @@ int bnxt_qplib_alloc_ctx(struct bnxt_qplib_res *res, goto fail; stats_alloc: /* Stats */ - rc = bnxt_qplib_alloc_stats_ctx(res->pdev, &ctx->stats); + rc = bnxt_qplib_alloc_stats_ctx(res->pdev, res->cctx, &ctx->stats); if (rc) goto fail; @@ -889,15 +890,12 @@ static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev, } static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev, + struct bnxt_qplib_chip_ctx *cctx, struct bnxt_qplib_stats *stats) { memset(stats, 0, sizeof(*stats)); stats->fw_id = -1; - /* 128 byte aligned context memory is required only for 57500. - * However making this unconditional, it does not harm previous - * generation. - */ - stats->size = ALIGN(sizeof(struct ctx_hw_stats), 128); + stats->size = cctx->hw_stats_size; stats->dma = dma_alloc_coherent(&pdev->dev, stats->size, &stats->dma_map, GFP_KERNEL); if (!stats->dma) { diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index c291f495ae91..91031502e8f5 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -54,6 +54,7 @@ struct bnxt_qplib_chip_ctx { u16 chip_num; u8 chip_rev; u8 chip_metal; + u16 hw_stats_size; struct bnxt_qplib_drv_modes modes; }; From 6407c69dc51fbd7cf7b6760cd8aefb105d96ff5b Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Wed, 7 Jul 2021 14:14:55 -0700 Subject: [PATCH 007/502] RDMA/irdma: Fix unused variable total_size warning Fix the following unused variable warning: drivers/infiniband/hw/irdma/uk.c:934:6: warning: variable 'total_size' set but not used [-Wunused-but-set-variable] Link: https://lore.kernel.org/r/20210707211455.2076-1-tatyana.e.nikolova@intel.com Link: https://lkml.org/lkml/2021/7/1/726 Reported-by: kernel test robot Fixes: 551c46edc769 ("RDMA/irdma: Add user/kernel shared libraries") Signed-off-by: Mustafa Ismail Signed-off-by: Tatyana Nikolova Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/uk.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c index a6d52c20091c..5fb92de1f015 100644 --- a/drivers/infiniband/hw/irdma/uk.c +++ b/drivers/infiniband/hw/irdma/uk.c @@ -931,7 +931,7 @@ enum irdma_status_code irdma_uk_mw_bind(struct irdma_qp_uk *qp, enum irdma_status_code irdma_uk_post_receive(struct irdma_qp_uk *qp, struct irdma_post_rq_info *info) { - u32 total_size = 0, wqe_idx, i, byte_off; + u32 wqe_idx, i, byte_off; u32 addl_frag_cnt; __le64 *wqe; u64 hdr; @@ -939,9 +939,6 @@ enum irdma_status_code irdma_uk_post_receive(struct irdma_qp_uk *qp, if (qp->max_rq_frag_cnt < info->num_sges) return IRDMA_ERR_INVALID_FRAG_COUNT; - for (i = 0; i < info->num_sges; i++) - total_size += info->sg_list[i].len; - wqe = irdma_qp_get_next_recv_wqe(qp, &wqe_idx); if (!wqe) return IRDMA_ERR_QP_TOOMANY_WRS_POSTED; From 514305ee0a1dade95c6ff1eb5735de5a329d1f89 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Thu, 1 Jul 2021 12:41:27 +0200 Subject: [PATCH 008/502] RDMA/irdma: Make spdxcheck.py happy Commit 48d6b3336a9f ("RDMA/irdma: Add ABI definitions") adds ./include/uapi/rdma/irdma-abi.h with an additional unneeded closing bracket at the end of the SPDX-License-Identifier line. Hence, ./scripts/spdxcheck.py complains: include/uapi/rdma/irdma-abi.h: 1:77 Syntax error: ) Remove that closing bracket to make spdxcheck.py happy. Fixes: 48d6b3336a9f ("RDMA/irdma: Add ABI definitions") Link: https://lore.kernel.org/r/20210701104127.1877-1-lukas.bulwahn@gmail.com Signed-off-by: Lukas Bulwahn Acked-by: Tatyana Nikolova Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/irdma-abi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/rdma/irdma-abi.h b/include/uapi/rdma/irdma-abi.h index 26b638a7ad97..a7085e092d34 100644 --- a/include/uapi/rdma/irdma-abi.h +++ b/include/uapi/rdma/irdma-abi.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB */ /* * Copyright (c) 2006 - 2021 Intel Corporation. All rights reserved. * Copyright (c) 2005 Topspin Communications. All rights reserved. From c9538831b353b96cb37092c3d3e929d67fd43c5f Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Thu, 8 Jul 2021 02:47:52 -0400 Subject: [PATCH 009/502] RDMA/irdma: Change the returned type to void Since the function irdma_sc_parse_fpm_commit_buf always returns 0, remove the returned value check and change the returned type to void. Fixes: 3f49d6842569 ("RDMA/irdma: Implement HW Admin Queue OPs") Link: https://lore.kernel.org/r/20210708064752.797520-1-yanjun.zhu@linux.dev Signed-off-by: Zhu Yanjun Reviewed-by: Majd Dibbiny Acked-by: Tatyana Nikolova Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/ctrl.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index b1023a7d0bd1..c3880a85e255 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -2845,7 +2845,7 @@ static u64 irdma_sc_decode_fpm_commit(struct irdma_sc_dev *dev, __le64 *buf, * parses fpm commit info and copy base value * of hmc objects in hmc_info */ -static enum irdma_status_code +static void irdma_sc_parse_fpm_commit_buf(struct irdma_sc_dev *dev, __le64 *buf, struct irdma_hmc_obj_info *info, u32 *sd) { @@ -2915,7 +2915,6 @@ irdma_sc_parse_fpm_commit_buf(struct irdma_sc_dev *dev, __le64 *buf, else *sd = (u32)(size >> 21); - return 0; } /** @@ -4434,9 +4433,9 @@ static enum irdma_status_code irdma_sc_cfg_iw_fpm(struct irdma_sc_dev *dev, ret_code = irdma_sc_commit_fpm_val(dev->cqp, 0, hmc_info->hmc_fn_id, &commit_fpm_mem, true, wait_type); if (!ret_code) - ret_code = irdma_sc_parse_fpm_commit_buf(dev, dev->fpm_commit_buf, - hmc_info->hmc_obj, - &hmc_info->sd_table.sd_cnt); + irdma_sc_parse_fpm_commit_buf(dev, dev->fpm_commit_buf, + hmc_info->hmc_obj, + &hmc_info->sd_table.sd_cnt); print_hex_dump_debug("HMC: COMMIT FPM BUFFER", DUMP_PREFIX_OFFSET, 16, 8, commit_fpm_mem.va, IRDMA_COMMIT_FPM_BUF_SIZE, false); From 7e71b85473f863a29eb1c69265ef025389b4091d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 8 Jun 2021 14:26:58 +0300 Subject: [PATCH 010/502] arm64: dts: ls1028a: fix node name for the sysclk U-Boot attempts to fix up the "clock-frequency" property of the "/sysclk" node: https://elixir.bootlin.com/u-boot/v2021.04/source/arch/arm/cpu/armv8/fsl-layerscape/fdt.c#L512 but fails to do so: ## Booting kernel from Legacy Image at a1000000 ... Image Name: Created: 2021-06-08 10:31:38 UTC Image Type: AArch64 Linux Kernel Image (gzip compressed) Data Size: 15431370 Bytes = 14.7 MiB Load Address: 80080000 Entry Point: 80080000 Verifying Checksum ... OK ## Flattened Device Tree blob at a0000000 Booting using the fdt blob at 0xa0000000 Uncompressing Kernel Image Loading Device Tree to 00000000fbb19000, end 00000000fbb22717 ... OK Unable to update property /sysclk:clock-frequency, err=FDT_ERR_NOTFOUND Starting kernel ... All Layerscape SoCs except LS1028A use "sysclk" as the node name, and not "clock-sysclk". So change the node name of LS1028A accordingly. Fixes: 8897f3255c9c ("arm64: dts: Add support for NXP LS1028A SoC") Signed-off-by: Vladimir Oltean Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index b2e3e5d2a108..343ecf0e8973 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -66,7 +66,7 @@ }; }; - sysclk: clock-sysclk { + sysclk: sysclk { compatible = "fixed-clock"; #clock-cells = <0>; clock-frequency = <100000000>; From 7dd2dd4ff9f3abda601f22b9d01441a0869d20d7 Mon Sep 17 00:00:00 2001 From: Adrian Larumbe Date: Wed, 7 Jul 2021 00:43:38 +0100 Subject: [PATCH 011/502] dmaengine: xilinx_dma: Fix read-after-free bug when terminating transfers When user calls dmaengine_terminate_sync, the driver will clean up any remaining descriptors for all the pending or active transfers that had previously been submitted. However, this might happen whilst the tasklet is invoking the DMA callback for the last finished transfer, so by the time it returns and takes over the channel's spinlock, the list of completed descriptors it was traversing is no longer valid. This leads to a read-after-free situation. Fix it by signalling whether a user-triggered termination has happened by means of a boolean variable. Signed-off-by: Adrian Larumbe Link: https://lore.kernel.org/r/20210706234338.7696-3-adrian.martinezlarumbe@imgtec.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dma.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 75c0b8e904e5..4b9530a7bf65 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -394,6 +394,7 @@ struct xilinx_dma_tx_descriptor { * @genlock: Support genlock mode * @err: Channel has errors * @idle: Check for channel idle + * @terminating: Check for channel being synchronized by user * @tasklet: Cleanup work after irq * @config: Device configuration info * @flush_on_fsync: Flush on Frame sync @@ -431,6 +432,7 @@ struct xilinx_dma_chan { bool genlock; bool err; bool idle; + bool terminating; struct tasklet_struct tasklet; struct xilinx_vdma_config config; bool flush_on_fsync; @@ -1049,6 +1051,13 @@ static void xilinx_dma_chan_desc_cleanup(struct xilinx_dma_chan *chan) /* Run any dependencies, then free the descriptor */ dma_run_dependencies(&desc->async_tx); xilinx_dma_free_tx_descriptor(chan, desc); + + /* + * While we ran a callback the user called a terminate function, + * which takes care of cleaning up any remaining descriptors + */ + if (chan->terminating) + break; } spin_unlock_irqrestore(&chan->lock, flags); @@ -1965,6 +1974,8 @@ static dma_cookie_t xilinx_dma_tx_submit(struct dma_async_tx_descriptor *tx) if (desc->cyclic) chan->cyclic = true; + chan->terminating = false; + spin_unlock_irqrestore(&chan->lock, flags); return cookie; @@ -2436,6 +2447,7 @@ static int xilinx_dma_terminate_all(struct dma_chan *dchan) xilinx_dma_chan_reset(chan); /* Remove and free all of the descriptors in the lists */ + chan->terminating = true; xilinx_dma_free_descriptors(chan); chan->idle = true; From 1da569fa7ec8cb0591c74aa3050d4ea1397778b4 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 6 Jul 2021 20:45:21 +0800 Subject: [PATCH 012/502] dmaengine: usb-dmac: Fix PM reference leak in usb_dmac_probe() pm_runtime_get_sync will increment pm usage counter even it failed. Forgetting to putting operation will result in reference leak here. Fix it by moving the error_pm label above the pm_runtime_put() in the error path. Reported-by: Hulk Robot Signed-off-by: Yu Kuai Link: https://lore.kernel.org/r/20210706124521.1371901-1-yukuai3@huawei.com Signed-off-by: Vinod Koul --- drivers/dma/sh/usb-dmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/sh/usb-dmac.c b/drivers/dma/sh/usb-dmac.c index 8f7ceb698226..1cc06900153e 100644 --- a/drivers/dma/sh/usb-dmac.c +++ b/drivers/dma/sh/usb-dmac.c @@ -855,8 +855,8 @@ static int usb_dmac_probe(struct platform_device *pdev) error: of_dma_controller_free(pdev->dev.of_node); - pm_runtime_put(&pdev->dev); error_pm: + pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); return ret; } From da435aedb00a4ef61019ff11ae0c08ffb9b1fb18 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 24 Jun 2021 12:09:29 -0700 Subject: [PATCH 013/502] dmaengine: idxd: fix array index when int_handles are being used The index to the irq vector should be local and has no relation to the assigned interrupt handle. Assign the MSIX interrupt index that is programmed for the descriptor. The interrupt handle only matters when it comes to hardware descriptor programming. Fixes: eb15e7154fbf ("dmaengine: idxd: add interrupt handle request and release support") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162456176939.1121476.3366256009925001897.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/submit.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index 19afb62abaff..e29887528077 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -128,19 +128,8 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) * Pending the descriptor to the lockless list for the irq_entry * that we designated the descriptor to. */ - if (desc->hw->flags & IDXD_OP_FLAG_RCI) { - int vec; - - /* - * If the driver is on host kernel, it would be the value - * assigned to interrupt handle, which is index for MSIX - * vector. If it's guest then can't use the int_handle since - * that is the index to IMS for the entire device. The guest - * device local index will be used. - */ - vec = !idxd->int_handles ? desc->hw->int_handle : desc->vector; - llist_add(&desc->llnode, &idxd->irq_entries[vec].pending_llist); - } + if (desc->hw->flags & IDXD_OP_FLAG_RCI) + llist_add(&desc->llnode, &idxd->irq_entries[desc->vector].pending_llist); return 0; } From d5c10e0fc8645342fe5c9796b00c84ab078cd713 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 24 Jun 2021 13:43:32 -0700 Subject: [PATCH 014/502] dmaengine: idxd: fix setup sequence for MSIXPERM table The MSIX permission table should be programmed BEFORE request_irq() happens. This prevents any possibility of an interrupt happening before the MSIX perm table is setup, however slight. Fixes: 6df0e6c57dfc ("dmaengine: idxd: clear MSIX permission entry on shutdown") Sign-off-by: Dave Jiang Link: https://lore.kernel.org/r/162456741222.1138073.1298447364671237896.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/init.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index c8ae41d36040..4e32a4dcc3ab 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -102,6 +102,8 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) spin_lock_init(&idxd->irq_entries[i].list_lock); } + idxd_msix_perm_setup(idxd); + irq_entry = &idxd->irq_entries[0]; rc = request_threaded_irq(irq_entry->vector, NULL, idxd_misc_thread, 0, "idxd-misc", irq_entry); @@ -148,7 +150,6 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) } idxd_unmask_error_interrupts(idxd); - idxd_msix_perm_setup(idxd); return 0; err_wq_irqs: @@ -162,6 +163,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) err_misc_irq: /* Disable error interrupt generation */ idxd_mask_error_interrupts(idxd); + idxd_msix_perm_clear(idxd); err_irq_entries: pci_free_irq_vectors(pdev); dev_err(dev, "No usable interrupts\n"); From f9613aa07f16d6042e74208d1b40a6104d72964a Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 15 Jun 2021 20:52:38 +0800 Subject: [PATCH 015/502] ARM: imx: add missing iounmap() Commit e76bdfd7403a ("ARM: imx: Added perf functionality to mmdc driver") introduced imx_mmdc_remove(), the mmdc_base need be unmapped in it if config PERF_EVENTS is enabled. If imx_mmdc_perf_init() fails, the mmdc_base also need be unmapped. Fixes: e76bdfd7403a ("ARM: imx: Added perf functionality to mmdc driver") Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Reviewed-by: Dong Aisheng Signed-off-by: Shawn Guo --- arch/arm/mach-imx/mmdc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c index 0dfd0ae7a63d..8e57691aafe2 100644 --- a/arch/arm/mach-imx/mmdc.c +++ b/arch/arm/mach-imx/mmdc.c @@ -462,6 +462,7 @@ static int imx_mmdc_remove(struct platform_device *pdev) cpuhp_state_remove_instance_nocalls(cpuhp_mmdc_state, &pmu_mmdc->node); perf_pmu_unregister(&pmu_mmdc->pmu); + iounmap(pmu_mmdc->mmdc_base); kfree(pmu_mmdc); return 0; } @@ -567,7 +568,11 @@ static int imx_mmdc_probe(struct platform_device *pdev) val &= ~(1 << BP_MMDC_MAPSR_PSD); writel_relaxed(val, reg); - return imx_mmdc_perf_init(pdev, mmdc_base); + err = imx_mmdc_perf_init(pdev, mmdc_base); + if (err) + iounmap(mmdc_base); + + return err; } int imx_mmdc_get_ddr_type(void) From f07ec85365807b3939f32d0094a6dd5ce065d1b9 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 15 Jun 2021 20:52:39 +0800 Subject: [PATCH 016/502] ARM: imx: add missing clk_disable_unprepare() clock source is prepared and enabled by clk_prepare_enable() in probe function, but no disable or unprepare in remove and error path. Fixes: 9454a0caff6a ("ARM: imx: add mmdc ipg clock operation for mmdc") Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Reviewed-by: Dong Aisheng Signed-off-by: Shawn Guo --- arch/arm/mach-imx/mmdc.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c index 8e57691aafe2..4a6f1359e1e9 100644 --- a/arch/arm/mach-imx/mmdc.c +++ b/arch/arm/mach-imx/mmdc.c @@ -103,6 +103,7 @@ struct mmdc_pmu { struct perf_event *mmdc_events[MMDC_NUM_COUNTERS]; struct hlist_node node; struct fsl_mmdc_devtype_data *devtype_data; + struct clk *mmdc_ipg_clk; }; /* @@ -463,11 +464,13 @@ static int imx_mmdc_remove(struct platform_device *pdev) cpuhp_state_remove_instance_nocalls(cpuhp_mmdc_state, &pmu_mmdc->node); perf_pmu_unregister(&pmu_mmdc->pmu); iounmap(pmu_mmdc->mmdc_base); + clk_disable_unprepare(pmu_mmdc->mmdc_ipg_clk); kfree(pmu_mmdc); return 0; } -static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_base) +static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_base, + struct clk *mmdc_ipg_clk) { struct mmdc_pmu *pmu_mmdc; char *name; @@ -495,6 +498,7 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b } mmdc_num = mmdc_pmu_init(pmu_mmdc, mmdc_base, &pdev->dev); + pmu_mmdc->mmdc_ipg_clk = mmdc_ipg_clk; if (mmdc_num == 0) name = "mmdc"; else @@ -568,9 +572,11 @@ static int imx_mmdc_probe(struct platform_device *pdev) val &= ~(1 << BP_MMDC_MAPSR_PSD); writel_relaxed(val, reg); - err = imx_mmdc_perf_init(pdev, mmdc_base); - if (err) + err = imx_mmdc_perf_init(pdev, mmdc_base, mmdc_ipg_clk); + if (err) { iounmap(mmdc_base); + clk_disable_unprepare(mmdc_ipg_clk); + } return err; } From fb1425b436bcf936065edbbe8d092465a53185b6 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 17 Jun 2021 11:54:15 -0300 Subject: [PATCH 017/502] ARM: imx: common: Move prototype outside the SMP block Currently the imx_gpcv2_set_core1_pdn_pup_by_software() prototype is guarded by the CONFIG_SMP symbol. This causes W=1 build warnings when CONFIG_SMP is not selected: arch/arm/mach-imx/src.c:103:6: warning: no previous prototype for 'imx_gpcv2_set_core1_pdn_pup_by_software' [-Wmissing-prototypes] Fix it by moving the imx_gpcv2_set_core1_pdn_pup_by_software() prototype outside of the CONFIG_SMP block. Fixes: e34645f45805 ("ARM: imx: add smp support for imx7d") Reported-by: kernel test robot Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/mach-imx/common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-imx/common.h b/arch/arm/mach-imx/common.h index f0a073a71401..13f3068e9845 100644 --- a/arch/arm/mach-imx/common.h +++ b/arch/arm/mach-imx/common.h @@ -68,7 +68,6 @@ void imx_set_cpu_arg(int cpu, u32 arg); void v7_secondary_startup(void); void imx_scu_map_io(void); void imx_smp_prepare(void); -void imx_gpcv2_set_core1_pdn_pup_by_software(bool pdn); #else static inline void imx_scu_map_io(void) {} static inline void imx_smp_prepare(void) {} @@ -81,6 +80,7 @@ void imx_gpc_mask_all(void); void imx_gpc_restore_all(void); void imx_gpc_hwirq_mask(unsigned int hwirq); void imx_gpc_hwirq_unmask(unsigned int hwirq); +void imx_gpcv2_set_core1_pdn_pup_by_software(bool pdn); void imx_anatop_init(void); void imx_anatop_pre_suspend(void); void imx_anatop_post_resume(void); From fd8e83884fdd7b5fc411f201a58d8d01890198a2 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Fri, 25 Jun 2021 14:13:53 +0200 Subject: [PATCH 018/502] ARM: dts: imx6qdl-sr-som: Increase the PHY reset duration to 10ms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The AR803x PHY used on this modules seems to require the reset line to be asserted for around 10ms in order to avoid rare cases where the PHY gets stuck in an incoherent state that prevents it to function correctly. The previous value of 2ms was found to be problematic on some setups, causing intermittent issues where the PHY would be unresponsive every once in a while on some sytems, with a low occurrence (it typically took around 30 consecutive reboots to encounter the issue). Bumping the delay to the 10ms makes the issue dissapear, with more than 2500 consecutive reboots performed without the issue showing-up. Fixes: 208d7baf8085 ("ARM: imx: initial SolidRun HummingBoard support") Signed-off-by: Maxime Chevallier Tested-by: Hervé Codina Reviewed-by: Russell King (Oracle) Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6qdl-sr-som.dtsi | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6qdl-sr-som.dtsi b/arch/arm/boot/dts/imx6qdl-sr-som.dtsi index 0ad8ccde0cf8..f86efd0ccc40 100644 --- a/arch/arm/boot/dts/imx6qdl-sr-som.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sr-som.dtsi @@ -54,7 +54,13 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_microsom_enet_ar8035>; phy-mode = "rgmii-id"; - phy-reset-duration = <2>; + + /* + * The PHY seems to require a long-enough reset duration to avoid + * some rare issues where the PHY gets stuck in an inconsistent and + * non-functional state at boot-up. 10ms proved to be fine . + */ + phy-reset-duration = <10>; phy-reset-gpios = <&gpio4 15 GPIO_ACTIVE_LOW>; status = "okay"; From 80d9ac9bd7b9366c2a89d2716a397749299728e7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Jul 2021 12:36:41 +0100 Subject: [PATCH 019/502] KVM: arm64: Fix detection of shared VMAs on guest fault When merging the KVM MTE support, the blob that was interposed between the chair and the keyboard experienced a neuronal accident (also known as a brain fart), turning a check for VM_SHARED into VM_PFNMAP as it was reshuffling some of the code. The blob having now come back to its senses, let's restore the initial check that the original author got right the first place. Fixes: ea7fc1bb1cd1 ("KVM: arm64: Introduce MTE VM feature") Reviewed-by: Steven Price Signed-off-by: Marc Zyngier Cc: Catalin Marinas Link: https://lore.kernel.org/r/20210713114804.594993-1-maz@kernel.org --- arch/arm64/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 3155c9e778f0..0625bf2353c2 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -947,7 +947,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, vma_shift = get_vma_page_shift(vma, hva); } - shared = (vma->vm_flags & VM_PFNMAP); + shared = (vma->vm_flags & VM_SHARED); switch (vma_shift) { #ifndef __PAGETABLE_PMD_FOLDED From bac0b135907855e9f8c032877c3df3c60885a08f Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 13 Jul 2021 22:37:41 +0200 Subject: [PATCH 020/502] KVM: selftests: change pthread_yield to sched_yield MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With later GCC we get steal_time.c: In function ‘main’: steal_time.c:323:25: warning: ‘pthread_yield’ is deprecated: pthread_yield is deprecated, use sched_yield instead [-Wdeprecated-declarations] Let's follow the instructions and use sched_yield instead. Signed-off-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210713203742.29680-2-drjones@redhat.com --- tools/testing/selftests/kvm/steal_time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index b0031f2d38fd..ecec30865a74 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -320,7 +320,7 @@ int main(int ac, char **av) run_delay = get_run_delay(); pthread_create(&thread, &attr, do_steal_time, NULL); do - pthread_yield(); + sched_yield(); while (get_run_delay() - run_delay < MIN_RUN_DELAY_NS); pthread_join(thread, NULL); run_delay = get_run_delay() - run_delay; From 5cf17746b302aa32a4f200cc6ce38865bfe4cf94 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 13 Jul 2021 22:37:42 +0200 Subject: [PATCH 021/502] KVM: arm64: selftests: get-reg-list: actually enable pmu regs in pmu sublist We reworked get-reg-list to make it easier to enable optional register sublists by parametrizing their vcpu feature flags as well as making other generalizations. That was all to make sure we enable the PMU registers when we want to test them. Somehow we forgot to actually include the PMU feature flag in the PMU sublist description though! Do that now. Fixes: 313673bad871 ("KVM: arm64: selftests: get-reg-list: Split base and pmu registers") Signed-off-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210713203742.29680-3-drjones@redhat.com --- tools/testing/selftests/kvm/aarch64/get-reg-list.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index a16c8f05366c..cc898181faab 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -1019,7 +1019,8 @@ static __u64 sve_rejects_set[] = { #define VREGS_SUBLIST \ { "vregs", .regs = vregs, .regs_n = ARRAY_SIZE(vregs), } #define PMU_SUBLIST \ - { "pmu", .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), } + { "pmu", .capability = KVM_CAP_ARM_PMU_V3, .feature = KVM_ARM_VCPU_PMU_V3, \ + .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), } #define SVE_SUBLIST \ { "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \ .regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \ From 95d429206c97cf109591009fa386004191c62c47 Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Mon, 28 Jun 2021 18:28:46 -0400 Subject: [PATCH 022/502] platform/x86: think-lmi: Add pending_reboot support The Think-lmi driver was missing pending_reboot support as it wasn't available from the BIOS. Turns out this is really useful to have from user space so implementing from a purely SW point of view. Thanks to Mario Limonciello for guidance on how fwupd would use this. Suggested-by: Mario Limonciello Signed-off-by: Mark Pearson Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20210628222846.8830-1-markpearson@lenovo.com Signed-off-by: Hans de Goede --- drivers/platform/x86/think-lmi.c | 19 +++++++++++++++++++ drivers/platform/x86/think-lmi.h | 1 + 2 files changed, 20 insertions(+) diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index 3671b5d20613..64dcec53a7a0 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -571,6 +571,11 @@ static ssize_t current_value_store(struct kobject *kobj, else ret = tlmi_save_bios_settings(""); + if (!ret && !tlmi_priv.pending_changes) { + tlmi_priv.pending_changes = true; + /* let userland know it may need to check reboot pending again */ + kobject_uevent(&tlmi_priv.class_dev->kobj, KOBJ_CHANGE); + } out: kfree(auth_str); kfree(set_str); @@ -647,6 +652,14 @@ static struct kobj_type tlmi_pwd_setting_ktype = { .sysfs_ops = &tlmi_kobj_sysfs_ops, }; +static ssize_t pending_reboot_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", tlmi_priv.pending_changes); +} + +static struct kobj_attribute pending_reboot = __ATTR_RO(pending_reboot); + /* ---- Initialisation --------------------------------------------------------- */ static void tlmi_release_attr(void) { @@ -667,6 +680,7 @@ static void tlmi_release_attr(void) sysfs_remove_group(&tlmi_priv.pwd_power->kobj, &auth_attr_group); kobject_put(&tlmi_priv.pwd_power->kobj); kset_unregister(tlmi_priv.authentication_kset); + sysfs_remove_file(&tlmi_priv.class_dev->kobj, &pending_reboot.attr); } static int tlmi_sysfs_init(void) @@ -746,6 +760,11 @@ static int tlmi_sysfs_init(void) if (ret) goto fail_create_attr; + /* Create global sysfs files */ + ret = sysfs_create_file(&tlmi_priv.class_dev->kobj, &pending_reboot.attr); + if (ret) + goto fail_create_attr; + return ret; fail_create_attr: diff --git a/drivers/platform/x86/think-lmi.h b/drivers/platform/x86/think-lmi.h index 6fa8da7af6c7..eb598846628a 100644 --- a/drivers/platform/x86/think-lmi.h +++ b/drivers/platform/x86/think-lmi.h @@ -60,6 +60,7 @@ struct think_lmi { bool can_get_bios_selections; bool can_set_bios_password; bool can_get_password_settings; + bool pending_changes; struct tlmi_attr_setting *setting[TLMI_SETTINGS_COUNT]; struct device *class_dev; From 95e1b60f8dc8f225b14619e9aca9bdd7d99167db Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Tue, 29 Jun 2021 14:17:57 +0530 Subject: [PATCH 023/502] platform/x86: amd-pmc: Fix command completion code The protocol to submit a job request to SMU is to wait for AMD_PMC_REGISTER_RESPONSE to return 1,meaning SMU is ready to take requests. PMC driver has to make sure that the response code is always AMD_PMC_RESULT_OK before making any command submissions. When we submit a message to SMU, we have to wait until it processes the request. Adding a read_poll_timeout() check as this was missing in the existing code. Also, add a mutex to protect amd_pmc_send_cmd() calls to SMU. Fixes: 156ec4731cb2 ("platform/x86: amd-pmc: Add AMD platform support for S2Idle") Signed-off-by: Shyam Sundar S K Acked-by: Raul E Rangel Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20210629084803.248498-2-Shyam-sundar.S-k@amd.com Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 38 ++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index b9da58ee9b1e..1b5f149932c1 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -68,6 +68,7 @@ struct amd_pmc_dev { u32 base_addr; u32 cpu_id; struct device *dev; + struct mutex lock; /* generic mutex lock */ #if IS_ENABLED(CONFIG_DEBUG_FS) struct dentry *dbgfs_dir; #endif /* CONFIG_DEBUG_FS */ @@ -138,9 +139,10 @@ static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set) u8 msg; u32 val; + mutex_lock(&dev->lock); /* Wait until we get a valid response */ rc = readx_poll_timeout(ioread32, dev->regbase + AMD_PMC_REGISTER_RESPONSE, - val, val > 0, PMC_MSG_DELAY_MIN_US, + val, val != 0, PMC_MSG_DELAY_MIN_US, PMC_MSG_DELAY_MIN_US * RESPONSE_REGISTER_LOOP_MAX); if (rc) { dev_err(dev->dev, "failed to talk to SMU\n"); @@ -156,7 +158,37 @@ static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set) /* Write message ID to message ID register */ msg = (dev->cpu_id == AMD_CPU_ID_RN) ? MSG_OS_HINT_RN : MSG_OS_HINT_PCO; amd_pmc_reg_write(dev, AMD_PMC_REGISTER_MESSAGE, msg); - return 0; + /* Wait until we get a valid response */ + rc = readx_poll_timeout(ioread32, dev->regbase + AMD_PMC_REGISTER_RESPONSE, + val, val != 0, PMC_MSG_DELAY_MIN_US, + PMC_MSG_DELAY_MIN_US * RESPONSE_REGISTER_LOOP_MAX); + if (rc) { + dev_err(dev->dev, "SMU response timed out\n"); + goto out_unlock; + } + + switch (val) { + case AMD_PMC_RESULT_OK: + break; + case AMD_PMC_RESULT_CMD_REJECT_BUSY: + dev_err(dev->dev, "SMU not ready. err: 0x%x\n", val); + rc = -EBUSY; + goto out_unlock; + case AMD_PMC_RESULT_CMD_UNKNOWN: + dev_err(dev->dev, "SMU cmd unknown. err: 0x%x\n", val); + rc = -EINVAL; + goto out_unlock; + case AMD_PMC_RESULT_CMD_REJECT_PREREQ: + case AMD_PMC_RESULT_FAILED: + default: + dev_err(dev->dev, "SMU cmd failed. err: 0x%x\n", val); + rc = -EIO; + goto out_unlock; + } + +out_unlock: + mutex_unlock(&dev->lock); + return rc; } static int __maybe_unused amd_pmc_suspend(struct device *dev) @@ -259,6 +291,7 @@ static int amd_pmc_probe(struct platform_device *pdev) amd_pmc_dump_registers(dev); + mutex_init(&dev->lock); platform_set_drvdata(pdev, dev); amd_pmc_dbgfs_register(dev); return 0; @@ -269,6 +302,7 @@ static int amd_pmc_remove(struct platform_device *pdev) struct amd_pmc_dev *dev = platform_get_drvdata(pdev); amd_pmc_dbgfs_unregister(dev); + mutex_destroy(&dev->lock); return 0; } From 4c06d35dfedf4c1fd03702e0f05292a69d020e21 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Tue, 29 Jun 2021 14:17:58 +0530 Subject: [PATCH 024/502] platform/x86: amd-pmc: Fix SMU firmware reporting mechanism It was lately understood that the current mechanism available in the driver to get SMU firmware info works only on internal SMU builds and there is a separate way to get all the SMU logging counters (addressed in the next patch). Hence remove all the smu info shown via debugfs as it is no more useful. Fixes: 156ec4731cb2 ("platform/x86: amd-pmc: Add AMD platform support for S2Idle") Signed-off-by: Shyam Sundar S K Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20210629084803.248498-3-Shyam-sundar.S-k@amd.com Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index 1b5f149932c1..b1d6175a13b2 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -52,7 +52,6 @@ #define AMD_CPU_ID_PCO AMD_CPU_ID_RV #define AMD_CPU_ID_CZN AMD_CPU_ID_RN -#define AMD_SMU_FW_VERSION 0x0 #define PMC_MSG_DELAY_MIN_US 100 #define RESPONSE_REGISTER_LOOP_MAX 200 @@ -89,11 +88,6 @@ static inline void amd_pmc_reg_write(struct amd_pmc_dev *dev, int reg_offset, u3 #ifdef CONFIG_DEBUG_FS static int smu_fw_info_show(struct seq_file *s, void *unused) { - struct amd_pmc_dev *dev = s->private; - u32 value; - - value = ioread32(dev->smu_base + AMD_SMU_FW_VERSION); - seq_printf(s, "SMU FW Info: %x\n", value); return 0; } DEFINE_SHOW_ATTRIBUTE(smu_fw_info); @@ -280,10 +274,6 @@ static int amd_pmc_probe(struct platform_device *pdev) pci_dev_put(rdev); base_addr = ((u64)base_addr_hi << 32 | base_addr_lo); - dev->smu_base = devm_ioremap(dev->dev, base_addr, AMD_PMC_MAPPING_SIZE); - if (!dev->smu_base) - return -ENOMEM; - dev->regbase = devm_ioremap(dev->dev, base_addr + AMD_PMC_BASE_ADDR_OFFSET, AMD_PMC_MAPPING_SIZE); if (!dev->regbase) From 162b937a8064029ed22cd1039d4dcf7f1721f940 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Tue, 29 Jun 2021 14:17:59 +0530 Subject: [PATCH 025/502] platform/x86: amd-pmc: call dump registers only once Currently amd_pmc_dump_registers() routine is being called at multiple places. The best to call it is after command submission to SMU. Signed-off-by: Shyam Sundar S K Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20210629084803.248498-4-Shyam-sundar.S-k@amd.com Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index b1d6175a13b2..e5107e3b1911 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -182,6 +182,7 @@ static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set) out_unlock: mutex_unlock(&dev->lock); + amd_pmc_dump_registers(dev); return rc; } @@ -194,7 +195,6 @@ static int __maybe_unused amd_pmc_suspend(struct device *dev) if (rc) dev_err(pdev->dev, "suspend failed\n"); - amd_pmc_dump_registers(pdev); return 0; } @@ -207,7 +207,6 @@ static int __maybe_unused amd_pmc_resume(struct device *dev) if (rc) dev_err(pdev->dev, "resume failed\n"); - amd_pmc_dump_registers(pdev); return 0; } @@ -279,8 +278,6 @@ static int amd_pmc_probe(struct platform_device *pdev) if (!dev->regbase) return -ENOMEM; - amd_pmc_dump_registers(dev); - mutex_init(&dev->lock); platform_set_drvdata(pdev, dev); amd_pmc_dbgfs_register(dev); From 76620567496237f1f1f54683ec7da1755ee501d7 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Tue, 29 Jun 2021 14:18:00 +0530 Subject: [PATCH 026/502] platform/x86: amd-pmc: Add support for logging SMU metrics SMU provides a way to dump the s0ix debug statistics in the form of a metrics table via a of set special mailbox commands. Add support to the driver which can send these commands to SMU and expose the information received via debugfs. The information contains the s0ix entry/exit, active time of each IP block etc. As a side note, SMU subsystem logging is not supported on Picasso based SoC's. Signed-off-by: Shyam Sundar S K Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20210629084803.248498-5-Shyam-sundar.S-k@amd.com Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 147 +++++++++++++++++++++++++++++++-- 1 file changed, 139 insertions(+), 8 deletions(-) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index e5107e3b1911..0ebb2732c46a 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -46,6 +46,14 @@ #define AMD_PMC_RESULT_CMD_UNKNOWN 0xFE #define AMD_PMC_RESULT_FAILED 0xFF +/* SMU Message Definations */ +#define SMU_MSG_GETSMUVERSION 0x02 +#define SMU_MSG_LOG_GETDRAM_ADDR_HI 0x04 +#define SMU_MSG_LOG_GETDRAM_ADDR_LO 0x05 +#define SMU_MSG_LOG_START 0x06 +#define SMU_MSG_LOG_RESET 0x07 +#define SMU_MSG_LOG_DUMP_DATA 0x08 +#define SMU_MSG_GET_SUP_CONSTRAINTS 0x09 /* List of supported CPU ids */ #define AMD_CPU_ID_RV 0x15D0 #define AMD_CPU_ID_RN 0x1630 @@ -55,17 +63,42 @@ #define PMC_MSG_DELAY_MIN_US 100 #define RESPONSE_REGISTER_LOOP_MAX 200 +#define SOC_SUBSYSTEM_IP_MAX 12 +#define DELAY_MIN_US 2000 +#define DELAY_MAX_US 3000 enum amd_pmc_def { MSG_TEST = 0x01, MSG_OS_HINT_PCO, MSG_OS_HINT_RN, }; +struct amd_pmc_bit_map { + const char *name; + u32 bit_mask; +}; + +static const struct amd_pmc_bit_map soc15_ip_blk[] = { + {"DISPLAY", BIT(0)}, + {"CPU", BIT(1)}, + {"GFX", BIT(2)}, + {"VDD", BIT(3)}, + {"ACP", BIT(4)}, + {"VCN", BIT(5)}, + {"ISP", BIT(6)}, + {"NBIO", BIT(7)}, + {"DF", BIT(8)}, + {"USB0", BIT(9)}, + {"USB1", BIT(10)}, + {"LAPIC", BIT(11)}, + {} +}; + struct amd_pmc_dev { void __iomem *regbase; - void __iomem *smu_base; + void __iomem *smu_virt_addr; u32 base_addr; u32 cpu_id; + u32 active_ips; struct device *dev; struct mutex lock; /* generic mutex lock */ #if IS_ENABLED(CONFIG_DEBUG_FS) @@ -74,6 +107,7 @@ struct amd_pmc_dev { }; static struct amd_pmc_dev pmc; +static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set, u32 *data, u8 msg, bool ret); static inline u32 amd_pmc_reg_read(struct amd_pmc_dev *dev, int reg_offset) { @@ -85,9 +119,49 @@ static inline void amd_pmc_reg_write(struct amd_pmc_dev *dev, int reg_offset, u3 iowrite32(val, dev->regbase + reg_offset); } +struct smu_metrics { + u32 table_version; + u32 hint_count; + u32 s0i3_cyclecount; + u32 timein_s0i2; + u64 timeentering_s0i3_lastcapture; + u64 timeentering_s0i3_totaltime; + u64 timeto_resume_to_os_lastcapture; + u64 timeto_resume_to_os_totaltime; + u64 timein_s0i3_lastcapture; + u64 timein_s0i3_totaltime; + u64 timein_swdrips_lastcapture; + u64 timein_swdrips_totaltime; + u64 timecondition_notmet_lastcapture[SOC_SUBSYSTEM_IP_MAX]; + u64 timecondition_notmet_totaltime[SOC_SUBSYSTEM_IP_MAX]; +} __packed; + #ifdef CONFIG_DEBUG_FS static int smu_fw_info_show(struct seq_file *s, void *unused) { + struct amd_pmc_dev *dev = s->private; + struct smu_metrics table; + int idx; + + if (dev->cpu_id == AMD_CPU_ID_PCO) + return -EINVAL; + + memcpy_fromio(&table, dev->smu_virt_addr, sizeof(struct smu_metrics)); + + seq_puts(s, "\n=== SMU Statistics ===\n"); + seq_printf(s, "Table Version: %d\n", table.table_version); + seq_printf(s, "Hint Count: %d\n", table.hint_count); + seq_printf(s, "S0i3 Cycle Count: %d\n", table.s0i3_cyclecount); + seq_printf(s, "Time (in us) to S0i3: %lld\n", table.timeentering_s0i3_lastcapture); + seq_printf(s, "Time (in us) in S0i3: %lld\n", table.timein_s0i3_lastcapture); + + seq_puts(s, "\n=== Active time (in us) ===\n"); + for (idx = 0 ; idx < SOC_SUBSYSTEM_IP_MAX ; idx++) { + if (soc15_ip_blk[idx].bit_mask & dev->active_ips) + seq_printf(s, "%-8s : %lld\n", soc15_ip_blk[idx].name, + table.timecondition_notmet_lastcapture[idx]); + } + return 0; } DEFINE_SHOW_ATTRIBUTE(smu_fw_info); @@ -113,6 +187,32 @@ static inline void amd_pmc_dbgfs_unregister(struct amd_pmc_dev *dev) } #endif /* CONFIG_DEBUG_FS */ +static int amd_pmc_setup_smu_logging(struct amd_pmc_dev *dev) +{ + u32 phys_addr_low, phys_addr_hi; + u64 smu_phys_addr; + + if (dev->cpu_id == AMD_CPU_ID_PCO) + return -EINVAL; + + /* Get Active devices list from SMU */ + amd_pmc_send_cmd(dev, 0, &dev->active_ips, SMU_MSG_GET_SUP_CONSTRAINTS, 1); + + /* Get dram address */ + amd_pmc_send_cmd(dev, 0, &phys_addr_low, SMU_MSG_LOG_GETDRAM_ADDR_LO, 1); + amd_pmc_send_cmd(dev, 0, &phys_addr_hi, SMU_MSG_LOG_GETDRAM_ADDR_HI, 1); + smu_phys_addr = ((u64)phys_addr_hi << 32 | phys_addr_low); + + dev->smu_virt_addr = devm_ioremap(dev->dev, smu_phys_addr, sizeof(struct smu_metrics)); + if (!dev->smu_virt_addr) + return -ENOMEM; + + /* Start the logging */ + amd_pmc_send_cmd(dev, 0, NULL, SMU_MSG_LOG_START, 0); + + return 0; +} + static void amd_pmc_dump_registers(struct amd_pmc_dev *dev) { u32 value; @@ -127,10 +227,9 @@ static void amd_pmc_dump_registers(struct amd_pmc_dev *dev) dev_dbg(dev->dev, "AMD_PMC_REGISTER_MESSAGE:%x\n", value); } -static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set) +static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set, u32 *data, u8 msg, bool ret) { int rc; - u8 msg; u32 val; mutex_lock(&dev->lock); @@ -150,8 +249,8 @@ static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set) amd_pmc_reg_write(dev, AMD_PMC_REGISTER_ARGUMENT, set); /* Write message ID to message ID register */ - msg = (dev->cpu_id == AMD_CPU_ID_RN) ? MSG_OS_HINT_RN : MSG_OS_HINT_PCO; amd_pmc_reg_write(dev, AMD_PMC_REGISTER_MESSAGE, msg); + /* Wait until we get a valid response */ rc = readx_poll_timeout(ioread32, dev->regbase + AMD_PMC_REGISTER_RESPONSE, val, val != 0, PMC_MSG_DELAY_MIN_US, @@ -163,6 +262,11 @@ static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set) switch (val) { case AMD_PMC_RESULT_OK: + if (ret) { + /* PMFW may take longer time to return back the data */ + usleep_range(DELAY_MIN_US, 10 * DELAY_MAX_US); + *data = amd_pmc_reg_read(dev, AMD_PMC_REGISTER_ARGUMENT); + } break; case AMD_PMC_RESULT_CMD_REJECT_BUSY: dev_err(dev->dev, "SMU not ready. err: 0x%x\n", val); @@ -186,12 +290,29 @@ out_unlock: return rc; } +static int amd_pmc_get_os_hint(struct amd_pmc_dev *dev) +{ + switch (dev->cpu_id) { + case AMD_CPU_ID_PCO: + return MSG_OS_HINT_PCO; + case AMD_CPU_ID_RN: + return MSG_OS_HINT_RN; + } + return -EINVAL; +} + static int __maybe_unused amd_pmc_suspend(struct device *dev) { struct amd_pmc_dev *pdev = dev_get_drvdata(dev); int rc; + u8 msg; - rc = amd_pmc_send_cmd(pdev, 1); + /* Reset and Start SMU logging - to monitor the s0i3 stats */ + amd_pmc_send_cmd(pdev, 0, NULL, SMU_MSG_LOG_RESET, 0); + amd_pmc_send_cmd(pdev, 0, NULL, SMU_MSG_LOG_START, 0); + + msg = amd_pmc_get_os_hint(pdev); + rc = amd_pmc_send_cmd(pdev, 1, NULL, msg, 0); if (rc) dev_err(pdev->dev, "suspend failed\n"); @@ -202,8 +323,13 @@ static int __maybe_unused amd_pmc_resume(struct device *dev) { struct amd_pmc_dev *pdev = dev_get_drvdata(dev); int rc; + u8 msg; - rc = amd_pmc_send_cmd(pdev, 0); + /* Let SMU know that we are looking for stats */ + amd_pmc_send_cmd(pdev, 0, NULL, SMU_MSG_LOG_DUMP_DATA, 0); + + msg = amd_pmc_get_os_hint(pdev); + rc = amd_pmc_send_cmd(pdev, 0, NULL, msg, 0); if (rc) dev_err(pdev->dev, "resume failed\n"); @@ -226,8 +352,7 @@ static int amd_pmc_probe(struct platform_device *pdev) { struct amd_pmc_dev *dev = &pmc; struct pci_dev *rdev; - u32 base_addr_lo; - u32 base_addr_hi; + u32 base_addr_lo, base_addr_hi; u64 base_addr; int err; u32 val; @@ -279,6 +404,12 @@ static int amd_pmc_probe(struct platform_device *pdev) return -ENOMEM; mutex_init(&dev->lock); + + /* Use SMU to get the s0i3 debug stats */ + err = amd_pmc_setup_smu_logging(dev); + if (err) + dev_err(dev->dev, "SMU debugging info not supported on this platform\n"); + platform_set_drvdata(pdev, dev); amd_pmc_dbgfs_register(dev); return 0; From b9a4fa6978bef902409858737fa180fa7b9346ac Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Tue, 29 Jun 2021 14:18:01 +0530 Subject: [PATCH 027/502] platform/x86: amd-pmc: Add support for logging s0ix counters Even the FCH SSC registers provides certain level of information about the s0ix entry and exit times which comes handy when the SMU fails to report the statistics via the mailbox communication. This information is captured via a new debugfs file "s0ix_stats". A non-zero entry in this counters would mean that the system entered the s0ix state. If s0ix entry time and exit time don't change during suspend to idle, the silicon has not entered the deepest state. Signed-off-by: Shyam Sundar S K Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20210629084803.248498-6-Shyam-sundar.S-k@amd.com Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 45 +++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index 0ebb2732c46a..7f011c3f60f2 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -46,6 +46,15 @@ #define AMD_PMC_RESULT_CMD_UNKNOWN 0xFE #define AMD_PMC_RESULT_FAILED 0xFF +/* FCH SSC Registers */ +#define FCH_S0I3_ENTRY_TIME_L_OFFSET 0x30 +#define FCH_S0I3_ENTRY_TIME_H_OFFSET 0x34 +#define FCH_S0I3_EXIT_TIME_L_OFFSET 0x38 +#define FCH_S0I3_EXIT_TIME_H_OFFSET 0x3C +#define FCH_SSC_MAPPING_SIZE 0x800 +#define FCH_BASE_PHY_ADDR_LOW 0xFED81100 +#define FCH_BASE_PHY_ADDR_HIGH 0x00000000 + /* SMU Message Definations */ #define SMU_MSG_GETSMUVERSION 0x02 #define SMU_MSG_LOG_GETDRAM_ADDR_HI 0x04 @@ -96,6 +105,7 @@ static const struct amd_pmc_bit_map soc15_ip_blk[] = { struct amd_pmc_dev { void __iomem *regbase; void __iomem *smu_virt_addr; + void __iomem *fch_virt_addr; u32 base_addr; u32 cpu_id; u32 active_ips; @@ -166,6 +176,29 @@ static int smu_fw_info_show(struct seq_file *s, void *unused) } DEFINE_SHOW_ATTRIBUTE(smu_fw_info); +static int s0ix_stats_show(struct seq_file *s, void *unused) +{ + struct amd_pmc_dev *dev = s->private; + u64 entry_time, exit_time, residency; + + entry_time = ioread32(dev->fch_virt_addr + FCH_S0I3_ENTRY_TIME_H_OFFSET); + entry_time = entry_time << 32 | ioread32(dev->fch_virt_addr + FCH_S0I3_ENTRY_TIME_L_OFFSET); + + exit_time = ioread32(dev->fch_virt_addr + FCH_S0I3_EXIT_TIME_H_OFFSET); + exit_time = exit_time << 32 | ioread32(dev->fch_virt_addr + FCH_S0I3_EXIT_TIME_L_OFFSET); + + /* It's in 48MHz. We need to convert it */ + residency = (exit_time - entry_time) / 48; + + seq_puts(s, "=== S0ix statistics ===\n"); + seq_printf(s, "S0ix Entry Time: %lld\n", entry_time); + seq_printf(s, "S0ix Exit Time: %lld\n", exit_time); + seq_printf(s, "Residency Time: %lld\n", residency); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(s0ix_stats); + static void amd_pmc_dbgfs_unregister(struct amd_pmc_dev *dev) { debugfs_remove_recursive(dev->dbgfs_dir); @@ -176,6 +209,8 @@ static void amd_pmc_dbgfs_register(struct amd_pmc_dev *dev) dev->dbgfs_dir = debugfs_create_dir("amd_pmc", NULL); debugfs_create_file("smu_fw_info", 0644, dev->dbgfs_dir, dev, &smu_fw_info_fops); + debugfs_create_file("s0ix_stats", 0644, dev->dbgfs_dir, dev, + &s0ix_stats_fops); } #else static inline void amd_pmc_dbgfs_register(struct amd_pmc_dev *dev) @@ -353,7 +388,7 @@ static int amd_pmc_probe(struct platform_device *pdev) struct amd_pmc_dev *dev = &pmc; struct pci_dev *rdev; u32 base_addr_lo, base_addr_hi; - u64 base_addr; + u64 base_addr, fch_phys_addr; int err; u32 val; @@ -405,6 +440,14 @@ static int amd_pmc_probe(struct platform_device *pdev) mutex_init(&dev->lock); + /* Use FCH registers to get the S0ix stats */ + base_addr_lo = FCH_BASE_PHY_ADDR_LOW; + base_addr_hi = FCH_BASE_PHY_ADDR_HIGH; + fch_phys_addr = ((u64)base_addr_hi << 32 | base_addr_lo); + dev->fch_virt_addr = devm_ioremap(dev->dev, fch_phys_addr, FCH_SSC_MAPPING_SIZE); + if (!dev->fch_virt_addr) + return -ENOMEM; + /* Use SMU to get the s0i3 debug stats */ err = amd_pmc_setup_smu_logging(dev); if (err) From 9422584a601ae8e4af51e890a14a936b2b689628 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Tue, 29 Jun 2021 14:18:02 +0530 Subject: [PATCH 028/502] platform/x86: amd-pmc: Add support for ACPI ID AMDI0006 Some newer BIOSes have added another ACPI ID for the uPEP device. SMU statistics behave identically on this device. Signed-off-by: Shyam Sundar S K Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20210629084803.248498-7-Shyam-sundar.S-k@amd.com Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index 7f011c3f60f2..c5054fa2aed9 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -469,6 +469,7 @@ static int amd_pmc_remove(struct platform_device *pdev) static const struct acpi_device_id amd_pmc_acpi_ids[] = { {"AMDI0005", 0}, + {"AMDI0006", 0}, {"AMD0004", 0}, { } }; From 83cbaf14275a30f14cf558b09389a1664b173858 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Tue, 29 Jun 2021 14:18:03 +0530 Subject: [PATCH 029/502] platform/x86: amd-pmc: Add new acpi id for future PMC controllers The upcoming PMC controller would have a newer acpi id, add that to the supported acpid device list. Signed-off-by: Shyam Sundar S K Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20210629084803.248498-8-Shyam-sundar.S-k@amd.com Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index c5054fa2aed9..d2f9a62e1166 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -68,6 +68,7 @@ #define AMD_CPU_ID_RN 0x1630 #define AMD_CPU_ID_PCO AMD_CPU_ID_RV #define AMD_CPU_ID_CZN AMD_CPU_ID_RN +#define AMD_CPU_ID_YC 0x14B5 #define PMC_MSG_DELAY_MIN_US 100 #define RESPONSE_REGISTER_LOOP_MAX 200 @@ -331,6 +332,7 @@ static int amd_pmc_get_os_hint(struct amd_pmc_dev *dev) case AMD_CPU_ID_PCO: return MSG_OS_HINT_PCO; case AMD_CPU_ID_RN: + case AMD_CPU_ID_YC: return MSG_OS_HINT_RN; } return -EINVAL; @@ -376,6 +378,7 @@ static const struct dev_pm_ops amd_pmc_pm_ops = { }; static const struct pci_device_id pmc_pci_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_YC) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_CZN) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_RN) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_PCO) }, @@ -470,6 +473,7 @@ static int amd_pmc_remove(struct platform_device *pdev) static const struct acpi_device_id amd_pmc_acpi_ids[] = { {"AMDI0005", 0}, {"AMDI0006", 0}, + {"AMDI0007", 0}, {"AMD0004", 0}, { } }; From a973c983375c37301645d4fea056b1f4bff77bf7 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 7 Jul 2021 09:16:47 -0500 Subject: [PATCH 030/502] platform/x86: amd-pmc: Use return code on suspend Right now the driver will still return success even if the OS_HINT command failed to send to the SMU. In the rare event of a failure, the suspend should really be aborted here so that relevant logs can may be captured. Signed-off-by: Mario Limonciello Acked-by: Shyam Sundar S K Link: https://lore.kernel.org/r/20210707141647.8871-1-mario.limonciello@amd.com Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index d2f9a62e1166..680f94c7e075 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -353,7 +353,7 @@ static int __maybe_unused amd_pmc_suspend(struct device *dev) if (rc) dev_err(pdev->dev, "suspend failed\n"); - return 0; + return rc; } static int __maybe_unused amd_pmc_resume(struct device *dev) From 23e9592b06b43cea4d6799843795beca13437907 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Sat, 10 Jul 2021 13:08:10 -0600 Subject: [PATCH 031/502] platform/x86: wireless-hotkey: remove hardcoded "hp" from the error message This driver is no longer specific to HP laptops so "hp" in the error message is no longer applicable. Signed-off-by: Alex Hung Link: https://lore.kernel.org/r/20210710190810.313104-1-alex.hung@canonical.com Signed-off-by: Hans de Goede --- drivers/platform/x86/wireless-hotkey.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/wireless-hotkey.c b/drivers/platform/x86/wireless-hotkey.c index b010e4ca3383..11c60a273446 100644 --- a/drivers/platform/x86/wireless-hotkey.c +++ b/drivers/platform/x86/wireless-hotkey.c @@ -78,7 +78,7 @@ static int wl_add(struct acpi_device *device) err = wireless_input_setup(); if (err) - pr_err("Failed to setup hp wireless hotkeys\n"); + pr_err("Failed to setup wireless hotkeys\n"); return err; } From 95edbbf78c3bdbd1daa921dd4a2e61c751e469ba Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 15 Jul 2021 15:43:27 +0800 Subject: [PATCH 032/502] platform/x86: amd-pmc: Fix missing unlock on error in amd_pmc_send_cmd() Add the missing unlock before return from function amd_pmc_send_cmd() in the error handling case. Fixes: 95e1b60f8dc8 ("platform/x86: amd-pmc: Fix command completion code") Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20210715074327.1966083-1-yangyingliang@huawei.com Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index 680f94c7e075..663a4ca0580d 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -275,7 +275,7 @@ static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set, u32 *data, u8 msg PMC_MSG_DELAY_MIN_US * RESPONSE_REGISTER_LOOP_MAX); if (rc) { dev_err(dev->dev, "failed to talk to SMU\n"); - return rc; + goto out_unlock; } /* Write zero to response register */ From ac34de14ac30ba4484d68f8845a54b6b6c23db42 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 28 Jun 2021 23:09:55 +0200 Subject: [PATCH 033/502] Revert "soc: imx8m: change to use platform driver" With the SoC matching changed to a platform driver the match data is available only after other drivers, which may rely on it are already probed. This breaks at least the CAAM driver on i.MX8M. Revert the change until all those drivers have been audited and changed to be able to eal with match data being available later in the boot process. Fixes: 7d981405d0fd ("soc: imx8m: change to use platform driver") Signed-off-by: Lucas Stach Tested-by: Frieder Schrempf Acked-by: Peng Fan Signed-off-by: Shawn Guo --- drivers/soc/imx/soc-imx8m.c | 84 ++++++------------------------------- 1 file changed, 12 insertions(+), 72 deletions(-) diff --git a/drivers/soc/imx/soc-imx8m.c b/drivers/soc/imx/soc-imx8m.c index 071e14496e4b..cc57a384d74d 100644 --- a/drivers/soc/imx/soc-imx8m.c +++ b/drivers/soc/imx/soc-imx8m.c @@ -5,8 +5,6 @@ #include #include -#include -#include #include #include #include @@ -31,7 +29,7 @@ struct imx8_soc_data { char *name; - u32 (*soc_revision)(struct device *dev); + u32 (*soc_revision)(void); }; static u64 soc_uid; @@ -52,7 +50,7 @@ static u32 imx8mq_soc_revision_from_atf(void) static inline u32 imx8mq_soc_revision_from_atf(void) { return 0; }; #endif -static u32 __init imx8mq_soc_revision(struct device *dev) +static u32 __init imx8mq_soc_revision(void) { struct device_node *np; void __iomem *ocotp_base; @@ -77,20 +75,9 @@ static u32 __init imx8mq_soc_revision(struct device *dev) rev = REV_B1; } - if (dev) { - int ret; - - ret = nvmem_cell_read_u64(dev, "soc_unique_id", &soc_uid); - if (ret) { - iounmap(ocotp_base); - of_node_put(np); - return ret; - } - } else { - soc_uid = readl_relaxed(ocotp_base + OCOTP_UID_HIGH); - soc_uid <<= 32; - soc_uid |= readl_relaxed(ocotp_base + OCOTP_UID_LOW); - } + soc_uid = readl_relaxed(ocotp_base + OCOTP_UID_HIGH); + soc_uid <<= 32; + soc_uid |= readl_relaxed(ocotp_base + OCOTP_UID_LOW); iounmap(ocotp_base); of_node_put(np); @@ -120,7 +107,7 @@ static void __init imx8mm_soc_uid(void) of_node_put(np); } -static u32 __init imx8mm_soc_revision(struct device *dev) +static u32 __init imx8mm_soc_revision(void) { struct device_node *np; void __iomem *anatop_base; @@ -138,15 +125,7 @@ static u32 __init imx8mm_soc_revision(struct device *dev) iounmap(anatop_base); of_node_put(np); - if (dev) { - int ret; - - ret = nvmem_cell_read_u64(dev, "soc_unique_id", &soc_uid); - if (ret) - return ret; - } else { - imx8mm_soc_uid(); - } + imx8mm_soc_uid(); return rev; } @@ -171,7 +150,7 @@ static const struct imx8_soc_data imx8mp_soc_data = { .soc_revision = imx8mm_soc_revision, }; -static __maybe_unused const struct of_device_id imx8_machine_match[] = { +static __maybe_unused const struct of_device_id imx8_soc_match[] = { { .compatible = "fsl,imx8mq", .data = &imx8mq_soc_data, }, { .compatible = "fsl,imx8mm", .data = &imx8mm_soc_data, }, { .compatible = "fsl,imx8mn", .data = &imx8mn_soc_data, }, @@ -179,20 +158,12 @@ static __maybe_unused const struct of_device_id imx8_machine_match[] = { { } }; -static __maybe_unused const struct of_device_id imx8_soc_match[] = { - { .compatible = "fsl,imx8mq-soc", .data = &imx8mq_soc_data, }, - { .compatible = "fsl,imx8mm-soc", .data = &imx8mm_soc_data, }, - { .compatible = "fsl,imx8mn-soc", .data = &imx8mn_soc_data, }, - { .compatible = "fsl,imx8mp-soc", .data = &imx8mp_soc_data, }, - { } -}; - #define imx8_revision(soc_rev) \ soc_rev ? \ kasprintf(GFP_KERNEL, "%d.%d", (soc_rev >> 4) & 0xf, soc_rev & 0xf) : \ "unknown" -static int imx8_soc_info(struct platform_device *pdev) +static int __init imx8_soc_init(void) { struct soc_device_attribute *soc_dev_attr; struct soc_device *soc_dev; @@ -211,10 +182,7 @@ static int imx8_soc_info(struct platform_device *pdev) if (ret) goto free_soc; - if (pdev) - id = of_match_node(imx8_soc_match, pdev->dev.of_node); - else - id = of_match_node(imx8_machine_match, of_root); + id = of_match_node(imx8_soc_match, of_root); if (!id) { ret = -ENODEV; goto free_soc; @@ -223,16 +191,8 @@ static int imx8_soc_info(struct platform_device *pdev) data = id->data; if (data) { soc_dev_attr->soc_id = data->name; - if (data->soc_revision) { - if (pdev) { - soc_rev = data->soc_revision(&pdev->dev); - ret = soc_rev; - if (ret < 0) - goto free_soc; - } else { - soc_rev = data->soc_revision(NULL); - } - } + if (data->soc_revision) + soc_rev = data->soc_revision(); } soc_dev_attr->revision = imx8_revision(soc_rev); @@ -270,24 +230,4 @@ free_soc: kfree(soc_dev_attr); return ret; } - -/* Retain device_initcall is for backward compatibility with DTS. */ -static int __init imx8_soc_init(void) -{ - if (of_find_matching_node_and_match(NULL, imx8_soc_match, NULL)) - return 0; - - return imx8_soc_info(NULL); -} device_initcall(imx8_soc_init); - -static struct platform_driver imx8_soc_info_driver = { - .probe = imx8_soc_info, - .driver = { - .name = "imx8_soc_info", - .of_match_table = imx8_soc_match, - }, -}; - -module_platform_driver(imx8_soc_info_driver); -MODULE_LICENSE("GPL v2"); From b18c7da63fcb46e2f9a093cc18d7c219e13a887c Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Mon, 5 Jul 2021 11:41:54 -0500 Subject: [PATCH 034/502] RDMA/rxe: Fix memory leak in error path code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In rxe_mr_init_user() at the third error the driver fails to free the memory at mr->map. This patch adds code to do that. This error only occurs if page_address() fails to return a non zero address which should never happen for 64 bit architectures. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20210705164153.17652-1-rpearsonhpe@gmail.com Reported by: Haakon Bugge Signed-off-by: Bob Pearson Reviewed-by: Zhu Yanjun Reviewed-by: Håkon Bugge Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mr.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 6aabcb4de235..be4bcb420fab 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -113,13 +113,14 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, int num_buf; void *vaddr; int err; + int i; umem = ib_umem_get(pd->ibpd.device, start, length, access); if (IS_ERR(umem)) { - pr_warn("err %d from rxe_umem_get\n", - (int)PTR_ERR(umem)); + pr_warn("%s: Unable to pin memory region err = %d\n", + __func__, (int)PTR_ERR(umem)); err = PTR_ERR(umem); - goto err1; + goto err_out; } mr->umem = umem; @@ -129,9 +130,9 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, err = rxe_mr_alloc(mr, num_buf); if (err) { - pr_warn("err %d from rxe_mr_alloc\n", err); - ib_umem_release(umem); - goto err1; + pr_warn("%s: Unable to allocate memory for map\n", + __func__); + goto err_release_umem; } mr->page_shift = PAGE_SHIFT; @@ -151,10 +152,10 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, vaddr = page_address(sg_page_iter_page(&sg_iter)); if (!vaddr) { - pr_warn("null vaddr\n"); - ib_umem_release(umem); + pr_warn("%s: Unable to get virtual address\n", + __func__); err = -ENOMEM; - goto err1; + goto err_cleanup_map; } buf->addr = (uintptr_t)vaddr; @@ -177,7 +178,13 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, return 0; -err1: +err_cleanup_map: + for (i = 0; i < mr->num_map; i++) + kfree(mr->map[i]); + kfree(mr->map); +err_release_umem: + ib_umem_release(umem); +err_out: return err; } From 0dc2d6ff40364a00cd66cae3ed327894dcd11c82 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Thu, 8 Jul 2021 14:35:21 -0700 Subject: [PATCH 035/502] RDMA/irdma: Check vsi pointer before using it Fix a coverity warning about NULL pointer dereference: Dereferencing "vsi", which is known to be "NULL". Link: https://lore.kernel.org/r/20210708213521.438-1-tatyana.e.nikolova@intel.com Reported-by: coverity-bot Addresses-Coverity-ID: 1505164 ("Null pointer dereferences") Fixes: 8498a30e1b94 ("RDMA/irdma: Register auxiliary driver and implement private channel OPs") Signed-off-by: Mustafa Ismail Signed-off-by: Tatyana Nikolova Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c index ea59432351fb..51a41359e0b4 100644 --- a/drivers/infiniband/hw/irdma/main.c +++ b/drivers/infiniband/hw/irdma/main.c @@ -215,10 +215,10 @@ static void irdma_remove(struct auxiliary_device *aux_dev) pr_debug("INIT: Gen2 PF[%d] device remove success\n", PCI_FUNC(pf->pdev->devfn)); } -static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf) +static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf, + struct ice_vsi *vsi) { struct irdma_pci_f *rf = iwdev->rf; - struct ice_vsi *vsi = ice_get_main_vsi(pf); rf->cdev = pf; rf->gen_ops.register_qset = irdma_lan_register_qset; @@ -253,12 +253,15 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_ struct iidc_auxiliary_dev, adev); struct ice_pf *pf = iidc_adev->pf; + struct ice_vsi *vsi = ice_get_main_vsi(pf); struct iidc_qos_params qos_info = {}; struct irdma_device *iwdev; struct irdma_pci_f *rf; struct irdma_l2params l2params = {}; int err; + if (!vsi) + return -EIO; iwdev = ib_alloc_device(irdma_device, ibdev); if (!iwdev) return -ENOMEM; @@ -268,7 +271,7 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_ return -ENOMEM; } - irdma_fill_device_info(iwdev, pf); + irdma_fill_device_info(iwdev, pf, vsi); rf = iwdev->rf; if (irdma_ctrl_init_hw(rf)) { From a323da0b73b89b3ecabd661c56978a271e1911b6 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Tue, 13 Jul 2021 23:11:28 -0400 Subject: [PATCH 036/502] RDMA/irdma: change the returned type of irdma_sc_repost_aeq_entries to void The function irdma_sc_repost_aeq_entries always returns zero. So the returned type is changed to void. Link: https://lore.kernel.org/r/20210714031130.1511109-2-yanjun.zhu@linux.dev Signed-off-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/ctrl.c | 4 +--- drivers/infiniband/hw/irdma/type.h | 3 +-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index c3880a85e255..f1e5515256e0 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -4186,11 +4186,9 @@ enum irdma_status_code irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, * @dev: sc device struct * @count: allocate count */ -enum irdma_status_code irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count) +void irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count) { writel(count, dev->hw_regs[IRDMA_AEQALLOC]); - - return 0; } /** diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h index 7387b83e826d..874bc25a938b 100644 --- a/drivers/infiniband/hw/irdma/type.h +++ b/drivers/infiniband/hw/irdma/type.h @@ -1222,8 +1222,7 @@ enum irdma_status_code irdma_sc_aeq_init(struct irdma_sc_aeq *aeq, struct irdma_aeq_init_info *info); enum irdma_status_code irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, struct irdma_aeqe_info *info); -enum irdma_status_code irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, - u32 count); +void irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count); void irdma_sc_pd_init(struct irdma_sc_dev *dev, struct irdma_sc_pd *pd, u32 pd_id, int abi_ver); From 41f5fa9fa75cebd48b5ce9ec244ee25390ac3b89 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Tue, 13 Jul 2021 23:11:29 -0400 Subject: [PATCH 037/502] RDMA/irdma: Change the returned type of irdma_set_hw_rsrc to void Since the function irdma_set_hw_rsrc always returns zero, change the returned type to void and remove all the related source code. Link: https://lore.kernel.org/r/20210714031130.1511109-3-yanjun.zhu@linux.dev Signed-off-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/hw.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 7afb8a6a0526..00de5ee9a260 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -1920,7 +1920,7 @@ enum irdma_status_code irdma_ctrl_init_hw(struct irdma_pci_f *rf) * irdma_set_hw_rsrc - set hw memory resources. * @rf: RDMA PCI function */ -static u32 irdma_set_hw_rsrc(struct irdma_pci_f *rf) +static void irdma_set_hw_rsrc(struct irdma_pci_f *rf) { rf->allocated_qps = (void *)(rf->mem_rsrc + (sizeof(struct irdma_arp_entry) * rf->arp_table_size)); @@ -1937,8 +1937,6 @@ static u32 irdma_set_hw_rsrc(struct irdma_pci_f *rf) spin_lock_init(&rf->arp_lock); spin_lock_init(&rf->qptable_lock); spin_lock_init(&rf->qh_list_lock); - - return 0; } /** @@ -2000,9 +1998,7 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf) rf->arp_table = (struct irdma_arp_entry *)rf->mem_rsrc; - ret = irdma_set_hw_rsrc(rf); - if (ret) - goto set_hw_rsrc_fail; + irdma_set_hw_rsrc(rf); set_bit(0, rf->allocated_mrs); set_bit(0, rf->allocated_qps); @@ -2025,9 +2021,6 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf) return 0; -set_hw_rsrc_fail: - kfree(rf->mem_rsrc); - rf->mem_rsrc = NULL; mem_rsrc_kzalloc_fail: kfree(rf->allocated_ws_nodes); rf->allocated_ws_nodes = NULL; From dc6afef7e14252c5ca5b8a8444946cb4b75b0aa0 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Tue, 13 Jul 2021 23:11:30 -0400 Subject: [PATCH 038/502] RDMA/irdma: Change returned type of irdma_setup_virt_qp to void Since the returned value of the function irdma_setup_virt_qp is always 0, remove the returned value check and change the returned type to void. Link: https://lore.kernel.org/r/20210714031130.1511109-4-yanjun.zhu@linux.dev Signed-off-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/verbs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 9712f6902ba8..717147ed0519 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -557,7 +557,7 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) * @iwqp: qp ptr * @init_info: initialize info to return */ -static int irdma_setup_virt_qp(struct irdma_device *iwdev, +static void irdma_setup_virt_qp(struct irdma_device *iwdev, struct irdma_qp *iwqp, struct irdma_qp_init_info *init_info) { @@ -574,8 +574,6 @@ static int irdma_setup_virt_qp(struct irdma_device *iwdev, init_info->sq_pa = qpmr->sq_pbl.addr; init_info->rq_pa = qpmr->rq_pbl.addr; } - - return 0; } /** @@ -914,7 +912,7 @@ static struct ib_qp *irdma_create_qp(struct ib_pd *ibpd, } } init_info.qp_uk_init_info.abi_ver = iwpd->sc_pd.abi_ver; - err_code = irdma_setup_virt_qp(iwdev, iwqp, &init_info); + irdma_setup_virt_qp(iwdev, iwqp, &init_info); } else { init_info.qp_uk_init_info.abi_ver = IRDMA_ABI_VER; err_code = irdma_setup_kmode_qp(iwdev, iwqp, &init_info, init_attr); From e48bf29cf9d6d60d810e2af71e54b71a324094e0 Mon Sep 17 00:00:00 2001 From: Ye Xiang Date: Sun, 13 Jun 2021 11:25:07 +0800 Subject: [PATCH 039/502] HID: intel-ish-hid: use async resume function ISH IPC driver uses asynchronous workqueue to do resume now, but there is a potential timing issue: when child devices resume before bus driver, it will cause child devices resume failed and cannot be recovered until reboot. The current implementation in this case do wait for IPC to resume but fail to accommodate for a case when there is no ISH reboot and soft resume is taking time. This issue is apparent on Tiger Lake platform with 5.11.13 kernel when doing suspend to idle then resume(s0ix) test. To resolve this issue, we change ISHTP HID client to use asynchronous resume callback too. In the asynchronous resume callback, it waits for the ISHTP resume done event, and then notify ISHTP HID client link ready. Signed-off-by: Ye Xiang Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ishtp-hid-client.c | 15 +++++++++- drivers/hid/intel-ish-hid/ishtp-hid.h | 1 + drivers/hid/intel-ish-hid/ishtp/bus.c | 29 +++++++++++++++----- include/linux/intel-ish-client-if.h | 2 ++ 4 files changed, 39 insertions(+), 8 deletions(-) diff --git a/drivers/hid/intel-ish-hid/ishtp-hid-client.c b/drivers/hid/intel-ish-hid/ishtp-hid-client.c index 6b1fa971b33e..91bf4d01e91a 100644 --- a/drivers/hid/intel-ish-hid/ishtp-hid-client.c +++ b/drivers/hid/intel-ish-hid/ishtp-hid-client.c @@ -784,6 +784,17 @@ static void hid_ishtp_cl_reset_handler(struct work_struct *work) } } +static void hid_ishtp_cl_resume_handler(struct work_struct *work) +{ + struct ishtp_cl_data *client_data = container_of(work, struct ishtp_cl_data, resume_work); + struct ishtp_cl *hid_ishtp_cl = client_data->hid_ishtp_cl; + + if (ishtp_wait_resume(ishtp_get_ishtp_device(hid_ishtp_cl))) { + client_data->suspended = false; + wake_up_interruptible(&client_data->ishtp_resume_wait); + } +} + ishtp_print_log ishtp_hid_print_trace; /** @@ -822,6 +833,8 @@ static int hid_ishtp_cl_probe(struct ishtp_cl_device *cl_device) init_waitqueue_head(&client_data->ishtp_resume_wait); INIT_WORK(&client_data->work, hid_ishtp_cl_reset_handler); + INIT_WORK(&client_data->resume_work, hid_ishtp_cl_resume_handler); + ishtp_hid_print_trace = ishtp_trace_callback(cl_device); @@ -921,7 +934,7 @@ static int hid_ishtp_cl_resume(struct device *device) hid_ishtp_trace(client_data, "%s hid_ishtp_cl %p\n", __func__, hid_ishtp_cl); - client_data->suspended = false; + schedule_work(&client_data->resume_work); return 0; } diff --git a/drivers/hid/intel-ish-hid/ishtp-hid.h b/drivers/hid/intel-ish-hid/ishtp-hid.h index f88443a7d935..6a5cc11aefd8 100644 --- a/drivers/hid/intel-ish-hid/ishtp-hid.h +++ b/drivers/hid/intel-ish-hid/ishtp-hid.h @@ -135,6 +135,7 @@ struct ishtp_cl_data { int multi_packet_cnt; struct work_struct work; + struct work_struct resume_work; struct ishtp_cl_device *cl_device; }; diff --git a/drivers/hid/intel-ish-hid/ishtp/bus.c b/drivers/hid/intel-ish-hid/ishtp/bus.c index f0802b047ed8..aa2c51624012 100644 --- a/drivers/hid/intel-ish-hid/ishtp/bus.c +++ b/drivers/hid/intel-ish-hid/ishtp/bus.c @@ -314,13 +314,6 @@ static int ishtp_cl_device_resume(struct device *dev) if (!device) return 0; - /* - * When ISH needs hard reset, it is done asynchrnously, hence bus - * resume will be called before full ISH resume - */ - if (device->ishtp_dev->resume_flag) - return 0; - driver = to_ishtp_cl_driver(dev->driver); if (driver && driver->driver.pm) { if (driver->driver.pm->resume) @@ -849,6 +842,28 @@ struct device *ishtp_device(struct ishtp_cl_device *device) } EXPORT_SYMBOL(ishtp_device); +/** + * ishtp_wait_resume() - Wait for IPC resume + * + * Wait for IPC resume + * + * Return: resume complete or not + */ +bool ishtp_wait_resume(struct ishtp_device *dev) +{ + /* 50ms to get resume response */ + #define WAIT_FOR_RESUME_ACK_MS 50 + + /* Waiting to get resume response */ + if (dev->resume_flag) + wait_event_interruptible_timeout(dev->resume_wait, + !dev->resume_flag, + msecs_to_jiffies(WAIT_FOR_RESUME_ACK_MS)); + + return (!dev->resume_flag); +} +EXPORT_SYMBOL_GPL(ishtp_wait_resume); + /** * ishtp_get_pci_device() - Return PCI device dev pointer * This interface is used to return PCI device pointer diff --git a/include/linux/intel-ish-client-if.h b/include/linux/intel-ish-client-if.h index 25e2b4e80502..aee8ff4739b1 100644 --- a/include/linux/intel-ish-client-if.h +++ b/include/linux/intel-ish-client-if.h @@ -81,6 +81,8 @@ int ishtp_register_event_cb(struct ishtp_cl_device *device, /* Get the device * from ishtp device instance */ struct device *ishtp_device(struct ishtp_cl_device *cl_device); +/* wait for IPC resume */ +bool ishtp_wait_resume(struct ishtp_device *dev); /* Trace interface for clients */ ishtp_print_log ishtp_trace_callback(struct ishtp_cl_device *cl_device); /* Get device pointer of PCI device for DMA acces */ From 3fdcf7cdfc229346d028242e73562704ad644dd0 Mon Sep 17 00:00:00 2001 From: "Luke D. Jones" Date: Mon, 5 Jul 2021 10:26:59 +1200 Subject: [PATCH 040/502] HID: asus: Remove check for same LED brightness on set Remove the early return on LED brightness set so that any controller application, daemon, or desktop may set the same brightness at any stage. This is required because many ASUS ROG keyboards will default to max brightness on laptop resume if the LEDs were set to off before sleep. Signed-off-by: Luke D Jones Signed-off-by: Jiri Kosina --- drivers/hid/hid-asus.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index fca8fc78a78a..fb807c8e989b 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -485,9 +485,6 @@ static void asus_kbd_backlight_set(struct led_classdev *led_cdev, { struct asus_kbd_leds *led = container_of(led_cdev, struct asus_kbd_leds, cdev); - if (led->brightness == brightness) - return; - led->brightness = brightness; schedule_work(&led->work); } From 8aa6348634d1bc81801329e6ea98cd88ec07fb10 Mon Sep 17 00:00:00 2001 From: Dylan MacKenzie Date: Tue, 13 Jul 2021 16:31:07 -0700 Subject: [PATCH 041/502] HID: amd_sfh: Use correct MMIO register for DMA address amd_stop_sensor_v2 accidentally used a different MMIO register than amd_start_sensor_v2 for the DMA address. Fixes: f264481ad614dfd9 ("HID: amd_sfh: Extend driver capabilities for multi-generation support") Signed-off-by: Dylan MacKenzie Acked-by: Basavaraj Natikar Signed-off-by: Jiri Kosina --- drivers/hid/amd-sfh-hid/amd_sfh_pcie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c index 96e2577fa37e..8d68796aa905 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c @@ -58,7 +58,7 @@ static void amd_stop_sensor_v2(struct amd_mp2_dev *privdata, u16 sensor_idx) cmd_base.cmd_v2.sensor_id = sensor_idx; cmd_base.cmd_v2.length = 16; - writeq(0x0, privdata->mmio + AMD_C2P_MSG2); + writeq(0x0, privdata->mmio + AMD_C2P_MSG1); writel(cmd_base.ul, privdata->mmio + AMD_C2P_MSG0); } From 2acf15b94d5b8ea8392c4b6753a6ffac3135cd78 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Fri, 2 Jul 2021 12:07:43 +0800 Subject: [PATCH 042/502] reiserfs: add check for root_inode in reiserfs_fill_super Our syzcaller report a NULL pointer dereference: BUG: kernel NULL pointer dereference, address: 0000000000000000 PGD 116e95067 P4D 116e95067 PUD 1080b5067 PMD 0 Oops: 0010 [#1] SMP KASAN CPU: 7 PID: 592 Comm: a.out Not tainted 5.13.0-next-20210629-dirty #67 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-p4 RIP: 0010:0x0 Code: Unable to access opcode bytes at RIP 0xffffffffffffffd6. RSP: 0018:ffff888114e779b8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 1ffff110229cef39 RCX: ffffffffaa67e1aa RDX: 0000000000000000 RSI: ffff88810a58ee00 RDI: ffff8881233180b0 RBP: ffffffffac38e9c0 R08: ffffffffaa67e17e R09: 0000000000000001 R10: ffffffffb91c5557 R11: fffffbfff7238aaa R12: ffff88810a58ee00 R13: ffff888114e77aa0 R14: 0000000000000000 R15: ffff8881233180b0 FS: 00007f946163c480(0000) GS:ffff88839f1c0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffffffffd6 CR3: 00000001099c1000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __lookup_slow+0x116/0x2d0 ? page_put_link+0x120/0x120 ? __d_lookup+0xfc/0x320 ? d_lookup+0x49/0x90 lookup_one_len+0x13c/0x170 ? __lookup_slow+0x2d0/0x2d0 ? reiserfs_schedule_old_flush+0x31/0x130 reiserfs_lookup_privroot+0x64/0x150 reiserfs_fill_super+0x158c/0x1b90 ? finish_unfinished+0xb10/0xb10 ? bprintf+0xe0/0xe0 ? __mutex_lock_slowpath+0x30/0x30 ? __kasan_check_write+0x20/0x30 ? up_write+0x51/0xb0 ? set_blocksize+0x9f/0x1f0 mount_bdev+0x27c/0x2d0 ? finish_unfinished+0xb10/0xb10 ? reiserfs_kill_sb+0x120/0x120 get_super_block+0x19/0x30 legacy_get_tree+0x76/0xf0 vfs_get_tree+0x49/0x160 ? capable+0x1d/0x30 path_mount+0xacc/0x1380 ? putname+0x97/0xd0 ? finish_automount+0x450/0x450 ? kmem_cache_free+0xf8/0x5a0 ? putname+0x97/0xd0 do_mount+0xe2/0x110 ? path_mount+0x1380/0x1380 ? copy_mount_options+0x69/0x140 __x64_sys_mount+0xf0/0x190 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae This is because 'root_inode' is initialized with wrong mode, and it's i_op is set to 'reiserfs_special_inode_operations'. Thus add check for 'root_inode' to fix the problem. Link: https://lore.kernel.org/r/20210702040743.1918552-1-yukuai3@huawei.com Signed-off-by: Yu Kuai Signed-off-by: Jan Kara --- fs/reiserfs/super.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 3ffafc73acf0..58481f8d63d5 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -2082,6 +2082,14 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) unlock_new_inode(root_inode); } + if (!S_ISDIR(root_inode->i_mode) || !inode_get_bytes(root_inode) || + !root_inode->i_size) { + SWARN(silent, s, "", "corrupt root inode, run fsck"); + iput(root_inode); + errval = -EUCLEAN; + goto error; + } + s->s_root = d_make_root(root_inode); if (!s->s_root) goto error; From 728d392f8a799f037812d0f2b254fb3b5e115fcf Mon Sep 17 00:00:00 2001 From: Javier Pello Date: Wed, 14 Jul 2021 18:54:48 +0200 Subject: [PATCH 043/502] fs/ext2: Avoid page_address on pages returned by ext2_get_page Commit 782b76d7abdf02b12c46ed6f1e9bf715569027f7 ("fs/ext2: Replace kmap() with kmap_local_page()") replaced the kmap/kunmap calls in ext2_get_page/ext2_put_page with kmap_local_page/kunmap_local for efficiency reasons. As a necessary side change, the commit also made ext2_get_page (and ext2_find_entry and ext2_dotdot) return the mapping address along with the page itself, as it is required for kunmap_local, and converted uses of page_address on such pages to use the newly returned address instead. However, uses of page_address on such pages were missed in ext2_check_page and ext2_delete_entry, which triggers oopses if kmap_local_page happens to return an address from high memory. Fix this now by converting the remaining uses of page_address to use the right address, as returned by kmap_local_page. Link: https://lore.kernel.org/r/20210714185448.8707ac239e9f12b3a7f5b9f9@urjc.es Reviewed-by: Ira Weiny Signed-off-by: Javier Pello Fixes: 782b76d7abdf ("fs/ext2: Replace kmap() with kmap_local_page()") Signed-off-by: Jan Kara --- fs/ext2/dir.c | 12 ++++++------ fs/ext2/ext2.h | 3 ++- fs/ext2/namei.c | 4 ++-- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 14292dba3a12..2c2f179b6977 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -106,12 +106,11 @@ static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len) return err; } -static bool ext2_check_page(struct page *page, int quiet) +static bool ext2_check_page(struct page *page, int quiet, char *kaddr) { struct inode *dir = page->mapping->host; struct super_block *sb = dir->i_sb; unsigned chunk_size = ext2_chunk_size(dir); - char *kaddr = page_address(page); u32 max_inumber = le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count); unsigned offs, rec_len; unsigned limit = PAGE_SIZE; @@ -205,7 +204,8 @@ static struct page * ext2_get_page(struct inode *dir, unsigned long n, if (!IS_ERR(page)) { *page_addr = kmap_local_page(page); if (unlikely(!PageChecked(page))) { - if (PageError(page) || !ext2_check_page(page, quiet)) + if (PageError(page) || !ext2_check_page(page, quiet, + *page_addr)) goto fail; } } @@ -584,10 +584,10 @@ out_unlock: * ext2_delete_entry deletes a directory entry by merging it with the * previous entry. Page is up-to-date. */ -int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ) +int ext2_delete_entry (struct ext2_dir_entry_2 *dir, struct page *page, + char *kaddr) { struct inode *inode = page->mapping->host; - char *kaddr = page_address(page); unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1); unsigned to = ((char *)dir - kaddr) + ext2_rec_len_from_disk(dir->rec_len); @@ -607,7 +607,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ) de = ext2_next_entry(de); } if (pde) - from = (char*)pde - (char*)page_address(page); + from = (char *)pde - kaddr; pos = page_offset(page) + from; lock_page(page); err = ext2_prepare_chunk(page, pos, to - from); diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index b0a694820cb7..e512630cb63e 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -740,7 +740,8 @@ extern int ext2_inode_by_name(struct inode *dir, extern int ext2_make_empty(struct inode *, struct inode *); extern struct ext2_dir_entry_2 *ext2_find_entry(struct inode *, const struct qstr *, struct page **, void **res_page_addr); -extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *); +extern int ext2_delete_entry(struct ext2_dir_entry_2 *dir, struct page *page, + char *kaddr); extern int ext2_empty_dir (struct inode *); extern struct ext2_dir_entry_2 *ext2_dotdot(struct inode *dir, struct page **p, void **pa); extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, void *, diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 1f69b81655b6..5f6b7560eb3f 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -293,7 +293,7 @@ static int ext2_unlink(struct inode * dir, struct dentry *dentry) goto out; } - err = ext2_delete_entry (de, page); + err = ext2_delete_entry (de, page, page_addr); ext2_put_page(page, page_addr); if (err) goto out; @@ -397,7 +397,7 @@ static int ext2_rename (struct user_namespace * mnt_userns, old_inode->i_ctime = current_time(old_inode); mark_inode_dirty(old_inode); - ext2_delete_entry(old_de, old_page); + ext2_delete_entry(old_de, old_page, old_page_addr); if (dir_de) { if (old_dir != new_dir) From 13d257503c0930010ef9eed78b689cec417ab741 Mon Sep 17 00:00:00 2001 From: Shreyansh Chouhan Date: Fri, 9 Jul 2021 20:59:29 +0530 Subject: [PATCH 044/502] reiserfs: check directory items on read from disk While verifying the leaf item that we read from the disk, reiserfs doesn't check the directory items, this could cause a crash when we read a directory item from the disk that has an invalid deh_location. This patch adds a check to the directory items read from the disk that does a bounds check on deh_location for the directory entries. Any directory entry header with a directory entry offset greater than the item length is considered invalid. Link: https://lore.kernel.org/r/20210709152929.766363-1-chouhan.shreyansh630@gmail.com Reported-by: syzbot+c31a48e6702ccb3d64c9@syzkaller.appspotmail.com Signed-off-by: Shreyansh Chouhan Signed-off-by: Jan Kara --- fs/reiserfs/stree.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index 476a7ff49482..ef42729216d1 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -387,6 +387,24 @@ void pathrelse(struct treepath *search_path) search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; } +static int has_valid_deh_location(struct buffer_head *bh, struct item_head *ih) +{ + struct reiserfs_de_head *deh; + int i; + + deh = B_I_DEH(bh, ih); + for (i = 0; i < ih_entry_count(ih); i++) { + if (deh_location(&deh[i]) > ih_item_len(ih)) { + reiserfs_warning(NULL, "reiserfs-5094", + "directory entry location seems wrong %h", + &deh[i]); + return 0; + } + } + + return 1; +} + static int is_leaf(char *buf, int blocksize, struct buffer_head *bh) { struct block_head *blkh; @@ -454,11 +472,14 @@ static int is_leaf(char *buf, int blocksize, struct buffer_head *bh) "(second one): %h", ih); return 0; } - if (is_direntry_le_ih(ih) && (ih_item_len(ih) < (ih_entry_count(ih) * IH_SIZE))) { - reiserfs_warning(NULL, "reiserfs-5093", - "item entry count seems wrong %h", - ih); - return 0; + if (is_direntry_le_ih(ih)) { + if (ih_item_len(ih) < (ih_entry_count(ih) * IH_SIZE)) { + reiserfs_warning(NULL, "reiserfs-5093", + "item entry count seems wrong %h", + ih); + return 0; + } + return has_valid_deh_location(bh, ih); } prev_location = ih_location(ih); } From 59089a189e3adde4cf85f2ce479738d1ae4c514d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 29 Jun 2021 09:39:15 +0000 Subject: [PATCH 045/502] bpf: Remove superfluous aux sanitation on subprog rejection Follow-up to fe9a5ca7e370 ("bpf: Do not mark insn as seen under speculative path verification"). The sanitize_insn_aux_data() helper does not serve a particular purpose in today's code. The original intention for the helper was that if function-by-function verification fails, a given program would be cleared from temporary insn_aux_data[], and then its verification would be re-attempted in the context of the main program a second time. However, a failure in do_check_subprogs() will skip do_check_main() and propagate the error to the user instead, thus such situation can never occur. Given its interaction is not compatible to the Spectre v1 mitigation (due to comparing aux->seen with env->pass_cnt), just remove sanitize_insn_aux_data() to avoid future bugs in this area. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9de3c9c3267c..8a7a28b4cfb9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -12752,37 +12752,6 @@ static void free_states(struct bpf_verifier_env *env) } } -/* The verifier is using insn_aux_data[] to store temporary data during - * verification and to store information for passes that run after the - * verification like dead code sanitization. do_check_common() for subprogram N - * may analyze many other subprograms. sanitize_insn_aux_data() clears all - * temporary data after do_check_common() finds that subprogram N cannot be - * verified independently. pass_cnt counts the number of times - * do_check_common() was run and insn->aux->seen tells the pass number - * insn_aux_data was touched. These variables are compared to clear temporary - * data from failed pass. For testing and experiments do_check_common() can be - * run multiple times even when prior attempt to verify is unsuccessful. - * - * Note that special handling is needed on !env->bypass_spec_v1 if this is - * ever called outside of error path with subsequent program rejection. - */ -static void sanitize_insn_aux_data(struct bpf_verifier_env *env) -{ - struct bpf_insn *insn = env->prog->insnsi; - struct bpf_insn_aux_data *aux; - int i, class; - - for (i = 0; i < env->prog->len; i++) { - class = BPF_CLASS(insn[i].code); - if (class != BPF_LDX && class != BPF_STX) - continue; - aux = &env->insn_aux_data[i]; - if (aux->seen != env->pass_cnt) - continue; - memset(aux, 0, offsetof(typeof(*aux), orig_idx)); - } -} - static int do_check_common(struct bpf_verifier_env *env, int subprog) { bool pop_log = !(env->log.level & BPF_LOG_LEVEL2); @@ -12859,9 +12828,6 @@ out: if (!ret && pop_log) bpf_vlog_reset(&env->log, 0); free_states(env); - if (ret) - /* clean aux data in case subprog was rejected */ - sanitize_insn_aux_data(env); return ret; } From e042aa532c84d18ff13291d00620502ce7a38dda Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 16 Jul 2021 09:18:21 +0000 Subject: [PATCH 046/502] bpf: Fix pointer arithmetic mask tightening under state pruning In 7fedb63a8307 ("bpf: Tighten speculative pointer arithmetic mask") we narrowed the offset mask for unprivileged pointer arithmetic in order to mitigate a corner case where in the speculative domain it is possible to advance, for example, the map value pointer by up to value_size-1 out-of- bounds in order to leak kernel memory via side-channel to user space. The verifier's state pruning for scalars leaves one corner case open where in the first verification path R_x holds an unknown scalar with an aux->alu_limit of e.g. 7, and in a second verification path that same register R_x, here denoted as R_x', holds an unknown scalar which has tighter bounds and would thus satisfy range_within(R_x, R_x') as well as tnum_in(R_x, R_x') for state pruning, yielding an aux->alu_limit of 3: Given the second path fits the register constraints for pruning, the final generated mask from aux->alu_limit will remain at 7. While technically not wrong for the non-speculative domain, it would however be possible to craft similar cases where the mask would be too wide as in 7fedb63a8307. One way to fix it is to detect the presence of unknown scalar map pointer arithmetic and force a deeper search on unknown scalars to ensure that we do not run into a masking mismatch. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 1 + kernel/bpf/verifier.c | 27 +++++++++++++++++---------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index e774ecc1cd1f..7ba7e800d472 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -414,6 +414,7 @@ struct bpf_verifier_env { u32 used_map_cnt; /* number of used maps */ u32 used_btf_cnt; /* number of used BTF objects */ u32 id_gen; /* used to generate unique reg IDs */ + bool explore_alu_limits; bool allow_ptr_leaks; bool allow_uninit_stack; bool allow_ptr_to_map_access; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8a7a28b4cfb9..657062cb4d85 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6561,6 +6561,12 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0; alu_state |= ptr_is_dst_reg ? BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; + + /* Limit pruning on unknown scalars to enable deep search for + * potential masking differences from other program paths. + */ + if (!off_is_imm) + env->explore_alu_limits = true; } err = update_alu_sanitation_state(aux, alu_state, alu_limit); @@ -9936,8 +9942,8 @@ next: } /* Returns true if (rold safe implies rcur safe) */ -static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, - struct bpf_id_pair *idmap) +static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, + struct bpf_reg_state *rcur, struct bpf_id_pair *idmap) { bool equal; @@ -9963,6 +9969,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, return false; switch (rold->type) { case SCALAR_VALUE: + if (env->explore_alu_limits) + return false; if (rcur->type == SCALAR_VALUE) { if (!rold->precise && !rcur->precise) return true; @@ -10053,9 +10061,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, return false; } -static bool stacksafe(struct bpf_func_state *old, - struct bpf_func_state *cur, - struct bpf_id_pair *idmap) +static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, + struct bpf_func_state *cur, struct bpf_id_pair *idmap) { int i, spi; @@ -10100,9 +10107,8 @@ static bool stacksafe(struct bpf_func_state *old, continue; if (old->stack[spi].slot_type[0] != STACK_SPILL) continue; - if (!regsafe(&old->stack[spi].spilled_ptr, - &cur->stack[spi].spilled_ptr, - idmap)) + if (!regsafe(env, &old->stack[spi].spilled_ptr, + &cur->stack[spi].spilled_ptr, idmap)) /* when explored and current stack slot are both storing * spilled registers, check that stored pointers types * are the same as well. @@ -10159,10 +10165,11 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch)); for (i = 0; i < MAX_BPF_REG; i++) - if (!regsafe(&old->regs[i], &cur->regs[i], env->idmap_scratch)) + if (!regsafe(env, &old->regs[i], &cur->regs[i], + env->idmap_scratch)) return false; - if (!stacksafe(old, cur, env->idmap_scratch)) + if (!stacksafe(env, old, cur, env->idmap_scratch)) return false; if (!refsafe(old, cur)) From a6c39de76d709f30982d4b80a9b9537e1d388858 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 16 Jul 2021 13:15:33 +0000 Subject: [PATCH 047/502] bpf, selftests: Add test cases for pointer alu from multiple paths Add several test cases for checking update_alu_sanitation_state() under multiple paths: # ./test_verifier [...] #1061/u map access: known scalar += value_ptr unknown vs const OK #1061/p map access: known scalar += value_ptr unknown vs const OK #1062/u map access: known scalar += value_ptr const vs unknown OK #1062/p map access: known scalar += value_ptr const vs unknown OK #1063/u map access: known scalar += value_ptr const vs const (ne) OK #1063/p map access: known scalar += value_ptr const vs const (ne) OK #1064/u map access: known scalar += value_ptr const vs const (eq) OK #1064/p map access: known scalar += value_ptr const vs const (eq) OK #1065/u map access: known scalar += value_ptr unknown vs unknown (eq) OK #1065/p map access: known scalar += value_ptr unknown vs unknown (eq) OK #1066/u map access: known scalar += value_ptr unknown vs unknown (lt) OK #1066/p map access: known scalar += value_ptr unknown vs unknown (lt) OK #1067/u map access: known scalar += value_ptr unknown vs unknown (gt) OK #1067/p map access: known scalar += value_ptr unknown vs unknown (gt) OK [...] Summary: 1762 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov --- .../selftests/bpf/verifier/value_ptr_arith.c | 229 ++++++++++++++++++ 1 file changed, 229 insertions(+) diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c index a3e593ddfafc..2debba4e8a3a 100644 --- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c @@ -1,3 +1,232 @@ +{ + "map access: known scalar += value_ptr unknown vs const", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_1, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps, paths or scalars", + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: known scalar += value_ptr const vs unknown", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 2), + BPF_MOV64_IMM(BPF_REG_1, 3), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps, paths or scalars", + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: known scalar += value_ptr const vs const (ne)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 2), + BPF_MOV64_IMM(BPF_REG_1, 3), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_1, 5), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps, paths or scalars", + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: known scalar += value_ptr const vs const (eq)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 2), + BPF_MOV64_IMM(BPF_REG_1, 5), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_1, 5), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: known scalar += value_ptr unknown vs unknown (eq)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: known scalar += value_ptr unknown vs unknown (lt)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x3), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps, paths or scalars", + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: known scalar += value_ptr unknown vs unknown (gt)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps, paths or scalars", + .result = ACCEPT, + .retval = 1, +}, { "map access: known scalar += value_ptr from different maps", .insns = { From 9d7a6c95f62bc335b62aaf9d50590122bd03a796 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 5 Jul 2021 10:44:52 +0200 Subject: [PATCH 048/502] perf: Fix required permissions if sigtrap is requested If perf_event_open() is called with another task as target and perf_event_attr::sigtrap is set, and the target task's user does not match the calling user, also require the CAP_KILL capability or PTRACE_MODE_ATTACH permissions. Otherwise, with the CAP_PERFMON capability alone it would be possible for a user to send SIGTRAP signals via perf events to another user's tasks. This could potentially result in those tasks being terminated if they cannot handle SIGTRAP signals. Note: The check complements the existing capability check, but is not supposed to supersede the ptrace_may_access() check. At a high level we now have: capable of CAP_PERFMON and (CAP_KILL if sigtrap) OR ptrace_may_access(...) // also checks for same thread-group and uid Fixes: 97ba62b27867 ("perf: Add support for SIGTRAP on perf events") Reported-by: Dmitry Vyukov Signed-off-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Acked-by: Dmitry Vyukov Cc: # 5.13+ Link: https://lore.kernel.org/r/20210705084453.2151729-1-elver@google.com --- kernel/events/core.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 464917096e73..c13730b7ac01 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -12158,10 +12158,33 @@ SYSCALL_DEFINE5(perf_event_open, } if (task) { + unsigned int ptrace_mode = PTRACE_MODE_READ_REALCREDS; + bool is_capable; + err = down_read_interruptible(&task->signal->exec_update_lock); if (err) goto err_file; + is_capable = perfmon_capable(); + if (attr.sigtrap) { + /* + * perf_event_attr::sigtrap sends signals to the other + * task. Require the current task to also have + * CAP_KILL. + */ + rcu_read_lock(); + is_capable &= ns_capable(__task_cred(task)->user_ns, CAP_KILL); + rcu_read_unlock(); + + /* + * If the required capabilities aren't available, checks + * for ptrace permissions: upgrade to ATTACH, since + * sending signals can effectively change the target + * task. + */ + ptrace_mode = PTRACE_MODE_ATTACH_REALCREDS; + } + /* * Preserve ptrace permission check for backwards compatibility. * @@ -12171,7 +12194,7 @@ SYSCALL_DEFINE5(perf_event_open, * perf_event_exit_task() that could imply). */ err = -EACCES; - if (!perfmon_capable() && !ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) + if (!is_capable && !ptrace_may_access(task, ptrace_mode)) goto err_cred; } From b068fc04de10fff8974f6ef32b861ad134d94ba4 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 5 Jul 2021 10:44:53 +0200 Subject: [PATCH 049/502] perf: Refactor permissions check into perf_check_permission() Refactor the permission check in perf_event_open() into a helper perf_check_permission(). This makes the permission check logic more readable (because we no longer have a negated disjunction). Add a comment mentioning the ptrace check also checks the uid. No functional change intended. Signed-off-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dmitry Vyukov Link: https://lore.kernel.org/r/20210705084453.2151729-2-elver@google.com --- kernel/events/core.c | 58 ++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 26 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index c13730b7ac01..1cb1f9b8392e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -11917,6 +11917,37 @@ again: return gctx; } +static bool +perf_check_permission(struct perf_event_attr *attr, struct task_struct *task) +{ + unsigned int ptrace_mode = PTRACE_MODE_READ_REALCREDS; + bool is_capable = perfmon_capable(); + + if (attr->sigtrap) { + /* + * perf_event_attr::sigtrap sends signals to the other task. + * Require the current task to also have CAP_KILL. + */ + rcu_read_lock(); + is_capable &= ns_capable(__task_cred(task)->user_ns, CAP_KILL); + rcu_read_unlock(); + + /* + * If the required capabilities aren't available, checks for + * ptrace permissions: upgrade to ATTACH, since sending signals + * can effectively change the target task. + */ + ptrace_mode = PTRACE_MODE_ATTACH_REALCREDS; + } + + /* + * Preserve ptrace permission check for backwards compatibility. The + * ptrace check also includes checks that the current task and other + * task have matching uids, and is therefore not done here explicitly. + */ + return is_capable || ptrace_may_access(task, ptrace_mode); +} + /** * sys_perf_event_open - open a performance event, associate it to a task/cpu * @@ -12158,43 +12189,18 @@ SYSCALL_DEFINE5(perf_event_open, } if (task) { - unsigned int ptrace_mode = PTRACE_MODE_READ_REALCREDS; - bool is_capable; - err = down_read_interruptible(&task->signal->exec_update_lock); if (err) goto err_file; - is_capable = perfmon_capable(); - if (attr.sigtrap) { - /* - * perf_event_attr::sigtrap sends signals to the other - * task. Require the current task to also have - * CAP_KILL. - */ - rcu_read_lock(); - is_capable &= ns_capable(__task_cred(task)->user_ns, CAP_KILL); - rcu_read_unlock(); - - /* - * If the required capabilities aren't available, checks - * for ptrace permissions: upgrade to ATTACH, since - * sending signals can effectively change the target - * task. - */ - ptrace_mode = PTRACE_MODE_ATTACH_REALCREDS; - } - /* - * Preserve ptrace permission check for backwards compatibility. - * * We must hold exec_update_lock across this and any potential * perf_install_in_context() call for this new event to * serialize against exec() altering our credentials (and the * perf_event_exit_task() that could imply). */ err = -EACCES; - if (!is_capable && !ptrace_may_access(task, ptrace_mode)) + if (!perf_check_permission(&attr, task)) goto err_cred; } From ba02920c51debb9198e72b3a8726a7c5ae4ffb41 Mon Sep 17 00:00:00 2001 From: Vidya Sagar Date: Tue, 13 Jul 2021 17:05:46 +0530 Subject: [PATCH 050/502] arm64: tegra: Enable SMMU support for PCIe on Tegra194 As of commit c7289b1c8a4e ("arm64: tegra: Enable SMMU support on Tegra194"), SMMU support is enabled system-wide on Tegra194. However, there was a bit of overlap between the SMMU enablement and the PCIe support addition, so the PCIe device tree nodes are missing the iommus and interconnects properties. This in turn leads to SMMU faults for these devices, since by default the ARM SMMU will fault. Add the iommus and interconnects properties to all the PCIe device tree nodes to restore their functionality. Fixes: c7289b1c8a4e ("arm64: tegra: Enable SMMU support on Tegra194") Signed-off-by: Vidya Sagar Reviewed-by: Jon Hunter Signed-off-by: Thierry Reding --- arch/arm64/boot/dts/nvidia/tegra194.dtsi | 60 +++++++++++++++++++++--- 1 file changed, 54 insertions(+), 6 deletions(-) diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi index 076d5efc4c3d..5ba7a4519b95 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi @@ -1840,7 +1840,11 @@ interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE1R &emc>, <&mc TEGRA194_MEMORY_CLIENT_PCIE1W &emc>; - interconnect-names = "read", "write"; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE1>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE1 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie@14120000 { @@ -1890,7 +1894,11 @@ interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE2AR &emc>, <&mc TEGRA194_MEMORY_CLIENT_PCIE2AW &emc>; - interconnect-names = "read", "write"; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE2>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE2 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie@14140000 { @@ -1940,7 +1948,11 @@ interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE3R &emc>, <&mc TEGRA194_MEMORY_CLIENT_PCIE3W &emc>; - interconnect-names = "read", "write"; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE3>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE3 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie@14160000 { @@ -1990,7 +2002,11 @@ interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE4R &emc>, <&mc TEGRA194_MEMORY_CLIENT_PCIE4W &emc>; - interconnect-names = "read", "write"; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE4>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE4 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie@14180000 { @@ -2040,7 +2056,11 @@ interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE0R &emc>, <&mc TEGRA194_MEMORY_CLIENT_PCIE0W &emc>; - interconnect-names = "read", "write"; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE0>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE0 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie@141a0000 { @@ -2094,7 +2114,11 @@ interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE5R &emc>, <&mc TEGRA194_MEMORY_CLIENT_PCIE5W &emc>; - interconnect-names = "read", "write"; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE5>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE5 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie_ep@14160000 { @@ -2127,6 +2151,14 @@ nvidia,aspm-cmrt-us = <60>; nvidia,aspm-pwr-on-t-us = <20>; nvidia,aspm-l0s-entrance-latency-us = <3>; + + interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE4R &emc>, + <&mc TEGRA194_MEMORY_CLIENT_PCIE4W &emc>; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE4>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE4 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie_ep@14180000 { @@ -2159,6 +2191,14 @@ nvidia,aspm-cmrt-us = <60>; nvidia,aspm-pwr-on-t-us = <20>; nvidia,aspm-l0s-entrance-latency-us = <3>; + + interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE0R &emc>, + <&mc TEGRA194_MEMORY_CLIENT_PCIE0W &emc>; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE0>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE0 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie_ep@141a0000 { @@ -2194,6 +2234,14 @@ nvidia,aspm-cmrt-us = <60>; nvidia,aspm-pwr-on-t-us = <20>; nvidia,aspm-l0s-entrance-latency-us = <3>; + + interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE5R &emc>, + <&mc TEGRA194_MEMORY_CLIENT_PCIE5W &emc>; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE5>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE5 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; sram@40000000 { From cfbe3650dd3ef2ea9a4420ca89d9a4df98af3fb6 Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Wed, 14 Jul 2021 11:27:03 +0800 Subject: [PATCH 051/502] netfilter: nf_tables: fix audit memory leak in nf_tables_commit In nf_tables_commit, if nf_tables_commit_audit_alloc fails, it does not free the adp variable. Fix this by adding nf_tables_commit_audit_free which frees the linked list with the head node adl. backtrace: kmalloc include/linux/slab.h:591 [inline] kzalloc include/linux/slab.h:721 [inline] nf_tables_commit_audit_alloc net/netfilter/nf_tables_api.c:8439 [inline] nf_tables_commit+0x16e/0x1760 net/netfilter/nf_tables_api.c:8508 nfnetlink_rcv_batch+0x512/0xa80 net/netfilter/nfnetlink.c:562 nfnetlink_rcv_skb_batch net/netfilter/nfnetlink.c:634 [inline] nfnetlink_rcv+0x1fa/0x220 net/netfilter/nfnetlink.c:652 netlink_unicast_kernel net/netlink/af_netlink.c:1314 [inline] netlink_unicast+0x2c7/0x3e0 net/netlink/af_netlink.c:1340 netlink_sendmsg+0x36b/0x6b0 net/netlink/af_netlink.c:1929 sock_sendmsg_nosec net/socket.c:702 [inline] sock_sendmsg+0x56/0x80 net/socket.c:722 Reported-by: syzbot Reported-by: kernel test robot Fixes: c520292f29b8 ("audit: log nftables configuration change events once per table") Signed-off-by: Dongliang Mu Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index de182d1f7c4e..081437dd75b7 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8445,6 +8445,16 @@ static int nf_tables_commit_audit_alloc(struct list_head *adl, return 0; } +static void nf_tables_commit_audit_free(struct list_head *adl) +{ + struct nft_audit_data *adp, *adn; + + list_for_each_entry_safe(adp, adn, adl, list) { + list_del(&adp->list); + kfree(adp); + } +} + static void nf_tables_commit_audit_collect(struct list_head *adl, struct nft_table *table, u32 op) { @@ -8509,6 +8519,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) ret = nf_tables_commit_audit_alloc(&adl, trans->ctx.table); if (ret) { nf_tables_commit_chain_prepare_cancel(net); + nf_tables_commit_audit_free(&adl); return ret; } if (trans->msg_type == NFT_MSG_NEWRULE || @@ -8518,6 +8529,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) ret = nf_tables_commit_chain_prepare(net, chain); if (ret < 0) { nf_tables_commit_chain_prepare_cancel(net); + nf_tables_commit_audit_free(&adl); return ret; } } From 7f5231b114da76bfd5d0fc685d5cf408d1bbfca7 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Fri, 16 Jul 2021 21:08:02 +0530 Subject: [PATCH 052/502] platform/x86: amd-pmc: Fix undefined reference to __udivdi3 It was reported that on i386 config ------ on i386: ld: drivers/platform/x86/amd-pmc.o: in function `s0ix_stats_show': amd-pmc.c:(.text+0x100): undefined reference to `__udivdi3' ------- The reason for this is that 64-bit integer division is not supported on 32-bit architecture. Use do_div macro to fix this. Fixes: b9a4fa6978be ("platform/x86: amd-pmc: Add support for logging s0ix counters") Reported-by: Randy Dunlap Signed-off-by: Shyam Sundar S K Reviewed-by: Randy Dunlap # and build-tested Link: https://lore.kernel.org/r/20210716153802.2929670-1-Shyam-sundar.S-k@amd.com Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index 663a4ca0580d..3481479a2942 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -189,7 +189,8 @@ static int s0ix_stats_show(struct seq_file *s, void *unused) exit_time = exit_time << 32 | ioread32(dev->fch_virt_addr + FCH_S0I3_EXIT_TIME_L_OFFSET); /* It's in 48MHz. We need to convert it */ - residency = (exit_time - entry_time) / 48; + residency = exit_time - entry_time; + do_div(residency, 48); seq_puts(s, "=== S0ix statistics ===\n"); seq_printf(s, "S0ix Entry Time: %lld\n", entry_time); From e62fb1e3faae60f483a96c359c8d72bb04a7b728 Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Sat, 17 Jul 2021 16:36:05 +0200 Subject: [PATCH 053/502] platform/x86: think-lmi: Move pending_reboot_attr to the attributes sysfs dir Move the pending_reboot node under attributes dir where it should live, as documented in: Documentation/ABI/testing/sysfs-class-firmware-attributes. Also move the create / remove code to be together with the other code populating / cleaning the attributes sysfs dir. In the removal path this is necessary so that the remove is done before the kset_unregister(tlmi_priv.attribute_kset) call. Signed-off-by: Mark Pearson Co-developed-by: Hans de Goede Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210717143607.3580-1-hdegoede@redhat.com --- drivers/platform/x86/think-lmi.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index 64dcec53a7a0..989a8221dcd8 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -672,6 +672,7 @@ static void tlmi_release_attr(void) kobject_put(&tlmi_priv.setting[i]->kobj); } } + sysfs_remove_file(&tlmi_priv.attribute_kset->kobj, &pending_reboot.attr); kset_unregister(tlmi_priv.attribute_kset); /* Authentication structures */ @@ -680,7 +681,6 @@ static void tlmi_release_attr(void) sysfs_remove_group(&tlmi_priv.pwd_power->kobj, &auth_attr_group); kobject_put(&tlmi_priv.pwd_power->kobj); kset_unregister(tlmi_priv.authentication_kset); - sysfs_remove_file(&tlmi_priv.class_dev->kobj, &pending_reboot.attr); } static int tlmi_sysfs_init(void) @@ -733,6 +733,10 @@ static int tlmi_sysfs_init(void) goto fail_create_attr; } + ret = sysfs_create_file(&tlmi_priv.attribute_kset->kobj, &pending_reboot.attr); + if (ret) + goto fail_create_attr; + /* Create authentication entries */ tlmi_priv.authentication_kset = kset_create_and_add("authentication", NULL, &tlmi_priv.class_dev->kobj); @@ -760,11 +764,6 @@ static int tlmi_sysfs_init(void) if (ret) goto fail_create_attr; - /* Create global sysfs files */ - ret = sysfs_create_file(&tlmi_priv.class_dev->kobj, &pending_reboot.attr); - if (ret) - goto fail_create_attr; - return ret; fail_create_attr: From 30e78435d3bf803cabdc2a1c2eb36e6983aa4596 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 17 Jul 2021 16:36:06 +0200 Subject: [PATCH 054/502] platform/x86: think-lmi: Split kobject_init() and kobject_add() calls tlmi_sysfs_init() calls tlmi_release_attr() on errors which calls kobject_put() for attributes created by tlmi_analyze(), but if we bail early because of an error, then this means that some of the kobjects will not have been initialized yet; and we should thus not call kobject_put() on them. Switch from using kobject_init_and_add() inside tlmi_sysfs_init() to initializing all the created kobjects directly in tlmi_analyze() and only adding them from tlmi_sysfs_init(). This way all kobjects will always be initialized when tlmi_release_attr() gets called. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210717143607.3580-2-hdegoede@redhat.com --- drivers/platform/x86/think-lmi.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index 989a8221dcd8..c22edcf26aaa 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -723,8 +723,8 @@ static int tlmi_sysfs_init(void) /* Build attribute */ tlmi_priv.setting[i]->kobj.kset = tlmi_priv.attribute_kset; - ret = kobject_init_and_add(&tlmi_priv.setting[i]->kobj, &tlmi_attr_setting_ktype, - NULL, "%s", tlmi_priv.setting[i]->display_name); + ret = kobject_add(&tlmi_priv.setting[i]->kobj, NULL, + "%s", tlmi_priv.setting[i]->display_name); if (ret) goto fail_create_attr; @@ -745,8 +745,7 @@ static int tlmi_sysfs_init(void) goto fail_create_attr; } tlmi_priv.pwd_admin->kobj.kset = tlmi_priv.authentication_kset; - ret = kobject_init_and_add(&tlmi_priv.pwd_admin->kobj, &tlmi_pwd_setting_ktype, - NULL, "%s", "Admin"); + ret = kobject_add(&tlmi_priv.pwd_admin->kobj, NULL, "%s", "Admin"); if (ret) goto fail_create_attr; @@ -755,8 +754,7 @@ static int tlmi_sysfs_init(void) goto fail_create_attr; tlmi_priv.pwd_power->kobj.kset = tlmi_priv.authentication_kset; - ret = kobject_init_and_add(&tlmi_priv.pwd_power->kobj, &tlmi_pwd_setting_ktype, - NULL, "%s", "System"); + ret = kobject_add(&tlmi_priv.pwd_power->kobj, NULL, "%s", "System"); if (ret) goto fail_create_attr; @@ -836,6 +834,7 @@ static int tlmi_analyze(void) pr_info("Error retrieving possible values for %d : %s\n", i, setting->display_name); } + kobject_init(&setting->kobj, &tlmi_attr_setting_ktype); tlmi_priv.setting[i] = setting; tlmi_priv.settings_count++; kfree(item); @@ -862,6 +861,8 @@ static int tlmi_analyze(void) if (pwdcfg.password_state & TLMI_PAP_PWD) tlmi_priv.pwd_admin->valid = true; + kobject_init(&tlmi_priv.pwd_admin->kobj, &tlmi_pwd_setting_ktype); + tlmi_priv.pwd_power = kzalloc(sizeof(struct tlmi_pwd_setting), GFP_KERNEL); if (!tlmi_priv.pwd_power) { ret = -ENOMEM; @@ -877,6 +878,8 @@ static int tlmi_analyze(void) if (pwdcfg.password_state & TLMI_POP_PWD) tlmi_priv.pwd_power->valid = true; + kobject_init(&tlmi_priv.pwd_power->kobj, &tlmi_pwd_setting_ktype); + return 0; fail_clear_attr: From f7e506ec4a9966be8b2a87d3324302f0f5dd5a29 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 17 Jul 2021 16:36:07 +0200 Subject: [PATCH 055/502] platform/x86: think-lmi: Fix possible mem-leaks on tlmi_analyze() error-exit Fix 2 possible memleaks on error-exits from tlmi_analyze(): 1. If the kzalloc of pwd_power fails, then not only free the atributes, but also the allocated pwd_admin struct. 2. Freeing the attributes should also free the possible_values strings. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210717143607.3580-3-hdegoede@redhat.com --- drivers/platform/x86/think-lmi.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index c22edcf26aaa..6cfed4427fb0 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -866,7 +866,7 @@ static int tlmi_analyze(void) tlmi_priv.pwd_power = kzalloc(sizeof(struct tlmi_pwd_setting), GFP_KERNEL); if (!tlmi_priv.pwd_power) { ret = -ENOMEM; - goto fail_clear_attr; + goto fail_free_pwd_admin; } strscpy(tlmi_priv.pwd_power->kbdlang, "us", TLMI_LANG_MAXLEN); tlmi_priv.pwd_power->encoding = TLMI_ENCODING_ASCII; @@ -882,9 +882,15 @@ static int tlmi_analyze(void) return 0; +fail_free_pwd_admin: + kfree(tlmi_priv.pwd_admin); fail_clear_attr: - for (i = 0; i < TLMI_SETTINGS_COUNT; ++i) - kfree(tlmi_priv.setting[i]); + for (i = 0; i < TLMI_SETTINGS_COUNT; ++i) { + if (tlmi_priv.setting[i]) { + kfree(tlmi_priv.setting[i]->possible_values); + kfree(tlmi_priv.setting[i]); + } + } return ret; } From ec7099fdea8025988710ee6fecfd4e4210c29ab5 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 2 Jul 2021 15:37:12 +0200 Subject: [PATCH 056/502] Revert "gpio: mpc8xxx: change the gpio interrupt flags." This reverts commit 3d5bfbd9716318b1ca5c38488aa69f64d38a9aa5. When booting with threadirqs, it causes a splat WARNING: CPU: 0 PID: 29 at kernel/irq/handle.c:159 __handle_irq_event_percpu+0x1ec/0x27c irq 66 handler irq_default_primary_handler+0x0/0x1c enabled interrupts That splat later went away with commit 81e2073c175b ("genirq: Disable interrupts for force threaded handlers"), which got backported to -stable. However, when running an -rt kernel, the splat still exists. Moreover, quoting Thomas Gleixner [1] But 3d5bfbd97163 ("gpio: mpc8xxx: change the gpio interrupt flags.") has nothing to do with that: "Delete the interrupt IRQF_NO_THREAD flags in order to gpio interrupts can be threaded to allow high-priority processes to preempt." This changelog is blatantly wrong. In mainline forced irq threads have always been invoked with softirqs disabled, which obviously makes them non-preemptible. So the patch didn't even do what its commit log said. [1] https://lore.kernel.org/lkml/871r8zey88.ffs@nanos.tec.linutronix.de/ Cc: stable@vger.kernel.org # v5.9+ Signed-off-by: Rasmus Villemoes Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-mpc8xxx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-mpc8xxx.c b/drivers/gpio/gpio-mpc8xxx.c index 4b9157a69fca..50b321a1ab1b 100644 --- a/drivers/gpio/gpio-mpc8xxx.c +++ b/drivers/gpio/gpio-mpc8xxx.c @@ -405,7 +405,7 @@ static int mpc8xxx_probe(struct platform_device *pdev) ret = devm_request_irq(&pdev->dev, mpc8xxx_gc->irqn, mpc8xxx_gpio_irq_cascade, - IRQF_SHARED, "gpio-cascade", + IRQF_NO_THREAD | IRQF_SHARED, "gpio-cascade", mpc8xxx_gc); if (ret) { dev_err(&pdev->dev, From d6371c76e20d7d3f61b05fd67b596af4d14a8886 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 19 Jul 2021 09:51:34 +0100 Subject: [PATCH 057/502] bpf: Fix OOB read when printing XDP link fdinfo We got the following UBSAN report on one of our testing machines: ================================================================================ UBSAN: array-index-out-of-bounds in kernel/bpf/syscall.c:2389:24 index 6 is out of range for type 'char *[6]' CPU: 43 PID: 930921 Comm: systemd-coredum Tainted: G O 5.10.48-cloudflare-kasan-2021.7.0 #1 Hardware name: Call Trace: dump_stack+0x7d/0xa3 ubsan_epilogue+0x5/0x40 __ubsan_handle_out_of_bounds.cold+0x43/0x48 ? seq_printf+0x17d/0x250 bpf_link_show_fdinfo+0x329/0x380 ? bpf_map_value_size+0xe0/0xe0 ? put_files_struct+0x20/0x2d0 ? __kasan_kmalloc.constprop.0+0xc2/0xd0 seq_show+0x3f7/0x540 seq_read_iter+0x3f8/0x1040 seq_read+0x329/0x500 ? seq_read_iter+0x1040/0x1040 ? __fsnotify_parent+0x80/0x820 ? __fsnotify_update_child_dentry_flags+0x380/0x380 vfs_read+0x123/0x460 ksys_read+0xed/0x1c0 ? __x64_sys_pwrite64+0x1f0/0x1f0 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 ================================================================================ ================================================================================ UBSAN: object-size-mismatch in kernel/bpf/syscall.c:2384:2 From the report, we can infer that some array access in bpf_link_show_fdinfo at index 6 is out of bounds. The obvious candidate is bpf_link_type_strs[BPF_LINK_TYPE_XDP] with BPF_LINK_TYPE_XDP == 6. It turns out that BPF_LINK_TYPE_XDP is missing from bpf_types.h and therefore doesn't have an entry in bpf_link_type_strs: pos: 0 flags: 02000000 mnt_id: 13 link_type: (null) link_id: 4 prog_tag: bcf7977d3b93787c prog_id: 4 ifindex: 1 Fixes: aa8d3a716b59 ("bpf, xdp: Add bpf_link-based XDP attachment API") Signed-off-by: Lorenz Bauer Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210719085134.43325-2-lmb@cloudflare.com --- include/linux/bpf_types.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index a9db1eae6796..ae3ac3a2018c 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -134,4 +134,5 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_CGROUP, cgroup) BPF_LINK_TYPE(BPF_LINK_TYPE_ITER, iter) #ifdef CONFIG_NET BPF_LINK_TYPE(BPF_LINK_TYPE_NETNS, netns) +BPF_LINK_TYPE(BPF_LINK_TYPE_XDP, xdp) #endif From 8ba89a3c7967808f33478a8573277cf6a7412c4c Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 14 Jul 2021 11:38:41 -0700 Subject: [PATCH 058/502] dmaengine: idxd: fix desc->vector that isn't being updated Missing update for desc->vector when the wq vector gets updated. This causes the desc->vector to always be at 0. Fixes: da435aedb00a ("dmaengine: idxd: fix array index when int_handles are being used") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162628784374.353761.4736602409627820431.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/submit.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index e29887528077..21d7d09f73dd 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -25,11 +25,10 @@ static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu) * Descriptor completion vectors are 1...N for MSIX. We will round * robin through the N vectors. */ - wq->vec_ptr = (wq->vec_ptr % idxd->num_wq_irqs) + 1; + wq->vec_ptr = desc->vector = (wq->vec_ptr % idxd->num_wq_irqs) + 1; if (!idxd->int_handles) { desc->hw->int_handle = wq->vec_ptr; } else { - desc->vector = wq->vec_ptr; /* * int_handles are only for descriptor completion. However for device * MSIX enumeration, vec 0 is used for misc interrupts. Therefore even From 7eb25da161befbc9a80e94e1bd90d6c06aa645cf Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 14 Jul 2021 14:57:19 -0700 Subject: [PATCH 059/502] dmaengine: idxd: fix sequence for pci driver remove() and shutdown() ->shutdown() call should only be responsible for quiescing the device. Currently it is doing PCI device tear down. This causes issue when things like MMIO mapping is removed while idxd_unregister_devices() will trigger removal of idxd device sub-driver and still initiates MMIO writes to the device. Another issue is with the unregistering of idxd 'struct device', the memory context gets freed. So the teardown calls are accessing freed memory and can cause kernel oops. Move all the teardown bits that doesn't belong in shutdown to ->remove() call. Move unregistering of the idxd conf_dev 'struct device' to after doing all the teardown to free all the memory that's no longer needed. Fixes: 47c16ac27d4c ("dmaengine: idxd: fix idxd conf_dev 'struct device' lifetime") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162629983901.395844.17964803190905549615.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/init.c | 26 +++++++++++++++++--------- drivers/dma/idxd/sysfs.c | 2 -- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 4e32a4dcc3ab..c0f4c0422f32 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -760,32 +760,40 @@ static void idxd_shutdown(struct pci_dev *pdev) for (i = 0; i < msixcnt; i++) { irq_entry = &idxd->irq_entries[i]; synchronize_irq(irq_entry->vector); - free_irq(irq_entry->vector, irq_entry); if (i == 0) continue; idxd_flush_pending_llist(irq_entry); idxd_flush_work_list(irq_entry); } - - idxd_msix_perm_clear(idxd); - idxd_release_int_handles(idxd); - pci_free_irq_vectors(pdev); - pci_iounmap(pdev, idxd->reg_base); - pci_disable_device(pdev); - destroy_workqueue(idxd->wq); + flush_workqueue(idxd->wq); } static void idxd_remove(struct pci_dev *pdev) { struct idxd_device *idxd = pci_get_drvdata(pdev); + struct idxd_irq_entry *irq_entry; + int msixcnt = pci_msix_vec_count(pdev); + int i; dev_dbg(&pdev->dev, "%s called\n", __func__); idxd_shutdown(pdev); if (device_pasid_enabled(idxd)) idxd_disable_system_pasid(idxd); idxd_unregister_devices(idxd); - perfmon_pmu_remove(idxd); + + for (i = 0; i < msixcnt; i++) { + irq_entry = &idxd->irq_entries[i]; + free_irq(irq_entry->vector, irq_entry); + } + idxd_msix_perm_clear(idxd); + idxd_release_int_handles(idxd); + pci_free_irq_vectors(pdev); + pci_iounmap(pdev, idxd->reg_base); iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); + pci_disable_device(pdev); + destroy_workqueue(idxd->wq); + perfmon_pmu_remove(idxd); + device_unregister(&idxd->conf_dev); } static struct pci_driver idxd_pci_driver = { diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 0460d58e3941..bb4df63906a7 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1744,8 +1744,6 @@ void idxd_unregister_devices(struct idxd_device *idxd) device_unregister(&group->conf_dev); } - - device_unregister(&idxd->conf_dev); } int idxd_register_bus_type(void) From 6b4b87f2c31ac1af4f244990a7cbfb50d3f3e33f Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 14 Jul 2021 11:50:06 -0700 Subject: [PATCH 060/502] dmaengine: idxd: fix submission race window Konstantin observed that when descriptors are submitted, the descriptor is added to the pending list after the submission. This creates a race window with the slight possibility that the descriptor can complete before it gets added to the pending list and this window would cause the completion handler to miss processing the descriptor. To address the issue, the addition of the descriptor to the pending list must be done before it gets submitted to the hardware. However, submitting to swq with ENQCMDS instruction can cause a failure with the condition of either wq is full or wq is not "active". With the descriptor allocation being the gate to the wq capacity, it is not possible to hit a retry with ENQCMDS submission to the swq. The only possible failure can happen is when wq is no longer "active" due to hw error and therefore we are moving towards taking down the portal. Given this is a rare condition and there's no longer concern over I/O performance, the driver can walk the completion lists in order to retrieve and abort the descriptor. The error path will set the descriptor to aborted status. It will take the work list lock to prevent further processing of worklist. It will do a delete_all on the pending llist to retrieve all descriptors on the pending llist. The delete_all action does not require a lock. It will walk through the acquired llist to find the aborted descriptor while add all remaining descriptors to the work list since it holds the lock. If it does not find the aborted descriptor on the llist, it will walk through the work list. And if it still does not find the descriptor, then it means the interrupt handler has removed the desc from the llist but is pending on the work list lock and will process it once the error path releases the lock. Fixes: eb15e7154fbf ("dmaengine: idxd: add interrupt handle request and release support") Reported-by: Konstantin Ananyev Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/162628855747.360485.10101925573082466530.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/idxd.h | 14 +++++++ drivers/dma/idxd/irq.c | 27 +++++++++----- drivers/dma/idxd/submit.c | 78 ++++++++++++++++++++++++++++++++++----- 3 files changed, 101 insertions(+), 18 deletions(-) diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 26482c7d4c3a..fc708be7ad9a 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -294,6 +294,14 @@ struct idxd_desc { struct idxd_wq *wq; }; +/* + * This is software defined error for the completion status. We overload the error code + * that will never appear in completion status and only SWERR register. + */ +enum idxd_completion_status { + IDXD_COMP_DESC_ABORT = 0xff, +}; + #define confdev_to_idxd(dev) container_of(dev, struct idxd_device, conf_dev) #define confdev_to_wq(dev) container_of(dev, struct idxd_wq, conf_dev) @@ -482,4 +490,10 @@ static inline void perfmon_init(void) {} static inline void perfmon_exit(void) {} #endif +static inline void complete_desc(struct idxd_desc *desc, enum idxd_complete_type reason) +{ + idxd_dma_complete_txd(desc, reason); + idxd_free_desc(desc->wq, desc); +} + #endif diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index ae68e1e5487a..4e3a7198c0ca 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -245,12 +245,6 @@ static inline bool match_fault(struct idxd_desc *desc, u64 fault_addr) return false; } -static inline void complete_desc(struct idxd_desc *desc, enum idxd_complete_type reason) -{ - idxd_dma_complete_txd(desc, reason); - idxd_free_desc(desc->wq, desc); -} - static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry, enum irq_work_type wtype, int *processed, u64 data) @@ -272,8 +266,16 @@ static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry, reason = IDXD_COMPLETE_DEV_FAIL; llist_for_each_entry_safe(desc, t, head, llnode) { - if (desc->completion->status) { - if ((desc->completion->status & DSA_COMP_STATUS_MASK) != DSA_COMP_SUCCESS) + u8 status = desc->completion->status & DSA_COMP_STATUS_MASK; + + if (status) { + if (unlikely(status == IDXD_COMP_DESC_ABORT)) { + complete_desc(desc, IDXD_COMPLETE_ABORT); + (*processed)++; + continue; + } + + if (unlikely(status != DSA_COMP_SUCCESS)) match_fault(desc, data); complete_desc(desc, reason); (*processed)++; @@ -329,7 +331,14 @@ static int irq_process_work_list(struct idxd_irq_entry *irq_entry, spin_unlock_irqrestore(&irq_entry->list_lock, flags); list_for_each_entry(desc, &flist, list) { - if ((desc->completion->status & DSA_COMP_STATUS_MASK) != DSA_COMP_SUCCESS) + u8 status = desc->completion->status & DSA_COMP_STATUS_MASK; + + if (unlikely(status == IDXD_COMP_DESC_ABORT)) { + complete_desc(desc, IDXD_COMPLETE_ABORT); + continue; + } + + if (unlikely(status != DSA_COMP_SUCCESS)) match_fault(desc, data); complete_desc(desc, reason); } diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index 21d7d09f73dd..36c9c1a89b7e 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -87,9 +87,64 @@ void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc) sbitmap_queue_clear(&wq->sbq, desc->id, cpu); } +static struct idxd_desc *list_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, + struct idxd_desc *desc) +{ + struct idxd_desc *d, *n; + + lockdep_assert_held(&ie->list_lock); + list_for_each_entry_safe(d, n, &ie->work_list, list) { + if (d == desc) { + list_del(&d->list); + return d; + } + } + + /* + * At this point, the desc needs to be aborted is held by the completion + * handler where it has taken it off the pending list but has not added to the + * work list. It will be cleaned up by the interrupt handler when it sees the + * IDXD_COMP_DESC_ABORT for completion status. + */ + return NULL; +} + +static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, + struct idxd_desc *desc) +{ + struct idxd_desc *d, *t, *found = NULL; + struct llist_node *head; + unsigned long flags; + + desc->completion->status = IDXD_COMP_DESC_ABORT; + /* + * Grab the list lock so it will block the irq thread handler. This allows the + * abort code to locate the descriptor need to be aborted. + */ + spin_lock_irqsave(&ie->list_lock, flags); + head = llist_del_all(&ie->pending_llist); + if (head) { + llist_for_each_entry_safe(d, t, head, llnode) { + if (d == desc) { + found = desc; + continue; + } + list_add_tail(&desc->list, &ie->work_list); + } + } + + if (!found) + found = list_abort_desc(wq, ie, desc); + spin_unlock_irqrestore(&ie->list_lock, flags); + + if (found) + complete_desc(found, IDXD_COMPLETE_ABORT); +} + int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) { struct idxd_device *idxd = wq->idxd; + struct idxd_irq_entry *ie = NULL; void __iomem *portal; int rc; @@ -107,6 +162,16 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) * even on UP because the recipient is a device. */ wmb(); + + /* + * Pending the descriptor to the lockless list for the irq_entry + * that we designated the descriptor to. + */ + if (desc->hw->flags & IDXD_OP_FLAG_RCI) { + ie = &idxd->irq_entries[desc->vector]; + llist_add(&desc->llnode, &ie->pending_llist); + } + if (wq_dedicated(wq)) { iosubmit_cmds512(portal, desc->hw, 1); } else { @@ -117,18 +182,13 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) * device is not accepting descriptor at all. */ rc = enqcmds(portal, desc->hw); - if (rc < 0) + if (rc < 0) { + if (ie) + llist_abort_desc(wq, ie, desc); return rc; + } } percpu_ref_put(&wq->wq_active); - - /* - * Pending the descriptor to the lockless list for the irq_entry - * that we designated the descriptor to. - */ - if (desc->hw->flags & IDXD_OP_FLAG_RCI) - llist_add(&desc->llnode, &idxd->irq_entries[desc->vector].pending_llist); - return 0; } From ec185dd3ab257dc2a60953fdf1b6622f524cc5b7 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Mon, 14 Jun 2021 17:33:10 -0500 Subject: [PATCH 061/502] optee: Fix memory leak when failing to register shm pages Free the previously allocated pages when we encounter an error condition while attempting to register the pages with the secure world. Fixes: a249dd200d03 ("tee: optee: Fix dynamic shm pool allocations") Fixes: 5a769f6ff439 ("optee: Fix multi page dynamic shm pool alloc") Cc: stable@vger.kernel.org Signed-off-by: Tyler Hicks Reviewed-by: Jens Wiklander Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- drivers/tee/optee/shm_pool.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/tee/optee/shm_pool.c b/drivers/tee/optee/shm_pool.c index d767eebf30bd..da06ce9b9313 100644 --- a/drivers/tee/optee/shm_pool.c +++ b/drivers/tee/optee/shm_pool.c @@ -32,8 +32,10 @@ static int pool_op_alloc(struct tee_shm_pool_mgr *poolm, struct page **pages; pages = kcalloc(nr_pages, sizeof(pages), GFP_KERNEL); - if (!pages) - return -ENOMEM; + if (!pages) { + rc = -ENOMEM; + goto err; + } for (i = 0; i < nr_pages; i++) { pages[i] = page; @@ -44,8 +46,14 @@ static int pool_op_alloc(struct tee_shm_pool_mgr *poolm, rc = optee_shm_register(shm->ctx, shm, pages, nr_pages, (unsigned long)shm->kaddr); kfree(pages); + if (rc) + goto err; } + return 0; + +err: + __free_pages(page, order); return rc; } From adf752af454e91e123e85e3784972d166837af73 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Mon, 14 Jun 2021 17:33:11 -0500 Subject: [PATCH 062/502] optee: Refuse to load the driver under the kdump kernel Fix a hung task issue, seen when booting the kdump kernel, that is caused by all of the secure world threads being in a permanent suspended state: INFO: task swapper/0:1 blocked for more than 120 seconds. Not tainted 5.4.83 #1 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. swapper/0 D 0 1 0 0x00000028 Call trace: __switch_to+0xc8/0x118 __schedule+0x2e0/0x700 schedule+0x38/0xb8 schedule_timeout+0x258/0x388 wait_for_completion+0x16c/0x4b8 optee_cq_wait_for_completion+0x28/0xa8 optee_disable_shm_cache+0xb8/0xf8 optee_probe+0x560/0x61c platform_drv_probe+0x58/0xa8 really_probe+0xe0/0x338 driver_probe_device+0x5c/0xf0 device_driver_attach+0x74/0x80 __driver_attach+0x64/0xe0 bus_for_each_dev+0x84/0xd8 driver_attach+0x30/0x40 bus_add_driver+0x188/0x1e8 driver_register+0x64/0x110 __platform_driver_register+0x54/0x60 optee_driver_init+0x20/0x28 do_one_initcall+0x54/0x24c kernel_init_freeable+0x1e8/0x2c0 kernel_init+0x18/0x118 ret_from_fork+0x10/0x18 The invoke_fn hook returned OPTEE_SMC_RETURN_ETHREAD_LIMIT, indicating that the secure world threads were all in a suspended state at the time of the kernel crash. This intermittently prevented the kdump kernel from booting, resulting in a failure to collect the kernel dump. Make kernel dump collection more reliable on systems utilizing OP-TEE by refusing to load the driver under the kdump kernel. Cc: stable@vger.kernel.org Signed-off-by: Tyler Hicks Reviewed-by: Jens Wiklander Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- drivers/tee/optee/core.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c index ddb8f9ecf307..5288cd767d82 100644 --- a/drivers/tee/optee/core.c +++ b/drivers/tee/optee/core.c @@ -6,6 +6,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include #include #include @@ -612,6 +613,16 @@ static int optee_probe(struct platform_device *pdev) u32 sec_caps; int rc; + /* + * The kernel may have crashed at the same time that all available + * secure world threads were suspended and we cannot reschedule the + * suspended threads without access to the crashed kernel's wait_queue. + * Therefore, we cannot reliably initialize the OP-TEE driver in the + * kdump kernel. + */ + if (is_kdump_kernel()) + return -ENODEV; + invoke_fn = get_invoke_func(&pdev->dev); if (IS_ERR(invoke_fn)) return PTR_ERR(invoke_fn); From f25889f93184db8b07a543cc2bbbb9a8fcaf4333 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Mon, 14 Jun 2021 17:33:12 -0500 Subject: [PATCH 063/502] optee: fix tee out of memory failure seen during kexec reboot The following out of memory errors are seen on kexec reboot from the optee core. [ 0.368428] tee_bnxt_fw optee-clnt0: tee_shm_alloc failed [ 0.368461] tee_bnxt_fw: probe of optee-clnt0 failed with error -22 tee_shm_release() is not invoked on dma shm buffer. Implement .shutdown() method to handle the release of the buffers correctly. More info: https://github.com/OP-TEE/optee_os/issues/3637 Cc: stable@vger.kernel.org Signed-off-by: Allen Pais Reviewed-by: Tyler Hicks Reviewed-by: Jens Wiklander Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- drivers/tee/optee/core.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c index 5288cd767d82..0987074d7ed0 100644 --- a/drivers/tee/optee/core.c +++ b/drivers/tee/optee/core.c @@ -573,6 +573,13 @@ static optee_invoke_fn *get_invoke_func(struct device *dev) return ERR_PTR(-EINVAL); } +/* optee_remove - Device Removal Routine + * @pdev: platform device information struct + * + * optee_remove is called by platform subsystem to alert the driver + * that it should release the device + */ + static int optee_remove(struct platform_device *pdev) { struct optee *optee = platform_get_drvdata(pdev); @@ -603,6 +610,18 @@ static int optee_remove(struct platform_device *pdev) return 0; } +/* optee_shutdown - Device Removal Routine + * @pdev: platform device information struct + * + * platform_shutdown is called by the platform subsystem to alert + * the driver that a shutdown, reboot, or kexec is happening and + * device must be disabled. + */ +static void optee_shutdown(struct platform_device *pdev) +{ + optee_disable_shm_cache(platform_get_drvdata(pdev)); +} + static int optee_probe(struct platform_device *pdev) { optee_invoke_fn *invoke_fn; @@ -739,6 +758,7 @@ MODULE_DEVICE_TABLE(of, optee_dt_match); static struct platform_driver optee_driver = { .probe = optee_probe, .remove = optee_remove, + .shutdown = optee_shutdown, .driver = { .name = "optee", .of_match_table = optee_dt_match, From b5c10dd04b7418793517e3286cde5c04759a86de Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Mon, 14 Jun 2021 17:33:13 -0500 Subject: [PATCH 064/502] optee: Clear stale cache entries during initialization The shm cache could contain invalid addresses if optee_disable_shm_cache() was not called from the .shutdown hook of the previous kernel before a kexec. These addresses could be unmapped or they could point to mapped but unintended locations in memory. Clear the shared memory cache, while being careful to not translate the addresses returned from OPTEE_SMC_DISABLE_SHM_CACHE, during driver initialization. Once all pre-cache shm objects are removed, proceed with enabling the cache so that we know that we can handle cached shm objects with confidence later in the .shutdown hook. Cc: stable@vger.kernel.org Signed-off-by: Tyler Hicks Reviewed-by: Jens Wiklander Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- drivers/tee/optee/call.c | 36 ++++++++++++++++++++++++++++--- drivers/tee/optee/core.c | 9 ++++++++ drivers/tee/optee/optee_private.h | 1 + 3 files changed, 43 insertions(+), 3 deletions(-) diff --git a/drivers/tee/optee/call.c b/drivers/tee/optee/call.c index 6e6eb836e9b6..387e94768182 100644 --- a/drivers/tee/optee/call.c +++ b/drivers/tee/optee/call.c @@ -416,11 +416,13 @@ void optee_enable_shm_cache(struct optee *optee) } /** - * optee_disable_shm_cache() - Disables caching of some shared memory allocation - * in OP-TEE + * __optee_disable_shm_cache() - Disables caching of some shared memory + * allocation in OP-TEE * @optee: main service struct + * @is_mapped: true if the cached shared memory addresses were mapped by this + * kernel, are safe to dereference, and should be freed */ -void optee_disable_shm_cache(struct optee *optee) +static void __optee_disable_shm_cache(struct optee *optee, bool is_mapped) { struct optee_call_waiter w; @@ -439,6 +441,13 @@ void optee_disable_shm_cache(struct optee *optee) if (res.result.status == OPTEE_SMC_RETURN_OK) { struct tee_shm *shm; + /* + * Shared memory references that were not mapped by + * this kernel must be ignored to prevent a crash. + */ + if (!is_mapped) + continue; + shm = reg_pair_to_ptr(res.result.shm_upper32, res.result.shm_lower32); tee_shm_free(shm); @@ -449,6 +458,27 @@ void optee_disable_shm_cache(struct optee *optee) optee_cq_wait_final(&optee->call_queue, &w); } +/** + * optee_disable_shm_cache() - Disables caching of mapped shared memory + * allocations in OP-TEE + * @optee: main service struct + */ +void optee_disable_shm_cache(struct optee *optee) +{ + return __optee_disable_shm_cache(optee, true); +} + +/** + * optee_disable_unmapped_shm_cache() - Disables caching of shared memory + * allocations in OP-TEE which are not + * currently mapped + * @optee: main service struct + */ +void optee_disable_unmapped_shm_cache(struct optee *optee) +{ + return __optee_disable_shm_cache(optee, false); +} + #define PAGELIST_ENTRIES_PER_PAGE \ ((OPTEE_MSG_NONCONTIG_PAGE_SIZE / sizeof(u64)) - 1) diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c index 0987074d7ed0..651d49b53d3b 100644 --- a/drivers/tee/optee/core.c +++ b/drivers/tee/optee/core.c @@ -716,6 +716,15 @@ static int optee_probe(struct platform_device *pdev) optee->memremaped_shm = memremaped_shm; optee->pool = pool; + /* + * Ensure that there are no pre-existing shm objects before enabling + * the shm cache so that there's no chance of receiving an invalid + * address during shutdown. This could occur, for example, if we're + * kexec booting from an older kernel that did not properly cleanup the + * shm cache. + */ + optee_disable_unmapped_shm_cache(optee); + optee_enable_shm_cache(optee); if (optee->sec_caps & OPTEE_SMC_SEC_CAP_DYNAMIC_SHM) diff --git a/drivers/tee/optee/optee_private.h b/drivers/tee/optee/optee_private.h index e25b216a14ef..dbdd367be156 100644 --- a/drivers/tee/optee/optee_private.h +++ b/drivers/tee/optee/optee_private.h @@ -159,6 +159,7 @@ int optee_cancel_req(struct tee_context *ctx, u32 cancel_id, u32 session); void optee_enable_shm_cache(struct optee *optee); void optee_disable_shm_cache(struct optee *optee); +void optee_disable_unmapped_shm_cache(struct optee *optee); int optee_shm_register(struct tee_context *ctx, struct tee_shm *shm, struct page **pages, size_t num_pages, From dc7019b7d0e188d4093b34bd0747ed0d668c63bf Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Mon, 14 Jun 2021 17:33:14 -0500 Subject: [PATCH 065/502] tee: add tee_shm_alloc_kernel_buf() Adds a new function tee_shm_alloc_kernel_buf() to allocate shared memory from a kernel driver. This function can later be made more lightweight by unnecessary dma-buf export. Cc: stable@vger.kernel.org Reviewed-by: Tyler Hicks Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- drivers/tee/tee_shm.c | 18 ++++++++++++++++++ include/linux/tee_drv.h | 1 + 2 files changed, 19 insertions(+) diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index 00472f5ce22e..c65e44707cd6 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -193,6 +193,24 @@ err_dev_put: } EXPORT_SYMBOL_GPL(tee_shm_alloc); +/** + * tee_shm_alloc_kernel_buf() - Allocate shared memory for kernel buffer + * @ctx: Context that allocates the shared memory + * @size: Requested size of shared memory + * + * The returned memory registered in secure world and is suitable to be + * passed as a memory buffer in parameter argument to + * tee_client_invoke_func(). The memory allocated is later freed with a + * call to tee_shm_free(). + * + * @returns a pointer to 'struct tee_shm' + */ +struct tee_shm *tee_shm_alloc_kernel_buf(struct tee_context *ctx, size_t size) +{ + return tee_shm_alloc(ctx, size, TEE_SHM_MAPPED | TEE_SHM_DMA_BUF); +} +EXPORT_SYMBOL_GPL(tee_shm_alloc_kernel_buf); + struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr, size_t length, u32 flags) { diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 54269e47ac9a..8990f7628387 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -332,6 +332,7 @@ void *tee_get_drvdata(struct tee_device *teedev); * @returns a pointer to 'struct tee_shm' */ struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags); +struct tee_shm *tee_shm_alloc_kernel_buf(struct tee_context *ctx, size_t size); /** * tee_shm_register() - Register shared memory buffer From 376e4199e327a5cf29b8ec8fb0f64f3d8b429819 Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Mon, 14 Jun 2021 17:33:15 -0500 Subject: [PATCH 066/502] tee: Correct inappropriate usage of TEE_SHM_DMA_BUF flag Currently TEE_SHM_DMA_BUF flag has been inappropriately used to not register shared memory allocated for private usage by underlying TEE driver: OP-TEE in this case. So rather add a new flag as TEE_SHM_PRIV that can be utilized by underlying TEE drivers for private allocation and usage of shared memory. With this corrected, allow tee_shm_alloc_kernel_buf() to allocate a shared memory region without the backing of dma-buf. Cc: stable@vger.kernel.org Signed-off-by: Sumit Garg Co-developed-by: Tyler Hicks Signed-off-by: Tyler Hicks Reviewed-by: Jens Wiklander Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- drivers/tee/optee/call.c | 2 +- drivers/tee/optee/core.c | 3 ++- drivers/tee/optee/rpc.c | 5 +++-- drivers/tee/optee/shm_pool.c | 8 ++++++-- drivers/tee/tee_shm.c | 4 ++-- include/linux/tee_drv.h | 1 + 6 files changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/tee/optee/call.c b/drivers/tee/optee/call.c index 387e94768182..945f03da0223 100644 --- a/drivers/tee/optee/call.c +++ b/drivers/tee/optee/call.c @@ -184,7 +184,7 @@ static struct tee_shm *get_msg_arg(struct tee_context *ctx, size_t num_params, struct optee_msg_arg *ma; shm = tee_shm_alloc(ctx, OPTEE_MSG_GET_ARG_SIZE(num_params), - TEE_SHM_MAPPED); + TEE_SHM_MAPPED | TEE_SHM_PRIV); if (IS_ERR(shm)) return shm; diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c index 651d49b53d3b..5ce13b099d7d 100644 --- a/drivers/tee/optee/core.c +++ b/drivers/tee/optee/core.c @@ -278,7 +278,8 @@ static void optee_release(struct tee_context *ctx) if (!ctxdata) return; - shm = tee_shm_alloc(ctx, sizeof(struct optee_msg_arg), TEE_SHM_MAPPED); + shm = tee_shm_alloc(ctx, sizeof(struct optee_msg_arg), + TEE_SHM_MAPPED | TEE_SHM_PRIV); if (!IS_ERR(shm)) { arg = tee_shm_get_va(shm, 0); /* diff --git a/drivers/tee/optee/rpc.c b/drivers/tee/optee/rpc.c index 1849180b0278..efbaff7ad7e5 100644 --- a/drivers/tee/optee/rpc.c +++ b/drivers/tee/optee/rpc.c @@ -314,7 +314,7 @@ static void handle_rpc_func_cmd_shm_alloc(struct tee_context *ctx, shm = cmd_alloc_suppl(ctx, sz); break; case OPTEE_RPC_SHM_TYPE_KERNEL: - shm = tee_shm_alloc(ctx, sz, TEE_SHM_MAPPED); + shm = tee_shm_alloc(ctx, sz, TEE_SHM_MAPPED | TEE_SHM_PRIV); break; default: arg->ret = TEEC_ERROR_BAD_PARAMETERS; @@ -502,7 +502,8 @@ void optee_handle_rpc(struct tee_context *ctx, struct optee_rpc_param *param, switch (OPTEE_SMC_RETURN_GET_RPC_FUNC(param->a0)) { case OPTEE_SMC_RPC_FUNC_ALLOC: - shm = tee_shm_alloc(ctx, param->a1, TEE_SHM_MAPPED); + shm = tee_shm_alloc(ctx, param->a1, + TEE_SHM_MAPPED | TEE_SHM_PRIV); if (!IS_ERR(shm) && !tee_shm_get_pa(shm, 0, &pa)) { reg_pair_from_64(¶m->a1, ¶m->a2, pa); reg_pair_from_64(¶m->a4, ¶m->a5, diff --git a/drivers/tee/optee/shm_pool.c b/drivers/tee/optee/shm_pool.c index da06ce9b9313..c41a9a501a6e 100644 --- a/drivers/tee/optee/shm_pool.c +++ b/drivers/tee/optee/shm_pool.c @@ -27,7 +27,11 @@ static int pool_op_alloc(struct tee_shm_pool_mgr *poolm, shm->paddr = page_to_phys(page); shm->size = PAGE_SIZE << order; - if (shm->flags & TEE_SHM_DMA_BUF) { + /* + * Shared memory private to the OP-TEE driver doesn't need + * to be registered with OP-TEE. + */ + if (!(shm->flags & TEE_SHM_PRIV)) { unsigned int nr_pages = 1 << order, i; struct page **pages; @@ -60,7 +64,7 @@ err: static void pool_op_free(struct tee_shm_pool_mgr *poolm, struct tee_shm *shm) { - if (shm->flags & TEE_SHM_DMA_BUF) + if (!(shm->flags & TEE_SHM_PRIV)) optee_shm_unregister(shm->ctx, shm); free_pages((unsigned long)shm->kaddr, get_order(shm->size)); diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index c65e44707cd6..8a9384a64f3e 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -117,7 +117,7 @@ struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags) return ERR_PTR(-EINVAL); } - if ((flags & ~(TEE_SHM_MAPPED | TEE_SHM_DMA_BUF))) { + if ((flags & ~(TEE_SHM_MAPPED | TEE_SHM_DMA_BUF | TEE_SHM_PRIV))) { dev_err(teedev->dev.parent, "invalid shm flags 0x%x", flags); return ERR_PTR(-EINVAL); } @@ -207,7 +207,7 @@ EXPORT_SYMBOL_GPL(tee_shm_alloc); */ struct tee_shm *tee_shm_alloc_kernel_buf(struct tee_context *ctx, size_t size) { - return tee_shm_alloc(ctx, size, TEE_SHM_MAPPED | TEE_SHM_DMA_BUF); + return tee_shm_alloc(ctx, size, TEE_SHM_MAPPED); } EXPORT_SYMBOL_GPL(tee_shm_alloc_kernel_buf); diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 8990f7628387..3ebfea0781f1 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -27,6 +27,7 @@ #define TEE_SHM_USER_MAPPED BIT(4) /* Memory mapped in user space */ #define TEE_SHM_POOL BIT(5) /* Memory allocated from pool */ #define TEE_SHM_KERNEL_MAPPED BIT(6) /* Memory mapped in kernel space */ +#define TEE_SHM_PRIV BIT(7) /* Memory private to TEE driver */ struct device; struct tee_device; From dfb703ad2a8d366b829818a558337be779746575 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Mon, 14 Jun 2021 17:33:16 -0500 Subject: [PATCH 067/502] tpm_ftpm_tee: Free and unregister TEE shared memory during kexec dma-buf backed shared memory cannot be reliably freed and unregistered during a kexec operation even when tee_shm_free() is called on the shm from a .shutdown hook. The problem occurs because dma_buf_put() calls fput() which then uses task_work_add(), with the TWA_RESUME parameter, to queue tee_shm_release() to be called before the current task returns to user mode. However, the current task never returns to user mode before the kexec completes so the memory is never freed nor unregistered. Use tee_shm_alloc_kernel_buf() to avoid dma-buf backed shared memory allocation so that tee_shm_free() can directly call tee_shm_release(). This will ensure that the shm can be freed and unregistered during a kexec operation. Fixes: 09e574831b27 ("tpm/tpm_ftpm_tee: A driver for firmware TPM running inside TEE") Fixes: 1760eb689ed6 ("tpm/tpm_ftpm_tee: add shutdown call back") Cc: stable@vger.kernel.org Signed-off-by: Tyler Hicks Reviewed-by: Sumit Garg Acked-by: Jarkko Sakkinen Signed-off-by: Jens Wiklander --- drivers/char/tpm/tpm_ftpm_tee.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/char/tpm/tpm_ftpm_tee.c b/drivers/char/tpm/tpm_ftpm_tee.c index 2ccdf8ac6994..6e3235565a4d 100644 --- a/drivers/char/tpm/tpm_ftpm_tee.c +++ b/drivers/char/tpm/tpm_ftpm_tee.c @@ -254,11 +254,11 @@ static int ftpm_tee_probe(struct device *dev) pvt_data->session = sess_arg.session; /* Allocate dynamic shared memory with fTPM TA */ - pvt_data->shm = tee_shm_alloc(pvt_data->ctx, - MAX_COMMAND_SIZE + MAX_RESPONSE_SIZE, - TEE_SHM_MAPPED | TEE_SHM_DMA_BUF); + pvt_data->shm = tee_shm_alloc_kernel_buf(pvt_data->ctx, + MAX_COMMAND_SIZE + + MAX_RESPONSE_SIZE); if (IS_ERR(pvt_data->shm)) { - dev_err(dev, "%s: tee_shm_alloc failed\n", __func__); + dev_err(dev, "%s: tee_shm_alloc_kernel_buf failed\n", __func__); rc = -ENOMEM; goto out_shm_alloc; } From 914ab19e471d8fb535ed50dff108b0a615f3c2d8 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Mon, 14 Jun 2021 17:33:17 -0500 Subject: [PATCH 068/502] firmware: tee_bnxt: Release TEE shm, session, and context during kexec Implement a .shutdown hook that will be called during a kexec operation so that the TEE shared memory, session, and context that were set up during .probe can be properly freed/closed. Additionally, don't use dma-buf backed shared memory for the fw_shm_pool. dma-buf backed shared memory cannot be reliably freed and unregistered during a kexec operation even when tee_shm_free() is called on the shm from a .shutdown hook. The problem occurs because dma_buf_put() calls fput() which then uses task_work_add(), with the TWA_RESUME parameter, to queue tee_shm_release() to be called before the current task returns to user mode. However, the current task never returns to user mode before the kexec completes so the memory is never freed nor unregistered. Use tee_shm_alloc_kernel_buf() to avoid dma-buf backed shared memory allocation so that tee_shm_free() can directly call tee_shm_release(). This will ensure that the shm can be freed and unregistered during a kexec operation. Fixes: 246880958ac9 ("firmware: broadcom: add OP-TEE based BNXT f/w manager") Cc: stable@vger.kernel.org Signed-off-by: Allen Pais Co-developed-by: Tyler Hicks Signed-off-by: Tyler Hicks Reviewed-by: Sumit Garg Acked-by: Florian Fainelli Signed-off-by: Jens Wiklander --- drivers/firmware/broadcom/tee_bnxt_fw.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/broadcom/tee_bnxt_fw.c b/drivers/firmware/broadcom/tee_bnxt_fw.c index ed10da5313e8..a5bf4c3f6dc7 100644 --- a/drivers/firmware/broadcom/tee_bnxt_fw.c +++ b/drivers/firmware/broadcom/tee_bnxt_fw.c @@ -212,10 +212,9 @@ static int tee_bnxt_fw_probe(struct device *dev) pvt_data.dev = dev; - fw_shm_pool = tee_shm_alloc(pvt_data.ctx, MAX_SHM_MEM_SZ, - TEE_SHM_MAPPED | TEE_SHM_DMA_BUF); + fw_shm_pool = tee_shm_alloc_kernel_buf(pvt_data.ctx, MAX_SHM_MEM_SZ); if (IS_ERR(fw_shm_pool)) { - dev_err(pvt_data.dev, "tee_shm_alloc failed\n"); + dev_err(pvt_data.dev, "tee_shm_alloc_kernel_buf failed\n"); err = PTR_ERR(fw_shm_pool); goto out_sess; } @@ -242,6 +241,14 @@ static int tee_bnxt_fw_remove(struct device *dev) return 0; } +static void tee_bnxt_fw_shutdown(struct device *dev) +{ + tee_shm_free(pvt_data.fw_shm_pool); + tee_client_close_session(pvt_data.ctx, pvt_data.session_id); + tee_client_close_context(pvt_data.ctx); + pvt_data.ctx = NULL; +} + static const struct tee_client_device_id tee_bnxt_fw_id_table[] = { {UUID_INIT(0x6272636D, 0x2019, 0x0716, 0x42, 0x43, 0x4D, 0x5F, 0x53, 0x43, 0x48, 0x49)}, @@ -257,6 +264,7 @@ static struct tee_client_driver tee_bnxt_fw_driver = { .bus = &tee_bus_type, .probe = tee_bnxt_fw_probe, .remove = tee_bnxt_fw_remove, + .shutdown = tee_bnxt_fw_shutdown, }, }; From 4e9505064f58d1252805952f8547a5b7dbc5c111 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 17 Jul 2021 16:02:21 +0100 Subject: [PATCH 069/502] net/xfrm/compat: Copy xfrm_spdattr_type_t atributes The attribute-translator has to take in mind maxtype, that is xfrm_link::nla_max. When it is set, attributes are not of xfrm_attr_type_t. Currently, they can be only XFRMA_SPD_MAX (message XFRM_MSG_NEWSPDINFO), their UABI is the same for 64/32-bit, so just copy them. Thanks to YueHaibing for reporting this: In xfrm_user_rcv_msg_compat() if maxtype is not zero and less than XFRMA_MAX, nlmsg_parse_deprecated() do not initialize attrs array fully. xfrm_xlate32() will access uninit 'attrs[i]' while iterating all attrs array. KASAN: probably user-memory-access in range [0x0000000041b58ab0-0x0000000041b58ab7] CPU: 0 PID: 15799 Comm: syz-executor.2 Tainted: G W 5.14.0-rc1-syzkaller #0 RIP: 0010:nla_type include/net/netlink.h:1130 [inline] RIP: 0010:xfrm_xlate32_attr net/xfrm/xfrm_compat.c:410 [inline] RIP: 0010:xfrm_xlate32 net/xfrm/xfrm_compat.c:532 [inline] RIP: 0010:xfrm_user_rcv_msg_compat+0x5e5/0x1070 net/xfrm/xfrm_compat.c:577 [...] Call Trace: xfrm_user_rcv_msg+0x556/0x8b0 net/xfrm/xfrm_user.c:2774 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2504 xfrm_netlink_rcv+0x6b/0x90 net/xfrm/xfrm_user.c:2824 netlink_unicast_kernel net/netlink/af_netlink.c:1314 [inline] netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1340 netlink_sendmsg+0x86d/0xdb0 net/netlink/af_netlink.c:1929 sock_sendmsg_nosec net/socket.c:702 [inline] Fixes: 5106f4a8acff ("xfrm/compat: Add 32=>64-bit messages translator") Cc: Reported-by: YueHaibing Signed-off-by: Dmitry Safonov Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_compat.c | 49 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c index a20aec9d7393..2bf269390163 100644 --- a/net/xfrm/xfrm_compat.c +++ b/net/xfrm/xfrm_compat.c @@ -298,8 +298,16 @@ static int xfrm_xlate64(struct sk_buff *dst, const struct nlmsghdr *nlh_src) len = nlmsg_attrlen(nlh_src, xfrm_msg_min[type]); nla_for_each_attr(nla, attrs, len, remaining) { - int err = xfrm_xlate64_attr(dst, nla); + int err; + switch (type) { + case XFRM_MSG_NEWSPDINFO: + err = xfrm_nla_cpy(dst, nla, nla_len(nla)); + break; + default: + err = xfrm_xlate64_attr(dst, nla); + break; + } if (err) return err; } @@ -341,7 +349,8 @@ static int xfrm_alloc_compat(struct sk_buff *skb, const struct nlmsghdr *nlh_src /* Calculates len of translated 64-bit message. */ static size_t xfrm_user_rcv_calculate_len64(const struct nlmsghdr *src, - struct nlattr *attrs[XFRMA_MAX+1]) + struct nlattr *attrs[XFRMA_MAX + 1], + int maxtype) { size_t len = nlmsg_len(src); @@ -358,10 +367,20 @@ static size_t xfrm_user_rcv_calculate_len64(const struct nlmsghdr *src, case XFRM_MSG_POLEXPIRE: len += 8; break; + case XFRM_MSG_NEWSPDINFO: + /* attirbutes are xfrm_spdattr_type_t, not xfrm_attr_type_t */ + return len; default: break; } + /* Unexpected for anything, but XFRM_MSG_NEWSPDINFO, please + * correct both 64=>32-bit and 32=>64-bit translators to copy + * new attributes. + */ + if (WARN_ON_ONCE(maxtype)) + return len; + if (attrs[XFRMA_SA]) len += 4; if (attrs[XFRMA_POLICY]) @@ -440,7 +459,8 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla, static int xfrm_xlate32(struct nlmsghdr *dst, const struct nlmsghdr *src, struct nlattr *attrs[XFRMA_MAX+1], - size_t size, u8 type, struct netlink_ext_ack *extack) + size_t size, u8 type, int maxtype, + struct netlink_ext_ack *extack) { size_t pos; int i; @@ -520,6 +540,25 @@ static int xfrm_xlate32(struct nlmsghdr *dst, const struct nlmsghdr *src, } pos = dst->nlmsg_len; + if (maxtype) { + /* attirbutes are xfrm_spdattr_type_t, not xfrm_attr_type_t */ + WARN_ON_ONCE(src->nlmsg_type != XFRM_MSG_NEWSPDINFO); + + for (i = 1; i <= maxtype; i++) { + int err; + + if (!attrs[i]) + continue; + + /* just copy - no need for translation */ + err = xfrm_attr_cpy32(dst, &pos, attrs[i], size, + nla_len(attrs[i]), nla_len(attrs[i])); + if (err) + return err; + } + return 0; + } + for (i = 1; i < XFRMA_MAX + 1; i++) { int err; @@ -564,7 +603,7 @@ static struct nlmsghdr *xfrm_user_rcv_msg_compat(const struct nlmsghdr *h32, if (err < 0) return ERR_PTR(err); - len = xfrm_user_rcv_calculate_len64(h32, attrs); + len = xfrm_user_rcv_calculate_len64(h32, attrs, maxtype); /* The message doesn't need translation */ if (len == nlmsg_len(h32)) return NULL; @@ -574,7 +613,7 @@ static struct nlmsghdr *xfrm_user_rcv_msg_compat(const struct nlmsghdr *h32, if (!h64) return ERR_PTR(-ENOMEM); - err = xfrm_xlate32(h64, h32, attrs, len, type, extack); + err = xfrm_xlate32(h64, h32, attrs, len, type, maxtype, extack); if (err < 0) { kvfree(h64); return ERR_PTR(err); From 70bfdf62e93a4d73cfbaf83a3ac708a483ef7a71 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 17 Jul 2021 16:02:22 +0100 Subject: [PATCH 070/502] selftests/net/ipsec: Add test for xfrm_spdattr_type_t Set hthresh, dump it again and verify thresh.lbits && thresh.rbits. They are passed as attributes of xfrm_spdattr_type_t, different from other message attributes that use xfrm_attr_type_t. Also, test attribute that is bigger than XFRMA_SPD_MAX, currently it should be silently ignored. Cc: Shuah Khan Cc: linux-kselftest@vger.kernel.org Signed-off-by: Dmitry Safonov Signed-off-by: Steffen Klassert --- tools/testing/selftests/net/ipsec.c | 165 +++++++++++++++++++++++++++- 1 file changed, 163 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c index f23438d512c5..3d7dde2c321b 100644 --- a/tools/testing/selftests/net/ipsec.c +++ b/tools/testing/selftests/net/ipsec.c @@ -484,13 +484,16 @@ enum desc_type { MONITOR_ACQUIRE, EXPIRE_STATE, EXPIRE_POLICY, + SPDINFO_ATTRS, }; const char *desc_name[] = { "create tunnel", "alloc spi", "monitor acquire", "expire state", - "expire policy" + "expire policy", + "spdinfo attributes", + "" }; struct xfrm_desc { enum desc_type type; @@ -1593,6 +1596,155 @@ out_close: return ret; } +static int xfrm_spdinfo_set_thresh(int xfrm_sock, uint32_t *seq, + unsigned thresh4_l, unsigned thresh4_r, + unsigned thresh6_l, unsigned thresh6_r, + bool add_bad_attr) + +{ + struct { + struct nlmsghdr nh; + union { + uint32_t unused; + int error; + }; + char attrbuf[MAX_PAYLOAD]; + } req; + struct xfrmu_spdhthresh thresh; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.unused)); + req.nh.nlmsg_type = XFRM_MSG_NEWSPDINFO; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = (*seq)++; + + thresh.lbits = thresh4_l; + thresh.rbits = thresh4_r; + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SPD_IPV4_HTHRESH, &thresh, sizeof(thresh))) + return -1; + + thresh.lbits = thresh6_l; + thresh.rbits = thresh6_r; + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SPD_IPV6_HTHRESH, &thresh, sizeof(thresh))) + return -1; + + if (add_bad_attr) { + BUILD_BUG_ON(XFRMA_IF_ID <= XFRMA_SPD_MAX + 1); + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_IF_ID, NULL, 0)) { + pr_err("adding attribute failed: no space"); + return -1; + } + } + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + + if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + return -1; + } else if (req.nh.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type); + return -1; + } + + if (req.error) { + printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error)); + return -1; + } + + return 0; +} + +static int xfrm_spdinfo_attrs(int xfrm_sock, uint32_t *seq) +{ + struct { + struct nlmsghdr nh; + union { + uint32_t unused; + int error; + }; + char attrbuf[MAX_PAYLOAD]; + } req; + + if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 31, 120, 16, false)) { + pr_err("Can't set SPD HTHRESH"); + return KSFT_FAIL; + } + + memset(&req, 0, sizeof(req)); + + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.unused)); + req.nh.nlmsg_type = XFRM_MSG_GETSPDINFO; + req.nh.nlmsg_flags = NLM_F_REQUEST; + req.nh.nlmsg_seq = (*seq)++; + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return KSFT_FAIL; + } + + if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + return KSFT_FAIL; + } else if (req.nh.nlmsg_type == XFRM_MSG_NEWSPDINFO) { + size_t len = NLMSG_PAYLOAD(&req.nh, sizeof(req.unused)); + struct rtattr *attr = (void *)req.attrbuf; + int got_thresh = 0; + + for (; RTA_OK(attr, len); attr = RTA_NEXT(attr, len)) { + if (attr->rta_type == XFRMA_SPD_IPV4_HTHRESH) { + struct xfrmu_spdhthresh *t = RTA_DATA(attr); + + got_thresh++; + if (t->lbits != 32 || t->rbits != 31) { + pr_err("thresh differ: %u, %u", + t->lbits, t->rbits); + return KSFT_FAIL; + } + } + if (attr->rta_type == XFRMA_SPD_IPV6_HTHRESH) { + struct xfrmu_spdhthresh *t = RTA_DATA(attr); + + got_thresh++; + if (t->lbits != 120 || t->rbits != 16) { + pr_err("thresh differ: %u, %u", + t->lbits, t->rbits); + return KSFT_FAIL; + } + } + } + if (got_thresh != 2) { + pr_err("only %d thresh returned by XFRM_MSG_GETSPDINFO", got_thresh); + return KSFT_FAIL; + } + } else if (req.nh.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type); + return KSFT_FAIL; + } else { + printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error)); + return -1; + } + + /* Restore the default */ + if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 32, 128, 128, false)) { + pr_err("Can't restore SPD HTHRESH"); + return KSFT_FAIL; + } + + /* + * At this moment xfrm uses nlmsg_parse_deprecated(), which + * implies NL_VALIDATE_LIBERAL - ignoring attributes with + * (type > maxtype). nla_parse_depricated_strict() would enforce + * it. Or even stricter nla_parse(). + * Right now it's not expected to fail, but to be ignored. + */ + if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 32, 128, 128, true)) + return KSFT_PASS; + + return KSFT_PASS; +} + static int child_serv(int xfrm_sock, uint32_t *seq, unsigned int nr, int cmd_fd, void *buf, struct xfrm_desc *desc) { @@ -1717,6 +1869,9 @@ static int child_f(unsigned int nr, int test_desc_fd, int cmd_fd, void *buf) case EXPIRE_POLICY: ret = xfrm_expire_policy(xfrm_sock, &seq, nr, &desc); break; + case SPDINFO_ATTRS: + ret = xfrm_spdinfo_attrs(xfrm_sock, &seq); + break; default: printk("Unknown desc type %d", desc.type); exit(KSFT_FAIL); @@ -1994,8 +2149,10 @@ static int write_proto_plan(int fd, int proto) * sizeof(xfrm_user_polexpire) = 168 | sizeof(xfrm_user_polexpire) = 176 * * Check the affected by the UABI difference structures. + * Also, check translation for xfrm_set_spdinfo: it has it's own attributes + * which needs to be correctly copied, but not translated. */ -const unsigned int compat_plan = 4; +const unsigned int compat_plan = 5; static int write_compat_struct_tests(int test_desc_fd) { struct xfrm_desc desc = {}; @@ -2019,6 +2176,10 @@ static int write_compat_struct_tests(int test_desc_fd) if (__write_desc(test_desc_fd, &desc)) return -1; + desc.type = SPDINFO_ATTRS; + if (__write_desc(test_desc_fd, &desc)) + return -1; + return 0; } From 990e4ad3ddcb72216caeddd6e62c5f45a21e8121 Mon Sep 17 00:00:00 2001 From: Xiangyang Zhang Date: Mon, 28 Jun 2021 23:22:39 +0800 Subject: [PATCH 071/502] staging: rtl8723bs: Fix a resource leak in sd_int_dpc The "c2h_evt" variable is not freed when function call "c2h_evt_read_88xx" failed Fixes: 554c0a3abf21 ("staging: Add rtl8723bs sdio wifi driver") Reviewed-by: Hans de Goede Signed-off-by: Xiangyang Zhang Cc: stable Link: https://lore.kernel.org/r/20210628152239.5475-1-xyz.sun.ok@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/hal/sdio_ops.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/staging/rtl8723bs/hal/sdio_ops.c b/drivers/staging/rtl8723bs/hal/sdio_ops.c index 2dd251ce177e..a545832a468e 100644 --- a/drivers/staging/rtl8723bs/hal/sdio_ops.c +++ b/drivers/staging/rtl8723bs/hal/sdio_ops.c @@ -909,6 +909,8 @@ void sd_int_dpc(struct adapter *adapter) } else { rtw_c2h_wk_cmd(adapter, (u8 *)c2h_evt); } + } else { + kfree(c2h_evt); } } else { /* Error handling for malloc fail */ From 61acabaae5ba58b3c32e6e90d24c2c0827fd27a8 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 25 Jun 2021 18:37:33 +0300 Subject: [PATCH 072/502] serial: max310x: Unprepare and disable clock in error path In one error case the clock may be left prepared and enabled. Unprepare and disable clock in that case to balance state of the hardware. Fixes: d4d6f03c4fb3 ("serial: max310x: Try to get crystal clock rate from property") Reported-by: Dan Carpenter Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210625153733.12911-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/max310x.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index 0c1e4df52215..ef11860cd69e 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -1293,7 +1293,8 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty freq = uartclk; if (freq == 0) { dev_err(dev, "Cannot get clock rate\n"); - return -EINVAL; + ret = -EINVAL; + goto out_clk; } if (xtal) { From e5227c51090e165db4b48dcaa300605bfced7014 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sat, 26 Jun 2021 06:11:05 +0200 Subject: [PATCH 073/502] serial: 8250: Mask out floating 16/32-bit bus bits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure only actual 8 bits of the IIR register are used in determining the port type in `autoconfig'. The `serial_in' port accessor returns the `unsigned int' type, meaning that with UPIO_AU, UPIO_MEM16, UPIO_MEM32, and UPIO_MEM32BE access types more than 8 bits of data are returned, of which the high order bits will often come from bus lines that are left floating in the data phase. For example with the MIPS Malta board's CBUS UART, where the registers are aligned on 8-byte boundaries and which uses 32-bit accesses, data as follows is returned: YAMON> dump -32 0xbf000900 0x40 BF000900: 1F000942 1F000942 1F000900 1F000900 ...B...B........ BF000910: 1F000901 1F000901 1F000900 1F000900 ................ BF000920: 1F000900 1F000900 1F000960 1F000960 ...........`...` BF000930: 1F000900 1F000900 1F0009FF 1F0009FF ................ YAMON> Evidently high-order 24 bits return values previously driven in the address phase (the 3 highest order address bits used with the command above are masked out in the simple virtual address mapping used here and come out at zeros on the external bus), a common scenario with bus lines left floating, due to bus capacitance. Consequently when the value of IIR, mapped at 0x1f000910, is retrieved in `autoconfig', it comes out at 0x1f0009c1 and when it is right-shifted by 6 and then assigned to 8-bit `scratch' variable, the value calculated is 0x27, not one of 0, 1, 2, 3 expected in port type determination. Fix the issue then, by assigning the value returned from `serial_in' to `scratch' first, which masks out 24 high-order bits retrieved, and only then right-shift the resulting 8-bit data quantity, producing the value of 3 in this case, as expected. Fix the same issue in `serial_dl_read'. The problem first appeared with Linux 2.6.9-rc3 which predates our repo history, but the origin could be identified with the old MIPS/Linux repo also at: as commit e0d2356c0777 ("Merge with Linux 2.6.9-rc3."), where code in `serial_in' was updated with this case: + case UPIO_MEM32: + return readl(up->port.membase + offset); + which made it produce results outside the unsigned 8-bit range for the first time, though obviously it is system dependent what actual values appear in the high order bits retrieved and it may well have been zeros in the relevant positions with the system the change originally was intended for. It is at that point that code in `autoconf' should have been updated accordingly, but clearly it was overlooked. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org # v2.6.12+ Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Maciej W. Rozycki Link: https://lore.kernel.org/r/alpine.DEB.2.21.2106260516220.37803@angie.orcam.me.uk Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_port.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 2164290cbd31..2e7000f79b03 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -311,7 +311,11 @@ static const struct serial8250_config uart_config[] = { /* Uart divisor latch read */ static int default_serial_dl_read(struct uart_8250_port *up) { - return serial_in(up, UART_DLL) | serial_in(up, UART_DLM) << 8; + /* Assign these in pieces to truncate any bits above 7. */ + unsigned char dll = serial_in(up, UART_DLL); + unsigned char dlm = serial_in(up, UART_DLM); + + return dll | dlm << 8; } /* Uart divisor latch write */ @@ -1297,9 +1301,11 @@ static void autoconfig(struct uart_8250_port *up) serial_out(up, UART_LCR, 0); serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO); - scratch = serial_in(up, UART_IIR) >> 6; - switch (scratch) { + /* Assign this as it is to truncate any bits above 7. */ + scratch = serial_in(up, UART_IIR); + + switch (scratch >> 6) { case 0: autoconfig_8250(up); break; From 9a936d6c3d3d6c33ecbadf72dccdb567b5cd3c72 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sat, 26 Jun 2021 06:11:13 +0200 Subject: [PATCH 074/502] MIPS: Malta: Do not byte-swap accesses to the CBUS UART MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Correct big-endian accesses to the CBUS UART, a Malta on-board discrete TI16C550C part wired directly to the system controller's device bus, and do not use byte swapping with the 32-bit accesses to the device. The CBUS is used for devices such as the boot flash memory needed early on in system bootstrap even before PCI has been initialised. Therefore it uses the system controller's device bus, which follows the endianness set with the CPU, which means no byte-swapping is ever required for data accesses to CBUS, unlike with PCI. The CBUS UART uses the UPIO_MEM32 access method, that is the `readl' and `writel' MMIO accessors, which on the MIPS platform imply byte-swapping with PCI systems. Consequently the wrong byte lane is accessed with the big-endian configuration and the UART is not correctly accessed. As it happens the UPIO_MEM32BE access method makes use of the `ioread32' and `iowrite32' MMIO accessors, which still use `readl' and `writel' respectively, however they byte-swap data passed, effectively cancelling swapping done with the accessors themselves and making it suitable for the CBUS UART. Make the CBUS UART switch between UPIO_MEM32 and UPIO_MEM32BE then, based on the endianness selected. With this change in place the device is correctly recognised with big-endian Malta at boot, along with the Super I/O devices behind PCI: Serial: 8250/16550 driver, 5 ports, IRQ sharing enabled printk: console [ttyS0] disabled serial8250.0: ttyS0 at I/O 0x3f8 (irq = 4, base_baud = 115200) is a 16550A printk: console [ttyS0] enabled printk: bootconsole [uart8250] disabled serial8250.0: ttyS1 at I/O 0x2f8 (irq = 3, base_baud = 115200) is a 16550A serial8250.0: ttyS2 at MMIO 0x1f000900 (irq = 20, base_baud = 230400) is a 16550A Fixes: e7c4782f92fc ("[MIPS] Put an end to 's long and annyoing existence") Cc: stable@vger.kernel.org # v2.6.23+ Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Maciej W. Rozycki Link: https://lore.kernel.org/r/alpine.DEB.2.21.2106260524430.37803@angie.orcam.me.uk Signed-off-by: Greg Kroah-Hartman --- arch/mips/mti-malta/malta-platform.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/mips/mti-malta/malta-platform.c b/arch/mips/mti-malta/malta-platform.c index ee7471984fe7..4ffbcc58c6f6 100644 --- a/arch/mips/mti-malta/malta-platform.c +++ b/arch/mips/mti-malta/malta-platform.c @@ -48,7 +48,8 @@ static struct plat_serial8250_port uart8250_data[] = { .mapbase = 0x1f000900, /* The CBUS UART */ .irq = MIPS_CPU_IRQ_BASE + MIPSCPU_INT_MB2, .uartclk = 3686400, /* Twice the usual clk! */ - .iotype = UPIO_MEM32, + .iotype = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) ? + UPIO_MEM32BE : UPIO_MEM32, .flags = CBUS_UART_FLAGS, .regshift = 3, }, From cc9ca4d95846cbbece48d9cd385550f8fba6a3c1 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Wed, 30 Jun 2021 13:56:43 +0100 Subject: [PATCH 075/502] serial: tegra: Only print FIFO error message when an error occurs The Tegra serial driver always prints an error message when enabling the FIFO for devices that have support for checking the FIFO enable status. Fix this by displaying the error message, only when an error occurs. Finally, update the error message to make it clear that enabling the FIFO failed and display the error code. Fixes: 222dcdff3405 ("serial: tegra: check for FIFO mode enabled status") Cc: Acked-by: Thierry Reding Signed-off-by: Jon Hunter Link: https://lore.kernel.org/r/20210630125643.264264-1-jonathanh@nvidia.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial-tegra.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/serial-tegra.c b/drivers/tty/serial/serial-tegra.c index 222032792d6c..eba5b9ecba34 100644 --- a/drivers/tty/serial/serial-tegra.c +++ b/drivers/tty/serial/serial-tegra.c @@ -1045,9 +1045,11 @@ static int tegra_uart_hw_init(struct tegra_uart_port *tup) if (tup->cdata->fifo_mode_enable_status) { ret = tegra_uart_wait_fifo_mode_enabled(tup); - dev_err(tup->uport.dev, "FIFO mode not enabled\n"); - if (ret < 0) + if (ret < 0) { + dev_err(tup->uport.dev, + "Failed to enable FIFO mode: %d\n", ret); return ret; + } } else { /* * For all tegra devices (up to t210), there is a hardware From 853a9ae29e978d37f5dfa72622a68c9ae3d7fa89 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 14 Jul 2021 10:04:27 +0200 Subject: [PATCH 076/502] serial: 8250: fix handle_irq locking The 8250 handle_irq callback is not just called from the interrupt handler but also from a timer callback when polling (e.g. for ports without an interrupt line). Consequently the callback must explicitly disable interrupts to avoid a potential deadlock with another interrupt in polled mode. Add back an irqrestore-version of the sysrq port-unlock helper and use it in the 8250 callbacks that need it. Fixes: 75f4e830fa9c ("serial: do not restore interrupt state in sysrq helper") Cc: stable@vger.kernel.org # 5.13 Cc: Joel Stanley Cc: Andrew Jeffery Reported-by: kernel test robot Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20210714080427.28164-1-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_aspeed_vuart.c | 5 +++-- drivers/tty/serial/8250/8250_fsl.c | 5 +++-- drivers/tty/serial/8250/8250_port.c | 5 +++-- include/linux/serial_core.h | 24 +++++++++++++++++++++ 4 files changed, 33 insertions(+), 6 deletions(-) diff --git a/drivers/tty/serial/8250/8250_aspeed_vuart.c b/drivers/tty/serial/8250/8250_aspeed_vuart.c index 4caab8714e2c..2350fb3bb5e4 100644 --- a/drivers/tty/serial/8250/8250_aspeed_vuart.c +++ b/drivers/tty/serial/8250/8250_aspeed_vuart.c @@ -329,6 +329,7 @@ static int aspeed_vuart_handle_irq(struct uart_port *port) { struct uart_8250_port *up = up_to_u8250p(port); unsigned int iir, lsr; + unsigned long flags; unsigned int space, count; iir = serial_port_in(port, UART_IIR); @@ -336,7 +337,7 @@ static int aspeed_vuart_handle_irq(struct uart_port *port) if (iir & UART_IIR_NO_INT) return 0; - spin_lock(&port->lock); + spin_lock_irqsave(&port->lock, flags); lsr = serial_port_in(port, UART_LSR); @@ -370,7 +371,7 @@ static int aspeed_vuart_handle_irq(struct uart_port *port) if (lsr & UART_LSR_THRE) serial8250_tx_chars(up); - uart_unlock_and_check_sysrq(port); + uart_unlock_and_check_sysrq_irqrestore(port, flags); return 1; } diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c index 4e75d2e4f87c..fc65a2293ce9 100644 --- a/drivers/tty/serial/8250/8250_fsl.c +++ b/drivers/tty/serial/8250/8250_fsl.c @@ -30,10 +30,11 @@ struct fsl8250_data { int fsl8250_handle_irq(struct uart_port *port) { unsigned char lsr, orig_lsr; + unsigned long flags; unsigned int iir; struct uart_8250_port *up = up_to_u8250p(port); - spin_lock(&up->port.lock); + spin_lock_irqsave(&up->port.lock, flags); iir = port->serial_in(port, UART_IIR); if (iir & UART_IIR_NO_INT) { @@ -82,7 +83,7 @@ int fsl8250_handle_irq(struct uart_port *port) up->lsr_saved_flags = orig_lsr; - uart_unlock_and_check_sysrq(&up->port); + uart_unlock_and_check_sysrq_irqrestore(&up->port, flags); return 1; } diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 2e7000f79b03..1da29a219842 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1899,11 +1899,12 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir) unsigned char status; struct uart_8250_port *up = up_to_u8250p(port); bool skip_rx = false; + unsigned long flags; if (iir & UART_IIR_NO_INT) return 0; - spin_lock(&port->lock); + spin_lock_irqsave(&port->lock, flags); status = serial_port_in(port, UART_LSR); @@ -1929,7 +1930,7 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir) (up->ier & UART_IER_THRI)) serial8250_tx_chars(up); - uart_unlock_and_check_sysrq(port); + uart_unlock_and_check_sysrq_irqrestore(port, flags); return 1; } diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 52d7fb92a69d..c58cc142d23f 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -518,6 +518,25 @@ static inline void uart_unlock_and_check_sysrq(struct uart_port *port) if (sysrq_ch) handle_sysrq(sysrq_ch); } + +static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port, + unsigned long flags) +{ + int sysrq_ch; + + if (!port->has_sysrq) { + spin_unlock_irqrestore(&port->lock, flags); + return; + } + + sysrq_ch = port->sysrq_ch; + port->sysrq_ch = 0; + + spin_unlock_irqrestore(&port->lock, flags); + + if (sysrq_ch) + handle_sysrq(sysrq_ch); +} #else /* CONFIG_MAGIC_SYSRQ_SERIAL */ static inline int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) { @@ -531,6 +550,11 @@ static inline void uart_unlock_and_check_sysrq(struct uart_port *port) { spin_unlock(&port->lock); } +static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port, + unsigned long flags) +{ + spin_unlock_irqrestore(&port->lock, flags); +} #endif /* CONFIG_MAGIC_SYSRQ_SERIAL */ /* From 7f0909db761535aefafa77031062603a71557267 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 13 Jul 2021 13:17:39 +0300 Subject: [PATCH 077/502] serial: 8250_pci: Enumerate Elkhart Lake UARTs via dedicated driver Elkhart Lake UARTs are PCI enumerated Synopsys DesignWare v4.0+ UART integrated with Intel iDMA 32-bit DMA controller. There is a specific driver to handle them, i.e. 8250_lpss. Hence, disable 8250_pci enumeration for these UARTs. Fixes: 1b91d97c66ef ("serial: 8250_lpss: Add ->setup() for Elkhart Lake ports") Fixes: 4f912b898dc2 ("serial: 8250_lpss: Enable HS UART on Elkhart Lake") Cc: stable Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210713101739.36962-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 75827b608fdb..02985cf90ef2 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -3836,6 +3836,12 @@ static const struct pci_device_id blacklist[] = { { PCI_VDEVICE(INTEL, 0x0f0c), }, { PCI_VDEVICE(INTEL, 0x228a), }, { PCI_VDEVICE(INTEL, 0x228c), }, + { PCI_VDEVICE(INTEL, 0x4b96), }, + { PCI_VDEVICE(INTEL, 0x4b97), }, + { PCI_VDEVICE(INTEL, 0x4b98), }, + { PCI_VDEVICE(INTEL, 0x4b99), }, + { PCI_VDEVICE(INTEL, 0x4b9a), }, + { PCI_VDEVICE(INTEL, 0x4b9b), }, { PCI_VDEVICE(INTEL, 0x9ce3), }, { PCI_VDEVICE(INTEL, 0x9ce4), }, From cb7abd1db6e5f99a05f1a00b65be29029a6a152a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 21 Jul 2021 17:35:44 +0200 Subject: [PATCH 078/502] staging: rtl8723bs: select CONFIG_CRYPTO_LIB_ARC4 The other rtlwifi drivers already have this, but r8723bs was converted to the generic implementation without adding the select: ERROR: modpost: "arc4_crypt" [drivers/staging/rtl8723bs/r8723bs.ko] undefined! ERROR: modpost: "arc4_setkey" [drivers/staging/rtl8723bs/r8723bs.ko] undefined! Fixes: 1b11e893eda0 ("staging: rtl8723bs: replace private arc4 encryption with in-kernel one") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20210721153550.3624490-1-arnd@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/rtl8723bs/Kconfig b/drivers/staging/rtl8723bs/Kconfig index a88467334dac..7eae820eae3b 100644 --- a/drivers/staging/rtl8723bs/Kconfig +++ b/drivers/staging/rtl8723bs/Kconfig @@ -5,6 +5,7 @@ config RTL8723BS depends on m select WIRELESS_EXT select WEXT_PRIV + select CRYPTO_LIB_ARC4 help This option enables support for RTL8723BS SDIO drivers, such as the wifi found on the 1st gen Intel Compute Stick, the CHIP From 1e7107c5ef44431bc1ebbd4c353f1d7c22e5f2ec Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Wed, 16 Jun 2021 08:51:57 -0400 Subject: [PATCH 079/502] cgroup1: fix leaked context root causing sporadic NULL deref in LTP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Richard reported sporadic (roughly one in 10 or so) null dereferences and other strange behaviour for a set of automated LTP tests. Things like: BUG: kernel NULL pointer dereference, address: 0000000000000008 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 0 PID: 1516 Comm: umount Not tainted 5.10.0-yocto-standard #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-48-gd9c812dda519-prebuilt.qemu.org 04/01/2014 RIP: 0010:kernfs_sop_show_path+0x1b/0x60 ...or these others: RIP: 0010:do_mkdirat+0x6a/0xf0 RIP: 0010:d_alloc_parallel+0x98/0x510 RIP: 0010:do_readlinkat+0x86/0x120 There were other less common instances of some kind of a general scribble but the common theme was mount and cgroup and a dubious dentry triggering the NULL dereference. I was only able to reproduce it under qemu by replicating Richard's setup as closely as possible - I never did get it to happen on bare metal, even while keeping everything else the same. In commit 71d883c37e8d ("cgroup_do_mount(): massage calling conventions") we see this as a part of the overall change: -------------- struct cgroup_subsys *ss; - struct dentry *dentry; [...] - dentry = cgroup_do_mount(&cgroup_fs_type, fc->sb_flags, root, - CGROUP_SUPER_MAGIC, ns); [...] - if (percpu_ref_is_dying(&root->cgrp.self.refcnt)) { - struct super_block *sb = dentry->d_sb; - dput(dentry); + ret = cgroup_do_mount(fc, CGROUP_SUPER_MAGIC, ns); + if (!ret && percpu_ref_is_dying(&root->cgrp.self.refcnt)) { + struct super_block *sb = fc->root->d_sb; + dput(fc->root); deactivate_locked_super(sb); msleep(10); return restart_syscall(); } -------------- In changing from the local "*dentry" variable to using fc->root, we now export/leave that dentry pointer in the file context after doing the dput() in the unlikely "is_dying" case. With LTP doing a crazy amount of back to back mount/unmount [testcases/bin/cgroup_regression_5_1.sh] the unlikely becomes slightly likely and then bad things happen. A fix would be to not leave the stale reference in fc->root as follows: --------------                 dput(fc->root); + fc->root = NULL;                 deactivate_locked_super(sb); -------------- ...but then we are just open-coding a duplicate of fc_drop_locked() so we simply use that instead. Cc: Al Viro Cc: Tejun Heo Cc: Zefan Li Cc: Johannes Weiner Cc: stable@vger.kernel.org # v5.1+ Reported-by: Richard Purdie Fixes: 71d883c37e8d ("cgroup_do_mount(): massage calling conventions") Signed-off-by: Paul Gortmaker Signed-off-by: Tejun Heo --- fs/internal.h | 1 - include/linux/fs_context.h | 1 + kernel/cgroup/cgroup-v1.c | 4 +--- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/internal.h b/fs/internal.h index 3ce8edbaa3ca..82e8eb32ff3d 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -61,7 +61,6 @@ extern void __init chrdev_init(void); */ extern const struct fs_context_operations legacy_fs_context_ops; extern int parse_monolithic_mount_data(struct fs_context *, void *); -extern void fc_drop_locked(struct fs_context *); extern void vfs_clean_context(struct fs_context *fc); extern int finish_clean_context(struct fs_context *fc); diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index e2bc16300c82..6b54982fc5f3 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -141,6 +141,7 @@ extern int vfs_get_tree(struct fs_context *fc); extern void put_fs_context(struct fs_context *fc); extern int vfs_parse_fs_param_source(struct fs_context *fc, struct fs_parameter *param); +extern void fc_drop_locked(struct fs_context *fc); /* * sget() wrappers to be called from the ->get_tree() op. diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 8d6bf56ed77a..de2c432dee20 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -1221,9 +1221,7 @@ int cgroup1_get_tree(struct fs_context *fc) ret = cgroup_do_get_tree(fc); if (!ret && percpu_ref_is_dying(&ctx->root->cgrp.self.refcnt)) { - struct super_block *sb = fc->root->d_sb; - dput(fc->root); - deactivate_locked_super(sb); + fc_drop_locked(fc); ret = 1; } From b42b0bddcbc87b4c66f6497f66fc72d52b712aa7 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 14 Jul 2021 17:19:33 +0800 Subject: [PATCH 080/502] workqueue: fix UAF in pwq_unbound_release_workfn() I got a UAF report when doing fuzz test: [ 152.880091][ T8030] ================================================================== [ 152.881240][ T8030] BUG: KASAN: use-after-free in pwq_unbound_release_workfn+0x50/0x190 [ 152.882442][ T8030] Read of size 4 at addr ffff88810d31bd00 by task kworker/3:2/8030 [ 152.883578][ T8030] [ 152.883932][ T8030] CPU: 3 PID: 8030 Comm: kworker/3:2 Not tainted 5.13.0+ #249 [ 152.885014][ T8030] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 [ 152.886442][ T8030] Workqueue: events pwq_unbound_release_workfn [ 152.887358][ T8030] Call Trace: [ 152.887837][ T8030] dump_stack_lvl+0x75/0x9b [ 152.888525][ T8030] ? pwq_unbound_release_workfn+0x50/0x190 [ 152.889371][ T8030] print_address_description.constprop.10+0x48/0x70 [ 152.890326][ T8030] ? pwq_unbound_release_workfn+0x50/0x190 [ 152.891163][ T8030] ? pwq_unbound_release_workfn+0x50/0x190 [ 152.891999][ T8030] kasan_report.cold.15+0x82/0xdb [ 152.892740][ T8030] ? pwq_unbound_release_workfn+0x50/0x190 [ 152.893594][ T8030] __asan_load4+0x69/0x90 [ 152.894243][ T8030] pwq_unbound_release_workfn+0x50/0x190 [ 152.895057][ T8030] process_one_work+0x47b/0x890 [ 152.895778][ T8030] worker_thread+0x5c/0x790 [ 152.896439][ T8030] ? process_one_work+0x890/0x890 [ 152.897163][ T8030] kthread+0x223/0x250 [ 152.897747][ T8030] ? set_kthread_struct+0xb0/0xb0 [ 152.898471][ T8030] ret_from_fork+0x1f/0x30 [ 152.899114][ T8030] [ 152.899446][ T8030] Allocated by task 8884: [ 152.900084][ T8030] kasan_save_stack+0x21/0x50 [ 152.900769][ T8030] __kasan_kmalloc+0x88/0xb0 [ 152.901416][ T8030] __kmalloc+0x29c/0x460 [ 152.902014][ T8030] alloc_workqueue+0x111/0x8e0 [ 152.902690][ T8030] __btrfs_alloc_workqueue+0x11e/0x2a0 [ 152.903459][ T8030] btrfs_alloc_workqueue+0x6d/0x1d0 [ 152.904198][ T8030] scrub_workers_get+0x1e8/0x490 [ 152.904929][ T8030] btrfs_scrub_dev+0x1b9/0x9c0 [ 152.905599][ T8030] btrfs_ioctl+0x122c/0x4e50 [ 152.906247][ T8030] __x64_sys_ioctl+0x137/0x190 [ 152.906916][ T8030] do_syscall_64+0x34/0xb0 [ 152.907535][ T8030] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 152.908365][ T8030] [ 152.908688][ T8030] Freed by task 8884: [ 152.909243][ T8030] kasan_save_stack+0x21/0x50 [ 152.909893][ T8030] kasan_set_track+0x20/0x30 [ 152.910541][ T8030] kasan_set_free_info+0x24/0x40 [ 152.911265][ T8030] __kasan_slab_free+0xf7/0x140 [ 152.911964][ T8030] kfree+0x9e/0x3d0 [ 152.912501][ T8030] alloc_workqueue+0x7d7/0x8e0 [ 152.913182][ T8030] __btrfs_alloc_workqueue+0x11e/0x2a0 [ 152.913949][ T8030] btrfs_alloc_workqueue+0x6d/0x1d0 [ 152.914703][ T8030] scrub_workers_get+0x1e8/0x490 [ 152.915402][ T8030] btrfs_scrub_dev+0x1b9/0x9c0 [ 152.916077][ T8030] btrfs_ioctl+0x122c/0x4e50 [ 152.916729][ T8030] __x64_sys_ioctl+0x137/0x190 [ 152.917414][ T8030] do_syscall_64+0x34/0xb0 [ 152.918034][ T8030] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 152.918872][ T8030] [ 152.919203][ T8030] The buggy address belongs to the object at ffff88810d31bc00 [ 152.919203][ T8030] which belongs to the cache kmalloc-512 of size 512 [ 152.921155][ T8030] The buggy address is located 256 bytes inside of [ 152.921155][ T8030] 512-byte region [ffff88810d31bc00, ffff88810d31be00) [ 152.922993][ T8030] The buggy address belongs to the page: [ 152.923800][ T8030] page:ffffea000434c600 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x10d318 [ 152.925249][ T8030] head:ffffea000434c600 order:2 compound_mapcount:0 compound_pincount:0 [ 152.926399][ T8030] flags: 0x57ff00000010200(slab|head|node=1|zone=2|lastcpupid=0x7ff) [ 152.927515][ T8030] raw: 057ff00000010200 dead000000000100 dead000000000122 ffff888009c42c80 [ 152.928716][ T8030] raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000 [ 152.929890][ T8030] page dumped because: kasan: bad access detected [ 152.930759][ T8030] [ 152.931076][ T8030] Memory state around the buggy address: [ 152.931851][ T8030] ffff88810d31bc00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 152.932967][ T8030] ffff88810d31bc80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 152.934068][ T8030] >ffff88810d31bd00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 152.935189][ T8030] ^ [ 152.935763][ T8030] ffff88810d31bd80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 152.936847][ T8030] ffff88810d31be00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 152.937940][ T8030] ================================================================== If apply_wqattrs_prepare() fails in alloc_workqueue(), it will call put_pwq() which invoke a work queue to call pwq_unbound_release_workfn() and use the 'wq'. The 'wq' allocated in alloc_workqueue() will be freed in error path when apply_wqattrs_prepare() fails. So it will lead a UAF. CPU0 CPU1 alloc_workqueue() alloc_and_link_pwqs() apply_wqattrs_prepare() fails apply_wqattrs_cleanup() schedule_work(&pwq->unbound_release_work) kfree(wq) worker_thread() pwq_unbound_release_workfn() <- trigger uaf here If apply_wqattrs_prepare() fails, the new pwq are not linked, it doesn't hold any reference to the 'wq', 'wq' is invalid to access in the worker, so add check pwq if linked to fix this. Fixes: 2d5f0764b526 ("workqueue: split apply_workqueue_attrs() into 3 stages") Cc: stable@vger.kernel.org # v4.2+ Reported-by: Hulk Robot Suggested-by: Lai Jiangshan Signed-off-by: Yang Yingliang Reviewed-by: Lai Jiangshan Tested-by: Pavel Skripkin Signed-off-by: Tejun Heo --- kernel/workqueue.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 50142fc08902..f148eacda55a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3676,15 +3676,21 @@ static void pwq_unbound_release_workfn(struct work_struct *work) unbound_release_work); struct workqueue_struct *wq = pwq->wq; struct worker_pool *pool = pwq->pool; - bool is_last; + bool is_last = false; - if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND))) - return; + /* + * when @pwq is not linked, it doesn't hold any reference to the + * @wq, and @wq is invalid to access. + */ + if (!list_empty(&pwq->pwqs_node)) { + if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND))) + return; - mutex_lock(&wq->mutex); - list_del_rcu(&pwq->pwqs_node); - is_last = list_empty(&wq->pwqs); - mutex_unlock(&wq->mutex); + mutex_lock(&wq->mutex); + list_del_rcu(&pwq->pwqs_node); + is_last = list_empty(&wq->pwqs); + mutex_unlock(&wq->mutex); + } mutex_lock(&wq_pool_mutex); put_unbound_pool(pool); From 456a9dace42ecfcec7ce6e17c18d1985d628dcd0 Mon Sep 17 00:00:00 2001 From: Mike Tipton Date: Wed, 21 Jul 2021 10:54:29 -0700 Subject: [PATCH 081/502] interconnect: Zero initial BW after sync-state The initial BW values may be used by providers to enforce floors. Zero these values after sync-state so that providers know when to stop enforcing them. Fixes: b1d681d8d324 ("interconnect: Add sync state support") Signed-off-by: Mike Tipton Link: https://lore.kernel.org/r/20210721175432.2119-2-mdtipton@codeaurora.org Signed-off-by: Georgi Djakov --- drivers/interconnect/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index 8a1e70e00876..945121e18b5c 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -1106,6 +1106,8 @@ void icc_sync_state(struct device *dev) dev_dbg(p->dev, "interconnect provider is in synced state\n"); list_for_each_entry(n, &p->nodes, node_list) { if (n->init_avg || n->init_peak) { + n->init_avg = 0; + n->init_peak = 0; aggregate_requests(n); p->set(n, n); } From 73606ba9242f8e32023699b500b7922b4cf2993c Mon Sep 17 00:00:00 2001 From: Mike Tipton Date: Wed, 21 Jul 2021 10:54:30 -0700 Subject: [PATCH 082/502] interconnect: Always call pre_aggregate before aggregate The pre_aggregate callback isn't called in all cases before calling aggregate. Add the missing calls so providers can rely on consistent framework behavior. Fixes: d3703b3e255f ("interconnect: Aggregate before setting initial bandwidth") Signed-off-by: Mike Tipton Link: https://lore.kernel.org/r/20210721175432.2119-3-mdtipton@codeaurora.org Signed-off-by: Georgi Djakov --- drivers/interconnect/core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index 945121e18b5c..1b2c564eaa99 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -973,9 +973,14 @@ void icc_node_add(struct icc_node *node, struct icc_provider *provider) } node->avg_bw = node->init_avg; node->peak_bw = node->init_peak; + + if (provider->pre_aggregate) + provider->pre_aggregate(node); + if (provider->aggregate) provider->aggregate(node, 0, node->init_avg, node->init_peak, &node->avg_bw, &node->peak_bw); + provider->set(node, node); node->avg_bw = 0; node->peak_bw = 0; From 69de4421bb4c103ef42a32bafc596e23918c106f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 21 Jul 2021 10:23:57 -0500 Subject: [PATCH 083/502] drm/ttm: Initialize debugfs from ttm_global_init() We create a bunch of debugfs entries as a side-effect of ttm_global_init() and then never clean them up. This isn't usually a problem because we free the whole debugfs directory on module unload. However, if the global reference count ever goes to zero and then ttm_global_init() is called again, we'll re-create those debugfs entries and debugfs will complain in dmesg that we're creating entries that already exist. This patch fixes this problem by changing the lifetime of the whole TTM debugfs directory to match that of the TTM global state. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210721152358.2893314-6-jason@jlekstrand.net --- drivers/gpu/drm/ttm/ttm_device.c | 12 ++++++++++++ drivers/gpu/drm/ttm/ttm_module.c | 16 ---------------- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 519deea8e39b..74e3b460132b 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -44,6 +44,8 @@ static unsigned ttm_glob_use_count; struct ttm_global ttm_glob; EXPORT_SYMBOL(ttm_glob); +struct dentry *ttm_debugfs_root; + static void ttm_global_release(void) { struct ttm_global *glob = &ttm_glob; @@ -53,6 +55,7 @@ static void ttm_global_release(void) goto out; ttm_pool_mgr_fini(); + debugfs_remove(ttm_debugfs_root); __free_page(glob->dummy_read_page); memset(glob, 0, sizeof(*glob)); @@ -73,6 +76,13 @@ static int ttm_global_init(void) si_meminfo(&si); + ttm_debugfs_root = debugfs_create_dir("ttm", NULL); + if (IS_ERR(ttm_debugfs_root)) { + ret = PTR_ERR(ttm_debugfs_root); + ttm_debugfs_root = NULL; + goto out; + } + /* Limit the number of pages in the pool to about 50% of the total * system memory. */ @@ -100,6 +110,8 @@ static int ttm_global_init(void) debugfs_create_atomic_t("buffer_objects", 0444, ttm_debugfs_root, &glob->bo_count); out: + if (ret && ttm_debugfs_root) + debugfs_remove(ttm_debugfs_root); if (ret) --ttm_glob_use_count; mutex_unlock(&ttm_global_mutex); diff --git a/drivers/gpu/drm/ttm/ttm_module.c b/drivers/gpu/drm/ttm/ttm_module.c index 997c458f68a9..7fcdef278c74 100644 --- a/drivers/gpu/drm/ttm/ttm_module.c +++ b/drivers/gpu/drm/ttm/ttm_module.c @@ -72,22 +72,6 @@ pgprot_t ttm_prot_from_caching(enum ttm_caching caching, pgprot_t tmp) return tmp; } -struct dentry *ttm_debugfs_root; - -static int __init ttm_init(void) -{ - ttm_debugfs_root = debugfs_create_dir("ttm", NULL); - return 0; -} - -static void __exit ttm_exit(void) -{ - debugfs_remove(ttm_debugfs_root); -} - -module_init(ttm_init); -module_exit(ttm_exit); - MODULE_AUTHOR("Thomas Hellstrom, Jerome Glisse"); MODULE_DESCRIPTION("TTM memory manager subsystem (for DRM device)"); MODULE_LICENSE("GPL and additional rights"); From 1d5ccab95f06675a269f4cb223a1e3f6d1ebef42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 21 Jul 2021 11:53:21 +0200 Subject: [PATCH 084/502] spi: spi-mux: Add module info needed for autoloading MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the spi device table udev can autoload the spi-mux module in the presence of an spi-mux device. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20210721095321.2165453-1-u.kleine-koenig@pengutronix.de Signed-off-by: Mark Brown --- drivers/spi/spi-mux.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/spi/spi-mux.c b/drivers/spi/spi-mux.c index 37dfc6e82804..9708b7827ff7 100644 --- a/drivers/spi/spi-mux.c +++ b/drivers/spi/spi-mux.c @@ -167,10 +167,17 @@ err_put_ctlr: return ret; } +static const struct spi_device_id spi_mux_id[] = { + { "spi-mux" }, + { } +}; +MODULE_DEVICE_TABLE(spi, spi_mux_id); + static const struct of_device_id spi_mux_of_match[] = { { .compatible = "spi-mux" }, { } }; +MODULE_DEVICE_TABLE(of, spi_mux_of_match); static struct spi_driver spi_mux_driver = { .probe = spi_mux_probe, @@ -178,6 +185,7 @@ static struct spi_driver spi_mux_driver = { .name = "spi-mux", .of_match_table = spi_mux_of_match, }, + .id_table = spi_mux_id, }; module_spi_driver(spi_mux_driver); From 8311ee2164c5cd1b63a601ea366f540eae89f10e Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Tue, 20 Jul 2021 18:01:16 +0800 Subject: [PATCH 085/502] spi: meson-spicc: fix memory leak in meson_spicc_remove In meson_spicc_probe, the error handling code needs to clean up master by calling spi_master_put, but the remove function does not have this function call. This will lead to memory leak of spicc->master. Reported-by: Dongliang Mu Fixes: 454fa271bc4e("spi: Add Meson SPICC driver") Signed-off-by: Dongliang Mu Link: https://lore.kernel.org/r/20210720100116.1438974-1-mudongliangabcd@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-meson-spicc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/spi/spi-meson-spicc.c b/drivers/spi/spi-meson-spicc.c index b2c4621db34d..c208efeadd18 100644 --- a/drivers/spi/spi-meson-spicc.c +++ b/drivers/spi/spi-meson-spicc.c @@ -785,6 +785,8 @@ static int meson_spicc_remove(struct platform_device *pdev) clk_disable_unprepare(spicc->core); clk_disable_unprepare(spicc->pclk); + spi_master_put(spicc->master); + return 0; } From e09f2ab8eecc6dcbd7013a1303cbe56b00dc9fb0 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Thu, 22 Jul 2021 15:48:45 +0200 Subject: [PATCH 086/502] spi: update modalias_show after of_device_uevent_modalias support Commit 3ce6c9e2617e ("spi: add of_device_uevent_modalias support") is incomplete, as it didn't update the modalias_show function to generate the of: modalias string if available. Fixes: 3ce6c9e2617e ("spi: add of_device_uevent_modalias support") Signed-off-by: Andreas Schwab Link: https://lore.kernel.org/r/mvmwnpi4fya.fsf@suse.de Signed-off-by: Mark Brown --- drivers/spi/spi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 35928d0843d9..397dd2959bfd 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -58,6 +58,10 @@ modalias_show(struct device *dev, struct device_attribute *a, char *buf) const struct spi_device *spi = to_spi_device(dev); int len; + len = of_device_modalias(dev, buf, PAGE_SIZE); + if (len != -ENODEV) + return len; + len = acpi_device_modalias(dev, buf, PAGE_SIZE - 1); if (len != -ENODEV) return len; From 29f6a20c21b5bdc7eb623a712bbf7b99612ee746 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Fri, 2 Jul 2021 21:49:14 +0200 Subject: [PATCH 087/502] arm64: dts: ls1028: sl28: fix networking for variant 2 The PHY configuration for the variant 2 is still missing the flag for in-band signalling between PHY and MAC. Both sides - MAC and PHY - have to match the setting. For now, Linux only supports setting the MAC side and thus it has to match the setting the bootloader is configuring. Enable in-band signalling to make ethernet work. Fixes: ab43f0307449 ("arm64: dts: ls1028a: sl28: add support for variant 2") Signed-off-by: Michael Walle Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts index dd764b720fb0..f6a79c8080d1 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts @@ -54,6 +54,7 @@ &mscc_felix_port0 { label = "swp0"; + managed = "in-band-status"; phy-handle = <&phy0>; phy-mode = "sgmii"; status = "okay"; @@ -61,6 +62,7 @@ &mscc_felix_port1 { label = "swp1"; + managed = "in-band-status"; phy-handle = <&phy1>; phy-mode = "sgmii"; status = "okay"; From 828db68f4ff1ab6982a36a56522b585160dc8c8e Mon Sep 17 00:00:00 2001 From: Oleksandr Suvorov Date: Tue, 13 Jul 2021 23:21:07 +0300 Subject: [PATCH 088/502] ARM: dts: colibri-imx6ull: limit SDIO clock to 25MHz NXP and AzureWave don't recommend using SDIO bus mode 3.3V@50MHz due to noise affecting the wireless throughput. Colibri iMX6ULL uses only 3.3V signaling for Wi-Fi module AW-CM276NF. Limit the SDIO Clock on Colibri iMX6ULL to 25MHz. Fixes: c2e4987e0e02 ("ARM: dts: imx6ull: add Toradex Colibri iMX6ULL support") Signed-off-by: Oleksandr Suvorov Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6ull-colibri-wifi.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/imx6ull-colibri-wifi.dtsi b/arch/arm/boot/dts/imx6ull-colibri-wifi.dtsi index a0545431b3dc..9f1e38282bee 100644 --- a/arch/arm/boot/dts/imx6ull-colibri-wifi.dtsi +++ b/arch/arm/boot/dts/imx6ull-colibri-wifi.dtsi @@ -43,6 +43,7 @@ assigned-clock-rates = <0>, <198000000>; cap-power-off-card; keep-power-in-suspend; + max-frequency = <25000000>; mmc-pwrseq = <&wifi_pwrseq>; no-1-8-v; non-removable; From e39cdacf2f664b09029e7c1eb354c91a20c367af Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Tue, 22 Jun 2021 07:11:31 +0000 Subject: [PATCH 089/502] pcmcia: i82092: fix a null pointer dereference bug During the driver loading process, the 'dev' field was not assigned, but the 'dev' field was referenced in the subsequent 'i82092aa_set_mem_map' function. Signed-off-by: Zheyu Ma CC: [linux@dominikbrodowski.net: shorten commit message, add Cc to stable] Signed-off-by: Dominik Brodowski --- drivers/pcmcia/i82092.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pcmcia/i82092.c b/drivers/pcmcia/i82092.c index 85887d885b5f..192c9049d654 100644 --- a/drivers/pcmcia/i82092.c +++ b/drivers/pcmcia/i82092.c @@ -112,6 +112,7 @@ static int i82092aa_pci_probe(struct pci_dev *dev, for (i = 0; i < socket_count; i++) { sockets[i].card_state = 1; /* 1 = present but empty */ sockets[i].io_base = pci_resource_start(dev, 0); + sockets[i].dev = dev; sockets[i].socket.features |= SS_CAP_PCCARD; sockets[i].socket.map_size = 0x1000; sockets[i].socket.irq_mask = 0; From 20fb73911fec01f06592de1cdbca00b66602ebd7 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 15 Jul 2021 14:23:21 +0100 Subject: [PATCH 090/502] ARM: imx: fix missing 3rd argument in macro imx_mmdc_perf_init The function imx_mmdc_perf_init recently had a 3rd argument added to it but the equivalent macro was not updated and is still the older 2 argument version. Fix this by adding in the missing 3rd argumement mmdc_ipg_clk. Fixes: f07ec8536580 ("ARM: imx: add missing clk_disable_unprepare()") Signed-off-by: Colin Ian King Signed-off-by: Shawn Guo --- arch/arm/mach-imx/mmdc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c index 4a6f1359e1e9..af12668d0bf5 100644 --- a/arch/arm/mach-imx/mmdc.c +++ b/arch/arm/mach-imx/mmdc.c @@ -534,7 +534,7 @@ pmu_free: #else #define imx_mmdc_remove NULL -#define imx_mmdc_perf_init(pdev, mmdc_base) 0 +#define imx_mmdc_perf_init(pdev, mmdc_base, mmdc_ipg_clk) 0 #endif static int imx_mmdc_probe(struct platform_device *pdev) From 3d9e30a52047f2d464efdfd1d561ae1f707a0286 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 18 Jul 2021 23:43:02 +0200 Subject: [PATCH 091/502] ARM: dts: imx: Swap M53Menlo pinctrl_power_button/pinctrl_power_out pins The pinctrl_power_button/pinctrl_power_out each define single GPIO pinmux, except it is exactly the other one than the matching gpio-keys and gpio-poweroff DT nodes use for that functionality. Swap the two GPIOs to correct this error. Fixes: 50d29fdb765d ("ARM: dts: imx53: Add power GPIOs on M53Menlo") Signed-off-by: Marek Vasut Cc: Shawn Guo Cc: Fabio Estevam Cc: NXP Linux Team Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx53-m53menlo.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/imx53-m53menlo.dts b/arch/arm/boot/dts/imx53-m53menlo.dts index f98691ae4415..d3082b9774e4 100644 --- a/arch/arm/boot/dts/imx53-m53menlo.dts +++ b/arch/arm/boot/dts/imx53-m53menlo.dts @@ -388,13 +388,13 @@ pinctrl_power_button: powerbutgrp { fsl,pins = < - MX53_PAD_SD2_DATA2__GPIO1_13 0x1e4 + MX53_PAD_SD2_DATA0__GPIO1_15 0x1e4 >; }; pinctrl_power_out: poweroutgrp { fsl,pins = < - MX53_PAD_SD2_DATA0__GPIO1_15 0x1e4 + MX53_PAD_SD2_DATA2__GPIO1_13 0x1e4 >; }; From ec61cd49bf566401306cfc4855bda8c08bbaa46c Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Mon, 28 Jun 2021 14:37:13 +0200 Subject: [PATCH 092/502] mac80211: Do not strip skb headroom on monitor frames When a monitor interface is present together with other interfaces, a received skb is copied and received on the monitor netdev. Before, the copied skb was allocated with exactly the amount of space needed for the radiotap header, resulting in an skb without any headroom at all being received on the monitor netdev. With the introduction of eBPF and XDP in the kernel, skbs may be processed by custom eBPF programs. However, since the skb cannot be reallocated in the eBPF program, no more data or headers can be pushed. The old code made sure the final headroom was zero regardless of the value of NET_SKB_PAD, so increasing that constant would have no effect. Now we allocate monitor skb copies with a headroom of NET_SKB_PAD bytes before the radiotap header. Monitor interfaces now behave in the same way as other netdev interfaces that honor the NET_SKB_PAD constant. Signed-off-by: Johan Almbladh Link: https://lore.kernel.org/r/20210628123713.2070753-1-johan.almbladh@anyfinetworks.com Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 771921c057e8..2563473b5cf1 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -730,7 +730,8 @@ ieee80211_make_monitor_skb(struct ieee80211_local *local, * Need to make a copy and possibly remove radiotap header * and FCS from the original. */ - skb = skb_copy_expand(*origskb, needed_headroom, 0, GFP_ATOMIC); + skb = skb_copy_expand(*origskb, needed_headroom + NET_SKB_PAD, + 0, GFP_ATOMIC); if (!skb) return NULL; From 1a7915501ca94a1f10288defe333cd5ade210b63 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 29 Jun 2021 13:28:53 +0200 Subject: [PATCH 093/502] mac80211: fix starting aggregation sessions on mesh interfaces The logic for starting aggregation sessions was recently moved from minstrel_ht to mac80211, into the subif tx handler just after the sta lookup. Unfortunately this didn't work for mesh interfaces, since the sta lookup is deferred until a much later point in time on those. Fix this by also calling the aggregation check right after the deferred sta lookup. Fixes: 08a46c642001 ("mac80211: move A-MPDU session check from minstrel_ht to mac80211") Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20210629112853.29785-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 57 ++++++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index e96981144358..8509778ff31f 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1147,6 +1147,29 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx, return queued; } +static void +ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct sk_buff *skb) +{ + struct rate_control_ref *ref = sdata->local->rate_ctrl; + u16 tid; + + if (!ref || !(ref->ops->capa & RATE_CTRL_CAPA_AMPDU_TRIGGER)) + return; + + if (!sta || !sta->sta.ht_cap.ht_supported || + !sta->sta.wme || skb_get_queue_mapping(skb) == IEEE80211_AC_VO || + skb->protocol == sdata->control_port_protocol) + return; + + tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; + if (likely(sta->ampdu_mlme.tid_tx[tid])) + return; + + ieee80211_start_tx_ba_session(&sta->sta, tid, 0); +} + /* * initialises @tx * pass %NULL for the station if unknown, a valid pointer if known @@ -1160,6 +1183,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_hdr *hdr; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + bool aggr_check = false; int tid; memset(tx, 0, sizeof(*tx)); @@ -1188,8 +1212,10 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, } else if (tx->sdata->control_port_protocol == tx->skb->protocol) { tx->sta = sta_info_get_bss(sdata, hdr->addr1); } - if (!tx->sta && !is_multicast_ether_addr(hdr->addr1)) + if (!tx->sta && !is_multicast_ether_addr(hdr->addr1)) { tx->sta = sta_info_get(sdata, hdr->addr1); + aggr_check = true; + } } if (tx->sta && ieee80211_is_data_qos(hdr->frame_control) && @@ -1199,8 +1225,12 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, struct tid_ampdu_tx *tid_tx; tid = ieee80211_get_tid(hdr); - tid_tx = rcu_dereference(tx->sta->ampdu_mlme.tid_tx[tid]); + if (!tid_tx && aggr_check) { + ieee80211_aggr_check(sdata, tx->sta, skb); + tid_tx = rcu_dereference(tx->sta->ampdu_mlme.tid_tx[tid]); + } + if (tid_tx) { bool queued; @@ -4120,29 +4150,6 @@ void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac) } EXPORT_SYMBOL(ieee80211_txq_schedule_start); -static void -ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata, - struct sta_info *sta, - struct sk_buff *skb) -{ - struct rate_control_ref *ref = sdata->local->rate_ctrl; - u16 tid; - - if (!ref || !(ref->ops->capa & RATE_CTRL_CAPA_AMPDU_TRIGGER)) - return; - - if (!sta || !sta->sta.ht_cap.ht_supported || - !sta->sta.wme || skb_get_queue_mapping(skb) == IEEE80211_AC_VO || - skb->protocol == sdata->control_port_protocol) - return; - - tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; - if (likely(sta->ampdu_mlme.tid_tx[tid])) - return; - - ieee80211_start_tx_ba_session(&sta->sta, tid, 0); -} - void __ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev, u32 info_flags, From a5d3cbdb09ff1f52cbe040932e06c8b9915c6dad Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 2 Jul 2021 07:01:11 +0200 Subject: [PATCH 094/502] mac80211: fix enabling 4-address mode on a sta vif after assoc Notify the driver about the 4-address mode change and also send a nulldata packet to the AP to notify it about the change Fixes: 1ff4e8f2dec8 ("mac80211: notify the driver when a sta uses 4-address mode") Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20210702050111.47546-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 19 +++++++++++++++++++ net/mac80211/ieee80211_i.h | 2 ++ net/mac80211/mlme.c | 4 ++-- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 84cc7733ea66..4e6f11e63df3 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -152,6 +152,8 @@ static int ieee80211_change_iface(struct wiphy *wiphy, struct vif_params *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; int ret; ret = ieee80211_if_change_type(sdata, type); @@ -162,7 +164,24 @@ static int ieee80211_change_iface(struct wiphy *wiphy, RCU_INIT_POINTER(sdata->u.vlan.sta, NULL); ieee80211_check_fast_rx_iface(sdata); } else if (type == NL80211_IFTYPE_STATION && params->use_4addr >= 0) { + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + + if (params->use_4addr == ifmgd->use_4addr) + return 0; + sdata->u.mgd.use_4addr = params->use_4addr; + if (!ifmgd->associated) + return 0; + + mutex_lock(&local->sta_mtx); + sta = sta_info_get(sdata, ifmgd->bssid); + if (sta) + drv_sta_set_4addr(local, sdata, &sta->sta, + params->use_4addr); + mutex_unlock(&local->sta_mtx); + + if (params->use_4addr) + ieee80211_send_4addr_nullfunc(local, sdata); } if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 22549b95d1aa..30ce6d2ec7ce 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2201,6 +2201,8 @@ void ieee80211_dynamic_ps_timer(struct timer_list *t); void ieee80211_send_nullfunc(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, bool powersave); +void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata); void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata, struct ieee80211_hdr *hdr, bool ack, u16 tx_time); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a00f11a33699..c0ea3b1aa9e1 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1095,8 +1095,8 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, ieee80211_tx_skb(sdata, skb); } -static void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata) +void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) { struct sk_buff *skb; struct ieee80211_hdr *nullfunc; From 17109e9783799be2a063b2bd861a508194b0a487 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Tue, 6 Jul 2021 17:44:23 +0200 Subject: [PATCH 095/502] virt_wifi: fix error on connect When connecting without first doing a scan, the BSS list is empty and __cfg80211_connect_result() generates this warning: $ iw dev wlan0 connect -w VirtWifi [ 15.371989] ------------[ cut here ]------------ [ 15.372179] WARNING: CPU: 0 PID: 92 at net/wireless/sme.c:756 __cfg80211_connect_result+0x402/0x440 [ 15.372383] CPU: 0 PID: 92 Comm: kworker/u2:2 Not tainted 5.13.0-kvm #444 [ 15.372512] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-3.fc34 04/01/2014 [ 15.372597] Workqueue: cfg80211 cfg80211_event_work [ 15.372756] RIP: 0010:__cfg80211_connect_result+0x402/0x440 [ 15.372818] Code: 48 2b 04 25 28 00 00 00 75 59 48 8b 3b 48 8b 76 10 48 8d 65 e0 5b 41 5c 41 5d 41 5e 5d 49 8d 65 f0 41 5d e9 d0 d4 fd ff 0f 0b <0f> 0b e9 f6 fd ff ff e8 f2 4a b4 ff e9 ec fd ff ff 0f 0b e9 19 fd [ 15.372966] RSP: 0018:ffffc900005cbdc0 EFLAGS: 00010246 [ 15.373022] RAX: 0000000000000000 RBX: ffff8880028e2400 RCX: ffff8880028e2472 [ 15.373088] RDX: 0000000000000002 RSI: 00000000fffffe01 RDI: ffffffff815335ba [ 15.373149] RBP: ffffc900005cbe00 R08: 0000000000000008 R09: ffff888002bdf8b8 [ 15.373209] R10: ffff88803ec208f0 R11: ffffffffffffe9ae R12: ffff88801d687d98 [ 15.373280] R13: ffff88801b5fe000 R14: ffffc900005cbdc0 R15: dead000000000100 [ 15.373330] FS: 0000000000000000(0000) GS:ffff88803ec00000(0000) knlGS:0000000000000000 [ 15.373382] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 15.373425] CR2: 000056421c468958 CR3: 000000001b458001 CR4: 0000000000170eb0 [ 15.373478] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 15.373529] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 15.373580] Call Trace: [ 15.373611] ? cfg80211_process_wdev_events+0x10e/0x170 [ 15.373743] cfg80211_process_wdev_events+0x10e/0x170 [ 15.373783] cfg80211_process_rdev_events+0x21/0x40 [ 15.373846] cfg80211_event_work+0x20/0x30 [ 15.373892] process_one_work+0x1e9/0x340 [ 15.373956] worker_thread+0x4b/0x3f0 [ 15.374017] ? process_one_work+0x340/0x340 [ 15.374053] kthread+0x11f/0x140 [ 15.374089] ? set_kthread_struct+0x30/0x30 [ 15.374153] ret_from_fork+0x1f/0x30 [ 15.374187] ---[ end trace 321ef0cb7e9c0be1 ]--- wlan0 (phy #0): connected to 00:00:00:00:00:00 Add the fake bss just before the connect so that cfg80211_get_bss() finds the virtual network. As some code was duplicated, move it in a common function. Signed-off-by: Matteo Croce Link: https://lore.kernel.org/r/20210706154423.11065-1-mcroce@linux.microsoft.com Signed-off-by: Johannes Berg --- drivers/net/wireless/virt_wifi.c | 52 ++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/drivers/net/wireless/virt_wifi.c b/drivers/net/wireless/virt_wifi.c index 1df959532c7d..514f2c1124b6 100644 --- a/drivers/net/wireless/virt_wifi.c +++ b/drivers/net/wireless/virt_wifi.c @@ -136,6 +136,29 @@ static struct ieee80211_supported_band band_5ghz = { /* Assigned at module init. Guaranteed locally-administered and unicast. */ static u8 fake_router_bssid[ETH_ALEN] __ro_after_init = {}; +static void virt_wifi_inform_bss(struct wiphy *wiphy) +{ + u64 tsf = div_u64(ktime_get_boottime_ns(), 1000); + struct cfg80211_bss *informed_bss; + static const struct { + u8 tag; + u8 len; + u8 ssid[8]; + } __packed ssid = { + .tag = WLAN_EID_SSID, + .len = 8, + .ssid = "VirtWifi", + }; + + informed_bss = cfg80211_inform_bss(wiphy, &channel_5ghz, + CFG80211_BSS_FTYPE_PRESP, + fake_router_bssid, tsf, + WLAN_CAPABILITY_ESS, 0, + (void *)&ssid, sizeof(ssid), + DBM_TO_MBM(-50), GFP_KERNEL); + cfg80211_put_bss(wiphy, informed_bss); +} + /* Called with the rtnl lock held. */ static int virt_wifi_scan(struct wiphy *wiphy, struct cfg80211_scan_request *request) @@ -156,28 +179,13 @@ static int virt_wifi_scan(struct wiphy *wiphy, /* Acquires and releases the rdev BSS lock. */ static void virt_wifi_scan_result(struct work_struct *work) { - struct { - u8 tag; - u8 len; - u8 ssid[8]; - } __packed ssid = { - .tag = WLAN_EID_SSID, .len = 8, .ssid = "VirtWifi", - }; - struct cfg80211_bss *informed_bss; struct virt_wifi_wiphy_priv *priv = container_of(work, struct virt_wifi_wiphy_priv, scan_result.work); struct wiphy *wiphy = priv_to_wiphy(priv); struct cfg80211_scan_info scan_info = { .aborted = false }; - u64 tsf = div_u64(ktime_get_boottime_ns(), 1000); - informed_bss = cfg80211_inform_bss(wiphy, &channel_5ghz, - CFG80211_BSS_FTYPE_PRESP, - fake_router_bssid, tsf, - WLAN_CAPABILITY_ESS, 0, - (void *)&ssid, sizeof(ssid), - DBM_TO_MBM(-50), GFP_KERNEL); - cfg80211_put_bss(wiphy, informed_bss); + virt_wifi_inform_bss(wiphy); /* Schedules work which acquires and releases the rtnl lock. */ cfg80211_scan_done(priv->scan_request, &scan_info); @@ -225,10 +233,12 @@ static int virt_wifi_connect(struct wiphy *wiphy, struct net_device *netdev, if (!could_schedule) return -EBUSY; - if (sme->bssid) + if (sme->bssid) { ether_addr_copy(priv->connect_requested_bss, sme->bssid); - else + } else { + virt_wifi_inform_bss(wiphy); eth_zero_addr(priv->connect_requested_bss); + } wiphy_debug(wiphy, "connect\n"); @@ -241,11 +251,13 @@ static void virt_wifi_connect_complete(struct work_struct *work) struct virt_wifi_netdev_priv *priv = container_of(work, struct virt_wifi_netdev_priv, connect.work); u8 *requested_bss = priv->connect_requested_bss; - bool has_addr = !is_zero_ether_addr(requested_bss); bool right_addr = ether_addr_equal(requested_bss, fake_router_bssid); u16 status = WLAN_STATUS_SUCCESS; - if (!priv->is_up || (has_addr && !right_addr)) + if (is_zero_ether_addr(requested_bss)) + requested_bss = NULL; + + if (!priv->is_up || (requested_bss && !right_addr)) status = WLAN_STATUS_UNSPECIFIED_FAILURE; else priv->is_connected = true; From 0d059964504a1605d84938c0b5b38f6573121c4a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 12 Jul 2021 21:53:30 +0200 Subject: [PATCH 096/502] nl80211: limit band information in non-split data In non-split data, we shouldn't be adding S1G and 6 GHz data (or future bands) since we're really close to the 4k message size limit. Remove those bands, any modern userspace that can use S1G or 6 GHz should already be using split dumps, and if not then it needs to update. Link: https://lore.kernel.org/r/20210712215329.31444162a2c2.I5555312e4a074c84f8b4e7ad79dc4d1fbfc5126c@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 50eb405b0690..16c88beea48b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2351,7 +2351,10 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, goto nla_put_failure; for (band = state->band_start; - band < NUM_NL80211_BANDS; band++) { + band < (state->split ? + NUM_NL80211_BANDS : + NL80211_BAND_60GHZ + 1); + band++) { struct ieee80211_supported_band *sband; /* omit higher bands for ancient software */ From f9a5c358c8d26fed0cc45f2afc64633d4ba21dff Mon Sep 17 00:00:00 2001 From: Nguyen Dinh Phi Date: Mon, 28 Jun 2021 21:23:34 +0800 Subject: [PATCH 097/502] cfg80211: Fix possible memory leak in function cfg80211_bss_update When we exceed the limit of BSS entries, this function will free the new entry, however, at this time, it is the last door to access the inputed ies, so these ies will be unreferenced objects and cause memory leak. Therefore we should free its ies before deallocating the new entry, beside of dropping it from hidden_list. Signed-off-by: Nguyen Dinh Phi Link: https://lore.kernel.org/r/20210628132334.851095-1-phind.uet@gmail.com Signed-off-by: Johannes Berg --- net/wireless/scan.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index f03c7ac8e184..7897b1478c3c 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1754,16 +1754,14 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, * be grouped with this beacon for updates ... */ if (!cfg80211_combine_bsses(rdev, new)) { - kfree(new); + bss_ref_put(rdev, new); goto drop; } } if (rdev->bss_entries >= bss_entries_limit && !cfg80211_bss_expire_oldest(rdev)) { - if (!list_empty(&new->hidden_list)) - list_del(&new->hidden_list); - kfree(new); + bss_ref_put(rdev, new); goto drop; } From 923f98929182dfd04e9149be839160b63a3db145 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 22 Jul 2021 13:11:34 +0300 Subject: [PATCH 098/502] arm64: dts: armada-3720-turris-mox: fixed indices for the SDHC controllers Since drivers/mmc/host/sdhci-xenon.c declares the PROBE_PREFER_ASYNCHRONOUS probe type, it is not guaranteed whether /dev/mmcblk0 will belong to sdhci0 or sdhci1. In turn, this will break booting by: root=/dev/mmcblk0p1 Fix the issue by adding aliases so that the old MMC controller indices are preserved. Fixes: 7320915c8861 ("mmc: Set PROBE_PREFER_ASYNCHRONOUS for drivers that existed in v4.14") Signed-off-by: Vladimir Oltean Signed-off-by: Gregory CLEMENT --- arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts index ce2bcddf396f..f2d7d6f071bc 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts @@ -19,6 +19,8 @@ aliases { spi0 = &spi0; ethernet1 = ð1; + mmc0 = &sdhci0; + mmc1 = &sdhci1; }; chosen { From b66541422824cf6cf20e9a35112e9cb5d82cdf62 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Tue, 13 Jul 2021 10:27:28 +0800 Subject: [PATCH 099/502] ext4: fix potential uninitialized access to retval in kmmpd if (!ext4_has_feature_mmp(sb)) then retval can be unitialized before we jump to the wait_to_exit label. Fixes: 61bb4a1c417e ("ext4: fix possible UAF when remounting r/o a mmp-protected file system") Signed-off-by: Ye Bin Link: https://lore.kernel.org/r/20210713022728.2533770-1-yebin10@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/mmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index bc364c119af6..cebea4270817 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -138,7 +138,7 @@ static int kmmpd(void *data) unsigned mmp_check_interval; unsigned long last_update_time; unsigned long diff; - int retval; + int retval = 0; mmp_block = le64_to_cpu(es->s_mmp_block); mmp = (struct mmp_struct *)(bh->b_data); From 73dc707161a83c24a9e6804b2d60e6f4a4d6be74 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Wed, 14 Jul 2021 13:59:40 +0800 Subject: [PATCH 100/502] ext4: remove conflicting comment from __ext4_forget We do a bforget and return for no journal case, so let's remove this conflict comment. Reviewed-by: Jan Kara Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20210714055940.1553705-1-guoqing.jiang@linux.dev Signed-off-by: Theodore Ts'o --- fs/ext4/ext4_jbd2.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index b96ecba91899..b60f0152ea57 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -244,9 +244,6 @@ int __ext4_journal_get_write_access(const char *where, unsigned int line, * "bh" may be NULL: a metadata block may have been freed from memory * but there may still be a record of it in the journal, and that record * still needs to be revoked. - * - * If the handle isn't valid we're not journaling, but we still need to - * call into ext4_journal_revoke() to put the buffer head. */ int __ext4_forget(const char *where, unsigned int line, handle_t *handle, int is_metadata, struct inode *inode, From 32c3973d808301e7a980f80fee8818fdf7c82b09 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 17 Jul 2021 10:10:29 +0200 Subject: [PATCH 101/502] netfilter: flowtable: avoid possible false sharing The flowtable follows the same timeout approach as conntrack, use the same idiom as in cc16921351d8 ("netfilter: conntrack: avoid same-timeout update") but also include the fix provided by e37542ba111f ("netfilter: conntrack: avoid possible false sharing"). Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 1e50908b1b7e..551976e4284c 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -331,7 +331,11 @@ EXPORT_SYMBOL_GPL(flow_offload_add); void flow_offload_refresh(struct nf_flowtable *flow_table, struct flow_offload *flow) { - flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); + u32 timeout; + + timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); + if (READ_ONCE(flow->timeout) != timeout) + WRITE_ONCE(flow->timeout, timeout); if (likely(!nf_flowtable_hw_offload(flow_table))) return; From 32953df7a6eb56bd9b8f18a13034d55f9fc96cfa Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 17 Jul 2021 10:20:08 +0200 Subject: [PATCH 102/502] netfilter: nft_last: avoid possible false sharing Use the idiom described in: https://github.com/google/ktsan/wiki/READ_ONCE-and-WRITE_ONCE#it-may-improve-performance Moreover, prevent a compiler optimization. Fixes: 836382dc2471 ("netfilter: nf_tables: add last expression") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_last.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c index 8088b99f2ee3..304e33cbed9b 100644 --- a/net/netfilter/nft_last.c +++ b/net/netfilter/nft_last.c @@ -48,24 +48,30 @@ static void nft_last_eval(const struct nft_expr *expr, { struct nft_last_priv *priv = nft_expr_priv(expr); - priv->last_jiffies = jiffies; - priv->last_set = 1; + if (READ_ONCE(priv->last_jiffies) != jiffies) + WRITE_ONCE(priv->last_jiffies, jiffies); + if (READ_ONCE(priv->last_set) == 0) + WRITE_ONCE(priv->last_set, 1); } static int nft_last_dump(struct sk_buff *skb, const struct nft_expr *expr) { struct nft_last_priv *priv = nft_expr_priv(expr); + unsigned long last_jiffies = READ_ONCE(priv->last_jiffies); + u32 last_set = READ_ONCE(priv->last_set); __be64 msecs; - if (time_before(jiffies, priv->last_jiffies)) - priv->last_set = 0; + if (time_before(jiffies, last_jiffies)) { + WRITE_ONCE(priv->last_set, 0); + last_set = 0; + } - if (priv->last_set) - msecs = nf_jiffies64_to_msecs(jiffies - priv->last_jiffies); + if (last_set) + msecs = nf_jiffies64_to_msecs(jiffies - last_jiffies); else msecs = 0; - if (nla_put_be32(skb, NFTA_LAST_SET, htonl(priv->last_set)) || + if (nla_put_be32(skb, NFTA_LAST_SET, htonl(last_set)) || nla_put_be64(skb, NFTA_LAST_MSECS, msecs, NFTA_LAST_PAD)) goto nla_put_failure; From 30a56a2b881821625f79837d4d968c679852444e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 18 Jul 2021 18:36:00 +0200 Subject: [PATCH 103/502] netfilter: conntrack: adjust stop timestamp to real expiry value In case the entry is evicted via garbage collection there is delay between the timeout value and the eviction event. This adjusts the stop value based on how much time has passed. Fixes: b87a2f9199ea82 ("netfilter: conntrack: add gc worker to remove timed-out entries") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 83c52df85870..5c03e5106751 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -670,8 +670,13 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) return false; tstamp = nf_conn_tstamp_find(ct); - if (tstamp && tstamp->stop == 0) + if (tstamp) { + s32 timeout = ct->timeout - nfct_time_stamp; + tstamp->stop = ktime_get_real_ns(); + if (timeout < 0) + tstamp->stop -= jiffies_to_nsecs(-timeout); + } if (nf_conntrack_event_report(IPCT_DESTROY, ct, portid, report) < 0) { From a33f387ecd5aafae514095c2c4a8c24f7aea7e8b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 20 Jul 2021 18:22:50 +0200 Subject: [PATCH 104/502] netfilter: nft_nat: allow to specify layer 4 protocol NAT only nft_nat reports a bogus EAFNOSUPPORT if no layer 3 information is specified. Fixes: d07db9884a5f ("netfilter: nf_tables: introduce nft_validate_register_load()") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_nat.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 0840c635b752..be1595d6979d 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -201,7 +201,9 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, alen = sizeof_field(struct nf_nat_range, min_addr.ip6); break; default: - return -EAFNOSUPPORT; + if (tb[NFTA_NAT_REG_ADDR_MIN]) + return -EAFNOSUPPORT; + break; } priv->family = family; From ee7ab3f263f8131722cff3871b9618b1e7478f07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 28 Jun 2021 17:12:29 +0200 Subject: [PATCH 105/502] arm64: dts: armada-3720-turris-mox: remove mrvl,i2c-fast-mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some SFP modules are not detected when i2c-fast-mode is enabled even when clock-frequency is already set to 100000. The I2C bus violates the timing specifications when run in fast mode. So disable fast mode on Turris Mox. Same change was already applied for uDPU (also Armada 3720 board with SFP) in commit fe3ec631a77d ("arm64: dts: uDPU: remove i2c-fast-mode"). Fixes: 7109d817db2e ("arm64: dts: marvell: add DTS for Turris Mox") Signed-off-by: Pali Rohár Reviewed-by: Marek Behún Acked-by: Russell King (Oracle) Signed-off-by: Gregory CLEMENT --- arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts index f2d7d6f071bc..a05b1ab2dd12 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts @@ -121,6 +121,7 @@ pinctrl-names = "default"; pinctrl-0 = <&i2c1_pins>; clock-frequency = <100000>; + /delete-property/ mrvl,i2c-fast-mode; status = "okay"; rtc@6f { From 217e26bd87b2930856726b48a4e71c768b8c9bf5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 21 Jul 2021 17:22:32 +0200 Subject: [PATCH 106/502] netfilter: nfnl_hook: fix unused variable warning The only user of this variable is in an #ifdef: net/netfilter/nfnetlink_hook.c: In function 'nfnl_hook_entries_head': net/netfilter/nfnetlink_hook.c:177:28: error: unused variable 'netdev' [-Werror=unused-variable] Fixes: e2cf17d3774c ("netfilter: add new hook nfnl subsystem") Signed-off-by: Arnd Bergmann Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_hook.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c index 50b4e3c9347a..202f57d17bab 100644 --- a/net/netfilter/nfnetlink_hook.c +++ b/net/netfilter/nfnetlink_hook.c @@ -174,7 +174,9 @@ static const struct nf_hook_entries * nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *dev) { const struct nf_hook_entries *hook_head = NULL; +#ifdef CONFIG_NETFILTER_INGRESS struct net_device *netdev; +#endif switch (pf) { case NFPROTO_IPV4: From 65662a8dcdd01342b71ee44234bcfd0162e195af Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Thu, 29 Apr 2021 19:49:47 +0200 Subject: [PATCH 107/502] i40e: Fix logic of disabling queues Correct the message flow between driver and firmware when disabling queues. Previously in case of PF reset (due to required reinit after reconfig), the error like: "VSI seid 397 Tx ring 60 disable timeout" could show up occasionally. The error was not a real issue of hardware or firmware, it was caused by wrong sequence of messages invoked by the driver. Fixes: 41c445ff0f48 ("i40e: main driver core") Signed-off-by: Aleksandr Loktionov Signed-off-by: Arkadiusz Kubalewski Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 58 ++++++++++++--------- 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 861e59a350bd..5297e6c59083 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -4454,11 +4454,10 @@ int i40e_control_wait_tx_q(int seid, struct i40e_pf *pf, int pf_q, } /** - * i40e_vsi_control_tx - Start or stop a VSI's rings + * i40e_vsi_enable_tx - Start a VSI's rings * @vsi: the VSI being configured - * @enable: start or stop the rings **/ -static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable) +static int i40e_vsi_enable_tx(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; int i, pf_q, ret = 0; @@ -4467,7 +4466,7 @@ static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable) for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) { ret = i40e_control_wait_tx_q(vsi->seid, pf, pf_q, - false /*is xdp*/, enable); + false /*is xdp*/, true); if (ret) break; @@ -4476,7 +4475,7 @@ static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable) ret = i40e_control_wait_tx_q(vsi->seid, pf, pf_q + vsi->alloc_queue_pairs, - true /*is xdp*/, enable); + true /*is xdp*/, true); if (ret) break; } @@ -4574,32 +4573,25 @@ int i40e_control_wait_rx_q(struct i40e_pf *pf, int pf_q, bool enable) } /** - * i40e_vsi_control_rx - Start or stop a VSI's rings + * i40e_vsi_enable_rx - Start a VSI's rings * @vsi: the VSI being configured - * @enable: start or stop the rings **/ -static int i40e_vsi_control_rx(struct i40e_vsi *vsi, bool enable) +static int i40e_vsi_enable_rx(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; int i, pf_q, ret = 0; pf_q = vsi->base_queue; for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) { - ret = i40e_control_wait_rx_q(pf, pf_q, enable); + ret = i40e_control_wait_rx_q(pf, pf_q, true); if (ret) { dev_info(&pf->pdev->dev, - "VSI seid %d Rx ring %d %sable timeout\n", - vsi->seid, pf_q, (enable ? "en" : "dis")); + "VSI seid %d Rx ring %d enable timeout\n", + vsi->seid, pf_q); break; } } - /* Due to HW errata, on Rx disable only, the register can indicate done - * before it really is. Needs 50ms to be sure - */ - if (!enable) - mdelay(50); - return ret; } @@ -4612,29 +4604,47 @@ int i40e_vsi_start_rings(struct i40e_vsi *vsi) int ret = 0; /* do rx first for enable and last for disable */ - ret = i40e_vsi_control_rx(vsi, true); + ret = i40e_vsi_enable_rx(vsi); if (ret) return ret; - ret = i40e_vsi_control_tx(vsi, true); + ret = i40e_vsi_enable_tx(vsi); return ret; } +#define I40E_DISABLE_TX_GAP_MSEC 50 + /** * i40e_vsi_stop_rings - Stop a VSI's rings * @vsi: the VSI being configured **/ void i40e_vsi_stop_rings(struct i40e_vsi *vsi) { + struct i40e_pf *pf = vsi->back; + int pf_q, err, q_end; + /* When port TX is suspended, don't wait */ if (test_bit(__I40E_PORT_SUSPENDED, vsi->back->state)) return i40e_vsi_stop_rings_no_wait(vsi); - /* do rx first for enable and last for disable - * Ignore return value, we need to shutdown whatever we can - */ - i40e_vsi_control_tx(vsi, false); - i40e_vsi_control_rx(vsi, false); + q_end = vsi->base_queue + vsi->num_queue_pairs; + for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++) + i40e_pre_tx_queue_cfg(&pf->hw, (u32)pf_q, false); + + for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++) { + err = i40e_control_wait_rx_q(pf, pf_q, false); + if (err) + dev_info(&pf->pdev->dev, + "VSI seid %d Rx ring %d dissable timeout\n", + vsi->seid, pf_q); + } + + msleep(I40E_DISABLE_TX_GAP_MSEC); + pf_q = vsi->base_queue; + for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++) + wr32(&pf->hw, I40E_QTX_ENA(pf_q), 0); + + i40e_vsi_wait_queues_disabled(vsi); } /** From 71d6fdba4b2d82fdd883fec31dee77fbcf59773a Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Fri, 21 May 2021 18:41:26 +0200 Subject: [PATCH 108/502] i40e: Fix firmware LLDP agent related warning Make warning meaningful for the user. Previously the trace: "Starting FW LLDP agent failed: error: I40E_ERR_ADMIN_QUEUE_ERROR, I40E_AQ_RC_EAGAIN" was produced when user tried to start Firmware LLDP agent, just after it was stopped with sequence: ethtool --set-priv-flags disable-fw-lldp on ethtool --set-priv-flags disable-fw-lldp off (without any delay between the commands) At that point the firmware is still processing stop command, the behavior is expected. Fixes: c1041d070437 ("i40e: Missing response checks in driver when starting/stopping FW LLDP") Signed-off-by: Aleksandr Loktionov Signed-off-by: Arkadiusz Kubalewski Tested-by: Imam Hassan Reza Biswas Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 3e822bad4851..d9e26f9713a5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -5294,6 +5294,10 @@ flags_complete: dev_warn(&pf->pdev->dev, "Device configuration forbids SW from starting the LLDP agent.\n"); return -EINVAL; + case I40E_AQ_RC_EAGAIN: + dev_warn(&pf->pdev->dev, + "Stop FW LLDP agent command is still being processed, please try again in a second.\n"); + return -EBUSY; default: dev_warn(&pf->pdev->dev, "Starting FW LLDP agent failed: error: %s, %s\n", From dc614c46178b0b89bde86ac54fc687a28580d2b7 Mon Sep 17 00:00:00 2001 From: Lukasz Cieplicki Date: Mon, 31 May 2021 16:55:49 +0000 Subject: [PATCH 109/502] i40e: Add additional info to PHY type error In case of PHY type error occurs, the message was too generic. Add additional info to PHY type error indicating that it can be wrong cable connected. Fixes: 124ed15bf126 ("i40e: Add dual speed module support") Signed-off-by: Lukasz Cieplicki Signed-off-by: Michal Maloszewski Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index d9e26f9713a5..2c9e4eeb7270 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -980,7 +980,7 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw, default: /* if we got here and link is up something bad is afoot */ netdev_info(netdev, - "WARNING: Link is up but PHY type 0x%x is not recognized.\n", + "WARNING: Link is up but PHY type 0x%x is not recognized, or incorrect cable is in use\n", hw_link_info->phy_type); } From 89ec1f0886c127c7e41ac61a6b6d539f4fb2510b Mon Sep 17 00:00:00 2001 From: Jedrzej Jagielski Date: Wed, 2 Jun 2021 00:47:03 +0000 Subject: [PATCH 110/502] i40e: Fix queue-to-TC mapping on Tx In SW DCB mode the packets sent receive incorrect UP tags. They are constructed correctly and put into tx_ring, but UP is later remapped by HW on the basis of TCTUPR register contents according to Tx queue selected, and BW used is consistent with the new UP values. This is caused by Tx queue selection in kernel not taking into account DCB configuration. This patch fixes the issue by implementing the ndo_select_queue NDO callback. Fixes: fd0a05ce74ef ("i40e: transmit, receive, and NAPI") Signed-off-by: Arkadiusz Kubalewski Signed-off-by: Jedrzej Jagielski Tested-by: Imam Hassan Reza Biswas Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 1 + drivers/net/ethernet/intel/i40e/i40e_txrx.c | 50 +++++++++++++++++++++ drivers/net/ethernet/intel/i40e/i40e_txrx.h | 2 + 3 files changed, 53 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 5297e6c59083..278077208f37 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -13271,6 +13271,7 @@ static const struct net_device_ops i40e_netdev_ops = { .ndo_poll_controller = i40e_netpoll, #endif .ndo_setup_tc = __i40e_setup_tc, + .ndo_select_queue = i40e_lan_select_queue, .ndo_set_features = i40e_set_features, .ndo_set_vf_mac = i40e_ndo_set_vf_mac, .ndo_set_vf_vlan = i40e_ndo_set_vf_port_vlan, diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 38eb8151ee9a..3f25bd8c4924 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -3631,6 +3631,56 @@ dma_error: return -1; } +static u16 i40e_swdcb_skb_tx_hash(struct net_device *dev, + const struct sk_buff *skb, + u16 num_tx_queues) +{ + u32 jhash_initval_salt = 0xd631614b; + u32 hash; + + if (skb->sk && skb->sk->sk_hash) + hash = skb->sk->sk_hash; + else + hash = (__force u16)skb->protocol ^ skb->hash; + + hash = jhash_1word(hash, jhash_initval_salt); + + return (u16)(((u64)hash * num_tx_queues) >> 32); +} + +u16 i40e_lan_select_queue(struct net_device *netdev, + struct sk_buff *skb, + struct net_device __always_unused *sb_dev) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_vsi *vsi = np->vsi; + struct i40e_hw *hw; + u16 qoffset; + u16 qcount; + u8 tclass; + u16 hash; + u8 prio; + + /* is DCB enabled at all? */ + if (vsi->tc_config.numtc == 1) + return i40e_swdcb_skb_tx_hash(netdev, skb, + netdev->real_num_tx_queues); + + prio = skb->priority; + hw = &vsi->back->hw; + tclass = hw->local_dcbx_config.etscfg.prioritytable[prio]; + /* sanity check */ + if (unlikely(!(vsi->tc_config.enabled_tc & BIT(tclass)))) + tclass = 0; + + /* select a queue assigned for the given TC */ + qcount = vsi->tc_config.tc_info[tclass].qcount; + hash = i40e_swdcb_skb_tx_hash(netdev, skb, qcount); + + qoffset = vsi->tc_config.tc_info[tclass].qoffset; + return qoffset + hash; +} + /** * i40e_xmit_xdp_ring - transmits an XDP buffer to an XDP Tx ring * @xdpf: data to transmit diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 86fed05b4f19..bfc2845c99d1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -451,6 +451,8 @@ static inline unsigned int i40e_rx_pg_order(struct i40e_ring *ring) bool i40e_alloc_rx_buffers(struct i40e_ring *rxr, u16 cleaned_count); netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev); +u16 i40e_lan_select_queue(struct net_device *netdev, struct sk_buff *skb, + struct net_device *sb_dev); void i40e_clean_tx_ring(struct i40e_ring *tx_ring); void i40e_clean_rx_ring(struct i40e_ring *rx_ring); int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring); From ea52faae1d17cd3048681d86d2e8641f44de484d Mon Sep 17 00:00:00 2001 From: Jedrzej Jagielski Date: Fri, 18 Jun 2021 08:49:49 +0000 Subject: [PATCH 111/502] i40e: Fix log TC creation failure when max num of queues is exceeded Fix missing failed message if driver does not have enough queues to complete TC command. Without this fix no message is displayed in dmesg. Fixes: a9ce82f744dc ("i40e: Enable 'channel' mode in mqprio for TC configs") Signed-off-by: Grzegorz Szczurek Signed-off-by: Jedrzej Jagielski Tested-by: Imam Hassan Reza Biswas Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 278077208f37..1d1f52756a93 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -7290,6 +7290,8 @@ static int i40e_validate_mqprio_qopt(struct i40e_vsi *vsi, } if (vsi->num_queue_pairs < (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) { + dev_err(&vsi->back->pdev->dev, + "Failed to create traffic channel, insufficient number of queues.\n"); return -EINVAL; } if (sum_max_rate > i40e_get_link_speed(vsi)) { From d72e91efcae12f2f24ced984d00d60517c677857 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Thu, 22 Jul 2021 18:15:51 +0530 Subject: [PATCH 112/502] octeontx2-af: Remove unnecessary devm_kfree Remove devm_kfree of memory where VLAN entry to RVU PF mapping info is saved. This will be freed anyway at driver exit. Having this could result in warning from devm_kfree() if the memory is not allocated due to errors in rvu_nix_block_init() before nix_setup_txvlan(). Fixes: 9a946def264d ("octeontx2-af: Modify nix_vtag_cfg mailbox to support TX VTAG entries") Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 0933699a0d2d..0d2cd5169018 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -3842,7 +3842,6 @@ static void rvu_nix_block_freemem(struct rvu *rvu, int blkaddr, vlan = &nix_hw->txvlan; kfree(vlan->rsrc.bmap); mutex_destroy(&vlan->rsrc_lock); - devm_kfree(rvu->dev, vlan->entry2pfvf_map); mcast = &nix_hw->mcast; qmem_free(rvu->dev, mcast->mce_ctx); From f8dd60de194817c86bf812700980762bb5a8d9a4 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 22 Jul 2021 12:05:41 -0400 Subject: [PATCH 113/502] tipc: fix implicit-connect for SYN+ For implicit-connect, when it's either SYN- or SYN+, an ACK should be sent back to the client immediately. It's not appropriate for the client to enter established state only after receiving data from the server. On client side, after the SYN is sent out, tipc_wait_for_connect() should be called to wait for the ACK if timeout is set. This patch also restricts __tipc_sendstream() to call __sendmsg() only when it's in TIPC_OPEN state, so that the client can program in a single loop doing both connecting and data sending like: for (...) sendmsg(dest, buf); This makes the implicit-connect more implicit. Fixes: b97bf3fd8f6a ("[TIPC] Initial merge") Signed-off-by: Xin Long Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 34a97ea36cc8..ebd300c26a44 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -158,6 +158,7 @@ static void tipc_sk_remove(struct tipc_sock *tsk); static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz); static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz); static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack); +static int tipc_wait_for_connect(struct socket *sock, long *timeo_p); static const struct proto_ops packet_ops; static const struct proto_ops stream_ops; @@ -1515,8 +1516,13 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) rc = 0; } - if (unlikely(syn && !rc)) + if (unlikely(syn && !rc)) { tipc_set_sk_state(sk, TIPC_CONNECTING); + if (timeout) { + timeout = msecs_to_jiffies(timeout); + tipc_wait_for_connect(sock, &timeout); + } + } return rc ? rc : dlen; } @@ -1564,7 +1570,7 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen) return -EMSGSIZE; /* Handle implicit connection setup */ - if (unlikely(dest)) { + if (unlikely(dest && sk->sk_state == TIPC_OPEN)) { rc = __tipc_sendmsg(sock, m, dlen); if (dlen && dlen == rc) { tsk->peer_caps = tipc_node_get_capabilities(net, dnode); @@ -2689,9 +2695,10 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, bool kern) { struct sock *new_sk, *sk = sock->sk; - struct sk_buff *buf; struct tipc_sock *new_tsock; + struct msghdr m = {NULL,}; struct tipc_msg *msg; + struct sk_buff *buf; long timeo; int res; @@ -2737,19 +2744,17 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, } /* - * Respond to 'SYN-' by discarding it & returning 'ACK'-. - * Respond to 'SYN+' by queuing it on new socket. + * Respond to 'SYN-' by discarding it & returning 'ACK'. + * Respond to 'SYN+' by queuing it on new socket & returning 'ACK'. */ if (!msg_data_sz(msg)) { - struct msghdr m = {NULL,}; - tsk_advance_rx_queue(sk); - __tipc_sendstream(new_sock, &m, 0); } else { __skb_dequeue(&sk->sk_receive_queue); __skb_queue_head(&new_sk->sk_receive_queue, buf); skb_set_owner_r(buf, new_sk); } + __tipc_sendstream(new_sock, &m, 0); release_sock(new_sk); exit: release_sock(sk); From d237a7f11719ff9320721be5818352e48071aab6 Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Fri, 23 Jul 2021 09:25:34 +0700 Subject: [PATCH 114/502] tipc: fix sleeping in tipc accept routine The release_sock() is blocking function, it would change the state after sleeping. In order to evaluate the stated condition outside the socket lock context, switch to use wait_woken() instead. Fixes: 6398e23cdb1d8 ("tipc: standardize accept routine") Acked-by: Jon Maloy Signed-off-by: Hoang Le Signed-off-by: David S. Miller --- net/tipc/socket.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index ebd300c26a44..75b99b7eda22 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2652,7 +2652,7 @@ static int tipc_listen(struct socket *sock, int len) static int tipc_wait_for_accept(struct socket *sock, long timeo) { struct sock *sk = sock->sk; - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); int err; /* True wake-one mechanism for incoming connections: only @@ -2661,12 +2661,12 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) * anymore, the common case will execute the loop only once. */ for (;;) { - prepare_to_wait_exclusive(sk_sleep(sk), &wait, - TASK_INTERRUPTIBLE); if (timeo && skb_queue_empty(&sk->sk_receive_queue)) { + add_wait_queue(sk_sleep(sk), &wait); release_sock(sk); - timeo = schedule_timeout(timeo); + timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); lock_sock(sk); + remove_wait_queue(sk_sleep(sk), &wait); } err = 0; if (!skb_queue_empty(&sk->sk_receive_queue)) @@ -2678,7 +2678,6 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) if (signal_pending(current)) break; } - finish_wait(sk_sleep(sk), &wait); return err; } From 227adfb2b1dfbc53dfc53b9dd7a93a6298ff7c56 Mon Sep 17 00:00:00 2001 From: Gilad Naaman Date: Thu, 22 Jul 2021 20:01:28 +0300 Subject: [PATCH 115/502] net: Set true network header for ECN decapsulation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In cases where the header straight after the tunnel header was another ethernet header (TEB), instead of the network header, the ECN decapsulation code would treat the ethernet header as if it was an IP header, resulting in mishandling and possible wrong drops or corruption of the IP header. In this case, ECT(1) is sent, so IP_ECN_decapsulate tries to copy it to the inner IPv4 header, and correct its checksum. The offset of the ECT bits in an IPv4 header corresponds to the lower 2 bits of the second octet of the destination MAC address in the ethernet header. The IPv4 checksum corresponds to end of the source address. In order to reproduce: $ ip netns add A $ ip netns add B $ ip -n A link add _v0 type veth peer name _v1 netns B $ ip -n A link set _v0 up $ ip -n A addr add dev _v0 10.254.3.1/24 $ ip -n A route add default dev _v0 scope global $ ip -n B link set _v1 up $ ip -n B addr add dev _v1 10.254.1.6/24 $ ip -n B route add default dev _v1 scope global $ ip -n B link add gre1 type gretap local 10.254.1.6 remote 10.254.3.1 key 0x49000000 $ ip -n B link set gre1 up # Now send an IPv4/GRE/Eth/IPv4 frame where the outer header has ECT(1), # and the inner header has no ECT bits set: $ cat send_pkt.py #!/usr/bin/env python3 from scapy.all import * pkt = IP(b'E\x01\x00\xa7\x00\x00\x00\x00@/`%\n\xfe\x03\x01\n\xfe\x01\x06 \x00eXI\x00' b'\x00\x00\x18\xbe\x92\xa0\xee&\x18\xb0\x92\xa0l&\x08\x00E\x00\x00}\x8b\x85' b'@\x00\x01\x01\xe4\xf2\x82\x82\x82\x01\x82\x82\x82\x02\x08\x00d\x11\xa6\xeb' b'3\x1e\x1e\\xf3\\xf7`\x00\x00\x00\x00ZN\x00\x00\x00\x00\x00\x00\x10\x11\x12' b'\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./01234' b'56789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ') send(pkt) $ sudo ip netns exec B tcpdump -neqlllvi gre1 icmp & ; sleep 1 $ sudo ip netns exec A python3 send_pkt.py In the original packet, the source/destinatio MAC addresses are dst=18:be:92:a0:ee:26 src=18:b0:92:a0:6c:26 In the received packet, they are dst=18:bd:92:a0:ee:26 src=18:b0:92:a0:6c:27 Thanks to Lahav Schlesinger and Isaac Garzon for helping me pinpoint the origin. Fixes: b723748750ec ("tunnel: Propagate ECT(1) when decapsulating as recommended by RFC6040") Cc: David S. Miller Cc: Hideaki YOSHIFUJI Cc: David Ahern Cc: Jakub Kicinski Cc: Toke Høiland-Jørgensen Signed-off-by: Gilad Naaman Acked-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 0dca00745ac3..be75b409445c 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -390,7 +390,7 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, tunnel->i_seqno = ntohl(tpi->seq) + 1; } - skb_reset_network_header(skb); + skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0); err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { From 46c7655f0b56b1ac864115441064cde9ed124f4a Mon Sep 17 00:00:00 2001 From: Kangmin Park Date: Fri, 23 Jul 2021 02:44:43 +0900 Subject: [PATCH 116/502] ipv6: decrease hop limit counter in ip6_forward() Decrease hop limit counter when deliver skb to ndp proxy. Signed-off-by: Kangmin Park Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index e1b9f7ac8bad..8e6ca9ad6812 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -549,9 +549,10 @@ int ip6_forward(struct sk_buff *skb) if (net->ipv6.devconf_all->proxy_ndp && pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { int proxied = ip6_forward_proxy_check(skb); - if (proxied > 0) + if (proxied > 0) { + hdr->hop_limit--; return ip6_input(skb); - else if (proxied < 0) { + } else if (proxied < 0) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); goto drop; } From c92c74131a84b508aa8f079a25d7bbe10748449e Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 22 Jul 2021 16:05:51 +0300 Subject: [PATCH 117/502] net: dsa: mv88e6xxx: silently accept the deletion of VID 0 too The blamed commit modified the driver to accept the addition of VID 0 without doing anything, but deleting that VID still fails: [ 32.080780] mv88e6085 d0032004.mdio-mii:10 lan8: failed to kill vid 0081/0 Modify mv88e6xxx_port_vlan_leave() to do the same thing as the addition. Fixes: b8b79c414eca ("net: dsa: mv88e6xxx: Fix adding vlan 0") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index beb41572d04e..272b0535d946 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2155,7 +2155,7 @@ static int mv88e6xxx_port_vlan_leave(struct mv88e6xxx_chip *chip, int i, err; if (!vid) - return -EOPNOTSUPP; + return 0; err = mv88e6xxx_vtu_get(chip, vid, &vlan); if (err) From 68d1f1d4af188c290087958c75c7b89a816e1137 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Thu, 22 Jul 2021 20:21:05 +0200 Subject: [PATCH 118/502] wwan: core: Fix missing RTM_NEWLINK event for default link A wwan link created via the wwan_create_default_link procedure is never notified to the user (RTM_NEWLINK), causing issues with user tools relying on such event to track network links (NetworkManager). This is because the procedure misses a call to rtnl_configure_link(), which sets the link as initialized and notifies the new link (cf proper usage in __rtnl_newlink()). Cc: stable@vger.kernel.org Fixes: ca374290aaad ("wwan: core: support default netdev creation") Suggested-by: Sergey Ryazanov Signed-off-by: Loic Poulain Acked-by: Sergey Ryazanov Signed-off-by: David S. Miller --- drivers/net/wwan/wwan_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c index 3e16c318e705..674a81d79db3 100644 --- a/drivers/net/wwan/wwan_core.c +++ b/drivers/net/wwan/wwan_core.c @@ -984,6 +984,8 @@ static void wwan_create_default_link(struct wwan_device *wwandev, goto unlock; } + rtnl_configure_link(dev, NULL); /* Link initialized, notify new link */ + unlock: rtnl_unlock(); From 3ce6e1f662a910970880188ea7bfd00542bd3934 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 6 Jul 2021 23:40:34 +0900 Subject: [PATCH 119/502] loop: reintroduce global lock for safe loop_validate_file() traversal Commit 6cc8e7430801fa23 ("loop: scale loop device by introducing per device lock") re-opened a race window for NULL pointer dereference at loop_validate_file() where commit 310ca162d779efee ("block/loop: Use global lock for ioctl() operation.") has closed. Although we need to guarantee that other loop devices will not change during traversal, we can't take remote "struct loop_device"->lo_mutex inside loop_validate_file() in order to avoid AB-BA deadlock. Therefore, introduce a global lock dedicated for loop_validate_file() which is conditionally taken before local "struct loop_device"->lo_mutex is taken. Signed-off-by: Tetsuo Handa Fixes: 6cc8e7430801fa23 ("loop: scale loop device by introducing per device lock") Signed-off-by: Jens Axboe --- drivers/block/loop.c | 128 ++++++++++++++++++++++++++++++++----------- 1 file changed, 97 insertions(+), 31 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f37b9e3d833c..f0cdff0c5fbf 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -88,6 +88,47 @@ static DEFINE_IDR(loop_index_idr); static DEFINE_MUTEX(loop_ctl_mutex); +static DEFINE_MUTEX(loop_validate_mutex); + +/** + * loop_global_lock_killable() - take locks for safe loop_validate_file() test + * + * @lo: struct loop_device + * @global: true if @lo is about to bind another "struct loop_device", false otherwise + * + * Returns 0 on success, -EINTR otherwise. + * + * Since loop_validate_file() traverses on other "struct loop_device" if + * is_loop_device() is true, we need a global lock for serializing concurrent + * loop_configure()/loop_change_fd()/__loop_clr_fd() calls. + */ +static int loop_global_lock_killable(struct loop_device *lo, bool global) +{ + int err; + + if (global) { + err = mutex_lock_killable(&loop_validate_mutex); + if (err) + return err; + } + err = mutex_lock_killable(&lo->lo_mutex); + if (err && global) + mutex_unlock(&loop_validate_mutex); + return err; +} + +/** + * loop_global_unlock() - release locks taken by loop_global_lock_killable() + * + * @lo: struct loop_device + * @global: true if @lo was about to bind another "struct loop_device", false otherwise + */ +static void loop_global_unlock(struct loop_device *lo, bool global) +{ + mutex_unlock(&lo->lo_mutex); + if (global) + mutex_unlock(&loop_validate_mutex); +} static int max_part; static int part_shift; @@ -672,13 +713,15 @@ static int loop_validate_file(struct file *file, struct block_device *bdev) while (is_loop_device(f)) { struct loop_device *l; + lockdep_assert_held(&loop_validate_mutex); if (f->f_mapping->host->i_rdev == bdev->bd_dev) return -EBADF; l = I_BDEV(f->f_mapping->host)->bd_disk->private_data; - if (l->lo_state != Lo_bound) { + if (l->lo_state != Lo_bound) return -EINVAL; - } + /* Order wrt setting lo->lo_backing_file in loop_configure(). */ + rmb(); f = l->lo_backing_file; } if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) @@ -697,13 +740,18 @@ static int loop_validate_file(struct file *file, struct block_device *bdev) static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, unsigned int arg) { - struct file *file = NULL, *old_file; - int error; - bool partscan; + struct file *file = fget(arg); + struct file *old_file; + int error; + bool partscan; + bool is_loop; - error = mutex_lock_killable(&lo->lo_mutex); + if (!file) + return -EBADF; + is_loop = is_loop_device(file); + error = loop_global_lock_killable(lo, is_loop); if (error) - return error; + goto out_putf; error = -ENXIO; if (lo->lo_state != Lo_bound) goto out_err; @@ -713,11 +761,6 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, if (!(lo->lo_flags & LO_FLAGS_READ_ONLY)) goto out_err; - error = -EBADF; - file = fget(arg); - if (!file) - goto out_err; - error = loop_validate_file(file, bdev); if (error) goto out_err; @@ -740,7 +783,16 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, loop_update_dio(lo); blk_mq_unfreeze_queue(lo->lo_queue); partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; - mutex_unlock(&lo->lo_mutex); + loop_global_unlock(lo, is_loop); + + /* + * Flush loop_validate_file() before fput(), for l->lo_backing_file + * might be pointing at old_file which might be the last reference. + */ + if (!is_loop) { + mutex_lock(&loop_validate_mutex); + mutex_unlock(&loop_validate_mutex); + } /* * We must drop file reference outside of lo_mutex as dropping * the file ref can take open_mutex which creates circular locking @@ -752,9 +804,9 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, return 0; out_err: - mutex_unlock(&lo->lo_mutex); - if (file) - fput(file); + loop_global_unlock(lo, is_loop); +out_putf: + fput(file); return error; } @@ -1136,22 +1188,22 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, struct block_device *bdev, const struct loop_config *config) { - struct file *file; - struct inode *inode; + struct file *file = fget(config->fd); + struct inode *inode; struct address_space *mapping; - int error; - loff_t size; - bool partscan; - unsigned short bsize; + int error; + loff_t size; + bool partscan; + unsigned short bsize; + bool is_loop; + + if (!file) + return -EBADF; + is_loop = is_loop_device(file); /* This is safe, since we have a reference from open(). */ __module_get(THIS_MODULE); - error = -EBADF; - file = fget(config->fd); - if (!file) - goto out; - /* * If we don't hold exclusive handle for the device, upgrade to it * here to avoid changing device under exclusive owner. @@ -1162,7 +1214,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, goto out_putf; } - error = mutex_lock_killable(&lo->lo_mutex); + error = loop_global_lock_killable(lo, is_loop); if (error) goto out_bdev; @@ -1242,6 +1294,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, size = get_loop_size(lo, file); loop_set_size(lo, size); + /* Order wrt reading lo_state in loop_validate_file(). */ + wmb(); + lo->lo_state = Lo_bound; if (part_shift) lo->lo_flags |= LO_FLAGS_PARTSCAN; @@ -1253,7 +1308,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev). */ bdgrab(bdev); - mutex_unlock(&lo->lo_mutex); + loop_global_unlock(lo, is_loop); if (partscan) loop_reread_partitions(lo); if (!(mode & FMODE_EXCL)) @@ -1261,13 +1316,12 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, return 0; out_unlock: - mutex_unlock(&lo->lo_mutex); + loop_global_unlock(lo, is_loop); out_bdev: if (!(mode & FMODE_EXCL)) bd_abort_claiming(bdev, loop_configure); out_putf: fput(file); -out: /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); return error; @@ -1283,6 +1337,18 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) int lo_number; struct loop_worker *pos, *worker; + /* + * Flush loop_configure() and loop_change_fd(). It is acceptable for + * loop_validate_file() to succeed, for actual clear operation has not + * started yet. + */ + mutex_lock(&loop_validate_mutex); + mutex_unlock(&loop_validate_mutex); + /* + * loop_validate_file() now fails because l->lo_state != Lo_bound + * became visible. + */ + mutex_lock(&lo->lo_mutex); if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) { err = -ENXIO; From 9986066d94c971edf19464ed7bf5b26a91520e97 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Fri, 23 Jul 2021 13:36:18 +0530 Subject: [PATCH 120/502] octeontx2-af: Fix uninitialized variables in rvu_switch Get the number of VFs of a PF correctly by calling rvu_get_pf_numvfs in rvu_switch_disable function. Also hwvf is not required hence remove it. Fixes: 23109f8dd06d ("octeontx2-af: Introduce internal packet switching") Reported-by: kernel test robot Reported-by: Colin Ian King Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 6 ++++-- .../net/ethernet/marvell/octeontx2/af/rvu_switch.c | 11 ++++++----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 017163fb3cd5..5fe277e354f7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -391,8 +391,10 @@ void rvu_get_pf_numvfs(struct rvu *rvu, int pf, int *numvfs, int *hwvf) /* Get numVFs attached to this PF and first HWVF */ cfg = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_CFG(pf)); - *numvfs = (cfg >> 12) & 0xFF; - *hwvf = cfg & 0xFFF; + if (numvfs) + *numvfs = (cfg >> 12) & 0xFF; + if (hwvf) + *hwvf = cfg & 0xFFF; } static int rvu_get_hwvf(struct rvu *rvu, int pcifunc) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c index 2e5379710aa5..820adf390b8e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c @@ -71,8 +71,8 @@ static int rvu_switch_install_rules(struct rvu *rvu) struct rvu_switch *rswitch = &rvu->rswitch; u16 start = rswitch->start_entry; struct rvu_hwinfo *hw = rvu->hw; - int pf, vf, numvfs, hwvf; u16 pcifunc, entry = 0; + int pf, vf, numvfs; int err; for (pf = 1; pf < hw->total_pfs; pf++) { @@ -110,8 +110,8 @@ static int rvu_switch_install_rules(struct rvu *rvu) rswitch->entry2pcifunc[entry++] = pcifunc; - rvu_get_pf_numvfs(rvu, pf, &numvfs, &hwvf); - for (vf = 0; vf < numvfs; vf++, hwvf++) { + rvu_get_pf_numvfs(rvu, pf, &numvfs, NULL); + for (vf = 0; vf < numvfs; vf++) { pcifunc = pf << 10 | ((vf + 1) & 0x3FF); rvu_get_nix_blkaddr(rvu, pcifunc); @@ -198,7 +198,7 @@ void rvu_switch_disable(struct rvu *rvu) struct npc_mcam_free_entry_req free_req = { 0 }; struct rvu_switch *rswitch = &rvu->rswitch; struct rvu_hwinfo *hw = rvu->hw; - int pf, vf, numvfs, hwvf; + int pf, vf, numvfs; struct msg_rsp rsp; u16 pcifunc; int err; @@ -217,7 +217,8 @@ void rvu_switch_disable(struct rvu *rvu) "Reverting RX rule for PF%d failed(%d)\n", pf, err); - for (vf = 0; vf < numvfs; vf++, hwvf++) { + rvu_get_pf_numvfs(rvu, pf, &numvfs, NULL); + for (vf = 0; vf < numvfs; vf++) { pcifunc = pf << 10 | ((vf + 1) & 0x3FF); err = rvu_switch_install_rx_rule(rvu, pcifunc, 0xFFF); if (err) From 52f3456a96c06760b9bfae460e39596fec7af22e Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Fri, 23 Jul 2021 18:31:32 +0300 Subject: [PATCH 121/502] net: qrtr: fix memory leaks Syzbot reported memory leak in qrtr. The problem was in unputted struct sock. qrtr_local_enqueue() function calls qrtr_port_lookup() which takes sock reference if port was found. Then there is the following check: if (!ipc || &ipc->sk == skb->sk) { ... return -ENODEV; } Since we should drop the reference before returning from this function and ipc can be non-NULL inside this if, we should add qrtr_port_put() inside this if. The similar corner case is in qrtr_endpoint_post() as Manivannan reported. In case of sock_queue_rcv_skb() failure we need to put port reference to avoid leaking struct sock pointer. Fixes: e04df98adf7d ("net: qrtr: Remove receive worker") Fixes: bdabad3e363d ("net: Add Qualcomm IPC router") Reported-and-tested-by: syzbot+35a511c72ea7356cdcf3@syzkaller.appspotmail.com Signed-off-by: Pavel Skripkin Reviewed-by: Manivannan Sadhasivam Signed-off-by: David S. Miller --- net/qrtr/qrtr.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index e6f4a6202f82..171b7f3be6ef 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -518,8 +518,10 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) if (!ipc) goto err; - if (sock_queue_rcv_skb(&ipc->sk, skb)) + if (sock_queue_rcv_skb(&ipc->sk, skb)) { + qrtr_port_put(ipc); goto err; + } qrtr_port_put(ipc); } @@ -839,6 +841,8 @@ static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb, ipc = qrtr_port_lookup(to->sq_port); if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */ + if (ipc) + qrtr_port_put(ipc); kfree_skb(skb); return -ENODEV; } From 15bbf8bb4d4ab87108ecf5f4155ec8ffa3c141d6 Mon Sep 17 00:00:00 2001 From: Paul Jakma Date: Fri, 23 Jul 2021 16:13:04 +0100 Subject: [PATCH 122/502] NIU: fix incorrect error return, missed in previous revert Commit 7930742d6, reverting 26fd962, missed out on reverting an incorrect change to a return value. The niu_pci_vpd_scan_props(..) == 1 case appears to be a normal path - treating it as an error and return -EINVAL was breaking VPD_SCAN and causing the driver to fail to load. Fix, so my Neptune card works again. Cc: Kangjie Lu Cc: Shannon Nelson Cc: David S. Miller Cc: Greg Kroah-Hartman Cc: stable Fixes: 7930742d ('Revert "niu: fix missing checks of niu_pci_eeprom_read"') Signed-off-by: Paul Jakma Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/niu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 74e748662ec0..860644d182ab 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -8191,8 +8191,9 @@ static int niu_pci_vpd_fetch(struct niu *np, u32 start) err = niu_pci_vpd_scan_props(np, here, end); if (err < 0) return err; + /* ret == 1 is not an error */ if (err == 1) - return -EINVAL; + return 0; } return 0; } From 5ba03936c05584b6f6f79be5ebe7e5036c1dd252 Mon Sep 17 00:00:00 2001 From: Wei Shuyu Date: Mon, 28 Jun 2021 15:15:08 +0800 Subject: [PATCH 123/502] md/raid10: properly indicate failure when ending a failed write request Similar to [1], this patch fixes the same bug in raid10. Also cleanup the comments. [1] commit 2417b9869b81 ("md/raid1: properly indicate failure when ending a failed write request") Cc: stable@vger.kernel.org Fixes: 7cee6d4e6035 ("md/raid10: end bio when the device faulty") Signed-off-by: Wei Shuyu Acked-by: Guoqing Jiang Signed-off-by: Song Liu --- drivers/md/raid1.c | 2 -- drivers/md/raid10.c | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index ced076ba560e..753822ca9613 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -472,8 +472,6 @@ static void raid1_end_write_request(struct bio *bio) /* * When the device is faulty, it is not necessary to * handle write error. - * For failfast, this is the only remaining device, - * We need to retry the write without FailFast. */ if (!test_bit(Faulty, &rdev->flags)) set_bit(R1BIO_WriteError, &r1_bio->state); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 13f5e6b2a73d..40e845fb9717 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -469,12 +469,12 @@ static void raid10_end_write_request(struct bio *bio) /* * When the device is faulty, it is not necessary to * handle write error. - * For failfast, this is the only remaining device, - * We need to retry the write without FailFast. */ if (!test_bit(Faulty, &rdev->flags)) set_bit(R10BIO_WriteError, &r10_bio->state); else { + /* Fail the request */ + set_bit(R10BIO_Degraded, &r10_bio->state); r10_bio->devs[slot].bio = NULL; to_put = bio; dec_rdev = 1; From 6840e17b8ea992453e2d6f460d403cb05d194e76 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 23 Jul 2021 11:02:45 -0700 Subject: [PATCH 124/502] ionic: make all rx_mode work threadsafe Move the bulk of the code from ionic_set_rx_mode(), which can be called from atomic context, into ionic_lif_rx_mode() which is a safe context. A call from the stack will get pushed off into a work thread, but it is also possible to simultaneously have a call driven by a queue reconfig request from an ethtool command or fw recovery event. We add a mutex around the rx_mode work to be sure they don't collide. Fixes: 81dbc24147f9 ("ionic: change set_rx_mode from_ndo to can_sleep") Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- .../net/ethernet/pensando/ionic/ionic_lif.c | 205 ++++++++---------- .../net/ethernet/pensando/ionic/ionic_lif.h | 4 +- 2 files changed, 96 insertions(+), 113 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index af3a5368529c..7815e9034fb8 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -29,7 +29,7 @@ static const u8 ionic_qtype_versions[IONIC_QTYPE_MAX] = { */ }; -static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode); +static void ionic_lif_rx_mode(struct ionic_lif *lif); static int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr); static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr); static void ionic_link_status_check(struct ionic_lif *lif); @@ -77,7 +77,7 @@ static void ionic_lif_deferred_work(struct work_struct *work) switch (w->type) { case IONIC_DW_TYPE_RX_MODE: - ionic_lif_rx_mode(lif, w->rx_mode); + ionic_lif_rx_mode(lif); break; case IONIC_DW_TYPE_RX_ADDR_ADD: ionic_lif_addr_add(lif, w->addr); @@ -1301,10 +1301,8 @@ static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr) return 0; } -static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add, - bool can_sleep) +static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add) { - struct ionic_deferred_work *work; unsigned int nmfilters; unsigned int nufilters; @@ -1330,63 +1328,77 @@ static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add, lif->nucast--; } - if (!can_sleep) { - work = kzalloc(sizeof(*work), GFP_ATOMIC); - if (!work) - return -ENOMEM; - work->type = add ? IONIC_DW_TYPE_RX_ADDR_ADD : - IONIC_DW_TYPE_RX_ADDR_DEL; - memcpy(work->addr, addr, ETH_ALEN); - netdev_dbg(lif->netdev, "deferred: rx_filter %s %pM\n", - add ? "add" : "del", addr); - ionic_lif_deferred_enqueue(&lif->deferred, work); - } else { - netdev_dbg(lif->netdev, "rx_filter %s %pM\n", - add ? "add" : "del", addr); - if (add) - return ionic_lif_addr_add(lif, addr); - else - return ionic_lif_addr_del(lif, addr); - } + netdev_dbg(lif->netdev, "rx_filter %s %pM\n", + add ? "add" : "del", addr); + if (add) + return ionic_lif_addr_add(lif, addr); + else + return ionic_lif_addr_del(lif, addr); return 0; } static int ionic_addr_add(struct net_device *netdev, const u8 *addr) { - return ionic_lif_addr(netdev_priv(netdev), addr, ADD_ADDR, CAN_SLEEP); -} - -static int ionic_ndo_addr_add(struct net_device *netdev, const u8 *addr) -{ - return ionic_lif_addr(netdev_priv(netdev), addr, ADD_ADDR, CAN_NOT_SLEEP); + return ionic_lif_addr(netdev_priv(netdev), addr, ADD_ADDR); } static int ionic_addr_del(struct net_device *netdev, const u8 *addr) { - return ionic_lif_addr(netdev_priv(netdev), addr, DEL_ADDR, CAN_SLEEP); + return ionic_lif_addr(netdev_priv(netdev), addr, DEL_ADDR); } -static int ionic_ndo_addr_del(struct net_device *netdev, const u8 *addr) +static void ionic_lif_rx_mode(struct ionic_lif *lif) { - return ionic_lif_addr(netdev_priv(netdev), addr, DEL_ADDR, CAN_NOT_SLEEP); -} - -static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode) -{ - struct ionic_admin_ctx ctx = { - .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), - .cmd.rx_mode_set = { - .opcode = IONIC_CMD_RX_MODE_SET, - .lif_index = cpu_to_le16(lif->index), - .rx_mode = cpu_to_le16(rx_mode), - }, - }; + struct net_device *netdev = lif->netdev; + unsigned int nfilters; + unsigned int nd_flags; char buf[128]; - int err; + u16 rx_mode; int i; #define REMAIN(__x) (sizeof(buf) - (__x)) + mutex_lock(&lif->config_lock); + + /* grab the flags once for local use */ + nd_flags = netdev->flags; + + rx_mode = IONIC_RX_MODE_F_UNICAST; + rx_mode |= (nd_flags & IFF_MULTICAST) ? IONIC_RX_MODE_F_MULTICAST : 0; + rx_mode |= (nd_flags & IFF_BROADCAST) ? IONIC_RX_MODE_F_BROADCAST : 0; + rx_mode |= (nd_flags & IFF_PROMISC) ? IONIC_RX_MODE_F_PROMISC : 0; + rx_mode |= (nd_flags & IFF_ALLMULTI) ? IONIC_RX_MODE_F_ALLMULTI : 0; + + /* sync unicast addresses + * next check to see if we're in an overflow state + * if so, we track that we overflowed and enable NIC PROMISC + * else if the overflow is set and not needed + * we remove our overflow flag and check the netdev flags + * to see if we can disable NIC PROMISC + */ + __dev_uc_sync(netdev, ionic_addr_add, ionic_addr_del); + nfilters = le32_to_cpu(lif->identity->eth.max_ucast_filters); + if (netdev_uc_count(netdev) + 1 > nfilters) { + rx_mode |= IONIC_RX_MODE_F_PROMISC; + lif->uc_overflow = true; + } else if (lif->uc_overflow) { + lif->uc_overflow = false; + if (!(nd_flags & IFF_PROMISC)) + rx_mode &= ~IONIC_RX_MODE_F_PROMISC; + } + + /* same for multicast */ + __dev_mc_sync(netdev, ionic_addr_add, ionic_addr_del); + nfilters = le32_to_cpu(lif->identity->eth.max_mcast_filters); + if (netdev_mc_count(netdev) > nfilters) { + rx_mode |= IONIC_RX_MODE_F_ALLMULTI; + lif->mc_overflow = true; + } else if (lif->mc_overflow) { + lif->mc_overflow = false; + if (!(nd_flags & IFF_ALLMULTI)) + rx_mode &= ~IONIC_RX_MODE_F_ALLMULTI; + } + i = scnprintf(buf, sizeof(buf), "rx_mode 0x%04x -> 0x%04x:", lif->rx_mode, rx_mode); if (rx_mode & IONIC_RX_MODE_F_UNICAST) @@ -1399,79 +1411,48 @@ static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode) i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_PROMISC"); if (rx_mode & IONIC_RX_MODE_F_ALLMULTI) i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_ALLMULTI"); - netdev_dbg(lif->netdev, "lif%d %s\n", lif->index, buf); + if (rx_mode & IONIC_RX_MODE_F_RDMA_SNIFFER) + i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_RDMA_SNIFFER"); + netdev_dbg(netdev, "lif%d %s\n", lif->index, buf); - err = ionic_adminq_post_wait(lif, &ctx); - if (err) - netdev_warn(lif->netdev, "set rx_mode 0x%04x failed: %d\n", - rx_mode, err); - else - lif->rx_mode = rx_mode; + if (lif->rx_mode != rx_mode) { + struct ionic_admin_ctx ctx = { + .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), + .cmd.rx_mode_set = { + .opcode = IONIC_CMD_RX_MODE_SET, + .lif_index = cpu_to_le16(lif->index), + }, + }; + int err; + + ctx.cmd.rx_mode_set.rx_mode = cpu_to_le16(rx_mode); + err = ionic_adminq_post_wait(lif, &ctx); + if (err) + netdev_warn(netdev, "set rx_mode 0x%04x failed: %d\n", + rx_mode, err); + else + lif->rx_mode = rx_mode; + } + + mutex_unlock(&lif->config_lock); } static void ionic_set_rx_mode(struct net_device *netdev, bool can_sleep) { struct ionic_lif *lif = netdev_priv(netdev); struct ionic_deferred_work *work; - unsigned int nfilters; - unsigned int rx_mode; - rx_mode = IONIC_RX_MODE_F_UNICAST; - rx_mode |= (netdev->flags & IFF_MULTICAST) ? IONIC_RX_MODE_F_MULTICAST : 0; - rx_mode |= (netdev->flags & IFF_BROADCAST) ? IONIC_RX_MODE_F_BROADCAST : 0; - rx_mode |= (netdev->flags & IFF_PROMISC) ? IONIC_RX_MODE_F_PROMISC : 0; - rx_mode |= (netdev->flags & IFF_ALLMULTI) ? IONIC_RX_MODE_F_ALLMULTI : 0; - - /* sync unicast addresses - * next check to see if we're in an overflow state - * if so, we track that we overflowed and enable NIC PROMISC - * else if the overflow is set and not needed - * we remove our overflow flag and check the netdev flags - * to see if we can disable NIC PROMISC - */ - if (can_sleep) - __dev_uc_sync(netdev, ionic_addr_add, ionic_addr_del); - else - __dev_uc_sync(netdev, ionic_ndo_addr_add, ionic_ndo_addr_del); - nfilters = le32_to_cpu(lif->identity->eth.max_ucast_filters); - if (netdev_uc_count(netdev) + 1 > nfilters) { - rx_mode |= IONIC_RX_MODE_F_PROMISC; - lif->uc_overflow = true; - } else if (lif->uc_overflow) { - lif->uc_overflow = false; - if (!(netdev->flags & IFF_PROMISC)) - rx_mode &= ~IONIC_RX_MODE_F_PROMISC; - } - - /* same for multicast */ - if (can_sleep) - __dev_mc_sync(netdev, ionic_addr_add, ionic_addr_del); - else - __dev_mc_sync(netdev, ionic_ndo_addr_add, ionic_ndo_addr_del); - nfilters = le32_to_cpu(lif->identity->eth.max_mcast_filters); - if (netdev_mc_count(netdev) > nfilters) { - rx_mode |= IONIC_RX_MODE_F_ALLMULTI; - lif->mc_overflow = true; - } else if (lif->mc_overflow) { - lif->mc_overflow = false; - if (!(netdev->flags & IFF_ALLMULTI)) - rx_mode &= ~IONIC_RX_MODE_F_ALLMULTI; - } - - if (lif->rx_mode != rx_mode) { - if (!can_sleep) { - work = kzalloc(sizeof(*work), GFP_ATOMIC); - if (!work) { - netdev_err(lif->netdev, "rxmode change dropped\n"); - return; - } - work->type = IONIC_DW_TYPE_RX_MODE; - work->rx_mode = rx_mode; - netdev_dbg(lif->netdev, "deferred: rx_mode\n"); - ionic_lif_deferred_enqueue(&lif->deferred, work); - } else { - ionic_lif_rx_mode(lif, rx_mode); + if (!can_sleep) { + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) { + netdev_err(lif->netdev, "rxmode change dropped\n"); + return; } + work->type = IONIC_DW_TYPE_RX_MODE; + netdev_dbg(lif->netdev, "deferred: rx_mode\n"); + ionic_lif_deferred_enqueue(&lif->deferred, work); + } else { + ionic_lif_rx_mode(lif); } } @@ -3058,6 +3039,7 @@ void ionic_lif_deinit(struct ionic_lif *lif) ionic_lif_qcq_deinit(lif, lif->notifyqcq); ionic_lif_qcq_deinit(lif, lif->adminqcq); + mutex_destroy(&lif->config_lock); mutex_destroy(&lif->queue_lock); ionic_lif_reset(lif); } @@ -3185,7 +3167,7 @@ static int ionic_station_set(struct ionic_lif *lif) */ if (!ether_addr_equal(ctx.comp.lif_getattr.mac, netdev->dev_addr)) - ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR, CAN_SLEEP); + ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR); } else { /* Update the netdev mac with the device's mac */ memcpy(addr.sa_data, ctx.comp.lif_getattr.mac, netdev->addr_len); @@ -3202,7 +3184,7 @@ static int ionic_station_set(struct ionic_lif *lif) netdev_dbg(lif->netdev, "adding station MAC addr %pM\n", netdev->dev_addr); - ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR, CAN_SLEEP); + ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR); return 0; } @@ -3225,6 +3207,7 @@ int ionic_lif_init(struct ionic_lif *lif) lif->hw_index = le16_to_cpu(comp.hw_index); mutex_init(&lif->queue_lock); + mutex_init(&lif->config_lock); /* now that we have the hw_index we can figure out our doorbell page */ lif->dbid_count = le32_to_cpu(lif->ionic->ident.dev.ndbpgs_per_lif); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h index 346506f01715..af291303bd7a 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h @@ -108,7 +108,6 @@ struct ionic_deferred_work { struct list_head list; enum ionic_deferred_work_type type; union { - unsigned int rx_mode; u8 addr[ETH_ALEN]; u8 fw_status; }; @@ -179,6 +178,7 @@ struct ionic_lif { unsigned int index; unsigned int hw_index; struct mutex queue_lock; /* lock for queue structures */ + struct mutex config_lock; /* lock for config actions */ spinlock_t adminq_lock; /* lock for AdminQ operations */ struct ionic_qcq *adminqcq; struct ionic_qcq *notifyqcq; @@ -199,7 +199,7 @@ struct ionic_lif { unsigned int nrxq_descs; u32 rx_copybreak; u64 rxq_features; - unsigned int rx_mode; + u16 rx_mode; u64 hw_features; bool registered; bool mc_overflow; From f79eef711eb57d56874b08ea11db69221de54a6d Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 23 Jul 2021 11:02:46 -0700 Subject: [PATCH 125/502] ionic: catch no ptp support earlier If PTP configuration is attempted on ports that don't support it, such as VF ports, the driver will return an error status -95, or EOPNOSUPP and print an error message enp98s0: hwstamp set failed: -95 Because some daemons can retry every few seconds, this can end up filling the dmesg log and pushing out other more useful messages. We can catch this issue earlier in our handling and return the error without a log message. Fixes: 829600ce5e4e ("ionic: add ts_config replay") Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_lif.h | 7 ++----- drivers/net/ethernet/pensando/ionic/ionic_phc.c | 10 +++++++--- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h index af291303bd7a..69ab59fedb6c 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h @@ -302,7 +302,7 @@ int ionic_lif_identify(struct ionic *ionic, u8 lif_type, int ionic_lif_size(struct ionic *ionic); #if IS_ENABLED(CONFIG_PTP_1588_CLOCK) -int ionic_lif_hwstamp_replay(struct ionic_lif *lif); +void ionic_lif_hwstamp_replay(struct ionic_lif *lif); int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr); int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr); ktime_t ionic_lif_phc_ktime(struct ionic_lif *lif, u64 counter); @@ -311,10 +311,7 @@ void ionic_lif_unregister_phc(struct ionic_lif *lif); void ionic_lif_alloc_phc(struct ionic_lif *lif); void ionic_lif_free_phc(struct ionic_lif *lif); #else -static inline int ionic_lif_hwstamp_replay(struct ionic_lif *lif) -{ - return -EOPNOTSUPP; -} +static inline void ionic_lif_hwstamp_replay(struct ionic_lif *lif) {} static inline int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr) { diff --git a/drivers/net/ethernet/pensando/ionic/ionic_phc.c b/drivers/net/ethernet/pensando/ionic/ionic_phc.c index a87c87e86aef..6e2403c71608 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_phc.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_phc.c @@ -188,6 +188,9 @@ int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr) struct hwtstamp_config config; int err; + if (!lif->phc || !lif->phc->ptp) + return -EOPNOTSUPP; + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) return -EFAULT; @@ -203,15 +206,16 @@ int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr) return 0; } -int ionic_lif_hwstamp_replay(struct ionic_lif *lif) +void ionic_lif_hwstamp_replay(struct ionic_lif *lif) { int err; + if (!lif->phc || !lif->phc->ptp) + return; + err = ionic_lif_hwstamp_set_ts_config(lif, NULL); if (err) netdev_info(lif->netdev, "hwstamp replay failed: %d\n", err); - - return err; } int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr) From a6ff85e0a2d9d074a4b4c291ba9ec1e5b0aba22b Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 23 Jul 2021 11:02:47 -0700 Subject: [PATCH 126/502] ionic: remove intr coalesce update from napi Move the interrupt coalesce value update out of the napi thread and into the dim_work thread and set it only when it has actually changed. Fixes: 04a834592bf5 ("ionic: dynamic interrupt moderation") Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 14 +++++++++++++- drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 4 ---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 7815e9034fb8..e795fa63ca12 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -53,7 +53,19 @@ static void ionic_dim_work(struct work_struct *work) cur_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix); qcq = container_of(dim, struct ionic_qcq, dim); new_coal = ionic_coal_usec_to_hw(qcq->q.lif->ionic, cur_moder.usec); - qcq->intr.dim_coal_hw = new_coal ? new_coal : 1; + new_coal = new_coal ? new_coal : 1; + + if (qcq->intr.dim_coal_hw != new_coal) { + unsigned int qi = qcq->cq.bound_q->index; + struct ionic_lif *lif = qcq->q.lif; + + qcq->intr.dim_coal_hw = new_coal; + + ionic_intr_coal_init(lif->ionic->idev.intr_ctrl, + lif->rxqcqs[qi]->intr.index, + qcq->intr.dim_coal_hw); + } + dim->state = DIM_START_MEASURE; } diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 08934888575c..9d3a04110685 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -463,10 +463,6 @@ static void ionic_dim_update(struct ionic_qcq *qcq) lif = qcq->q.lif; qi = qcq->cq.bound_q->index; - ionic_intr_coal_init(lif->ionic->idev.intr_ctrl, - lif->rxqcqs[qi]->intr.index, - qcq->intr.dim_coal_hw); - dim_update_sample(qcq->cq.bound_intr->rearm_count, lif->txqstats[qi].pkts, lif->txqstats[qi].bytes, From 76ed8a4a00b484dcccef819ef2618bcf8e46f560 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 23 Jul 2021 11:02:48 -0700 Subject: [PATCH 127/502] ionic: fix up dim accounting for tx and rx We need to count the correct Tx and/or Rx packets for dynamic interrupt moderation, depending on which we're processing on the queue interrupt. Fixes: 04a834592bf5 ("ionic: dynamic interrupt moderation") Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- .../net/ethernet/pensando/ionic/ionic_txrx.c | 28 ++++++++++++++----- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 9d3a04110685..1c6e2b9fc96b 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -451,11 +451,12 @@ void ionic_rx_empty(struct ionic_queue *q) q->tail_idx = 0; } -static void ionic_dim_update(struct ionic_qcq *qcq) +static void ionic_dim_update(struct ionic_qcq *qcq, int napi_mode) { struct dim_sample dim_sample; struct ionic_lif *lif; unsigned int qi; + u64 pkts, bytes; if (!qcq->intr.dim_coal_hw) return; @@ -463,10 +464,23 @@ static void ionic_dim_update(struct ionic_qcq *qcq) lif = qcq->q.lif; qi = qcq->cq.bound_q->index; + switch (napi_mode) { + case IONIC_LIF_F_TX_DIM_INTR: + pkts = lif->txqstats[qi].pkts; + bytes = lif->txqstats[qi].bytes; + break; + case IONIC_LIF_F_RX_DIM_INTR: + pkts = lif->rxqstats[qi].pkts; + bytes = lif->rxqstats[qi].bytes; + break; + default: + pkts = lif->txqstats[qi].pkts + lif->rxqstats[qi].pkts; + bytes = lif->txqstats[qi].bytes + lif->rxqstats[qi].bytes; + break; + } + dim_update_sample(qcq->cq.bound_intr->rearm_count, - lif->txqstats[qi].pkts, - lif->txqstats[qi].bytes, - &dim_sample); + pkts, bytes, &dim_sample); net_dim(&qcq->dim, dim_sample); } @@ -487,7 +501,7 @@ int ionic_tx_napi(struct napi_struct *napi, int budget) ionic_tx_service, NULL, NULL); if (work_done < budget && napi_complete_done(napi, work_done)) { - ionic_dim_update(qcq); + ionic_dim_update(qcq, IONIC_LIF_F_TX_DIM_INTR); flags |= IONIC_INTR_CRED_UNMASK; cq->bound_intr->rearm_count++; } @@ -526,7 +540,7 @@ int ionic_rx_napi(struct napi_struct *napi, int budget) ionic_rx_fill(cq->bound_q); if (work_done < budget && napi_complete_done(napi, work_done)) { - ionic_dim_update(qcq); + ionic_dim_update(qcq, IONIC_LIF_F_RX_DIM_INTR); flags |= IONIC_INTR_CRED_UNMASK; cq->bound_intr->rearm_count++; } @@ -572,7 +586,7 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget) ionic_rx_fill(rxcq->bound_q); if (rx_work_done < budget && napi_complete_done(napi, rx_work_done)) { - ionic_dim_update(qcq); + ionic_dim_update(qcq, 0); flags |= IONIC_INTR_CRED_UNMASK; rxcq->bound_intr->rearm_count++; } From f07f9815b7046e25cc32bf8542c9c0bbc5eb6e0e Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 23 Jul 2021 11:02:49 -0700 Subject: [PATCH 128/502] ionic: count csum_none when offload enabled Be sure to count the csum_none cases when csum offload is enabled. Fixes: 0f3154e6bcb3 ("ionic: Add Tx and Rx handling") Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 1c6e2b9fc96b..08870190e4d2 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -274,12 +274,11 @@ static void ionic_rx_clean(struct ionic_queue *q, } } - if (likely(netdev->features & NETIF_F_RXCSUM)) { - if (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_CALC) { - skb->ip_summed = CHECKSUM_COMPLETE; - skb->csum = (__force __wsum)le16_to_cpu(comp->csum); - stats->csum_complete++; - } + if (likely(netdev->features & NETIF_F_RXCSUM) && + (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_CALC)) { + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum = (__force __wsum)le16_to_cpu(comp->csum); + stats->csum_complete++; } else { stats->csum_none++; } From cdf72837cda89b2d38bd18fbe6cc591c1d5f2416 Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Fri, 23 Jul 2021 05:41:53 +0930 Subject: [PATCH 129/502] ALSA: scarlett2: Fix Mute/Dim/MSD Mode control names Append "Playback Switch" to the names of "Mute" and "Dim" controls, and append "Switch" to the "MSD Mode" control as per Documentation/sound/designs/control-names.rst. Signed-off-by: Geoffrey D. Bennett Link: https://lore.kernel.org/r/77f1000652c37e3217fb8dad8e156bc6392abc0b.1626959758.git.g@b4.vu Signed-off-by: Takashi Iwai --- sound/usb/mixer_scarlett_gen2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index f9d698a37153..347995ea39e4 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -228,7 +228,7 @@ enum { }; static const char *const scarlett2_dim_mute_names[SCARLETT2_DIM_MUTE_COUNT] = { - "Mute", "Dim" + "Mute Playback Switch", "Dim Playback Switch" }; /* Description of each hardware port type: @@ -3455,7 +3455,7 @@ static int scarlett2_add_msd_ctl(struct usb_mixer_interface *mixer) /* Add MSD control */ return scarlett2_add_new_ctl(mixer, &scarlett2_msd_ctl, - 0, 1, "MSD Mode", NULL); + 0, 1, "MSD Mode Switch", NULL); } /*** Cleanup/Suspend Callbacks ***/ From d3a4f784d20c696b134b916f57956f12a37ecd47 Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Fri, 23 Jul 2021 05:42:08 +0930 Subject: [PATCH 130/502] ALSA: scarlett2: Fix Direct Monitor control name for 2i2 The Direct Monitor control for the 2i2 is an enumerated value, not a boolean. Fix the control name to say "Playback Enum" instead of "Playback Switch" in this case. Signed-off-by: Geoffrey D. Bennett Link: https://lore.kernel.org/r/faf5de1d2100038e7d07520d770fda4a1adc276a.1626959758.git.g@b4.vu Signed-off-by: Takashi Iwai --- sound/usb/mixer_scarlett_gen2.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index 347995ea39e4..fa604b61066f 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -2530,14 +2530,18 @@ static int scarlett2_add_direct_monitor_ctl(struct usb_mixer_interface *mixer) { struct scarlett2_data *private = mixer->private_data; const struct scarlett2_device_info *info = private->info; + const char *s; if (!info->direct_monitor) return 0; + s = info->direct_monitor == 1 + ? "Direct Monitor Playback Switch" + : "Direct Monitor Playback Enum"; + return scarlett2_add_new_ctl( mixer, &scarlett2_direct_monitor_ctl[info->direct_monitor - 1], - 0, 1, "Direct Monitor Playback Switch", - &private->direct_monitor_ctl); + 0, 1, s, &private->direct_monitor_ctl); } /*** Speaker Switching Control ***/ From 9ee0fc8366ddce380547878640708f1bd7dd2ead Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Fri, 23 Jul 2021 05:42:48 +0930 Subject: [PATCH 131/502] ALSA: scarlett2: Correct channel mute status after mute button pressed After the hardware mute button is pressed, private->vol_updated is set so that the mute status is invalidated. As the channel mute values may be affected by the global mute value, update scarlett2_mute_ctl_get() to call scarlett2_update_volumes() if private->vol_updated is set. Signed-off-by: Geoffrey D. Bennett Link: https://lore.kernel.org/r/aa18ddbf8d8bd7f31832ab1b6b6057c00b931202.1626959758.git.g@b4.vu Signed-off-by: Takashi Iwai --- sound/usb/mixer_scarlett_gen2.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index fa604b61066f..3457fbc8108f 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -1856,9 +1856,15 @@ static int scarlett2_mute_ctl_get(struct snd_kcontrol *kctl, struct snd_ctl_elem_value *ucontrol) { struct usb_mixer_elem_info *elem = kctl->private_data; - struct scarlett2_data *private = elem->head.mixer->private_data; + struct usb_mixer_interface *mixer = elem->head.mixer; + struct scarlett2_data *private = mixer->private_data; int index = line_out_remap(private, elem->control); + mutex_lock(&private->data_mutex); + if (private->vol_updated) + scarlett2_update_volumes(mixer); + mutex_unlock(&private->data_mutex); + ucontrol->value.integer.value[0] = private->mute_switch[index]; return 0; } From 2b8b12be9b9752c36efda38b7dd5d83d790d01d8 Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Fri, 23 Jul 2021 05:43:26 +0930 Subject: [PATCH 132/502] ALSA: scarlett2: Fix line out/speaker switching notifications The values of the line output controls can change when the SW/HW switches are set to HW, and also when speaker switching is enabled. These notifications were sent with a mask of only SNDRV_CTL_EVENT_MASK_INFO. Change the notifications to set the SNDRV_CTL_EVENT_MASK_VALUE mask bit as well. When the mute control is updated, the notification was sent with a mask of SNDRV_CTL_EVENT_MASK_INFO. Change the mask to the correct value of SNDRV_CTL_EVENT_MASK_VALUE. Signed-off-by: Geoffrey D. Bennett Link: https://lore.kernel.org/r/8192e15ba62fa4bc90425c005f265c0de530be20.1626959758.git.g@b4.vu Signed-off-by: Takashi Iwai --- sound/usb/mixer_scarlett_gen2.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index 3457fbc8108f..3d5848d5481b 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -1961,10 +1961,12 @@ static void scarlett2_vol_ctl_set_writable(struct usb_mixer_interface *mixer, ~SNDRV_CTL_ELEM_ACCESS_WRITE; } - /* Notify of write bit change */ - snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_INFO, + /* Notify of write bit and possible value change */ + snd_ctl_notify(card, + SNDRV_CTL_EVENT_MASK_VALUE | SNDRV_CTL_EVENT_MASK_INFO, &private->vol_ctls[index]->id); - snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_INFO, + snd_ctl_notify(card, + SNDRV_CTL_EVENT_MASK_VALUE | SNDRV_CTL_EVENT_MASK_INFO, &private->mute_ctls[index]->id); } @@ -2599,7 +2601,9 @@ static int scarlett2_speaker_switch_enable(struct usb_mixer_interface *mixer) /* disable the line out SW/HW switch */ scarlett2_sw_hw_ctl_ro(private, i); - snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_INFO, + snd_ctl_notify(card, + SNDRV_CTL_EVENT_MASK_VALUE | + SNDRV_CTL_EVENT_MASK_INFO, &private->sw_hw_ctls[i]->id); } @@ -2923,7 +2927,7 @@ static int scarlett2_dim_mute_ctl_put(struct snd_kcontrol *kctl, if (private->vol_sw_hw_switch[line_index]) { private->mute_switch[line_index] = val; snd_ctl_notify(mixer->chip->card, - SNDRV_CTL_EVENT_MASK_INFO, + SNDRV_CTL_EVENT_MASK_VALUE, &private->mute_ctls[i]->id); } } From 4511781f95da0a3b2bad34f3f5e3967e80cd2d18 Mon Sep 17 00:00:00 2001 From: "chihhao.chen" Date: Sat, 24 Jul 2021 12:23:41 +0800 Subject: [PATCH 133/502] ALSA: usb-audio: fix incorrect clock source setting The following scenario describes an echo test for Samsung USBC Headset (AKG) with VID/PID (0x04e8/0xa051). We first start a capture stream(USB IN transfer) in 96Khz/24bit/1ch mode. In clock find source function, we get value 0x2 for clock selector and 0x1 for clock source. Kernel-4.14 behavior Since clock source is valid so clock selector was not set again. We pass through this function and start a playback stream(USB OUT transfer) in 48Khz/32bit/2ch mode. This time we get value 0x1 for clock selector and 0x1 for clock source. Finally clock id with this setting is 0x9. Kernel-5.10 behavior Clock selector was always set one more time even it is valid. When we start a playback stream, we will get 0x2 for clock selector and 0x1 for clock source. In this case clock id becomes 0xA. This is an incorrect clock source setting and results in severe noises. We see wrong data rate in USB IN transfer. (From 288 bytes/ms becomes 144 bytes/ms) It should keep in 288 bytes/ms. This earphone works fine on older kernel version load because this is a newly-added behavior. Fixes: d2e8f641257d ("ALSA: usb-audio: Explicitly set up the clock selector") Signed-off-by: chihhao.chen Link: https://lore.kernel.org/r/1627100621-19225-1-git-send-email-chihhao.chen@mediatek.com Signed-off-by: Takashi Iwai --- sound/usb/clock.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/usb/clock.c b/sound/usb/clock.c index 52de52288e10..14456f61539e 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c @@ -324,6 +324,12 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, sources[ret - 1], visited, validate); if (ret > 0) { + /* + * For Samsung USBC Headset (AKG), setting clock selector again + * will result in incorrect default clock setting problems + */ + if (chip->usb_id == USB_ID(0x04e8, 0xa051)) + return ret; err = uac_clock_selector_set_val(chip, entity_id, cur); if (err < 0) return err; From f5d156c7bfab7d728b2fd35bc63eab12eda18125 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Mon, 19 Jul 2021 15:34:37 +0800 Subject: [PATCH 134/502] arm64: dts: imx8mp: remove fallback compatible string for FlexCAN FlexCAN on i.MX8MP is not derived from i.MX6Q, instead reuses from i.MX8QM with extra ECC added and default is enabled, so that the FlexCAN would be put into freeze mode without FLEXCAN_QUIRK_DISABLE_MECR quirk. This patch removes "fsl,imx6q-flexcan" fallback compatible string since it's not compatible with the i.MX6Q. Link: https://lore.kernel.org/r/20210719073437.32078-1-qiangqing.zhang@nxp.com Signed-off-by: Joakim Zhang Reviewed-by: Fabio Estevam Signed-off-by: Marc Kleine-Budde --- arch/arm64/boot/dts/freescale/imx8mp.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index ca38d0d6c3c4..f4eaab3ecf03 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -579,7 +579,7 @@ }; flexcan1: can@308c0000 { - compatible = "fsl,imx8mp-flexcan", "fsl,imx6q-flexcan"; + compatible = "fsl,imx8mp-flexcan"; reg = <0x308c0000 0x10000>; interrupts = ; clocks = <&clk IMX8MP_CLK_IPG_ROOT>, @@ -594,7 +594,7 @@ }; flexcan2: can@308d0000 { - compatible = "fsl,imx8mp-flexcan", "fsl,imx6q-flexcan"; + compatible = "fsl,imx8mp-flexcan"; reg = <0x308d0000 0x10000>; interrupts = ; clocks = <&clk IMX8MP_CLK_IPG_ROOT>, From 54f93336d000229f72c26d8a3f69dd256b744528 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Thu, 22 Jul 2021 15:08:19 +0800 Subject: [PATCH 135/502] can: raw: raw_setsockopt(): fix raw_rcv panic for sock UAF We get a bug during ltp can_filter test as following. =========================================== [60919.264984] BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 [60919.265223] PGD 8000003dda726067 P4D 8000003dda726067 PUD 3dda727067 PMD 0 [60919.265443] Oops: 0000 [#1] SMP PTI [60919.265550] CPU: 30 PID: 3638365 Comm: can_filter Kdump: loaded Tainted: G W 4.19.90+ #1 [60919.266068] RIP: 0010:selinux_socket_sock_rcv_skb+0x3e/0x200 [60919.293289] RSP: 0018:ffff8d53bfc03cf8 EFLAGS: 00010246 [60919.307140] RAX: 0000000000000000 RBX: 000000000000001d RCX: 0000000000000007 [60919.320756] RDX: 0000000000000001 RSI: ffff8d5104a8ed00 RDI: ffff8d53bfc03d30 [60919.334319] RBP: ffff8d9338056800 R08: ffff8d53bfc29d80 R09: 0000000000000001 [60919.347969] R10: ffff8d53bfc03ec0 R11: ffffb8526ef47c98 R12: ffff8d53bfc03d30 [60919.350320] perf: interrupt took too long (3063 > 2500), lowering kernel.perf_event_max_sample_rate to 65000 [60919.361148] R13: 0000000000000001 R14: ffff8d53bcf90000 R15: 0000000000000000 [60919.361151] FS: 00007fb78b6b3600(0000) GS:ffff8d53bfc00000(0000) knlGS:0000000000000000 [60919.400812] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [60919.413730] CR2: 0000000000000010 CR3: 0000003e3f784006 CR4: 00000000007606e0 [60919.426479] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [60919.439339] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [60919.451608] PKRU: 55555554 [60919.463622] Call Trace: [60919.475617] [60919.487122] ? update_load_avg+0x89/0x5d0 [60919.498478] ? update_load_avg+0x89/0x5d0 [60919.509822] ? account_entity_enqueue+0xc5/0xf0 [60919.520709] security_sock_rcv_skb+0x2a/0x40 [60919.531413] sk_filter_trim_cap+0x47/0x1b0 [60919.542178] ? kmem_cache_alloc+0x38/0x1b0 [60919.552444] sock_queue_rcv_skb+0x17/0x30 [60919.562477] raw_rcv+0x110/0x190 [can_raw] [60919.572539] can_rcv_filter+0xbc/0x1b0 [can] [60919.582173] can_receive+0x6b/0xb0 [can] [60919.591595] can_rcv+0x31/0x70 [can] [60919.600783] __netif_receive_skb_one_core+0x5a/0x80 [60919.609864] process_backlog+0x9b/0x150 [60919.618691] net_rx_action+0x156/0x400 [60919.627310] ? sched_clock_cpu+0xc/0xa0 [60919.635714] __do_softirq+0xe8/0x2e9 [60919.644161] do_softirq_own_stack+0x2a/0x40 [60919.652154] [60919.659899] do_softirq.part.17+0x4f/0x60 [60919.667475] __local_bh_enable_ip+0x60/0x70 [60919.675089] __dev_queue_xmit+0x539/0x920 [60919.682267] ? finish_wait+0x80/0x80 [60919.689218] ? finish_wait+0x80/0x80 [60919.695886] ? sock_alloc_send_pskb+0x211/0x230 [60919.702395] ? can_send+0xe5/0x1f0 [can] [60919.708882] can_send+0xe5/0x1f0 [can] [60919.715037] raw_sendmsg+0x16d/0x268 [can_raw] It's because raw_setsockopt() concurrently with unregister_netdevice_many(). Concurrent scenario as following. cpu0 cpu1 raw_bind raw_setsockopt unregister_netdevice_many unlist_netdevice dev_get_by_index raw_notifier raw_enable_filters ...... can_rx_register can_rcv_list_find(..., net->can.rx_alldev_list) ...... sock_close raw_release(sock_a) ...... can_receive can_rcv_filter(net->can.rx_alldev_list, ...) raw_rcv(skb, sock_a) BUG After unlist_netdevice(), dev_get_by_index() return NULL in raw_setsockopt(). Function raw_enable_filters() will add sock and can_filter to net->can.rx_alldev_list. Then the sock is closed. Followed by, we sock_sendmsg() to a new vcan device use the same can_filter. Protocol stack match the old receiver whose sock has been released on net->can.rx_alldev_list in can_rcv_filter(). Function raw_rcv() uses the freed sock. UAF BUG is triggered. We can find that the key issue is that net_device has not been protected in raw_setsockopt(). Use rtnl_lock to protect net_device in raw_setsockopt(). Fixes: c18ce101f2e4 ("[CAN]: Add raw protocol") Link: https://lore.kernel.org/r/20210722070819.1048263-1-william.xuanziyang@huawei.com Cc: linux-stable Signed-off-by: Ziyang Xuan Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- net/can/raw.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/net/can/raw.c b/net/can/raw.c index ed4fcb7ab0c3..cd5a49380116 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -546,10 +546,18 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, return -EFAULT; } + rtnl_lock(); lock_sock(sk); - if (ro->bound && ro->ifindex) + if (ro->bound && ro->ifindex) { dev = dev_get_by_index(sock_net(sk), ro->ifindex); + if (!dev) { + if (count > 1) + kfree(filter); + err = -ENODEV; + goto out_fil; + } + } if (ro->bound) { /* (try to) register the new filters */ @@ -588,6 +596,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, dev_put(dev); release_sock(sk); + rtnl_unlock(); break; @@ -600,10 +609,16 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, err_mask &= CAN_ERR_MASK; + rtnl_lock(); lock_sock(sk); - if (ro->bound && ro->ifindex) + if (ro->bound && ro->ifindex) { dev = dev_get_by_index(sock_net(sk), ro->ifindex); + if (!dev) { + err = -ENODEV; + goto out_err; + } + } /* remove current error mask */ if (ro->bound) { @@ -627,6 +642,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, dev_put(dev); release_sock(sk); + rtnl_unlock(); break; From 0c71437dd50dd687c15d8ca80b3b68f10bb21d63 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 14 Jul 2021 13:16:02 +0200 Subject: [PATCH 136/502] can: j1939: j1939_session_deactivate(): clarify lifetime of session object The j1939_session_deactivate() is decrementing the session ref-count and potentially can free() the session. This would cause use-after-free situation. However, the code calling j1939_session_deactivate() does always hold another reference to the session, so that it would not be free()ed in this code path. This patch adds a comment to make this clear and a WARN_ON, to ensure that future changes will not violate this requirement. Further this patch avoids dereferencing the session pointer as a precaution to avoid use-after-free if the session is actually free()ed. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Link: https://lore.kernel.org/r/20210714111602.24021-1-o.rempel@pengutronix.de Reported-by: Xiaochen Zou Signed-off-by: Oleksij Rempel Signed-off-by: Marc Kleine-Budde --- net/can/j1939/transport.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index c3946c355882..bb1092c3e7e3 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1075,11 +1075,16 @@ static bool j1939_session_deactivate_locked(struct j1939_session *session) static bool j1939_session_deactivate(struct j1939_session *session) { + struct j1939_priv *priv = session->priv; bool active; - j1939_session_list_lock(session->priv); + j1939_session_list_lock(priv); + /* This function should be called with a session ref-count of at + * least 2. + */ + WARN_ON_ONCE(kref_read(&session->kref) < 2); active = j1939_session_deactivate_locked(session); - j1939_session_list_unlock(session->priv); + j1939_session_list_unlock(priv); return active; } From c6eea1c8bda56737752465a298dc6ce07d6b8ce3 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Tue, 6 Jul 2021 19:00:08 +0800 Subject: [PATCH 137/502] can: j1939: j1939_xtp_rx_dat_one(): fix rxtimer value between consecutive TP.DT to 750ms For receive side, the max time interval between two consecutive TP.DT should be 750ms. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Link: https://lore.kernel.org/r/1625569210-47506-1-git-send-email-zhangchangzhong@huawei.com Cc: linux-stable Signed-off-by: Zhang Changzhong Acked-by: Oleksij Rempel Signed-off-by: Marc Kleine-Budde --- net/can/j1939/transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index bb1092c3e7e3..bdc95bd7a851 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1874,7 +1874,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session, if (!session->transmission) j1939_tp_schedule_txtimer(session, 0); } else { - j1939_tp_set_rxtimeout(session, 250); + j1939_tp_set_rxtimeout(session, 750); } session->last_cmd = 0xff; consume_skb(se_skb); From 590eb2b7d8cfafb27e8108d52d4bf4850626d31d Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Fri, 25 Jun 2021 15:09:29 +0200 Subject: [PATCH 138/502] can: peak_usb: pcan_usb_handle_bus_evt(): fix reading rxerr/txerr values This patch fixes an incorrect way of reading error counters in messages received for this purpose from the PCAN-USB interface. These messages inform about the increase or decrease of the error counters, whose values are placed in bytes 1 and 2 of the message data (not 0 and 1). Fixes: ea8b33bde76c ("can: pcan_usb: add support of rxerr/txerr counters") Link: https://lore.kernel.org/r/20210625130931.27438-4-s.grosjean@peak-system.com Cc: linux-stable Signed-off-by: Stephane Grosjean Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/peak_usb/pcan_usb.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c index 1d6f77252f01..899a3d21b77f 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb.c @@ -117,7 +117,8 @@ #define PCAN_USB_BERR_MASK (PCAN_USB_ERR_RXERR | PCAN_USB_ERR_TXERR) /* identify bus event packets with rx/tx error counters */ -#define PCAN_USB_ERR_CNT 0x80 +#define PCAN_USB_ERR_CNT_DEC 0x00 /* counters are decreasing */ +#define PCAN_USB_ERR_CNT_INC 0x80 /* counters are increasing */ /* private to PCAN-USB adapter */ struct pcan_usb { @@ -608,11 +609,12 @@ static int pcan_usb_handle_bus_evt(struct pcan_usb_msg_context *mc, u8 ir) /* acccording to the content of the packet */ switch (ir) { - case PCAN_USB_ERR_CNT: + case PCAN_USB_ERR_CNT_DEC: + case PCAN_USB_ERR_CNT_INC: /* save rx/tx error counters from in the device context */ - pdev->bec.rxerr = mc->ptr[0]; - pdev->bec.txerr = mc->ptr[1]; + pdev->bec.rxerr = mc->ptr[1]; + pdev->bec.txerr = mc->ptr[2]; break; default: From ef68a717960658e6a1e5f08adb0574326e9a12c2 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sat, 24 Apr 2021 16:20:39 +0200 Subject: [PATCH 139/502] can: mcp251xfd: mcp251xfd_irq(): stop timestamping worker in case error in IRQ In case an error occurred in the IRQ handler, the chip status is dumped via devcoredump and all IRQs are disabled, but the chip stays powered for further analysis. The chip is in an undefined state and will not receive any CAN frames, so shut down the timestamping worker, which reads the TBC register regularly, too. This avoids any CRC read error messages if there is a communication problem with the chip. Fixes: efd8d98dfb90 ("can: mcp251xfd: add HW timestamp infrastructure") Link: https://lore.kernel.org/r/20210724155131.471303-1-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 47c3f408a799..9ae48072b6c6 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -2300,6 +2300,7 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id) err, priv->regs_status.intf); mcp251xfd_dump(priv); mcp251xfd_chip_interrupts_disable(priv); + mcp251xfd_timestamp_stop(priv); return handled; } From 3cf4375a090473d240281a0d2b04a3a5aaeac34b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 23 Jul 2021 18:46:01 -0400 Subject: [PATCH 140/502] tipc: do not write skb_shinfo frags when doing decrytion One skb's skb_shinfo frags are not writable, and they can be shared with other skbs' like by pskb_copy(). To write the frags may cause other skb's data crash. So before doing en/decryption, skb_cow_data() should always be called for a cloned or nonlinear skb if req dst is using the same sg as req src. While at it, the likely branch can be removed, as it will be covered by skb_cow_data(). Note that esp_input() has the same issue, and I will fix it in another patch. tipc_aead_encrypt() doesn't have this issue, as it only processes linear data in the unlikely branch. Fixes: fc1b6d6de220 ("tipc: introduce TIPC encryption & authentication") Reported-by: Shuang Li Signed-off-by: Xin Long Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/crypto.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index e5c43d4d5a75..c9391d38de85 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -898,16 +898,10 @@ static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead, if (unlikely(!aead)) return -ENOKEY; - /* Cow skb data if needed */ - if (likely(!skb_cloned(skb) && - (!skb_is_nonlinear(skb) || !skb_has_frag_list(skb)))) { - nsg = 1 + skb_shinfo(skb)->nr_frags; - } else { - nsg = skb_cow_data(skb, 0, &unused); - if (unlikely(nsg < 0)) { - pr_err("RX: skb_cow_data() returned %d\n", nsg); - return nsg; - } + nsg = skb_cow_data(skb, 0, &unused); + if (unlikely(nsg < 0)) { + pr_err("RX: skb_cow_data() returned %d\n", nsg); + return nsg; } /* Allocate memory for the AEAD operation */ From 89bc7f456cd40e0be7b94f4fdae9186f22b76a05 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 23 Jul 2021 17:53:48 -0400 Subject: [PATCH 141/502] bnxt_en: Add missing periodic PHC overflow check We use the timecounter APIs for the 48-bit PHC and packet timestamps. We must periodically update the timecounter at roughly half the overflow interval. The overflow interval is about 78 hours, so update it every 19 hours (1/4 interval) for some extra margins. Fixes: 390862f45c85 ("bnxt_en: Get the full 48-bit hardware timestamp periodically") Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Acked-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 7 +++++++ drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index 9089e7f3fbd4..ec381c2423b8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -353,6 +353,12 @@ static long bnxt_ptp_ts_aux_work(struct ptp_clock_info *ptp_info) bnxt_ptp_get_current_time(bp); ptp->next_period = now + HZ; + if (time_after_eq(now, ptp->next_overflow_check)) { + spin_lock_bh(&ptp->ptp_lock); + timecounter_read(&ptp->tc); + spin_unlock_bh(&ptp->ptp_lock); + ptp->next_overflow_check = now + BNXT_PHC_OVERFLOW_PERIOD; + } return HZ; } @@ -423,6 +429,7 @@ int bnxt_ptp_init(struct bnxt *bp) ptp->cc.shift = 0; ptp->cc.mult = 1; + ptp->next_overflow_check = jiffies + BNXT_PHC_OVERFLOW_PERIOD; timecounter_init(&ptp->tc, &ptp->cc, ktime_to_ns(ktime_get_real())); ptp->ptp_info = bnxt_ptp_caps; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h index 4135ea3ec788..254ba7bc0f99 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h @@ -32,6 +32,10 @@ struct bnxt_ptp_cfg { u64 current_time; u64 old_time; unsigned long next_period; + unsigned long next_overflow_check; + /* 48-bit PHC overflows in 78 hours. Check overflow every 19 hours. */ + #define BNXT_PHC_OVERFLOW_PERIOD (19 * 3600 * HZ) + u16 tx_seqid; struct bnxt *bp; atomic_t tx_avail; From 78d9d8005e4556448f398d876f29d0ca7ab8e398 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Fri, 16 Jul 2021 21:40:51 +0800 Subject: [PATCH 142/502] riscv: stacktrace: Fix NULL pointer dereference When CONFIG_FRAME_POINTER=y, calling dump_stack() can always trigger NULL pointer dereference panic similar as below: [ 0.396060] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.13.0-rc5+ #47 [ 0.396692] Hardware name: riscv-virtio,qemu (DT) [ 0.397176] Call Trace: [ 0.398191] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000960 [ 0.399487] Oops [#1] [ 0.399739] Modules linked in: [ 0.400135] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.13.0-rc5+ #47 [ 0.400570] Hardware name: riscv-virtio,qemu (DT) [ 0.400926] epc : walk_stackframe+0xc4/0xdc [ 0.401291] ra : dump_backtrace+0x30/0x38 [ 0.401630] epc : ffffffff80004922 ra : ffffffff8000496a sp : ffffffe000f3bd00 [ 0.402115] gp : ffffffff80cfdcb8 tp : ffffffe000f30000 t0 : ffffffff80d0b0cf [ 0.402602] t1 : ffffffff80d0b0c0 t2 : 0000000000000000 s0 : ffffffe000f3bd60 [ 0.403071] s1 : ffffffff808bc2e8 a0 : 0000000000001000 a1 : 0000000000000000 [ 0.403448] a2 : ffffffff803d7088 a3 : ffffffff808bc2e8 a4 : 6131725dbc24d400 [ 0.403820] a5 : 0000000000001000 a6 : 0000000000000002 a7 : ffffffffffffffff [ 0.404226] s2 : 0000000000000000 s3 : 0000000000000000 s4 : 0000000000000000 [ 0.404634] s5 : ffffffff803d7088 s6 : ffffffff808bc2e8 s7 : ffffffff80630650 [ 0.405085] s8 : ffffffff80912a80 s9 : 0000000000000008 s10: ffffffff804000fc [ 0.405388] s11: 0000000000000000 t3 : 0000000000000043 t4 : ffffffffffffffff [ 0.405616] t5 : 000000000000003d t6 : ffffffe000f3baa8 [ 0.405793] status: 0000000000000100 badaddr: 0000000000000960 cause: 000000000000000d [ 0.406135] [] walk_stackframe+0xc4/0xdc [ 0.407032] [] dump_backtrace+0x30/0x38 [ 0.407797] [] show_stack+0x40/0x4c [ 0.408234] [] dump_stack+0x90/0xb6 [ 0.409019] [] ptdump_init+0x20/0xc4 [ 0.409681] [] do_one_initcall+0x4c/0x226 [ 0.410110] [] kernel_init_freeable+0x1f4/0x258 [ 0.410562] [] kernel_init+0x22/0x148 [ 0.410959] [] ret_from_exception+0x0/0x14 [ 0.412241] ---[ end trace b2ab92c901b96251 ]--- [ 0.413099] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b The reason is the task is NULL when we finally call walk_stackframe() the NULL is passed from __dump_stack(): |static void __dump_stack(void) |{ | dump_stack_print_info(KERN_DEFAULT); | show_stack(NULL, NULL, KERN_DEFAULT); |} Fix this issue by checking "task == NULL" case in walk_stackframe(). Fixes: eac2f3059e02 ("riscv: stacktrace: fix the riscv stacktrace when CONFIG_FRAME_POINTER enabled") Signed-off-by: Jisheng Zhang Reviewed-by: Atish Patra Tested-by: Wende Tan Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/stacktrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index ac7593607fa6..315db3d0229b 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -27,7 +27,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, fp = frame_pointer(regs); sp = user_stack_pointer(regs); pc = instruction_pointer(regs); - } else if (task == current) { + } else if (task == NULL || task == current) { fp = (unsigned long)__builtin_frame_address(1); sp = (unsigned long)__builtin_frame_address(0); pc = (unsigned long)__builtin_return_address(0); From ac059d16442f30e6a9a95d41655153e01247e710 Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Sun, 25 Jul 2021 13:28:24 +0530 Subject: [PATCH 143/502] octeontx2-af: Fix PKIND overlap between LBK and LMAC interfaces Currently PKINDs are not assigned to LBK channels. The default value of LBK_CHX_PKIND (channel to PKIND mapping) register is zero, which is resulting in a overlap of pkind between LBK and CGX LMACs. When KPU1 parser config is modified when PTP timestamping is enabled on the CGX LMAC interface it is impacting traffic on LBK interfaces as well. This patch fixes the issue by reserving the PKIND#0 for LBK devices. CGX mapped PF pkind starts from 1 and also fixes the max pkind available. Fixes: 421572175ba5 ("octeontx2-af: Support to enable/disable HW timestamping") Signed-off-by: Geetha sowjanya Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/npc.h | 3 +++ drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 1 + drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c | 11 +++++++---- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h index 19bad9a59c8f..243cf8070e77 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h @@ -151,7 +151,10 @@ enum npc_kpu_lh_ltype { * Software assigns pkind for each incoming port such as CGX * Ethernet interfaces, LBK interfaces, etc. */ +#define NPC_UNRESERVED_PKIND_COUNT NPC_RX_VLAN_EXDSA_PKIND + enum npc_pkind_type { + NPC_RX_LBK_PKIND = 0ULL, NPC_RX_VLAN_EXDSA_PKIND = 56ULL, NPC_RX_CHLEN24B_PKIND = 57ULL, NPC_RX_CPT_HDR_PKIND, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 0d2cd5169018..30067668eda7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -298,6 +298,7 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf) rvu_nix_chan_lbk(rvu, lbkid, vf + 1); pfvf->rx_chan_cnt = 1; pfvf->tx_chan_cnt = 1; + rvu_npc_set_pkind(rvu, NPC_RX_LBK_PKIND, pfvf); rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf, pfvf->rx_chan_base, pfvf->rx_chan_cnt); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 1097291aaa45..52b255426c22 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -1721,7 +1721,6 @@ static void npc_parser_profile_init(struct rvu *rvu, int blkaddr) { struct rvu_hwinfo *hw = rvu->hw; int num_pkinds, num_kpus, idx; - struct npc_pkind *pkind; /* Disable all KPUs and their entries */ for (idx = 0; idx < hw->npc_kpus; idx++) { @@ -1739,9 +1738,8 @@ static void npc_parser_profile_init(struct rvu *rvu, int blkaddr) * Check HW max count to avoid configuring junk or * writing to unsupported CSR addresses. */ - pkind = &hw->pkind; num_pkinds = rvu->kpu.pkinds; - num_pkinds = min_t(int, pkind->rsrc.max, num_pkinds); + num_pkinds = min_t(int, hw->npc_pkinds, num_pkinds); for (idx = 0; idx < num_pkinds; idx++) npc_config_kpuaction(rvu, blkaddr, &rvu->kpu.ikpu[idx], 0, idx, true); @@ -1891,7 +1889,8 @@ static void rvu_npc_hw_init(struct rvu *rvu, int blkaddr) if (npc_const1 & BIT_ULL(63)) npc_const2 = rvu_read64(rvu, blkaddr, NPC_AF_CONST2); - pkind->rsrc.max = (npc_const1 >> 12) & 0xFFULL; + pkind->rsrc.max = NPC_UNRESERVED_PKIND_COUNT; + hw->npc_pkinds = (npc_const1 >> 12) & 0xFFULL; hw->npc_kpu_entries = npc_const1 & 0xFFFULL; hw->npc_kpus = (npc_const >> 8) & 0x1FULL; hw->npc_intfs = npc_const & 0xFULL; @@ -2002,6 +2001,10 @@ int rvu_npc_init(struct rvu *rvu) err = rvu_alloc_bitmap(&pkind->rsrc); if (err) return err; + /* Reserve PKIND#0 for LBKs. Power reset value of LBK_CH_PKIND is '0', + * no need to configure PKIND for all LBKs separately. + */ + rvu_alloc_rsrc(&pkind->rsrc); /* Allocate mem for pkind to PF and channel mapping info */ pkind->pfchan_map = devm_kcalloc(rvu->dev, pkind->rsrc.max, From 69f0aeb13bb548e2d5710a350116e03f0273302e Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Sun, 25 Jul 2021 13:29:03 +0530 Subject: [PATCH 144/502] octeontx2-pf: Fix interface down flag on error In the existing code while changing the number of TX/RX queues using ethtool the PF/VF interface resources are freed and reallocated (otx2_stop and otx2_open is called) if the device is in running state. If any resource allocation fails in otx2_open, driver free already allocated resources and return. But again, when the number of queues changes as the device state still running oxt2_stop is called. In which we try to free already freed resources leading to driver crash. This patch fixes the issue by setting the INTF_DOWN flag on error and free the resources in otx2_stop only if the flag is not set. Fixes: 50fe6c02e5ad ("octeontx2-pf: Register and handle link notifications") Signed-off-by: Geetha sowjanya Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c | 7 +++---- drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 5 +++++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index 8df748e0677b..b906a0eb6e0d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -298,15 +298,14 @@ static int otx2_set_channels(struct net_device *dev, err = otx2_set_real_num_queues(dev, channel->tx_count, channel->rx_count); if (err) - goto fail; + return err; pfvf->hw.rx_queues = channel->rx_count; pfvf->hw.tx_queues = channel->tx_count; pfvf->qset.cq_cnt = pfvf->hw.tx_queues + pfvf->hw.rx_queues; -fail: if (if_up) - dev->netdev_ops->ndo_open(dev); + err = dev->netdev_ops->ndo_open(dev); netdev_info(dev, "Setting num Tx rings to %d, Rx rings to %d success\n", pfvf->hw.tx_queues, pfvf->hw.rx_queues); @@ -410,7 +409,7 @@ static int otx2_set_ringparam(struct net_device *netdev, qs->rqe_cnt = rx_count; if (if_up) - netdev->netdev_ops->ndo_open(netdev); + return netdev->netdev_ops->ndo_open(netdev); return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index f300b807a85b..2c24944a4dba 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1662,6 +1662,7 @@ int otx2_open(struct net_device *netdev) err_tx_stop_queues: netif_tx_stop_all_queues(netdev); netif_carrier_off(netdev); + pf->flags |= OTX2_FLAG_INTF_DOWN; err_free_cints: otx2_free_cints(pf, qidx); vec = pci_irq_vector(pf->pdev, @@ -1689,6 +1690,10 @@ int otx2_stop(struct net_device *netdev) struct otx2_rss_info *rss; int qidx, vec, wrk; + /* If the DOWN flag is set resources are already freed */ + if (pf->flags & OTX2_FLAG_INTF_DOWN) + return 0; + netif_carrier_off(netdev); netif_tx_stop_all_queues(netdev); From 4c85e57575fb9e6405d02d55aef8025c60abb824 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Sun, 25 Jul 2021 13:29:37 +0530 Subject: [PATCH 145/502] octeontx2-pf: Dont enable backpressure on LBK links Avoid configure backpressure for LBK links as they don't support it and enable lmacs before configuration pause frames. Fixes: 75f36270990c ("octeontx2-pf: Support to enable/disable pause frames via ethtool") Signed-off-by: Geetha sowjanya Signed-off-by: Hariprasad Kelam Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 2 +- .../ethernet/marvell/octeontx2/nic/otx2_common.c | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 9169849881bf..544c96c8fe1d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -1504,8 +1504,8 @@ static int cgx_lmac_init(struct cgx *cgx) /* Add reference */ cgx->lmac_idmap[lmac->lmac_id] = lmac; - cgx->mac_ops->mac_pause_frm_config(cgx, lmac->lmac_id, true); set_bit(lmac->lmac_id, &cgx->lmac_bmap); + cgx->mac_ops->mac_pause_frm_config(cgx, lmac->lmac_id, true); } return cgx_lmac_verify_fwi_version(cgx); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 7cccd802c4ed..70fcc1fd962f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -924,12 +924,14 @@ static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx) aq->cq.drop = RQ_DROP_LVL_CQ(pfvf->hw.rq_skid, cq->cqe_cnt); aq->cq.drop_ena = 1; - /* Enable receive CQ backpressure */ - aq->cq.bp_ena = 1; - aq->cq.bpid = pfvf->bpid[0]; + if (!is_otx2_lbkvf(pfvf->pdev)) { + /* Enable receive CQ backpressure */ + aq->cq.bp_ena = 1; + aq->cq.bpid = pfvf->bpid[0]; - /* Set backpressure level is same as cq pass level */ - aq->cq.bp = RQ_PASS_LVL_CQ(pfvf->hw.rq_skid, qset->rqe_cnt); + /* Set backpressure level is same as cq pass level */ + aq->cq.bp = RQ_PASS_LVL_CQ(pfvf->hw.rq_skid, qset->rqe_cnt); + } } /* Fill AQ info */ @@ -1186,7 +1188,7 @@ static int otx2_aura_init(struct otx2_nic *pfvf, int aura_id, aq->aura.fc_hyst_bits = 0; /* Store count on all updates */ /* Enable backpressure for RQ aura */ - if (aura_id < pfvf->hw.rqpool_cnt) { + if (aura_id < pfvf->hw.rqpool_cnt && !is_otx2_lbkvf(pfvf->pdev)) { aq->aura.bp_ena = 0; aq->aura.nix0_bpid = pfvf->bpid[0]; /* Set backpressure level for RQ's Aura */ From 149ea30fdd5c28b89a3bfdecfc75cdab1deddb14 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 23 Jul 2021 17:56:00 +0300 Subject: [PATCH 146/502] devlink: Fix phys_port_name of virtual port and merge error Merge commit cited in fixes tag was incorrect. Due to it phys_port_name of the virtual port resulted in incorrect name. Also the phys_port_name of the physical port was written twice due to the merge error. Fix it by removing the old code and inserting back the misplaced code. Related commits of interest in net and net-next branches that resulted in merge conflict are: in net-next branch: commit f285f37cb1e6 ("devlink: append split port number to the port name") in net branch: commit b28d8f0c25a9 ("devlink: Correct VIRTUAL port to not have phys_port attributes") Fixes: 126285651b7 ("Merge ra.kernel.org:/pub/scm/linux/kernel/git/netdev/net") Signed-off-by: Parav Pandit Reported-by: Niklas Schnelle Tested-by: Niklas Schnelle Signed-off-by: David S. Miller --- net/core/devlink.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/net/core/devlink.c b/net/core/devlink.c index 8fdd04f00fd7..85032626de24 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -9328,18 +9328,10 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, switch (attrs->flavour) { case DEVLINK_PORT_FLAVOUR_PHYSICAL: - case DEVLINK_PORT_FLAVOUR_VIRTUAL: n = snprintf(name, len, "p%u", attrs->phys.port_number); if (n < len && attrs->split) n += snprintf(name + n, len - n, "s%u", attrs->phys.split_subport_number); - if (!attrs->split) - n = snprintf(name, len, "p%u", attrs->phys.port_number); - else - n = snprintf(name, len, "p%us%u", - attrs->phys.port_number, - attrs->phys.split_subport_number); - break; case DEVLINK_PORT_FLAVOUR_CPU: case DEVLINK_PORT_FLAVOUR_DSA: @@ -9381,6 +9373,8 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, n = snprintf(name, len, "pf%usf%u", attrs->pci_sf.pf, attrs->pci_sf.sf); break; + case DEVLINK_PORT_FLAVOUR_VIRTUAL: + return -EOPNOTSUPP; } if (n >= len) From ad4e1e48a6291f7fb53fbef38ca264966ffd65c9 Mon Sep 17 00:00:00 2001 From: Kevin Lo Date: Fri, 23 Jul 2021 21:59:27 +0800 Subject: [PATCH 147/502] net: phy: broadcom: re-add check for PHY_BRCM_DIS_TXCRXC_NOENRGY on the BCM54811 PHY Restore PHY_ID_BCM54811 accidently removed by commit 5d4358ede8eb. Fixes: 5d4358ede8eb ("net: phy: broadcom: Allow BCM54210E to configure APD") Signed-off-by: Kevin Lo Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/broadcom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 7bf3011b8e77..83aea5c5cd03 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -288,7 +288,7 @@ static void bcm54xx_adjust_rxrefclk(struct phy_device *phydev) if (phydev->dev_flags & PHY_BRCM_DIS_TXCRXC_NOENRGY) { if (BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54210E || BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54810 || - BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54210E) + BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54811) val |= BCM54XX_SHD_SCR3_RXCTXC_DIS; else val |= BCM54XX_SHD_SCR3_TRDDAPD; From 7e4960b3d66d7248b23de3251118147812b42da2 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 23 Jul 2021 18:36:09 +0800 Subject: [PATCH 148/502] mlx4: Fix missing error code in mlx4_load_one() The error code is missing in this code scenario, add the error code '-EINVAL' to the return value 'err'. Eliminate the follow smatch warning: drivers/net/ethernet/mellanox/mlx4/main.c:3538 mlx4_load_one() warn: missing error code 'err'. Reported-by: Abaci Robot Fixes: 7ae0e400cd93 ("net/mlx4_core: Flexible (asymmetric) allocation of EQs and MSI-X vectors for PF/VFs") Signed-off-by: Jiapeng Chong Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 00c84656b2e7..28ac4693da3c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -3535,6 +3535,7 @@ slave_start: if (!SRIOV_VALID_STATE(dev->flags)) { mlx4_err(dev, "Invalid SRIOV state\n"); + err = -EINVAL; goto err_close; } } From 44379b986424b02acfa6e8c85ec5d68d89d3ccc4 Mon Sep 17 00:00:00 2001 From: Jagan Teki Date: Sun, 25 Jul 2021 23:17:37 +0530 Subject: [PATCH 149/502] drm/panel: panel-simple: Fix proper bpc for ytc700tlag_05_201c ytc700tlag_05_201c panel support 8 bpc not 6 bpc as per recent testing in i.MX8MM platform. Fix it. Fixes: 7a1f4fa4a629 ("drm/panel: simple: Add YTC700TLAG-05-201C") Signed-off-by: Jagan Teki Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20210725174737.891106-1-jagan@amarulasolutions.com --- drivers/gpu/drm/panel/panel-simple.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 21939d4352cf..1b80290c2b53 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -4166,7 +4166,7 @@ static const struct drm_display_mode yes_optoelectronics_ytc700tlag_05_201c_mode static const struct panel_desc yes_optoelectronics_ytc700tlag_05_201c = { .modes = &yes_optoelectronics_ytc700tlag_05_201c_mode, .num_modes = 1, - .bpc = 6, + .bpc = 8, .size = { .width = 154, .height = 90, From 795e3d2ea68e489ee7039ac29e98bfea0e34a96c Mon Sep 17 00:00:00 2001 From: Harshvardhan Jha Date: Sun, 25 Jul 2021 23:28:04 +0530 Subject: [PATCH 150/502] net: qede: Fix end of loop tests for list_for_each_entry The list_for_each_entry() iterator, "vlan" in this code, can never be NULL so the warning will never be printed. Signed-off-by: Harshvardhan Jha Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index c59b72c90293..a2e4dfb5cb44 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -831,7 +831,7 @@ int qede_configure_vlan_filters(struct qede_dev *edev) int qede_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) { struct qede_dev *edev = netdev_priv(dev); - struct qede_vlan *vlan = NULL; + struct qede_vlan *vlan; int rc = 0; DP_VERBOSE(edev, NETIF_MSG_IFDOWN, "Removing vlan 0x%04x\n", vid); @@ -842,7 +842,7 @@ int qede_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) if (vlan->vid == vid) break; - if (!vlan || (vlan->vid != vid)) { + if (list_entry_is_head(vlan, &edev->vlan_list, list)) { DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), "Vlan isn't configured\n"); goto out; From 058e6e0ed0eace43401c945082dec1d669b5b231 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 25 Jul 2021 13:42:50 -0400 Subject: [PATCH 151/502] sctp: improve the code for pmtu probe send and recv update This patch does 3 things: - make sctp_transport_pl_send() and sctp_transport_pl_recv() return bool type to decide if more probe is needed to send. - pr_debug() only when probe is really needed to send. - count pl.raise_count in sctp_transport_pl_send() instead of sctp_transport_pl_recv(), and it's only incremented for the 1st probe for the same size. These are preparations for the next patch to make probes happen only when there's packet loss in Search Complete state. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 4 ++-- net/sctp/sm_statefuns.c | 15 +++++++------- net/sctp/transport.c | 41 +++++++++++++++++++++----------------- 3 files changed, 32 insertions(+), 28 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 32fc4a309df5..f3d414ed208e 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1024,8 +1024,8 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu); void sctp_transport_immediate_rtx(struct sctp_transport *); void sctp_transport_dst_release(struct sctp_transport *t); void sctp_transport_dst_confirm(struct sctp_transport *t); -void sctp_transport_pl_send(struct sctp_transport *t); -void sctp_transport_pl_recv(struct sctp_transport *t); +bool sctp_transport_pl_send(struct sctp_transport *t); +bool sctp_transport_pl_recv(struct sctp_transport *t); /* This is the structure we use to queue packets as they come into diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 09a8f23ec709..32df65f68c12 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1109,12 +1109,12 @@ enum sctp_disposition sctp_sf_send_probe(struct net *net, if (!sctp_transport_pl_enabled(transport)) return SCTP_DISPOSITION_CONSUME; - sctp_transport_pl_send(transport); - - reply = sctp_make_heartbeat(asoc, transport, transport->pl.probe_size); - if (!reply) - return SCTP_DISPOSITION_NOMEM; - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply)); + if (sctp_transport_pl_send(transport)) { + reply = sctp_make_heartbeat(asoc, transport, transport->pl.probe_size); + if (!reply) + return SCTP_DISPOSITION_NOMEM; + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply)); + } sctp_add_cmd_sf(commands, SCTP_CMD_PROBE_TIMER_UPDATE, SCTP_TRANSPORT(transport)); @@ -1274,8 +1274,7 @@ enum sctp_disposition sctp_sf_backbeat_8_3(struct net *net, !sctp_transport_pl_enabled(link)) return SCTP_DISPOSITION_DISCARD; - sctp_transport_pl_recv(link); - if (link->pl.state == SCTP_PL_COMPLETE) + if (sctp_transport_pl_recv(link)) return SCTP_DISPOSITION_CONSUME; return sctp_sf_send_probe(net, ep, asoc, type, link, commands); diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 397a6244dd97..23e7bd3e3bd4 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -258,16 +258,12 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) sctp_transport_pl_update(transport); } -void sctp_transport_pl_send(struct sctp_transport *t) +bool sctp_transport_pl_send(struct sctp_transport *t) { - pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n", - __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high); - - if (t->pl.probe_count < SCTP_MAX_PROBES) { - t->pl.probe_count++; - return; - } + if (t->pl.probe_count < SCTP_MAX_PROBES) + goto out; + t->pl.probe_count = 0; if (t->pl.state == SCTP_PL_BASE) { if (t->pl.probe_size == SCTP_BASE_PLPMTU) { /* BASE_PLPMTU Confirmation Failed */ t->pl.state = SCTP_PL_ERROR; /* Base -> Error */ @@ -299,10 +295,20 @@ void sctp_transport_pl_send(struct sctp_transport *t) sctp_assoc_sync_pmtu(t->asoc); } } - t->pl.probe_count = 1; + +out: + if (t->pl.state == SCTP_PL_COMPLETE && t->pl.raise_count < 30 && + !t->pl.probe_count) + t->pl.raise_count++; + + pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n", + __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high); + + t->pl.probe_count++; + return true; } -void sctp_transport_pl_recv(struct sctp_transport *t) +bool sctp_transport_pl_recv(struct sctp_transport *t) { pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n", __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high); @@ -323,7 +329,7 @@ void sctp_transport_pl_recv(struct sctp_transport *t) if (!t->pl.probe_high) { t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_BIG_STEP, SCTP_MAX_PLPMTU); - return; + return false; } t->pl.probe_size += SCTP_PL_MIN_STEP; if (t->pl.probe_size >= t->pl.probe_high) { @@ -335,14 +341,13 @@ void sctp_transport_pl_recv(struct sctp_transport *t) t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); sctp_assoc_sync_pmtu(t->asoc); } - } else if (t->pl.state == SCTP_PL_COMPLETE) { - t->pl.raise_count++; - if (t->pl.raise_count == 30) { - /* Raise probe_size again after 30 * interval in Search Complete */ - t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */ - t->pl.probe_size += SCTP_PL_MIN_STEP; - } + } else if (t->pl.state == SCTP_PL_COMPLETE && t->pl.raise_count == 30) { + /* Raise probe_size again after 30 * interval in Search Complete */ + t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */ + t->pl.probe_size += SCTP_PL_MIN_STEP; } + + return t->pl.state == SCTP_PL_COMPLETE; } static bool sctp_transport_pl_toobig(struct sctp_transport *t, u32 pmtu) From eacf078cf4c7aa23e9591738511f142cc39b5186 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 25 Jul 2021 13:42:51 -0400 Subject: [PATCH 152/502] sctp: send pmtu probe only if packet loss in Search Complete state This patch is to introduce last_rtx_chunks into sctp_transport to detect if there's any packet retransmission/loss happened by checking against asoc's rtx_data_chunks in sctp_transport_pl_send(). If there is, namely, transport->last_rtx_chunks != asoc->rtx_data_chunks, the pmtu probe will be sent out. Otherwise, increment the pl.raise_count and return when it's in Search Complete state. With this patch, if in Search Complete state, which is a long period, it doesn't need to keep probing the current pmtu unless there's data packet loss. This will save quite some traffic. v1->v2: - add the missing Fixes tag. Fixes: 0dac127c0557 ("sctp: do black hole detection in search complete state") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 + net/sctp/transport.c | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index f3d414ed208e..651bba654d77 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -984,6 +984,7 @@ struct sctp_transport { } cacc; struct { + __u32 last_rtx_chunks; __u16 pmtu; __u16 probe_size; __u16 probe_high; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 23e7bd3e3bd4..a3d3ca6dd63d 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -263,6 +263,7 @@ bool sctp_transport_pl_send(struct sctp_transport *t) if (t->pl.probe_count < SCTP_MAX_PROBES) goto out; + t->pl.last_rtx_chunks = t->asoc->rtx_data_chunks; t->pl.probe_count = 0; if (t->pl.state == SCTP_PL_BASE) { if (t->pl.probe_size == SCTP_BASE_PLPMTU) { /* BASE_PLPMTU Confirmation Failed */ @@ -298,8 +299,10 @@ bool sctp_transport_pl_send(struct sctp_transport *t) out: if (t->pl.state == SCTP_PL_COMPLETE && t->pl.raise_count < 30 && - !t->pl.probe_count) + !t->pl.probe_count && t->pl.last_rtx_chunks == t->asoc->rtx_data_chunks) { t->pl.raise_count++; + return false; + } pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n", __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high); @@ -313,6 +316,7 @@ bool sctp_transport_pl_recv(struct sctp_transport *t) pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n", __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high); + t->pl.last_rtx_chunks = t->asoc->rtx_data_chunks; t->pl.pmtu = t->pl.probe_size; t->pl.probe_count = 0; if (t->pl.state == SCTP_PL_BASE) { From 9f66861181e64dc192bea136da6c91528910002e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 4 Jul 2021 16:01:37 -0700 Subject: [PATCH 153/502] m68k/coldfire: change pll var. to clk_pll DEFINE_CLK() makes the variable name be clk_xyz, so variable 'pll' should instead be 'clk_pll'. In file included from ../arch/m68k/coldfire/m525x.c:12: ../arch/m68k/coldfire/m525x.c:29:30: error: 'pll' undeclared here (not in a function) 29 | CLKDEV_INIT(NULL, "pll.0", &pll), | ^~~ ../include/linux/clkdev.h:30:10: note: in definition of macro 'CLKDEV_INIT' 30 | .clk = c, \ | ^ In file included from ../arch/m68k/coldfire/m525x.c:21: ../arch/m68k/include/asm/mcfclk.h:43:27: warning: 'clk_pll' defined but not used [-Wunused-variable] 43 | static struct clk clk_##clk_ref = { \ | ^~~~ ../arch/m68k/coldfire/m525x.c:25:1: note: in expansion of macro 'DEFINE_CLK' 25 | DEFINE_CLK(pll, "pll.0", MCF_CLK); | ^~~~~~~~~~ Fixes: 63aadb77669a ("m68k: coldfire: use clkdev_lookup on most coldfire") Reported-by: kernel test robot Signed-off-by: Randy Dunlap Cc: Greg Ungerer Cc: linux-m68k@lists.linux-m68k.org Cc: uclinux-dev@uclinux.org Cc: Arnd Bergmann Signed-off-by: Greg Ungerer --- arch/m68k/coldfire/m525x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/m68k/coldfire/m525x.c b/arch/m68k/coldfire/m525x.c index 2c4d2ca2f20d..485375112e28 100644 --- a/arch/m68k/coldfire/m525x.c +++ b/arch/m68k/coldfire/m525x.c @@ -26,7 +26,7 @@ DEFINE_CLK(pll, "pll.0", MCF_CLK); DEFINE_CLK(sys, "sys.0", MCF_BUSCLK); static struct clk_lookup m525x_clk_lookup[] = { - CLKDEV_INIT(NULL, "pll.0", &pll), + CLKDEV_INIT(NULL, "pll.0", &clk_pll), CLKDEV_INIT(NULL, "sys.0", &clk_sys), CLKDEV_INIT("mcftmr.0", NULL, &clk_sys), CLKDEV_INIT("mcftmr.1", NULL, &clk_sys), From e4b016f4b44176807e545fd437cd519b6380e86f Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Fri, 8 Nov 2019 15:36:08 +0000 Subject: [PATCH 154/502] alpha: __udiv_qrnnd should be exported When building an alpha kernel with mpi set as module, I hit this build error: ERROR: "__udiv_qrnnd" [lib/mpi/mpi.ko] undefined! make[2]: *** [scripts/Makefile.modpost:92: __modpost] Error 1 make[1]: *** [Makefile:1266: modules] Error 2 This is due to __udiv_qrnnd not exported. Signed-off-by: Corentin Labbe Signed-off-by: Matt Turner --- arch/alpha/math-emu/math.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/alpha/math-emu/math.c b/arch/alpha/math-emu/math.c index d568cd9a3e43..5617ac0889b8 100644 --- a/arch/alpha/math-emu/math.c +++ b/arch/alpha/math-emu/math.c @@ -401,3 +401,5 @@ alpha_fp_emul_imprecise (struct pt_regs *regs, unsigned long write_mask) egress: return si_code; } + +EXPORT_SYMBOL(__udiv_qrnnd); From a09c33cbf3db545d44eab16eb528acf834310690 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Mon, 13 Jul 2020 11:43:41 +0200 Subject: [PATCH 155/502] alpha: Kconfig: Replace HTTP links with HTTPS ones Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Signed-off-by: Matt Turner --- arch/alpha/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 77d3280dc678..f4ffad76e8b1 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -532,7 +532,7 @@ config SMP will run faster if you say N here. See also the SMP-HOWTO available at - . + . If you don't know what to do here, say N. From 5e3c3a0ae5d194f0a464aaaa71d764d96f2e7245 Mon Sep 17 00:00:00 2001 From: Chen Li Date: Tue, 13 Oct 2020 14:31:52 +0800 Subject: [PATCH 156/502] alpha: remove undef inline in compiler.h since 889b3c1245de48ed0cacf7aebb25c489d3e4a3e9, CONFIG_OPTIMIZE_INLINING is removed entirely and inline is always defined to `inline __gnu_inline __inline_maybe_unused notrace` in compiler_types.h Besides, undef inline here also means it never use __attribute__((__gnu_inline__)), so `extern inline` function can never be defined header files, otherwise multiple definition errors will happen, e.g. if multiple translation units use alpha/include/asm/pal.h will report multiple definitions, because there are many extern inline function definitions in this header. ``` c extern inline TYPE NAME(void) \ { \ register TYPE __r0 __asm__("$0"); \ __asm__ __volatile__( \ ... ``` Ofc, it is also ok to remove `extern` in `extern inline` here, then all of iso c99 and gnuc99/89 are ok, but there are also other alpha headers have such function definitions. Signed-off-by: chenli Signed-off-by: Matt Turner --- arch/alpha/include/asm/compiler.h | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/alpha/include/asm/compiler.h b/arch/alpha/include/asm/compiler.h index 5159ba259d65..ae645959018a 100644 --- a/arch/alpha/include/asm/compiler.h +++ b/arch/alpha/include/asm/compiler.h @@ -4,15 +4,4 @@ #include -/* Some idiots over in thought inline should imply - always_inline. This breaks stuff. We'll include this file whenever - we run into such problems. */ - -#include -#undef inline -#undef __inline__ -#undef __inline -#undef __always_inline -#define __always_inline inline __attribute__((always_inline)) - #endif /* __ALPHA_COMPILER_H */ From f0443da1d8560f4c664ab0f9a900ed69e9aaeb14 Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Wed, 16 Dec 2020 21:12:41 +0800 Subject: [PATCH 157/502] alpha: convert comma to semicolon Replace a comma between expression statements by a semicolon. Fixes: cba1ec7e88a0 ("alpha: switch to generic kernel_thread()") Signed-off-by: Zheng Yongjun Signed-off-by: Matt Turner --- arch/alpha/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index ef0c08ed0481..a5123ea426ce 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -256,7 +256,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, childstack->r26 = (unsigned long) ret_from_kernel_thread; childstack->r9 = usp; /* function */ childstack->r10 = kthread_arg; - childregs->hae = alpha_mv.hae_cache, + childregs->hae = alpha_mv.hae_cache; childti->pcb.usp = 0; return 0; } From caace6ca4e06f09413fb8f8a63319594cfb7d47d Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Tue, 5 Jan 2021 10:16:27 -0500 Subject: [PATCH 158/502] alpha: Send stop IPI to send to online CPUs This issue was noticed while debugging a shutdown issue where some secondary CPUs are not being shutdown correctly. A fix for that [1] requires that secondary cpus be offlined using the cpu_online_mask so that the stop operation is a no-op if CPU HOTPLUG is disabled. I, like the author in [1] looked at the architectures and found that alpha is one of two architectures that executes smp_send_stop() on all possible CPUs. On alpha, smp_send_stop() sends an IPI to all possible CPUs but only needs to send them to online CPUs. Send the stop IPI to only the online CPUs. [1] https://lkml.org/lkml/2020/1/10/250 Signed-off-by: Prarit Bhargava Cc: Richard Henderson Cc: Ivan Kokshaysky Signed-off-by: Matt Turner --- arch/alpha/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 4b2575f936d4..cb64e4797d2a 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -582,7 +582,7 @@ void smp_send_stop(void) { cpumask_t to_whom; - cpumask_copy(&to_whom, cpu_possible_mask); + cpumask_copy(&to_whom, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), &to_whom); #ifdef DEBUG_IPI_MSG if (hard_smp_processor_id() != boot_cpu_id) From bfd736e3ffcc9dfc23c0a619fcc131eefd91d7ca Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 18 Jan 2021 11:20:29 +0000 Subject: [PATCH 159/502] alpha: defconfig: add necessary configs for boot testing Gentoo's KernelCI will soon boot test alpha kernel and we need CONFIG_DEVTMPFS=y to be set for that. Note that CONFIG_DEVTMPFS=y is already necessary for lot of other distribution/tools like recent udev/systemd. Signed-off-by: Corentin Labbe Signed-off-by: Matt Turner --- arch/alpha/configs/defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/alpha/configs/defconfig b/arch/alpha/configs/defconfig index dd2dd9f0861f..7f1ca30b115b 100644 --- a/arch/alpha/configs/defconfig +++ b/arch/alpha/configs/defconfig @@ -70,3 +70,4 @@ CONFIG_DEBUG_INFO=y CONFIG_ALPHA_LEGACY_START_ADDRESS=y CONFIG_MATHEMU=y CONFIG_CRYPTO_HMAC=y +CONFIG_DEVTMPFS=y From 8f34ed9d959786e2f2a643a1237f69f0171911cf Mon Sep 17 00:00:00 2001 From: tangchunyou Date: Wed, 20 Jan 2021 21:34:10 +0800 Subject: [PATCH 160/502] alpha: fix typos in a comment "kerne" -> "kernel" Signed-off-by: tangchunyou Signed-off-by: Matt Turner --- arch/alpha/boot/bootpz.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/alpha/boot/bootpz.c b/arch/alpha/boot/bootpz.c index 43af71835adf..90a2b341e9c0 100644 --- a/arch/alpha/boot/bootpz.c +++ b/arch/alpha/boot/bootpz.c @@ -200,7 +200,7 @@ extern char _end; START_ADDR KSEG address of the entry point of kernel code. ZERO_PGE KSEG address of page full of zeroes, but - upon entry to kerne cvan be expected + upon entry to kernel, it can be expected to hold the parameter list and possible INTRD information. From 6208721f1399912a0a53c77ed86dcc25d3e20efb Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 20 Apr 2021 19:56:31 +0200 Subject: [PATCH 161/502] binfmt: remove support for em86 (alpha only) We have a fairly specific alpha binary loader in Linux: running x86 (i386, i486) binaries via the em86 [1] emulator. As noted in the Kconfig option, the same behavior can be achieved via binfmt_misc, for example, more nowadays used for running qemu-user. An example on how to get binfmt_misc running with em86 can be found in Documentation/admin-guide/binfmt-misc.rst The defconfig does not have CONFIG_BINFMT_EM86=y set. And doing a make defconfig && make olddefconfig results in # CONFIG_BINFMT_EM86 is not set ... as we don't seem to have any supported Linux distirbution for alpha anymore, there isn't really any "default" user of that feature anymore. Searching for "CONFIG_BINFMT_EM86=y" reveals mostly discussions from around 20 years ago, like [2] describing how to get netscape via em86 running via em86, or [3] discussing that running wine or installing Win 3.11 through em86 would be a nice feature. The latest binaries available for em86 are from 2000, version 2.2.1 [4] -- which translates to "unsupported"; further, em86 doesn't even work with glibc-2.x but only with glibc-2.0 [4, 5]. These are clear signs that there might not be too many em86 users out there, especially users relying on modern Linux kernels. Even though the code footprint is relatively small, let's just get rid of this blast from the past that's effectively unused. [1] http://ftp.dreamtime.org/pub/linux/Linux-Alpha/em86/v0.4/docs/em86.html [2] https://static.lwn.net/1998/1119/a/alpha-netscape.html [3] https://groups.google.com/g/linux.debian.alpha/c/AkGuQHeCe0Y [4] http://zeniv.linux.org.uk/pub/linux/alpha/em86/v2.2-1/relnotes.2.2.1.html [5] https://forum.teamspeak.com/archive/index.php/t-1477.html Cc: Alexander Viro Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: Jonathan Corbet Cc: linux-fsdevel@vger.kernel.org Cc: linux-api@vger.kernel.org Cc: linux-alpha@vger.kernel.org Signed-off-by: David Hildenbrand Signed-off-by: Matt Turner --- fs/Kconfig.binfmt | 15 ------- fs/Makefile | 1 - fs/binfmt_em86.c | 110 ---------------------------------------------- 3 files changed, 126 deletions(-) delete mode 100644 fs/binfmt_em86.c diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 06fb7a93a1bd..4d5ae61580aa 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -168,21 +168,6 @@ config OSF4_COMPAT with v4 shared libraries freely available from Compaq. If you're going to use shared libraries from Tru64 version 5.0 or later, say N. -config BINFMT_EM86 - tristate "Kernel support for Linux/Intel ELF binaries" - depends on ALPHA - help - Say Y here if you want to be able to execute Linux/Intel ELF - binaries just like native Alpha binaries on your Alpha machine. For - this to work, you need to have the emulator /usr/bin/em86 in place. - - You can get the same functionality by saying N here and saying Y to - "Kernel support for MISC binaries". - - You may answer M to compile the emulation support as a module and - later load the module when you want to use a Linux/Intel binary. The - module will be called binfmt_em86. If unsure, say Y. - config BINFMT_MISC tristate "Kernel support for MISC binaries" help diff --git a/fs/Makefile b/fs/Makefile index 9c708e1fbe8f..f98f3e691c37 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -39,7 +39,6 @@ obj-$(CONFIG_FS_ENCRYPTION) += crypto/ obj-$(CONFIG_FS_VERITY) += verity/ obj-$(CONFIG_FILE_LOCKING) += locks.o obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o -obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o obj-$(CONFIG_BINFMT_SCRIPT) += binfmt_script.o obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c deleted file mode 100644 index 06b9b9fddf70..000000000000 --- a/fs/binfmt_em86.c +++ /dev/null @@ -1,110 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * linux/fs/binfmt_em86.c - * - * Based on linux/fs/binfmt_script.c - * Copyright (C) 1996 Martin von Löwis - * original #!-checking implemented by tytso. - * - * em86 changes Copyright (C) 1997 Jim Paradis - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#define EM86_INTERP "/usr/bin/em86" -#define EM86_I_NAME "em86" - -static int load_em86(struct linux_binprm *bprm) -{ - const char *i_name, *i_arg; - char *interp; - struct file * file; - int retval; - struct elfhdr elf_ex; - - /* Make sure this is a Linux/Intel ELF executable... */ - elf_ex = *((struct elfhdr *)bprm->buf); - - if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0) - return -ENOEXEC; - - /* First of all, some simple consistency checks */ - if ((elf_ex.e_type != ET_EXEC && elf_ex.e_type != ET_DYN) || - (!((elf_ex.e_machine == EM_386) || (elf_ex.e_machine == EM_486))) || - !bprm->file->f_op->mmap) { - return -ENOEXEC; - } - - /* Need to be able to load the file after exec */ - if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE) - return -ENOENT; - - /* Unlike in the script case, we don't have to do any hairy - * parsing to find our interpreter... it's hardcoded! - */ - interp = EM86_INTERP; - i_name = EM86_I_NAME; - i_arg = NULL; /* We reserve the right to add an arg later */ - - /* - * Splice in (1) the interpreter's name for argv[0] - * (2) (optional) argument to interpreter - * (3) filename of emulated file (replace argv[0]) - * - * This is done in reverse order, because of how the - * user environment and arguments are stored. - */ - remove_arg_zero(bprm); - retval = copy_string_kernel(bprm->filename, bprm); - if (retval < 0) return retval; - bprm->argc++; - if (i_arg) { - retval = copy_string_kernel(i_arg, bprm); - if (retval < 0) return retval; - bprm->argc++; - } - retval = copy_string_kernel(i_name, bprm); - if (retval < 0) return retval; - bprm->argc++; - - /* - * OK, now restart the process with the interpreter's inode. - * Note that we use open_exec() as the name is now in kernel - * space, and we don't need to copy it. - */ - file = open_exec(interp); - if (IS_ERR(file)) - return PTR_ERR(file); - - bprm->interpreter = file; - return 0; -} - -static struct linux_binfmt em86_format = { - .module = THIS_MODULE, - .load_binary = load_em86, -}; - -static int __init init_em86_binfmt(void) -{ - register_binfmt(&em86_format); - return 0; -} - -static void __exit exit_em86_binfmt(void) -{ - unregister_binfmt(&em86_format); -} - -core_initcall(init_em86_binfmt); -module_exit(exit_em86_binfmt); -MODULE_LICENSE("GPL"); From 15b9e384030cf34de33deed70d670a8dc0fc784a Mon Sep 17 00:00:00 2001 From: He Zhe Date: Mon, 26 Apr 2021 17:16:29 +0800 Subject: [PATCH 162/502] alpha: Add syscall_get_return_value() audit now requires syscall_get_return_value instead of regs_return_value to retrieve syscall return code . Other architectures that support audit have already define this function. Signed-off-by: He Zhe Signed-off-by: Matt Turner --- arch/alpha/include/asm/syscall.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/alpha/include/asm/syscall.h b/arch/alpha/include/asm/syscall.h index 11c688c1d7ec..f21babaeed85 100644 --- a/arch/alpha/include/asm/syscall.h +++ b/arch/alpha/include/asm/syscall.h @@ -9,4 +9,10 @@ static inline int syscall_get_arch(struct task_struct *task) return AUDIT_ARCH_ALPHA; } +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->r0; +} + #endif /* _ASM_ALPHA_SYSCALL_H */ From ee3e9fa29e8b2553097009dac270cbed0f03f6d2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 14 May 2021 23:37:20 +0200 Subject: [PATCH 163/502] alpha: fp_emul: avoid init/cleanup_module names This is one of the last modules using the old calling conventions for module init/exit functions. Change it over to the style used everywhere else. Signed-off-by: Arnd Bergmann Signed-off-by: Matt Turner --- arch/alpha/math-emu/math.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/alpha/math-emu/math.c b/arch/alpha/math-emu/math.c index 5617ac0889b8..f7cef66af88d 100644 --- a/arch/alpha/math-emu/math.c +++ b/arch/alpha/math-emu/math.c @@ -65,7 +65,7 @@ static long (*save_emul) (unsigned long pc); long do_alpha_fp_emul_imprecise(struct pt_regs *, unsigned long); long do_alpha_fp_emul(unsigned long); -int init_module(void) +static int alpha_fp_emul_init_module(void) { save_emul_imprecise = alpha_fp_emul_imprecise; save_emul = alpha_fp_emul; @@ -73,12 +73,14 @@ int init_module(void) alpha_fp_emul = do_alpha_fp_emul; return 0; } +module_init(alpha_fp_emul_init_module); -void cleanup_module(void) +static void alpha_fp_emul_cleanup_module(void) { alpha_fp_emul_imprecise = save_emul_imprecise; alpha_fp_emul = save_emul; } +module_exit(alpha_fp_emul_cleanup_module); #undef alpha_fp_emul_imprecise #define alpha_fp_emul_imprecise do_alpha_fp_emul_imprecise From 3e0c6d15adeafa2afcb4c95c892bb5980c1430e6 Mon Sep 17 00:00:00 2001 From: gushengxian Date: Tue, 25 May 2021 20:16:10 -0700 Subject: [PATCH 164/502] alpha: Remove space between * and parameter name 'struct pcb_struct * pcb_va' should be 'struct pcb_struct *pcb_va'. Signed-off-by: gushengxian Signed-off-by: Matt Turner --- arch/alpha/boot/bootp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/alpha/boot/bootp.c b/arch/alpha/boot/bootp.c index 00266e6e1b71..b4faba2432d5 100644 --- a/arch/alpha/boot/bootp.c +++ b/arch/alpha/boot/bootp.c @@ -23,7 +23,7 @@ #include "ksize.h" extern unsigned long switch_to_osf_pal(unsigned long nr, - struct pcb_struct * pcb_va, struct pcb_struct * pcb_pa, + struct pcb_struct *pcb_va, struct pcb_struct *pcb_pa, unsigned long *vptb); extern void move_stack(unsigned long new_stack); From fc520525c18ac2207792eb2067c6b626326a87ad Mon Sep 17 00:00:00 2001 From: gushengxian Date: Fri, 2 Jul 2021 05:48:12 -0700 Subject: [PATCH 165/502] alpha: fix spelling mistakes Fix some spelling mistakes in comments: delarations ==> declarations softare ==> software suffiently ==> sufficiently requred ==> required unaliged ==> unaligned Signed-off-by: gushengxian Signed-off-by: Matt Turner --- arch/alpha/boot/misc.c | 2 +- arch/alpha/kernel/osf_sys.c | 4 ++-- arch/alpha/kernel/perf_event.c | 2 +- arch/alpha/kernel/sys_nautilus.c | 2 +- arch/alpha/kernel/traps.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/alpha/boot/misc.c b/arch/alpha/boot/misc.c index d65192202703..325d4dd4f904 100644 --- a/arch/alpha/boot/misc.c +++ b/arch/alpha/boot/misc.c @@ -30,7 +30,7 @@ extern long srm_printk(const char *, ...) __attribute__ ((format (printf, 1, 2))); /* - * gzip delarations + * gzip declarations */ #define OF(args) args #define STATIC static diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index d5367a1c6300..d31167e3269c 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -834,7 +834,7 @@ SYSCALL_DEFINE5(osf_setsysinfo, unsigned long, op, void __user *, buffer, return -EFAULT; state = ¤t_thread_info()->ieee_state; - /* Update softare trap enable bits. */ + /* Update software trap enable bits. */ *state = (*state & ~IEEE_SW_MASK) | (swcr & IEEE_SW_MASK); /* Update the real fpcr. */ @@ -854,7 +854,7 @@ SYSCALL_DEFINE5(osf_setsysinfo, unsigned long, op, void __user *, buffer, state = ¤t_thread_info()->ieee_state; exc &= IEEE_STATUS_MASK; - /* Update softare trap enable bits. */ + /* Update software trap enable bits. */ swcr = (*state & IEEE_SW_MASK) | exc; *state |= exc; diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index e7a59d927d78..efcf7321701b 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -574,7 +574,7 @@ static void alpha_pmu_start(struct perf_event *event, int flags) * Check that CPU performance counters are supported. * - currently support EV67 and later CPUs. * - actually some later revisions of the EV6 have the same PMC model as the - * EV67 but we don't do suffiently deep CPU detection to detect them. + * EV67 but we don't do sufficiently deep CPU detection to detect them. * Bad luck to the very few people who might have one, I guess. */ static int supported_cpu(void) diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c index 53adf43dcd44..96fd6ff3fe81 100644 --- a/arch/alpha/kernel/sys_nautilus.c +++ b/arch/alpha/kernel/sys_nautilus.c @@ -212,7 +212,7 @@ nautilus_init_pci(void) /* Use default IO. */ pci_add_resource(&bridge->windows, &ioport_resource); - /* Irongate PCI memory aperture, calculate requred size before + /* Irongate PCI memory aperture, calculate required size before setting it up. */ pci_add_resource(&bridge->windows, &irongate_mem); diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index 921d4b6e4d95..5398f982bdd1 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -730,7 +730,7 @@ do_entUnaUser(void __user * va, unsigned long opcode, long error; /* Check the UAC bits to decide what the user wants us to do - with the unaliged access. */ + with the unaligned access. */ if (!(current_thread_info()->status & TS_UAC_NOPRINT)) { if (__ratelimit(&ratelimit)) { From d7f237df53457cf0cbdb9943b9b7c93a05e2fdb6 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 23 Jul 2021 05:52:25 -0400 Subject: [PATCH 166/502] drm/i915/bios: Fix ports mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PORT_A to PORT_F are regular integers defined in the enum port, while for_each_port_masked requires a bit mask for the ports. Current given mask: 0b111 Desired mask: 0b111111 I noticed this while Christoph was reporting a bug found on headless GVT configuration which bisect blamed commit 3ae04c0c7e63 ("drm/i915/bios: limit default outputs to ports A through F") v2: Avoid unnecessary line continuations as pointed by CI and Christoph Cc: Christoph Hellwig Fixes: 3ae04c0c7e63 ("drm/i915/bios: limit default outputs to ports A through F") Cc: Lucas De Marchi Cc: Ville Syrjälä Cc: Jani Nikula Signed-off-by: Rodrigo Vivi Reviewed-by: José Roberto de Souza Reviewed-by: Lucas De Marchi Tested-by: Christoph Hellwig Link: https://patchwork.freedesktop.org/patch/msgid/20210723095225.562913-1-rodrigo.vivi@intel.com (cherry picked from commit 9b52aa720168859526bf90d77fa210fc0336f170) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_bios.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 5b6922e28ef2..aa667fa71158 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -2166,7 +2166,8 @@ static void init_vbt_missing_defaults(struct drm_i915_private *i915) { enum port port; - int ports = PORT_A | PORT_B | PORT_C | PORT_D | PORT_E | PORT_F; + int ports = BIT(PORT_A) | BIT(PORT_B) | BIT(PORT_C) | + BIT(PORT_D) | BIT(PORT_E) | BIT(PORT_F); if (!HAS_DDI(i915) && !IS_CHERRYVIEW(i915)) return; From 5d3a618f356595f132ee85c63a1b5f007a71f23c Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 23 Jul 2021 16:43:52 -0700 Subject: [PATCH 167/502] drm/i915: fix not reading DSC disable fuse in GLK We were using GRAPHICS_VER() to handle SKL_DFSM register, which means we were not handling GLK correctly since that has GRAPHICS_VER == 9, but DISPLAY_VER == 10. Switch the entire branch to check DISPLAY_VER which makes it more in line with Bspec. Even though the Bspec has an exception for RKL in TGL_DFSM_PIPE_D_DISABLE, we don't have to do anything as the bit has disable semantic and RKL doesn't have pipe D. Bspec: 50075, 7548 Fixes: 2b5a4562edd0 ("drm/i915/display: Simplify GLK display version tests") Cc: Matt Roper Signed-off-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210723234352.214459-1-lucas.demarchi@intel.com (cherry picked from commit 4fd177288a4ee046bd8590355a64de855dcf77e2) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_device_info.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 7eaa92fee421..e0a10f36acc1 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -325,7 +325,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) info->pipe_mask &= ~BIT(PIPE_C); info->cpu_transcoder_mask &= ~BIT(TRANSCODER_C); } - } else if (HAS_DISPLAY(dev_priv) && GRAPHICS_VER(dev_priv) >= 9) { + } else if (HAS_DISPLAY(dev_priv) && DISPLAY_VER(dev_priv) >= 9) { u32 dfsm = intel_de_read(dev_priv, SKL_DFSM); if (dfsm & SKL_DFSM_PIPE_A_DISABLE) { @@ -340,7 +340,8 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) info->pipe_mask &= ~BIT(PIPE_C); info->cpu_transcoder_mask &= ~BIT(TRANSCODER_C); } - if (GRAPHICS_VER(dev_priv) >= 12 && + + if (DISPLAY_VER(dev_priv) >= 12 && (dfsm & TGL_DFSM_PIPE_D_DISABLE)) { info->pipe_mask &= ~BIT(PIPE_D); info->cpu_transcoder_mask &= ~BIT(TRANSCODER_D); @@ -352,10 +353,10 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) if (dfsm & SKL_DFSM_DISPLAY_PM_DISABLE) info->display.has_fbc = 0; - if (GRAPHICS_VER(dev_priv) >= 11 && (dfsm & ICL_DFSM_DMC_DISABLE)) + if (DISPLAY_VER(dev_priv) >= 11 && (dfsm & ICL_DFSM_DMC_DISABLE)) info->display.has_dmc = 0; - if (GRAPHICS_VER(dev_priv) >= 10 && + if (DISPLAY_VER(dev_priv) >= 10 && (dfsm & CNL_DFSM_DISPLAY_DSC_DISABLE)) info->display.has_dsc = 0; } From b4bde5554f70fb04ff07989fdc1356ab84d6f482 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 22 Jul 2021 16:29:22 -0700 Subject: [PATCH 168/502] drm/i915/display: split DISPLAY_VER 9 and 10 in intel_setup_outputs() Commit 5a9d38b20a5a ("drm/i915/display: hide workaround for broken vbt in intel_bios.c") moved the workaround for broken or missing VBT to intel_bios.c. However is_port_valid() only protects the handling of different skus of the same display version. Since in intel_setup_outputs() we share the code path with version 9, this would also create port F for SKL/KBL, which does not exist. Missing VBT can be reproduced when starting a headless QEMU with no opregion available. Avoid the issue by splitting versions 9 and 10 in intel_setup_outputs(), which also makes it more clear what code path it's taking for each version. v2: move generic display version after Geminilake since that one has a different set of outputs Fixes: 5a9d38b20a5a ("drm/i915/display: hide workaround for broken vbt in intel_bios.c") Cc: Jani Nikula Cc: Rodrigo Vivi Reported-by: Christoph Hellwig Signed-off-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Reviewed-by: Matt Roper Tested-by: Christoph Hellwig Link: https://patchwork.freedesktop.org/patch/msgid/20210722232922.3796835-1-lucas.demarchi@intel.com (cherry picked from commit ec387b8ff8d757561369be9a280cf63f23bbb926) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_display.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 3bad4e00f7be..2d5d21740c25 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -11361,13 +11361,19 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) intel_ddi_init(dev_priv, PORT_B); intel_ddi_init(dev_priv, PORT_C); vlv_dsi_init(dev_priv); - } else if (DISPLAY_VER(dev_priv) >= 9) { + } else if (DISPLAY_VER(dev_priv) == 10) { intel_ddi_init(dev_priv, PORT_A); intel_ddi_init(dev_priv, PORT_B); intel_ddi_init(dev_priv, PORT_C); intel_ddi_init(dev_priv, PORT_D); intel_ddi_init(dev_priv, PORT_E); intel_ddi_init(dev_priv, PORT_F); + } else if (DISPLAY_VER(dev_priv) >= 9) { + intel_ddi_init(dev_priv, PORT_A); + intel_ddi_init(dev_priv, PORT_B); + intel_ddi_init(dev_priv, PORT_C); + intel_ddi_init(dev_priv, PORT_D); + intel_ddi_init(dev_priv, PORT_E); } else if (HAS_DDI(dev_priv)) { u32 found; From 480e93e12aa04d857f7cc2e6fcec181c0d690404 Mon Sep 17 00:00:00 2001 From: Harshvardhan Jha Date: Sun, 25 Jul 2021 23:23:55 +0530 Subject: [PATCH 169/502] net: xfrm: Fix end of loop tests for list_for_each_entry The list_for_each_entry() iterator, "pos" in this code, can never be NULL so the warning will never be printed. Signed-off-by: Harshvardhan Jha Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_ipcomp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index 2e8afe078d61..cb40ff0ff28d 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -241,7 +241,7 @@ static void ipcomp_free_tfms(struct crypto_comp * __percpu *tfms) break; } - WARN_ON(!pos); + WARN_ON(list_entry_is_head(pos, &ipcomp_tfms_list, list)); if (--pos->users) return; From f0c6225531e4a9e43e51c5f7b02089bdd725c734 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Sat, 17 Jul 2021 23:11:38 -0500 Subject: [PATCH 170/502] ACPI: PM: Add support for upcoming AMD uPEP HID AMDI007 AMD systems with uPEP HID AMDI007 should be using revision 2 and the AMD method. Fixes: 8fbd6c15ea0a ("ACPI: PM: Adjust behavior for field problems on AMD systems") Signed-off-by: Mario Limonciello Signed-off-by: Rafael J. Wysocki --- drivers/acpi/x86/s2idle.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c index 1c507804fb10..fbdbef0ab552 100644 --- a/drivers/acpi/x86/s2idle.c +++ b/drivers/acpi/x86/s2idle.c @@ -378,19 +378,25 @@ static int lps0_device_attach(struct acpi_device *adev, * AMDI0006: * - should use rev_id 0x0 * - function mask = 0x3: Should use Microsoft method + * AMDI0007: + * - Should use rev_id 0x2 + * - Should only use AMD method */ const char *hid = acpi_device_hid(adev); - rev_id = 0; + rev_id = strcmp(hid, "AMDI0007") ? 0 : 2; lps0_dsm_func_mask = validate_dsm(adev->handle, ACPI_LPS0_DSM_UUID_AMD, rev_id, &lps0_dsm_guid); lps0_dsm_func_mask_microsoft = validate_dsm(adev->handle, - ACPI_LPS0_DSM_UUID_MICROSOFT, rev_id, + ACPI_LPS0_DSM_UUID_MICROSOFT, 0, &lps0_dsm_guid_microsoft); if (lps0_dsm_func_mask > 0x3 && (!strcmp(hid, "AMD0004") || !strcmp(hid, "AMDI0005"))) { lps0_dsm_func_mask = (lps0_dsm_func_mask << 1) | 0x1; acpi_handle_debug(adev->handle, "_DSM UUID %s: Adjusted function mask: 0x%x\n", ACPI_LPS0_DSM_UUID_AMD, lps0_dsm_func_mask); + } else if (lps0_dsm_func_mask_microsoft > 0 && !strcmp(hid, "AMDI0007")) { + lps0_dsm_func_mask_microsoft = -EINVAL; + acpi_handle_debug(adev->handle, "_DSM Using AMD method\n"); } } else { rev_id = 1; From 94cbe7db7d757c2d481c3617ab5579a28cfc2175 Mon Sep 17 00:00:00 2001 From: Mohammad Athari Bin Ismail Date: Mon, 26 Jul 2021 10:20:20 +0800 Subject: [PATCH 171/502] net: stmmac: add est_irq_status callback function for GMAC 4.10 and 5.10 Assign dwmac5_est_irq_status to est_irq_status callback function for GMAC 4.10 and 5.10. With this, EST related interrupts could be handled properly. Fixes: e49aa315cb01 ("net: stmmac: EST interrupts handling and error reporting") Cc: # 5.13.x Signed-off-by: Mohammad Athari Bin Ismail Acked-by: Wong Vee Khee Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 67ba083eb90c..b21745368983 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -1249,6 +1249,7 @@ const struct stmmac_ops dwmac410_ops = { .config_l3_filter = dwmac4_config_l3_filter, .config_l4_filter = dwmac4_config_l4_filter, .est_configure = dwmac5_est_configure, + .est_irq_status = dwmac5_est_irq_status, .fpe_configure = dwmac5_fpe_configure, .fpe_send_mpacket = dwmac5_fpe_send_mpacket, .fpe_irq_status = dwmac5_fpe_irq_status, @@ -1300,6 +1301,7 @@ const struct stmmac_ops dwmac510_ops = { .config_l3_filter = dwmac4_config_l3_filter, .config_l4_filter = dwmac4_config_l4_filter, .est_configure = dwmac5_est_configure, + .est_irq_status = dwmac5_est_irq_status, .fpe_configure = dwmac5_fpe_configure, .fpe_send_mpacket = dwmac5_fpe_send_mpacket, .fpe_irq_status = dwmac5_fpe_irq_status, From 2ebda027148315581b89a2ed2fef84ad53b2aedd Mon Sep 17 00:00:00 2001 From: Chen Shen Date: Mon, 26 Jul 2021 13:47:34 +0800 Subject: [PATCH 172/502] sctp: delete addr based on sin6_scope_id sctp_inet6addr_event deletes 'addr' from 'local_addr_list' when setting netdev down, but it is possible to delete the incorrect entry (match the first one with the same ipaddr, but the different 'ifindex'), if there are some netdevs with the same 'local-link' ipaddr added already. It should delete the entry depending on 'sin6_addr' and 'sin6_scope_id' both. otherwise, the endpoint will call 'sctp_sf_ootb' if it can't find the according association when receives 'heartbeat', and finally will reply 'abort'. For example: 1.when linux startup the entries in local_addr_list: ifindex:35 addr:fe80::40:43ff:fe80:0 (eths0.201) ifindex:36 addr:fe80::40:43ff:fe80:0 (eths0.209) ifindex:37 addr:fe80::40:43ff:fe80:0 (eths0.210) the route table: local fe80::40:43ff:fe80:0 dev eths0.201 local fe80::40:43ff:fe80:0 dev eths0.209 local fe80::40:43ff:fe80:0 dev eths0.210 2.after 'ifconfig eths0.209 down' the entries in local_addr_list: ifindex:36 addr:fe80::40:43ff:fe80:0 (eths0.209) ifindex:37 addr:fe80::40:43ff:fe80:0 (eths0.210) the route table: local fe80::40:43ff:fe80:0 dev eths0.201 local fe80::40:43ff:fe80:0 dev eths0.210 3.asoc not found for src:[fe80::40:43ff:fe80:0]:37381 dst:[:1]:53335 ::1->fe80::40:43ff:fe80:0 HEARTBEAT fe80::40:43ff:fe80:0->::1 ABORT Signed-off-by: Chen Shen Signed-off-by: David S. Miller --- net/sctp/ipv6.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index e48dd909dee5..470dbdc27d58 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -100,8 +100,9 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, list_for_each_entry_safe(addr, temp, &net->sctp.local_addr_list, list) { if (addr->a.sa.sa_family == AF_INET6 && - ipv6_addr_equal(&addr->a.v6.sin6_addr, - &ifa->addr)) { + ipv6_addr_equal(&addr->a.v6.sin6_addr, + &ifa->addr) && + addr->a.v6.sin6_scope_id == ifa->idev->dev->ifindex) { sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL); found = 1; addr->valid = 0; From 9a9e74819bb0e4694279fb437e136fe485878d25 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 16 Jul 2021 16:41:04 +0200 Subject: [PATCH 173/502] KVM: nSVM: Rename nested_svm_vmloadsave() to svm_copy_vmloadsave_state() To match svm_copy_vmrun_state(), rename nested_svm_vmloadsave() to svm_copy_vmloadsave_state(). Opportunistically add missing braces to 'else' branch in vmload_vmsave_interception(). No functional change intended. Suggested-by: Paolo Bonzini Signed-off-by: Vitaly Kuznetsov Message-Id: <20210716144104.465269-1-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 2 +- arch/x86/kvm/svm/svm.c | 7 ++++--- arch/x86/kvm/svm/svm.h | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 3bd09c50c98b..8493592b63b4 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -722,7 +722,7 @@ void svm_copy_vmrun_state(struct vmcb_save_area *from_save, to_save->cpl = 0; } -void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) +void svm_copy_vmloadsave_state(struct vmcb *from_vmcb, struct vmcb *to_vmcb) { to_vmcb->save.fs = from_vmcb->save.fs; to_vmcb->save.gs = from_vmcb->save.gs; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 664d20f0689c..cfe165d74093 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2147,11 +2147,12 @@ static int vmload_vmsave_interception(struct kvm_vcpu *vcpu, bool vmload) ret = kvm_skip_emulated_instruction(vcpu); if (vmload) { - nested_svm_vmloadsave(vmcb12, svm->vmcb); + svm_copy_vmloadsave_state(vmcb12, svm->vmcb); svm->sysenter_eip_hi = 0; svm->sysenter_esp_hi = 0; - } else - nested_svm_vmloadsave(svm->vmcb, vmcb12); + } else { + svm_copy_vmloadsave_state(svm->vmcb, vmcb12); + } kvm_vcpu_unmap(vcpu, &map, true); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 7e2090752d8f..1b65ee3a9569 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -466,7 +466,7 @@ int svm_allocate_nested(struct vcpu_svm *svm); int nested_svm_vmrun(struct kvm_vcpu *vcpu); void svm_copy_vmrun_state(struct vmcb_save_area *from_save, struct vmcb_save_area *to_save); -void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb); +void svm_copy_vmloadsave_state(struct vmcb *from_vmcb, struct vmcb *to_vmcb); int nested_svm_vmexit(struct vcpu_svm *svm); static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code) From 2bb16bea5feaa582fbbdbfd84ecaa1ab61bbb34c Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 19 Jul 2021 11:03:22 +0200 Subject: [PATCH 174/502] KVM: nSVM: Swap the parameter order for svm_copy_vmrun_state()/svm_copy_vmloadsave_state() Make svm_copy_vmrun_state()/svm_copy_vmloadsave_state() interface match 'memcpy(dest, src)' to avoid any confusion. No functional change intended. Suggested-by: Sean Christopherson Signed-off-by: Vitaly Kuznetsov Message-Id: <20210719090322.625277-1-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 8 ++++---- arch/x86/kvm/svm/svm.c | 12 ++++++------ arch/x86/kvm/svm/svm.h | 6 +++--- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 8493592b63b4..1c2a0414a88d 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -702,8 +702,8 @@ out: } /* Copy state save area fields which are handled by VMRUN */ -void svm_copy_vmrun_state(struct vmcb_save_area *from_save, - struct vmcb_save_area *to_save) +void svm_copy_vmrun_state(struct vmcb_save_area *to_save, + struct vmcb_save_area *from_save) { to_save->es = from_save->es; to_save->cs = from_save->cs; @@ -722,7 +722,7 @@ void svm_copy_vmrun_state(struct vmcb_save_area *from_save, to_save->cpl = 0; } -void svm_copy_vmloadsave_state(struct vmcb *from_vmcb, struct vmcb *to_vmcb) +void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb) { to_vmcb->save.fs = from_vmcb->save.fs; to_vmcb->save.gs = from_vmcb->save.gs; @@ -1385,7 +1385,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa; - svm_copy_vmrun_state(save, &svm->vmcb01.ptr->save); + svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save); nested_load_control_from_vmcb12(svm, ctl); svm_switch_vmcb(svm, &svm->nested.vmcb02); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index cfe165d74093..9a6987549e1b 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2147,11 +2147,11 @@ static int vmload_vmsave_interception(struct kvm_vcpu *vcpu, bool vmload) ret = kvm_skip_emulated_instruction(vcpu); if (vmload) { - svm_copy_vmloadsave_state(vmcb12, svm->vmcb); + svm_copy_vmloadsave_state(svm->vmcb, vmcb12); svm->sysenter_eip_hi = 0; svm->sysenter_esp_hi = 0; } else { - svm_copy_vmloadsave_state(svm->vmcb, vmcb12); + svm_copy_vmloadsave_state(vmcb12, svm->vmcb); } kvm_vcpu_unmap(vcpu, &map, true); @@ -4345,8 +4345,8 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate) BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400); - svm_copy_vmrun_state(&svm->vmcb01.ptr->save, - map_save.hva + 0x400); + svm_copy_vmrun_state(map_save.hva + 0x400, + &svm->vmcb01.ptr->save); kvm_vcpu_unmap(vcpu, &map_save, true); } @@ -4394,8 +4394,8 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) &map_save) == -EINVAL) return 1; - svm_copy_vmrun_state(map_save.hva + 0x400, - &svm->vmcb01.ptr->save); + svm_copy_vmrun_state(&svm->vmcb01.ptr->save, + map_save.hva + 0x400); kvm_vcpu_unmap(vcpu, &map_save, true); } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 1b65ee3a9569..bd0fe94c2920 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -464,9 +464,9 @@ void svm_leave_nested(struct vcpu_svm *svm); void svm_free_nested(struct vcpu_svm *svm); int svm_allocate_nested(struct vcpu_svm *svm); int nested_svm_vmrun(struct kvm_vcpu *vcpu); -void svm_copy_vmrun_state(struct vmcb_save_area *from_save, - struct vmcb_save_area *to_save); -void svm_copy_vmloadsave_state(struct vmcb *from_vmcb, struct vmcb *to_vmcb); +void svm_copy_vmrun_state(struct vmcb_save_area *to_save, + struct vmcb_save_area *from_save); +void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb); int nested_svm_vmexit(struct vcpu_svm *svm); static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code) From 0e691ee7b5034c91a31b565d3ff9a50e01dde445 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 22 Jul 2021 11:26:28 +0200 Subject: [PATCH 175/502] KVM: Documentation: Fix KVM_CAP_ENFORCE_PV_FEATURE_CPUID name 'KVM_CAP_ENFORCE_PV_CPUID' doesn't match the define in include/uapi/linux/kvm.h. Signed-off-by: Vitaly Kuznetsov Message-Id: <20210722092628.236474-1-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index c7b165ca70b6..1a1d2061227b 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -7049,7 +7049,7 @@ In combination with KVM_CAP_X86_USER_SPACE_MSR, this allows user space to trap and emulate MSRs that are outside of the scope of KVM as well as limit the attack surface on KVM's MSR emulation code. -8.28 KVM_CAP_ENFORCE_PV_CPUID +8.28 KVM_CAP_ENFORCE_PV_FEATURE_CPUID ----------------------------- Architectures: x86 From 3b1c8c5682672d73c1e977944af8c3ebed4a0ce1 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 22 Jul 2021 11:50:03 +0200 Subject: [PATCH 176/502] docs: virt: kvm: api.rst: replace some characters The conversion tools used during DocBook/LaTeX/html/Markdown->ReST conversion and some cut-and-pasted text contain some characters that aren't easily reachable on standard keyboards and/or could cause troubles when parsed by the documentation build system. Replace the occurences of the following characters: - U+00a0 (' '): NO-BREAK SPACE as it can cause lines being truncated on PDF output Signed-off-by: Mauro Carvalho Chehab Message-Id: Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 1a1d2061227b..dae68e68ca23 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -855,7 +855,7 @@ in-kernel irqchip (GIC), and for in-kernel irqchip can tell the GIC to use PPIs designated for specific cpus. The irq field is interpreted like this:: -  bits: | 31 ... 28 | 27 ... 24 | 23 ... 16 | 15 ... 0 | + bits: | 31 ... 28 | 27 ... 24 | 23 ... 16 | 15 ... 0 | field: | vcpu2_index | irq_type | vcpu_index | irq_id | The irq_type field has the following values: @@ -2149,10 +2149,10 @@ prior to calling the KVM_RUN ioctl. Errors: ====== ============================================================ -  ENOENT   no such register -  EINVAL   invalid register ID, or no such register or used with VMs in + ENOENT no such register + EINVAL invalid register ID, or no such register or used with VMs in protected virtualization mode on s390 -  EPERM    (arm64) register access not allowed before vcpu finalization + EPERM (arm64) register access not allowed before vcpu finalization ====== ============================================================ (These error codes are indicative only: do not rely on a specific error @@ -2590,10 +2590,10 @@ following id bit patterns:: Errors include: ======== ============================================================ -  ENOENT   no such register -  EINVAL   invalid register ID, or no such register or used with VMs in + ENOENT no such register + EINVAL invalid register ID, or no such register or used with VMs in protected virtualization mode on s390 -  EPERM    (arm64) register access not allowed before vcpu finalization + EPERM (arm64) register access not allowed before vcpu finalization ======== ============================================================ (These error codes are indicative only: do not rely on a specific error @@ -3112,13 +3112,13 @@ current state. "addr" is ignored. Errors: ====== ================================================================= -  EINVAL    the target is unknown, or the combination of features is invalid. -  ENOENT    a features bit specified is unknown. + EINVAL the target is unknown, or the combination of features is invalid. + ENOENT a features bit specified is unknown. ====== ================================================================= This tells KVM what type of CPU to present to the guest, and what -optional features it should have.  This will cause a reset of the cpu -registers to their initial values.  If this is not called, KVM_RUN will +optional features it should have. This will cause a reset of the cpu +registers to their initial values. If this is not called, KVM_RUN will return ENOEXEC for that vcpu. The initial values are defined as: @@ -3239,8 +3239,8 @@ VCPU matching underlying host. Errors: ===== ============================================================== -  E2BIG     the reg index list is too big to fit in the array specified by -             the user (the number required will be written into n). + E2BIG the reg index list is too big to fit in the array specified by + the user (the number required will be written into n). ===== ============================================================== :: @@ -3288,7 +3288,7 @@ specific device. ARM/arm64 divides the id field into two parts, a device id and an address type id specific to the individual device:: -  bits: | 63 ... 32 | 31 ... 16 | 15 ... 0 | + bits: | 63 ... 32 | 31 ... 16 | 15 ... 0 | field: | 0x00000000 | device id | addr type id | ARM/arm64 currently only require this when using the in-kernel GIC From 0a31df6823232516f61f174907e444f710941dfe Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 22 Jul 2021 14:30:18 +0200 Subject: [PATCH 177/502] KVM: x86: Check the right feature bit for MSR_KVM_ASYNC_PF_ACK access MSR_KVM_ASYNC_PF_ACK MSR is part of interrupt based asynchronous page fault interface and not the original (deprecated) KVM_FEATURE_ASYNC_PF. This is stated in Documentation/virt/kvm/msr.rst. Fixes: 66570e966dd9 ("kvm: x86: only provide PV features if enabled in guest's CPUID") Signed-off-by: Vitaly Kuznetsov Reviewed-by: Maxim Levitsky Reviewed-by: Oliver Upton Message-Id: <20210722123018.260035-1-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a4fd10604f72..4116567f3d44 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3407,7 +3407,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; break; case MSR_KVM_ASYNC_PF_ACK: - if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF)) + if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT)) return 1; if (data & 0x1) { vcpu->arch.apf.pageready_pending = false; @@ -3746,7 +3746,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vcpu->arch.apf.msr_int_val; break; case MSR_KVM_ASYNC_PF_ACK: - if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF)) + if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT)) return 1; msr_info->data = 0; From 92766c4628ea349c8ddab0cd7bd0488f36e5c4ce Mon Sep 17 00:00:00 2001 From: Letu Ren Date: Sun, 25 Jul 2021 21:45:12 +0800 Subject: [PATCH 178/502] net/qla3xxx: fix schedule while atomic in ql_wait_for_drvr_lock and ql_adapter_reset When calling the 'ql_wait_for_drvr_lock' and 'ql_adapter_reset', the driver has already acquired the spin lock, so the driver should not call 'ssleep' in atomic context. This bug can be fixed by using 'mdelay' instead of 'ssleep'. Reported-by: Letu Ren Signed-off-by: Letu Ren Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qla3xxx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 2376b2729633..c00ad57575ea 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -154,7 +154,7 @@ static int ql_wait_for_drvr_lock(struct ql3_adapter *qdev) "driver lock acquired\n"); return 1; } - ssleep(1); + mdelay(1000); } while (++i < 10); netdev_err(qdev->ndev, "Timed out waiting for driver lock...\n"); @@ -3274,7 +3274,7 @@ static int ql_adapter_reset(struct ql3_adapter *qdev) if ((value & ISP_CONTROL_SR) == 0) break; - ssleep(1); + mdelay(1000); } while ((--max_wait_time)); /* @@ -3310,7 +3310,7 @@ static int ql_adapter_reset(struct ql3_adapter *qdev) ispControlStatus); if ((value & ISP_CONTROL_FSR) == 0) break; - ssleep(1); + mdelay(1000); } while ((--max_wait_time)); } if (max_wait_time == 0) From 44eff40a32e8f5228ae041006352e32638ad2368 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 26 Jul 2021 14:14:31 +0100 Subject: [PATCH 179/502] io_uring: fix io_prep_async_link locking io_prep_async_link() may be called after arming a linked timeout, automatically making it unsafe to traverse the linked list. Guard with completion_lock if there was a linked timeout. Cc: stable@vger.kernel.org # 5.9+ Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/93f7c617e2b4f012a2a175b3dab6bc2f27cebc48.1627304436.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 5a0fd6bcd318..c4d2b320cdd4 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1279,8 +1279,17 @@ static void io_prep_async_link(struct io_kiocb *req) { struct io_kiocb *cur; - io_for_each_link(cur, req) - io_prep_async_work(cur); + if (req->flags & REQ_F_LINK_TIMEOUT) { + struct io_ring_ctx *ctx = req->ctx; + + spin_lock_irq(&ctx->completion_lock); + io_for_each_link(cur, req) + io_prep_async_work(cur); + spin_unlock_irq(&ctx->completion_lock); + } else { + io_for_each_link(cur, req) + io_prep_async_work(cur); + } } static void io_queue_async_work(struct io_kiocb *req) From d47255d3f87338164762ac56df1f28d751e27246 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Thu, 24 Jun 2021 13:20:21 +0200 Subject: [PATCH 180/502] drm/amdgpu: Fix resource leak on probe error path This reverts commit 4192f7b5768912ceda82be2f83c87ea7181f9980. It is not true (as stated in the reverted commit changelog) that we never unmap the BAR on failure; it actually does happen properly on amdgpu_driver_load_kms() -> amdgpu_driver_unload_kms() -> amdgpu_device_fini() error path. What's worse, this commit actually completely breaks resource freeing on probe failure (like e.g. failure to load microcode), as amdgpu_driver_unload_kms() notices adev->rmmio being NULL and bails too early, leaving all the resources that'd normally be freed in amdgpu_acpi_fini() and amdgpu_device_fini() still hanging around, leading to all sorts of oopses when someone tries to, for example, access the sysfs and procfs resources which are still around while the driver is gone. Fixes: 4192f7b57689 ("drm/amdgpu: unmap register bar on device init failure") Reported-by: Vojtech Pavlik Signed-off-by: Jiri Kosina Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d303e88e3c23..f3fd5ec710b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3504,13 +3504,13 @@ int amdgpu_device_init(struct amdgpu_device *adev, r = amdgpu_device_get_job_timeout_settings(adev); if (r) { dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); - goto failed_unmap; + return r; } /* early init functions */ r = amdgpu_device_ip_early_init(adev); if (r) - goto failed_unmap; + return r; /* doorbell bar mapping and doorbell index init*/ amdgpu_device_doorbell_init(adev); @@ -3736,10 +3736,6 @@ release_ras_con: failed: amdgpu_vf_error_trans_all(adev); -failed_unmap: - iounmap(adev->rmmio); - adev->rmmio = NULL; - return r; } From 110aa25c3ce417a44e35990cf8ed22383277933a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 26 Jul 2021 10:42:56 -0600 Subject: [PATCH 181/502] io_uring: fix race in unified task_work running We use a bit to manage if we need to add the shared task_work, but a list + lock for the pending work. Before aborting a current run of the task_work we check if the list is empty, but we do so without grabbing the lock that protects it. This can lead to races where we think we have nothing left to run, where in practice we could be racing with a task adding new work to the list. If we do hit that race condition, we could be left with work items that need processing, but the shared task_work is not active. Ensure that we grab the lock before checking if the list is empty, so we know if it's safe to exit the run or not. Link: https://lore.kernel.org/io-uring/c6bd5987-e9ae-cd02-49d0-1b3ac1ef65b1@tnonline.net/ Cc: stable@vger.kernel.org # 5.11+ Reported-by: Forza Signed-off-by: Jens Axboe --- fs/io_uring.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index c4d2b320cdd4..a4331deb0427 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1959,9 +1959,13 @@ static void tctx_task_work(struct callback_head *cb) node = next; } if (wq_list_empty(&tctx->task_list)) { + spin_lock_irq(&tctx->task_lock); clear_bit(0, &tctx->task_state); - if (wq_list_empty(&tctx->task_list)) + if (wq_list_empty(&tctx->task_list)) { + spin_unlock_irq(&tctx->task_lock); break; + } + spin_unlock_irq(&tctx->task_lock); /* another tctx_task_work() is enqueued, yield */ if (test_and_set_bit(0, &tctx->task_state)) break; From 6aade587d329ebe88319dfdb8e8c7b6aede80417 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Thu, 24 Jun 2021 13:11:36 +0200 Subject: [PATCH 182/502] drm/amdgpu: Avoid printing of stack contents on firmware load error In case when psp_init_asd_microcode() fails to load ASD microcode file, psp_v12_0_init_microcode() tries to print the firmware filename that failed to load before bailing out. This is wrong because: - the firmware filename it would want it print is an incorrect one as psp_init_asd_microcode() and psp_v12_0_init_microcode() are loading different filenames - it tries to print fw_name, but that's not yet been initialized by that time, so it prints random stack contents, e.g. amdgpu 0000:04:00.0: Direct firmware load for amdgpu/renoir_asd.bin failed with error -2 amdgpu 0000:04:00.0: amdgpu: fail to initialize asd microcode amdgpu 0000:04:00.0: amdgpu: psp v12.0: Failed to load firmware "\xfeTO\x8e\xff\xff" Fix that by bailing out immediately, instead of priting the bogus error message. Reported-by: Vojtech Pavlik Signed-off-by: Jiri Kosina Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/psp_v12_0.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c index 618e5b6b85d9..536d41f327c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c @@ -67,7 +67,7 @@ static int psp_v12_0_init_microcode(struct psp_context *psp) err = psp_init_asd_microcode(psp, chip_name); if (err) - goto out; + return err; snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev); @@ -80,7 +80,7 @@ static int psp_v12_0_init_microcode(struct psp_context *psp) } else { err = amdgpu_ucode_validate(adev->psp.ta_fw); if (err) - goto out2; + goto out; ta_hdr = (const struct ta_firmware_header_v1_0 *) adev->psp.ta_fw->data; @@ -105,10 +105,9 @@ static int psp_v12_0_init_microcode(struct psp_context *psp) return 0; -out2: +out: release_firmware(adev->psp.ta_fw); adev->psp.ta_fw = NULL; -out: if (err) { dev_err(adev->dev, "psp v12.0: Failed to load firmware \"%s\"\n", From 66291b6adb66dd3bc96b0f594d88c2ff1300d95f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 26 Jul 2021 08:26:56 +0200 Subject: [PATCH 183/502] ALSA: usb-audio: Fix superfluous autosuspend recovery The change to restore the autosuspend from the disabled state uses a wrong check: namely, it should have been the exact comparison of the quirk_type instead of the bitwise and (&). Otherwise it matches wrongly with the other quirk types. Although re-enabling the autosuspend for the already enabled device shouldn't matter much, it's better to fix the unbalanced call. Fixes: 9799110825db ("ALSA: usb-audio: Disable USB autosuspend properly in setup_disable_autosuspend()") Cc: Link: https://lore.kernel.org/r/s5hr1flh9ov.wl-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/card.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/card.c b/sound/usb/card.c index 2f6a62416c05..a1f8c3a026f5 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -907,7 +907,7 @@ static void usb_audio_disconnect(struct usb_interface *intf) } } - if (chip->quirk_type & QUIRK_SETUP_DISABLE_AUTOSUSPEND) + if (chip->quirk_type == QUIRK_SETUP_DISABLE_AUTOSUSPEND) usb_enable_autosuspend(interface_to_usbdev(intf)); chip->num_interfaces--; From 53ca18acbe645656132fb5a329833db711067e54 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 26 Jul 2021 12:01:02 +0200 Subject: [PATCH 184/502] spi: imx: mx51-ecspi: Fix low-speed CONFIGREG delay calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The spi_imx->spi_bus_clk may be uninitialized and thus also zero in mx51_ecspi_prepare_message(), which would lead to division by zero in kernel. Since bitbang .setup_transfer callback which initializes the spi_imx->spi_bus_clk is called after bitbang prepare_message callback, iterate over all the transfers in spi_message, find the one with lowest bus frequency, and use that bus frequency for the delay calculation. Note that it is not possible to move this CONFIGREG delay back into the .setup_transfer callback, because that is invoked too late, after the GPIO chipselects were already configured. Fixes: 135cbd378eab ("spi: imx: mx51-ecspi: Reinstate low-speed CONFIGREG delay") Signed-off-by: Marek Vasut Cc: Uwe Kleine-König Cc: Mark Brown Link: https://lore.kernel.org/r/20210726100102.5188-1-marex@denx.de Signed-off-by: Mark Brown --- drivers/spi/spi-imx.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 4aee3db6d6df..2872993550bd 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -505,7 +505,9 @@ static int mx51_ecspi_prepare_message(struct spi_imx_data *spi_imx, struct spi_message *msg) { struct spi_device *spi = msg->spi; + struct spi_transfer *xfer; u32 ctrl = MX51_ECSPI_CTRL_ENABLE; + u32 min_speed_hz = ~0U; u32 testreg, delay; u32 cfg = readl(spi_imx->base + MX51_ECSPI_CONFIG); @@ -577,8 +579,20 @@ static int mx51_ecspi_prepare_message(struct spi_imx_data *spi_imx, * be asserted before the SCLK polarity changes, which would disrupt * the SPI communication as the device on the other end would consider * the change of SCLK polarity as a clock tick already. + * + * Because spi_imx->spi_bus_clk is only set in bitbang prepare_message + * callback, iterate over all the transfers in spi_message, find the + * one with lowest bus frequency, and use that bus frequency for the + * delay calculation. In case all transfers have speed_hz == 0, then + * min_speed_hz is ~0 and the resulting delay is zero. */ - delay = (2 * 1000000) / spi_imx->spi_bus_clk; + list_for_each_entry(xfer, &msg->transfers, transfer_list) { + if (!xfer->speed_hz) + continue; + min_speed_hz = min(xfer->speed_hz, min_speed_hz); + } + + delay = (2 * 1000000) / min_speed_hz; if (likely(delay < 10)) /* SCLK is faster than 100 kHz */ udelay(delay); else /* SCLK is _very_ slow */ From 758684e49f4c7ea2a75e249e486659f0950cd63e Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Mon, 26 Jul 2021 14:52:48 -0400 Subject: [PATCH 185/502] bnxt_en: Fix static checker warning in bnxt_fw_reset_task() Now that we return when bnxt_open() fails in bnxt_fw_reset_task(), there is no need to check for 'rc' value again before invoking bnxt_reenable_sriov(). Fixes: 3958b1da725a ("bnxt_en: fix error path of FW reset") Reported-by: Dan Carpenter Signed-off-by: Somnath Kotur Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4db162cee911..89606587b156 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -12131,9 +12131,8 @@ static void bnxt_fw_reset_task(struct work_struct *work) /* Make sure fw_reset_state is 0 before clearing the flag */ smp_mb__before_atomic(); clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); - bnxt_ulp_start(bp, rc); - if (!rc) - bnxt_reenable_sriov(bp); + bnxt_ulp_start(bp, 0); + bnxt_reenable_sriov(bp); bnxt_vf_reps_alloc(bp); bnxt_vf_reps_open(bp); bnxt_dl_health_recovery_done(bp); From 24b5b1978cd5a80db58e2a19db2f9c36fe8d4f7a Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Sun, 25 Jul 2021 18:07:25 +0200 Subject: [PATCH 186/502] clk: stm32f4: fix post divisor setup for I2S/SAI PLLs Enabling the framebuffer leads to a system hang. Running, as a debug hack, the store_pan() function in drivers/video/fbdev/core/fbsysfs.c without taking the console_lock, allows to see the crash backtrace on the serial line. ~ # echo 0 0 > /sys/class/graphics/fb0/pan [ 9.719414] Unhandled exception: IPSR = 00000005 LR = fffffff1 [ 9.726937] CPU: 0 PID: 49 Comm: sh Not tainted 5.13.0-rc5 #9 [ 9.733008] Hardware name: STM32 (Device Tree Support) [ 9.738296] PC is at clk_gate_is_enabled+0x0/0x28 [ 9.743426] LR is at stm32f4_pll_div_set_rate+0xf/0x38 [ 9.748857] pc : [<0011e4be>] lr : [<0011f9e3>] psr: 0100000b [ 9.755373] sp : 00bc7be0 ip : 00000000 fp : 001f3ac4 [ 9.760812] r10: 002610d0 r9 : 01efe920 r8 : 00540560 [ 9.766269] r7 : 02e7ddb0 r6 : 0173eed8 r5 : 00000000 r4 : 004027c0 [ 9.773081] r3 : 0011e4bf r2 : 02e7ddb0 r1 : 0173eed8 r0 : 1d3267b8 [ 9.779911] xPSR: 0100000b [ 9.782719] CPU: 0 PID: 49 Comm: sh Not tainted 5.13.0-rc5 #9 [ 9.788791] Hardware name: STM32 (Device Tree Support) [ 9.794120] [<0000afa1>] (unwind_backtrace) from [<0000a33f>] (show_stack+0xb/0xc) [ 9.802421] [<0000a33f>] (show_stack) from [<0000a8df>] (__invalid_entry+0x4b/0x4c) The `pll_num' field in the post_div_data configuration contained a wrong value which also referenced an uninitialized hardware clock when clk_register_pll_div() was called. Fixes: 517633ef630e ("clk: stm32f4: Add post divisor for I2S & SAI PLLs") Signed-off-by: Dario Binacchi Reviewed-by: Gabriel Fernandez Link: https://lore.kernel.org/r/20210725160725.10788-1-dariobin@libero.it Signed-off-by: Stephen Boyd --- drivers/clk/clk-stm32f4.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/clk/clk-stm32f4.c b/drivers/clk/clk-stm32f4.c index 18117ce5ff85..5c75e3d906c2 100644 --- a/drivers/clk/clk-stm32f4.c +++ b/drivers/clk/clk-stm32f4.c @@ -526,7 +526,7 @@ struct stm32f4_pll { struct stm32f4_pll_post_div_data { int idx; - u8 pll_num; + int pll_idx; const char *name; const char *parent; u8 flag; @@ -557,13 +557,13 @@ static const struct clk_div_table post_divr_table[] = { #define MAX_POST_DIV 3 static const struct stm32f4_pll_post_div_data post_div_data[MAX_POST_DIV] = { - { CLK_I2SQ_PDIV, PLL_I2S, "plli2s-q-div", "plli2s-q", + { CLK_I2SQ_PDIV, PLL_VCO_I2S, "plli2s-q-div", "plli2s-q", CLK_SET_RATE_PARENT, STM32F4_RCC_DCKCFGR, 0, 5, 0, NULL}, - { CLK_SAIQ_PDIV, PLL_SAI, "pllsai-q-div", "pllsai-q", + { CLK_SAIQ_PDIV, PLL_VCO_SAI, "pllsai-q-div", "pllsai-q", CLK_SET_RATE_PARENT, STM32F4_RCC_DCKCFGR, 8, 5, 0, NULL }, - { NO_IDX, PLL_SAI, "pllsai-r-div", "pllsai-r", CLK_SET_RATE_PARENT, + { NO_IDX, PLL_VCO_SAI, "pllsai-r-div", "pllsai-r", CLK_SET_RATE_PARENT, STM32F4_RCC_DCKCFGR, 16, 2, 0, post_divr_table }, }; @@ -1774,7 +1774,7 @@ static void __init stm32f4_rcc_init(struct device_node *np) post_div->width, post_div->flag_div, post_div->div_table, - clks[post_div->pll_num], + clks[post_div->pll_idx], &stm32f4_clk_lock); if (post_div->idx != NO_IDX) From 953a92f0e55f370ec76e7f85e332906f1e898ef4 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 16 Jul 2021 21:31:59 -0700 Subject: [PATCH 187/502] clk: hisilicon: hi3559a: select RESET_HISI The clk-hi3559a driver uses functions from reset.c so it should select RESET_HISI to avoid build errors. Fixes these build errors: aarch64-linux-ld: drivers/clk/hisilicon/clk-hi3559a.o: in function `hi3559av100_crg_remove': clk-hi3559a.c:(.text+0x158): undefined reference to `hisi_reset_exit' aarch64-linux-ld: drivers/clk/hisilicon/clk-hi3559a.o: in function `hi3559av100_crg_probe': clk-hi3559a.c:(.text+0x1f4): undefined reference to `hisi_reset_init' aarch64-linux-ld: clk-hi3559a.c:(.text+0x238): undefined reference to `hisi_reset_exit' Fixes: 6c81966107dc ("clk: hisilicon: Add clock driver for hi3559A SoC") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Dongjiu Geng Cc: Stephen Boyd Cc: stable@vger.kernel.org Cc: linux-clk@vger.kernel.org Cc: Michael Turquette Link: https://lore.kernel.org/r/20210717043159.12566-1-rdunlap@infradead.org Reviewed-by: Dongjiu Geng Signed-off-by: Stephen Boyd --- drivers/clk/hisilicon/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/hisilicon/Kconfig b/drivers/clk/hisilicon/Kconfig index 5ecc37aaa118..c1ec75aa4ccd 100644 --- a/drivers/clk/hisilicon/Kconfig +++ b/drivers/clk/hisilicon/Kconfig @@ -18,6 +18,7 @@ config COMMON_CLK_HI3519 config COMMON_CLK_HI3559A bool "Hi3559A Clock Driver" depends on ARCH_HISI || COMPILE_TEST + select RESET_HISI default ARCH_HISI help Build the clock driver for hi3559a. From f2a26a3cff27dfa456fef386fe5df56dcb4b47b6 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 23 Jul 2021 18:35:15 -0500 Subject: [PATCH 188/502] SMB3: fix readpage for large swap cache readpage was calculating the offset of the page incorrectly for the case of large swapcaches. loff_t offset = (loff_t)page->index << PAGE_SHIFT; As pointed out by Matthew Wilcox, this needs to use page_file_offset() to calculate the offset instead. Pages coming from the swap cache have page->index set to their index within the swapcache, not within the backing file. For a sufficiently large swapcache, we could have overlapping values of page->index within the same backing file. Suggested by: Matthew Wilcox (Oracle) Cc: # v5.7+ Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index cd108607a070..0a72840a88f1 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -4619,7 +4619,7 @@ read_complete: static int cifs_readpage(struct file *file, struct page *page) { - loff_t offset = (loff_t)page->index << PAGE_SHIFT; + loff_t offset = page_file_offset(page); int rc = -EACCES; unsigned int xid; From 5ad4df56cd2158965f73416d41fce37906724822 Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 26 Jul 2021 16:22:55 -0500 Subject: [PATCH 189/502] smb3: rc uninitialized in one fallocate path Clang detected a problem with rc possibly being unitialized (when length is zero) in a recently added fallocate code path. Reported-by: kernel test robot Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 23d6f4d71649..2dfd0d8297eb 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3617,7 +3617,8 @@ static int smb3_simple_fallocate_write_range(unsigned int xid, char *buf) { struct cifs_io_parms io_parms = {0}; - int rc, nbytes; + int nbytes; + int rc = 0; struct kvec iov[2]; io_parms.netfid = cfile->fid.netfid; From 35171fbfc0d94aa31b009bb475d156ad1941ab50 Mon Sep 17 00:00:00 2001 From: Nikos Liolios Date: Tue, 27 Jul 2021 06:05:10 +0300 Subject: [PATCH 190/502] ALSA: hda/realtek: Fix headset mic for Acer SWIFT SF314-56 (ALC256) The issue on Acer SWIFT SF314-56 is that headset microphone doesn't work. The following quirk fixed headset microphone issue. The fixup was found by trial and error. Note that the fixup of SF314-54/55 (ALC256_FIXUP_ACER_HEADSET_MIC) was not successful on my SF314-56. Signed-off-by: Nikos Liolios Cc: Link: https://lore.kernel.org/r/20210727030510.36292-1-liolios.nk@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index caaf0e8aac11..14e1ab7c7954 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8274,6 +8274,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x129c, "Acer SWIFT SF314-55", ALC256_FIXUP_ACER_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x1300, "Acer SWIFT SF314-56", ALC256_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1308, "Acer Aspire Z24-890", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x132a, "Acer TravelMate B114-21", ALC233_FIXUP_ACER_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1330, "Acer TravelMate X514-51T", ALC255_FIXUP_ACER_HEADSET_MIC), From b070f9ca78680486927b799cf6126b128a7c2c1b Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Tue, 20 Jul 2021 11:47:10 -0700 Subject: [PATCH 191/502] ARM: omap2+: hwmod: fix potential NULL pointer access omap_hwmod_get_pwrdm() may access a NULL clk_hw pointer in some failure cases. Add a check for the case and bail out gracely if this happens. Reported-by: Dan Murphy Signed-off-by: Tero Kristo Cc: stable@vger.kernel.org # v5.10+ Signed-off-by: Kevin Hilman Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/omap_hwmod.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 65934b2924fb..12b26e04686f 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -3776,6 +3776,7 @@ struct powerdomain *omap_hwmod_get_pwrdm(struct omap_hwmod *oh) struct omap_hwmod_ocp_if *oi; struct clockdomain *clkdm; struct clk_hw_omap *clk; + struct clk_hw *hw; if (!oh) return NULL; @@ -3792,7 +3793,14 @@ struct powerdomain *omap_hwmod_get_pwrdm(struct omap_hwmod *oh) c = oi->_clk; } - clk = to_clk_hw_omap(__clk_get_hw(c)); + hw = __clk_get_hw(c); + if (!hw) + return NULL; + + clk = to_clk_hw_omap(hw); + if (!clk) + return NULL; + clkdm = clk->clkdm; if (!clkdm) return NULL; From a6d90e9f22328f07343e49e08a4ca483ae8e8abb Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Tue, 20 Jul 2021 11:27:16 -0700 Subject: [PATCH 192/502] bus: ti-sysc: AM3: RNG is GP only Make the RNG on AM3 GP only. Based on this patch from TI v5.4 tree which is based on hwmod data which are now removed: | ARM: AM43xx: hwmod: Move RNG to a GP only links table | | On non-GP devices the RNG is controlled by the secure-side software, | like in DRA7xx hwmod we should not control this IP when we are not | a GP device. | | Signed-off-by: Andrew F. Davis Cc: stable@vger.kernel.org # v5.10+ Signed-off-by: Kevin Hilman Signed-off-by: Tony Lindgren --- drivers/bus/ti-sysc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index 38cb116ed433..2587ed43ee8a 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -2951,6 +2951,8 @@ static int sysc_init_soc(struct sysc *ddata) case SOC_3430 ... SOC_3630: sysc_add_disabled(0x48304000); /* timer12 */ break; + case SOC_AM3: + sysc_add_disabled(0x48310000); /* rng */ default: break; } From 20a6b3fd8e2e2c063b25fbf2ee74d86b898e5087 Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Fri, 16 Jul 2021 09:07:30 -0700 Subject: [PATCH 193/502] ARM: dts: am43x-epos-evm: Reduce i2c0 bus speed for tps65218 Based on the latest timing specifications for the TPS65218 from the data sheet, http://www.ti.com/lit/ds/symlink/tps65218.pdf, document SLDS206 from November 2014, we must change the i2c bus speed to better fit within the minimum high SCL time required for proper i2c transfer. When running at 400khz, measurements show that SCL spends 0.8125 uS/1.666 uS high/low which violates the requirement for minimum high period of SCL provided in datasheet Table 7.6 which is 1 uS. Switching to 100khz gives us 5 uS/5 uS high/low which both fall above the minimum given values for 100 khz, 4.0 uS/4.7 uS high/low. Without this patch occasionally a voltage set operation from the kernel will appear to have worked but the actual voltage reflected on the PMIC will not have updated, causing problems especially with cpufreq that may update to a higher OPP without actually raising the voltage on DCDC2, leading to a hang. Signed-off-by: Dave Gerlach Signed-off-by: Kevin Hilman Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am43x-epos-evm.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/am43x-epos-evm.dts b/arch/arm/boot/dts/am43x-epos-evm.dts index aae0af10a5b1..2aa75abf85a9 100644 --- a/arch/arm/boot/dts/am43x-epos-evm.dts +++ b/arch/arm/boot/dts/am43x-epos-evm.dts @@ -582,7 +582,7 @@ status = "okay"; pinctrl-names = "default"; pinctrl-0 = <&i2c0_pins>; - clock-frequency = <400000>; + clock-frequency = <100000>; tps65218: tps65218@24 { reg = <0x24>; From 0162a9964365fd26e34575e121b17d021204c481 Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Mon, 26 Jul 2021 15:15:25 +0200 Subject: [PATCH 194/502] ARM: dts: am437x-l4: fix typo in can@0 node Replace clock-name with clock-names. Fixes: 2a4117df9b43 ("ARM: dts: Fix dcan driver probe failed on am437x platform") Signed-off-by: Dario Binacchi Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am437x-l4.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/am437x-l4.dtsi b/arch/arm/boot/dts/am437x-l4.dtsi index 40ef3973f2a9..ba58e6b0da1d 100644 --- a/arch/arm/boot/dts/am437x-l4.dtsi +++ b/arch/arm/boot/dts/am437x-l4.dtsi @@ -1595,7 +1595,7 @@ compatible = "ti,am4372-d_can", "ti,am3352-d_can"; reg = <0x0 0x2000>; clocks = <&dcan1_fck>; - clock-name = "fck"; + clock-names = "fck"; syscon-raminit = <&scm_conf 0x644 1>; interrupts = ; status = "disabled"; From c68ef4ad180e09805fa46965d15e1dfadf09ffa5 Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Thu, 1 Jul 2021 16:00:22 +0200 Subject: [PATCH 195/502] omap5-board-common: remove not physically existing vdds_1v8_main fixed-regulator This device tree include file describes a fixed-regulator connecting smps7_reg output (1.8V) to some 1.8V rail and consumers (vdds_1v8_main). This regulator does not physically exist. I assume it was introduced as a wrapper around smps7_reg to provide a speaking signal name "vdds_1v8_main" as label. This fixed-regulator without real function was not an issue in driver code until Commit 98e48cd9283d ("regulator: core: resolve supply for boot-on/always-on regulators") introduced a new check for regulator initialization which makes Palmas regulator registration fail: [ 5.407712] ldo1: supplied by vsys_cobra [ 5.412748] ldo2: supplied by vsys_cobra [ 5.417603] palmas-pmic 48070000.i2c:palmas@48:palmas_pmic: failed to register 48070000.i2c:palmas@48:palmas_pmic regulator The reason is that the supply-chain of regulators is too long and goes from ldo3 through the virtual vdds_1v8_main regulator and then back to smps7. This adds a cross-dependency of probing Palmas regulators and the fixed-regulator which leads to probe deferral by the new check and is no longer resolved. Since we do not control what device tree files including this one reference (either &vdds_1v8_main or &smps7_reg or both) we keep both labels for smps7 for compatibility. Fixes: 98e48cd9283d ("regulator: core: resolve supply for boot-on/always-on regulators") Signed-off-by: H. Nikolaus Schaller Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap5-board-common.dtsi | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index 45435bb88c89..373984c130e0 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -30,14 +30,6 @@ regulator-max-microvolt = <5000000>; }; - vdds_1v8_main: fixedregulator-vdds_1v8_main { - compatible = "regulator-fixed"; - regulator-name = "vdds_1v8_main"; - vin-supply = <&smps7_reg>; - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - }; - vmmcsd_fixed: fixedregulator-mmcsd { compatible = "regulator-fixed"; regulator-name = "vmmcsd_fixed"; @@ -487,6 +479,7 @@ regulator-boot-on; }; + vdds_1v8_main: smps7_reg: smps7 { /* VDDS_1v8_OMAP over VDDS_1v8_MAIN */ regulator-name = "smps7"; From 9f59efcd51e332aad01e7fa2b3a97cd22d347ceb Mon Sep 17 00:00:00 2001 From: Michael Zaidman Date: Mon, 10 May 2021 19:34:28 +0300 Subject: [PATCH 196/502] HID: ft260: fix format type warning in ft260_word_show() Fixes: 6a82582d9fa4 ("HID: ft260: add usb hid to i2c host bridge driver") Fix warning reported by static analysis when built with W=1 for arm64 by clang version 13.0.0 >> drivers/hid/hid-ft260.c:794:44: warning: format specifies type 'short' but the argument has type 'int' [-Wformat] return scnprintf(buf, PAGE_SIZE, "%hi\n", le16_to_cpu(*field)); ~~~ ^~~~~~~~~~~~~~~~~~~ %i include/linux/byteorder/generic.h:91:21: note: expanded from macro 'le16_to_cpu' #define le16_to_cpu __le16_to_cpu ^ include/uapi/linux/byteorder/big_endian.h:36:26: note: expanded from macro '__le16_to_cpu' #define __le16_to_cpu(x) __swab16((__force __u16)(__le16)(x)) ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/uapi/linux/swab.h:105:2: note: expanded from macro '__swab16' (__builtin_constant_p((__u16)(x)) ? \ ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Any sprintf style use of %h or %hi for a sub-int sized value isn't useful since integer promotion is done on the value anyway. So, use %d instead. https://lore.kernel.org/lkml/CAHk-=wgoxnmsj8GEVFJSvTwdnWm8wVJthefNk2n6+4TC=20e0Q@mail.gmail.com/ Signed-off-by: Michael Zaidman Suggested-by: Joe Perches Reported-by: kernel test robot Signed-off-by: Jiri Kosina --- drivers/hid/hid-ft260.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-ft260.c b/drivers/hid/hid-ft260.c index f43a8406cb9a..6f10df2042c4 100644 --- a/drivers/hid/hid-ft260.c +++ b/drivers/hid/hid-ft260.c @@ -785,7 +785,7 @@ static int ft260_byte_show(struct hid_device *hdev, int id, u8 *cfg, int len, if (ret < 0) return ret; - return scnprintf(buf, PAGE_SIZE, "%hi\n", *field); + return scnprintf(buf, PAGE_SIZE, "%d\n", *field); } static int ft260_word_show(struct hid_device *hdev, int id, u8 *cfg, int len, @@ -797,7 +797,7 @@ static int ft260_word_show(struct hid_device *hdev, int id, u8 *cfg, int len, if (ret < 0) return ret; - return scnprintf(buf, PAGE_SIZE, "%hi\n", le16_to_cpu(*field)); + return scnprintf(buf, PAGE_SIZE, "%d\n", le16_to_cpu(*field)); } #define FT260_ATTR_SHOW(name, reptype, id, type, func) \ From 4b0556b96e1fe7723629bd40e3813a30cd632faf Mon Sep 17 00:00:00 2001 From: Alexander Tsoy Date: Tue, 27 Jul 2021 12:33:26 +0300 Subject: [PATCH 197/502] ALSA: usb-audio: Add registration quirk for JBL Quantum 600 Apparently JBL Quantum 600 has multiple hardware revisions. Apply registration quirk to another device id as well. Signed-off-by: Alexander Tsoy Cc: Link: https://lore.kernel.org/r/20210727093326.1153366-1-alexander@tsoy.me Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index e7accd87e063..326d1b0ea5e6 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1899,6 +1899,7 @@ static const struct registration_quirk registration_quirks[] = { REG_QUIRK_ENTRY(0x0951, 0x16ea, 2), /* Kingston HyperX Cloud Flight S */ REG_QUIRK_ENTRY(0x0ecb, 0x1f46, 2), /* JBL Quantum 600 */ REG_QUIRK_ENTRY(0x0ecb, 0x2039, 2), /* JBL Quantum 400 */ + REG_QUIRK_ENTRY(0x0ecb, 0x203c, 2), /* JBL Quantum 600 */ REG_QUIRK_ENTRY(0x0ecb, 0x203e, 2), /* JBL Quantum 800 */ { 0 } /* terminator */ }; From fcef709c2c4baf758950bd7395e4b10527b81e2c Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Sun, 25 Jul 2021 18:54:52 +0530 Subject: [PATCH 198/502] octeontx2-af: Do NIX_RX_SW_SYNC twice NIX_RX_SW_SYNC ensures all existing transactions are finished and pkts are written to LLC/DRAM, queues should be teared down after successful SW_SYNC. Due to a HW errata, in some rare scenarios an existing transaction might end after SW_SYNC operation. To ensure operation is fully done, do the SW_SYNC twice. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- .../net/ethernet/marvell/octeontx2/af/rvu_nix.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 30067668eda7..4bfbbdf38770 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -196,11 +196,22 @@ static void nix_rx_sync(struct rvu *rvu, int blkaddr) { int err; - /*Sync all in flight RX packets to LLC/DRAM */ + /* Sync all in flight RX packets to LLC/DRAM */ rvu_write64(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0)); err = rvu_poll_reg(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0), true); if (err) - dev_err(rvu->dev, "NIX RX software sync failed\n"); + dev_err(rvu->dev, "SYNC1: NIX RX software sync failed\n"); + + /* SW_SYNC ensures all existing transactions are finished and pkts + * are written to LLC/DRAM, queues should be teared down after + * successful SW_SYNC. Due to a HW errata, in some rare scenarios + * an existing transaction might end after SW_SYNC operation. To + * ensure operation is fully done, do the SW_SYNC twice. + */ + rvu_write64(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0)); + err = rvu_poll_reg(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0), true); + if (err) + dev_err(rvu->dev, "SYNC2: NIX RX software sync failed\n"); } static bool is_valid_txschq(struct rvu *rvu, int blkaddr, From c7c9d2102c9c098916ab9e0ab248006107d00d6c Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Sun, 25 Jul 2021 00:11:59 +0300 Subject: [PATCH 199/502] net: llc: fix skb_over_panic Syzbot reported skb_over_panic() in llc_pdu_init_as_xid_cmd(). The problem was in wrong LCC header manipulations. Syzbot's reproducer tries to send XID packet. llc_ui_sendmsg() is doing following steps: 1. skb allocation with size = len + header size len is passed from userpace and header size is 3 since addr->sllc_xid is set. 2. skb_reserve() for header_len = 3 3. filling all other space with memcpy_from_msg() Ok, at this moment we have fully loaded skb, only headers needs to be filled. Then code comes to llc_sap_action_send_xid_c(). This function pushes 3 bytes for LLC PDU header and initializes it. Then comes llc_pdu_init_as_xid_cmd(). It initalizes next 3 bytes *AFTER* LLC PDU header and call skb_push(skb, 3). This looks wrong for 2 reasons: 1. Bytes rigth after LLC header are user data, so this function was overwriting payload. 2. skb_push(skb, 3) call can cause skb_over_panic() since all free space was filled in llc_ui_sendmsg(). (This can happen is user passed 686 len: 686 + 14 (eth header) + 3 (LLC header) = 703. SKB_DATA_ALIGN(703) = 704) So, in this patch I added 2 new private constansts: LLC_PDU_TYPE_U_XID and LLC_PDU_LEN_U_XID. LLC_PDU_LEN_U_XID is used to correctly reserve header size to handle LLC + XID case. LLC_PDU_TYPE_U_XID is used by llc_pdu_header_init() function to push 6 bytes instead of 3. And finally I removed skb_push() call from llc_pdu_init_as_xid_cmd(). This changes should not affect other parts of LLC, since after all steps we just transmit buffer. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-and-tested-by: syzbot+5e5a981ad7cc54c4b2b4@syzkaller.appspotmail.com Signed-off-by: Pavel Skripkin Signed-off-by: David S. Miller --- include/net/llc_pdu.h | 31 +++++++++++++++++++++++-------- net/llc/af_llc.c | 10 +++++++++- net/llc/llc_s_ac.c | 2 +- 3 files changed, 33 insertions(+), 10 deletions(-) diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h index c0f0a13ed818..49aa79c7b278 100644 --- a/include/net/llc_pdu.h +++ b/include/net/llc_pdu.h @@ -15,9 +15,11 @@ #include /* Lengths of frame formats */ -#define LLC_PDU_LEN_I 4 /* header and 2 control bytes */ -#define LLC_PDU_LEN_S 4 -#define LLC_PDU_LEN_U 3 /* header and 1 control byte */ +#define LLC_PDU_LEN_I 4 /* header and 2 control bytes */ +#define LLC_PDU_LEN_S 4 +#define LLC_PDU_LEN_U 3 /* header and 1 control byte */ +/* header and 1 control byte and XID info */ +#define LLC_PDU_LEN_U_XID (LLC_PDU_LEN_U + sizeof(struct llc_xid_info)) /* Known SAP addresses */ #define LLC_GLOBAL_SAP 0xFF #define LLC_NULL_SAP 0x00 /* not network-layer visible */ @@ -50,9 +52,10 @@ #define LLC_PDU_TYPE_U_MASK 0x03 /* 8-bit control field */ #define LLC_PDU_TYPE_MASK 0x03 -#define LLC_PDU_TYPE_I 0 /* first bit */ -#define LLC_PDU_TYPE_S 1 /* first two bits */ -#define LLC_PDU_TYPE_U 3 /* first two bits */ +#define LLC_PDU_TYPE_I 0 /* first bit */ +#define LLC_PDU_TYPE_S 1 /* first two bits */ +#define LLC_PDU_TYPE_U 3 /* first two bits */ +#define LLC_PDU_TYPE_U_XID 4 /* private type for detecting XID commands */ #define LLC_PDU_TYPE_IS_I(pdu) \ ((!(pdu->ctrl_1 & LLC_PDU_TYPE_I_MASK)) ? 1 : 0) @@ -230,9 +233,18 @@ static inline struct llc_pdu_un *llc_pdu_un_hdr(struct sk_buff *skb) static inline void llc_pdu_header_init(struct sk_buff *skb, u8 type, u8 ssap, u8 dsap, u8 cr) { - const int hlen = type == LLC_PDU_TYPE_U ? 3 : 4; + int hlen = 4; /* default value for I and S types */ struct llc_pdu_un *pdu; + switch (type) { + case LLC_PDU_TYPE_U: + hlen = 3; + break; + case LLC_PDU_TYPE_U_XID: + hlen = 6; + break; + } + skb_push(skb, hlen); skb_reset_network_header(skb); pdu = llc_pdu_un_hdr(skb); @@ -374,7 +386,10 @@ static inline void llc_pdu_init_as_xid_cmd(struct sk_buff *skb, xid_info->fmt_id = LLC_XID_FMT_ID; /* 0x81 */ xid_info->type = svcs_supported; xid_info->rw = rx_window << 1; /* size of receive window */ - skb_put(skb, sizeof(struct llc_xid_info)); + + /* no need to push/put since llc_pdu_header_init() has already + * pushed 3 + 3 bytes + */ } /** diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 7180979114e4..ac5cadd02cfa 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -98,8 +98,16 @@ static inline u8 llc_ui_header_len(struct sock *sk, struct sockaddr_llc *addr) { u8 rc = LLC_PDU_LEN_U; - if (addr->sllc_test || addr->sllc_xid) + if (addr->sllc_test) rc = LLC_PDU_LEN_U; + else if (addr->sllc_xid) + /* We need to expand header to sizeof(struct llc_xid_info) + * since llc_pdu_init_as_xid_cmd() sets 4,5,6 bytes of LLC header + * as XID PDU. In llc_ui_sendmsg() we reserved header size and then + * filled all other space with user data. If we won't reserve this + * bytes, llc_pdu_init_as_xid_cmd() will overwrite user data + */ + rc = LLC_PDU_LEN_U_XID; else if (sk->sk_type == SOCK_STREAM) rc = LLC_PDU_LEN_I; return rc; diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c index b554f26c68ee..79d1cef8f15a 100644 --- a/net/llc/llc_s_ac.c +++ b/net/llc/llc_s_ac.c @@ -79,7 +79,7 @@ int llc_sap_action_send_xid_c(struct llc_sap *sap, struct sk_buff *skb) struct llc_sap_state_ev *ev = llc_sap_ev(skb); int rc; - llc_pdu_header_init(skb, LLC_PDU_TYPE_U, ev->saddr.lsap, + llc_pdu_header_init(skb, LLC_PDU_TYPE_U_XID, ev->saddr.lsap, ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_xid_cmd(skb, LLC_XID_NULL_CLASS_2, 0); rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); From 4d1014c1816c0395eca5d1d480f196a4c63119d0 Mon Sep 17 00:00:00 2001 From: Filip Schauer Date: Tue, 27 Jul 2021 13:23:11 +0200 Subject: [PATCH 200/502] drivers core: Fix oops when driver probe fails dma_range_map is freed to early, which might cause an oops when a driver probe fails. Call trace: is_free_buddy_page+0xe4/0x1d4 __free_pages+0x2c/0x88 dma_free_contiguous+0x64/0x80 dma_direct_free+0x38/0xb4 dma_free_attrs+0x88/0xa0 dmam_release+0x28/0x34 release_nodes+0x78/0x8c devres_release_all+0xa8/0x110 really_probe+0x118/0x2d0 __driver_probe_device+0xc8/0xe0 driver_probe_device+0x54/0xec __driver_attach+0xe0/0xf0 bus_for_each_dev+0x7c/0xc8 driver_attach+0x30/0x3c bus_add_driver+0x17c/0x1c4 driver_register+0xc0/0xf8 __platform_driver_register+0x34/0x40 ... This issue is introduced by commit d0243bbd5dd3 ("drivers core: Free dma_range_map when driver probe failed"). It frees dma_range_map before the call to devres_release_all, which is too early. The solution is to free dma_range_map only after devres_release_all. Fixes: d0243bbd5dd3 ("drivers core: Free dma_range_map when driver probe failed") Cc: stable Signed-off-by: Filip Schauer Link: https://lore.kernel.org/r/20210727112311.GA7645@DESKTOP-E8BN1B0.localdomain Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index daeb9b5763ae..437cd61343b2 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -653,8 +653,6 @@ dev_groups_failed: else if (drv->remove) drv->remove(dev); probe_failed: - kfree(dev->dma_range_map); - dev->dma_range_map = NULL; if (dev->bus) blocking_notifier_call_chain(&dev->bus->p->bus_notifier, BUS_NOTIFY_DRIVER_NOT_BOUND, dev); @@ -662,6 +660,8 @@ pinctrl_bind_failed: device_links_no_driver(dev); devres_release_all(dev); arch_teardown_dma_ops(dev); + kfree(dev->dma_range_map); + dev->dma_range_map = NULL; driver_sysfs_remove(dev); dev->driver = NULL; dev_set_drvdata(dev, NULL); From 55f24c27b6c1a840b62fe297616f1f9ea3576cb7 Mon Sep 17 00:00:00 2001 From: Kunihiko Hayashi Date: Tue, 27 Jul 2021 14:47:32 +0900 Subject: [PATCH 201/502] dmaengine: uniphier-xdmac: Use readl_poll_timeout_atomic() in atomic state The function uniphier_xdmac_chan_stop() is only called in atomic state. Should use readl_poll_timeout_atomic() there instead of readl_poll_timeout(). Reported-by: Dan Carpenter Fixes: 667b9251440b ("dmaengine: uniphier-xdmac: Add UniPhier external DMA controller driver") Signed-off-by: Kunihiko Hayashi Link: https://lore.kernel.org/r/1627364852-28432-1-git-send-email-hayashi.kunihiko@socionext.com Signed-off-by: Vinod Koul --- drivers/dma/uniphier-xdmac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/uniphier-xdmac.c b/drivers/dma/uniphier-xdmac.c index 16b19654873d..d6b8a202474f 100644 --- a/drivers/dma/uniphier-xdmac.c +++ b/drivers/dma/uniphier-xdmac.c @@ -209,8 +209,8 @@ static int uniphier_xdmac_chan_stop(struct uniphier_xdmac_chan *xc) writel(0, xc->reg_ch_base + XDMAC_TSS); /* wait until transfer is stopped */ - return readl_poll_timeout(xc->reg_ch_base + XDMAC_STAT, val, - !(val & XDMAC_STAT_TENF), 100, 1000); + return readl_poll_timeout_atomic(xc->reg_ch_base + XDMAC_STAT, val, + !(val & XDMAC_STAT_TENF), 100, 1000); } /* xc->vc.lock must be held by caller */ From 801e541c79bbc63af852ca21b713ba87cc97c6ad Mon Sep 17 00:00:00 2001 From: Tang Bin Date: Tue, 27 Jul 2021 20:25:06 +0800 Subject: [PATCH 202/502] nfc: s3fwrn5: fix undefined parameter values in dev_err() In the function s3fwrn5_fw_download(), the 'ret' is not assigned, so the correct value should be given in dev_err function. Fixes: a0302ff5906a ("nfc: s3fwrn5: remove unnecessary label") Signed-off-by: Zhang Shengju Signed-off-by: Tang Bin Signed-off-by: David S. Miller --- drivers/nfc/s3fwrn5/firmware.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/s3fwrn5/firmware.c b/drivers/nfc/s3fwrn5/firmware.c index eb5d7a5beac7..1340fab9565e 100644 --- a/drivers/nfc/s3fwrn5/firmware.c +++ b/drivers/nfc/s3fwrn5/firmware.c @@ -423,7 +423,7 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info) if (IS_ERR(tfm)) { ret = PTR_ERR(tfm); dev_err(&fw_info->ndev->nfc_dev->dev, - "Cannot allocate shash (code=%d)\n", ret); + "Cannot allocate shash (code=%ld)\n", PTR_ERR(tfm)); goto out; } From 9be550ee43919b070bcd77f9228bdbbbc073245b Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Wed, 21 Jul 2021 22:34:36 +0300 Subject: [PATCH 203/502] staging: rtl8712: get rid of flush_scheduled_work This patch is preparation for following patch for error handling refactoring. flush_scheduled_work() takes (wq_completion)events lock and it can lead to deadlock when r871xu_dev_remove() is called from workqueue. To avoid deadlock sutiation we can change flush_scheduled_work() call to flush_work() call for all possibly scheduled works in this driver, since next patch adds device_release_driver() in case of fw load failure. Signed-off-by: Pavel Skripkin Cc: stable Link: https://lore.kernel.org/r/6e028b4c457eeb7156c76c6ea3cdb3cb0207c7e1.1626895918.git.paskripkin@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8712/rtl8712_led.c | 8 ++++++++ drivers/staging/rtl8712/rtl871x_led.h | 1 + drivers/staging/rtl8712/rtl871x_pwrctrl.c | 8 ++++++++ drivers/staging/rtl8712/rtl871x_pwrctrl.h | 1 + drivers/staging/rtl8712/usb_intf.c | 3 ++- 5 files changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/staging/rtl8712/rtl8712_led.c b/drivers/staging/rtl8712/rtl8712_led.c index 5901026949f2..d5fc9026b036 100644 --- a/drivers/staging/rtl8712/rtl8712_led.c +++ b/drivers/staging/rtl8712/rtl8712_led.c @@ -1820,3 +1820,11 @@ void LedControl871x(struct _adapter *padapter, enum LED_CTL_MODE LedAction) break; } } + +void r8712_flush_led_works(struct _adapter *padapter) +{ + struct led_priv *pledpriv = &padapter->ledpriv; + + flush_work(&pledpriv->SwLed0.BlinkWorkItem); + flush_work(&pledpriv->SwLed1.BlinkWorkItem); +} diff --git a/drivers/staging/rtl8712/rtl871x_led.h b/drivers/staging/rtl8712/rtl871x_led.h index ee19c873cf01..2f0768132ad8 100644 --- a/drivers/staging/rtl8712/rtl871x_led.h +++ b/drivers/staging/rtl8712/rtl871x_led.h @@ -112,6 +112,7 @@ struct led_priv { void r8712_InitSwLeds(struct _adapter *padapter); void r8712_DeInitSwLeds(struct _adapter *padapter); void LedControl871x(struct _adapter *padapter, enum LED_CTL_MODE LedAction); +void r8712_flush_led_works(struct _adapter *padapter); #endif diff --git a/drivers/staging/rtl8712/rtl871x_pwrctrl.c b/drivers/staging/rtl8712/rtl871x_pwrctrl.c index 23cff43437e2..cd6d9ff0bebc 100644 --- a/drivers/staging/rtl8712/rtl871x_pwrctrl.c +++ b/drivers/staging/rtl8712/rtl871x_pwrctrl.c @@ -224,3 +224,11 @@ void r8712_unregister_cmd_alive(struct _adapter *padapter) } mutex_unlock(&pwrctrl->mutex_lock); } + +void r8712_flush_rwctrl_works(struct _adapter *padapter) +{ + struct pwrctrl_priv *pwrctrl = &padapter->pwrctrlpriv; + + flush_work(&pwrctrl->SetPSModeWorkItem); + flush_work(&pwrctrl->rpwm_workitem); +} diff --git a/drivers/staging/rtl8712/rtl871x_pwrctrl.h b/drivers/staging/rtl8712/rtl871x_pwrctrl.h index bf6623cfaf27..b35b9c7920eb 100644 --- a/drivers/staging/rtl8712/rtl871x_pwrctrl.h +++ b/drivers/staging/rtl8712/rtl871x_pwrctrl.h @@ -108,5 +108,6 @@ void r8712_cpwm_int_hdl(struct _adapter *padapter, void r8712_set_ps_mode(struct _adapter *padapter, uint ps_mode, uint smart_ps); void r8712_set_rpwm(struct _adapter *padapter, u8 val8); +void r8712_flush_rwctrl_works(struct _adapter *padapter); #endif /* __RTL871X_PWRCTRL_H_ */ diff --git a/drivers/staging/rtl8712/usb_intf.c b/drivers/staging/rtl8712/usb_intf.c index 2434b13c8b12..643f21eb1128 100644 --- a/drivers/staging/rtl8712/usb_intf.c +++ b/drivers/staging/rtl8712/usb_intf.c @@ -606,7 +606,8 @@ static void r871xu_dev_remove(struct usb_interface *pusb_intf) padapter->surprise_removed = true; if (pnetdev->reg_state != NETREG_UNINITIALIZED) unregister_netdev(pnetdev); /* will call netdev_close() */ - flush_scheduled_work(); + r8712_flush_rwctrl_works(padapter); + r8712_flush_led_works(padapter); udelay(1); /* Stop driver mlme relation timer */ r8712_stop_drv_timers(padapter); From e9e6aa51b2735d83a67d9fa0119cf11abef80d99 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Wed, 21 Jul 2021 22:34:47 +0300 Subject: [PATCH 204/502] staging: rtl8712: error handling refactoring There was strange error handling logic in case of fw load failure. For some reason fw loader callback was doing clean up stuff when fw is not available. I don't see any reason behind doing this. Since this driver doesn't have EEPROM firmware let's just disconnect it in case of fw load failure. Doing clean up stuff in 2 different place which can run concurently is not good idea and syzbot found 2 bugs related to this strange approach. So, in this pacth I deleted all clean up code from fw callback and made a call to device_release_driver() under device_lock(parent) in case of fw load failure. This approach is more generic and it defend driver from UAF bugs, since all clean up code is moved to one place. Fixes: e02a3b945816 ("staging: rtl8712: fix memory leak in rtl871x_load_fw_cb") Fixes: 8c213fa59199 ("staging: r8712u: Use asynchronous firmware loading") Cc: stable Reported-and-tested-by: syzbot+5872a520e0ce0a7c7230@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+cc699626e48a6ebaf295@syzkaller.appspotmail.com Signed-off-by: Pavel Skripkin Link: https://lore.kernel.org/r/d49ecc56e97c4df181d7bd4d240b031f315eacc3.1626895918.git.paskripkin@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8712/hal_init.c | 30 ++++++++++++------- drivers/staging/rtl8712/usb_intf.c | 48 +++++++++++++----------------- 2 files changed, 41 insertions(+), 37 deletions(-) diff --git a/drivers/staging/rtl8712/hal_init.c b/drivers/staging/rtl8712/hal_init.c index 22974277afa0..4eff3fdecdb8 100644 --- a/drivers/staging/rtl8712/hal_init.c +++ b/drivers/staging/rtl8712/hal_init.c @@ -29,21 +29,31 @@ #define FWBUFF_ALIGN_SZ 512 #define MAX_DUMP_FWSZ (48 * 1024) +static void rtl871x_load_fw_fail(struct _adapter *adapter) +{ + struct usb_device *udev = adapter->dvobjpriv.pusbdev; + struct device *dev = &udev->dev; + struct device *parent = dev->parent; + + complete(&adapter->rtl8712_fw_ready); + + dev_err(&udev->dev, "r8712u: Firmware request failed\n"); + + if (parent) + device_lock(parent); + + device_release_driver(dev); + + if (parent) + device_unlock(parent); +} + static void rtl871x_load_fw_cb(const struct firmware *firmware, void *context) { struct _adapter *adapter = context; if (!firmware) { - struct usb_device *udev = adapter->dvobjpriv.pusbdev; - struct usb_interface *usb_intf = adapter->pusb_intf; - - dev_err(&udev->dev, "r8712u: Firmware request failed\n"); - usb_put_dev(udev); - usb_set_intfdata(usb_intf, NULL); - r8712_free_drv_sw(adapter); - adapter->dvobj_deinit(adapter); - complete(&adapter->rtl8712_fw_ready); - free_netdev(adapter->pnetdev); + rtl871x_load_fw_fail(adapter); return; } adapter->fw = firmware; diff --git a/drivers/staging/rtl8712/usb_intf.c b/drivers/staging/rtl8712/usb_intf.c index 643f21eb1128..505ebeb643dc 100644 --- a/drivers/staging/rtl8712/usb_intf.c +++ b/drivers/staging/rtl8712/usb_intf.c @@ -591,36 +591,30 @@ static void r871xu_dev_remove(struct usb_interface *pusb_intf) { struct net_device *pnetdev = usb_get_intfdata(pusb_intf); struct usb_device *udev = interface_to_usbdev(pusb_intf); + struct _adapter *padapter = netdev_priv(pnetdev); - if (pnetdev) { - struct _adapter *padapter = netdev_priv(pnetdev); + /* never exit with a firmware callback pending */ + wait_for_completion(&padapter->rtl8712_fw_ready); + usb_set_intfdata(pusb_intf, NULL); + release_firmware(padapter->fw); + if (drvpriv.drv_registered) + padapter->surprise_removed = true; + if (pnetdev->reg_state != NETREG_UNINITIALIZED) + unregister_netdev(pnetdev); /* will call netdev_close() */ + r8712_flush_rwctrl_works(padapter); + r8712_flush_led_works(padapter); + udelay(1); + /* Stop driver mlme relation timer */ + r8712_stop_drv_timers(padapter); + r871x_dev_unload(padapter); + r8712_free_drv_sw(padapter); + free_netdev(pnetdev); - /* never exit with a firmware callback pending */ - wait_for_completion(&padapter->rtl8712_fw_ready); - pnetdev = usb_get_intfdata(pusb_intf); - usb_set_intfdata(pusb_intf, NULL); - if (!pnetdev) - goto firmware_load_fail; - release_firmware(padapter->fw); - if (drvpriv.drv_registered) - padapter->surprise_removed = true; - if (pnetdev->reg_state != NETREG_UNINITIALIZED) - unregister_netdev(pnetdev); /* will call netdev_close() */ - r8712_flush_rwctrl_works(padapter); - r8712_flush_led_works(padapter); - udelay(1); - /* Stop driver mlme relation timer */ - r8712_stop_drv_timers(padapter); - r871x_dev_unload(padapter); - r8712_free_drv_sw(padapter); - free_netdev(pnetdev); + /* decrease the reference count of the usb device structure + * when disconnect + */ + usb_put_dev(udev); - /* decrease the reference count of the usb device structure - * when disconnect - */ - usb_put_dev(udev); - } -firmware_load_fail: /* If we didn't unplug usb dongle and remove/insert module, driver * fails on sitesurvey for the first time when device is up. * Reset usb port for sitesurvey fail issue. From c7b65650c7f41d3946c4e2f0bb56dfdb92cfe127 Mon Sep 17 00:00:00 2001 From: Sergio Paracuellos Date: Tue, 27 Jul 2021 07:40:58 +0200 Subject: [PATCH 205/502] staging: mt7621-pci: avoid to re-disable clock for those pcies not in use Clock driver for this SoC is using some gates to properly enabling and disabling the access to peripherals. Those gates that are not in use are properly being automatically disabled by the kernel. Pcie driver is explicitly doing a 'clk_disable_unprepare' call for gates of those pcies that are not used. Since kernel has already disabled them, the following warnings appear: WARNING: CPU: 0 PID: 1 at drivers/clk/clk.c:952 clk_core_disable+0xe4/0x100 pcie2 already disabled Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0 #0 Stack : 81661680 80082d00 807c0000 00000004 00000000 80a20000 80860000 80792380 814503d4 80862e83 00000000 1431b70 81454360 00000000 00000000 80792380 81431a08 ffffefff fffffea 00000000 81431a14 0000007b 80868820 ffffffff 80792380 1431c70 803d7a24 00000009 807f3a74 00000001 815df810 00000018 0000000 80a20000 ... Call Trace: [<80007ed8>] show_stack+0x28/0xf0 [<80381e40>] dump_stack_lvl+0x60/0x80 [<8002cf90>] __warn+0xcc/0x140 [<8002d090>] warn_slowpath_fmt+0x8c/0xac [<803d7a24>] clk_core_disable+0xe4/0x100 [<803da468>] clk_disable+0x38/0x58 [<804cb730>] mt7621_pci_probe+0x980/0xa50 [<8041e624>] platform_probe+0x50/0xbc [<8041bfe4>] really_probe.part.0+0xa8/0x340 [<8041c3dc>] driver_probe_device+0x4c/0x154 [<8041cb88>] __driver_attach+0xb4/0x1b4 [<80419a38>] bus_for_each_dev+0x68/0xa4 [<8041b1e8>] bus_add_driver+0x134/0x214 [<8041d3bc>] driver_register+0x98/0x154 [<80001648>] do_one_initcall+0x50/0x1a8 [<808ea1fc>] kernel_init_freeable+0x270/0x30c [<806dd9dc>] kernel_init+0x20/0x110 [<80002d98>] ret_from_kernel_thread+0x14/0x1c WARNING: CPU: 0 PID: 1 at drivers/clk/clk.c:810 clk_core_unprepare+0xf4/0x194 pcie2 already unprepared Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Tainted: G W 5.14.0 #0 Stack : 81661680 80082d00 807c0000 00000004 00000000 00000000 81431bc4 80a20000 80860000 80792380 814503d4 80862e83 00000000 00000001 81431b70 81454360 00000000 00000000 80792380 81431a08 ffffefff 00000000 ffffffea 00000000 81431a14 0000009b 80868820 ffffffff 80792380 00000001 81431c70 803d7764 00000009 807f3a74 00000001 815df810 00000018 8040b36c 00000000 80a20000 ... Call Trace: [<80007ed8>] show_stack+0x28/0xf0 [<80381e40>] dump_stack_lvl+0x60/0x80 [<8002cf90>] __warn+0xcc/0x140 [<8002d090>] warn_slowpath_fmt+0x8c/0xac [<803d7764>] clk_core_unprepare+0xf4/0x194 [<803d97c4>] clk_unprepare+0x30/0x48 [<804cb738>] mt7621_pci_probe+0x988/0xa50 [<8041e624>] platform_probe+0x50/0xbc [<8041bfe4>] really_probe.part.0+0xa8/0x340 [<8041c3dc>] driver_probe_device+0x4c/0x154 [<8041cb88>] __driver_attach+0xb4/0x1b4 [<80419a38>] bus_for_each_dev+0x68/0xa4 [<8041b1e8>] bus_add_driver+0x134/0x214 [<8041d3bc>] driver_register+0x98/0x154 [<80001648>] do_one_initcall+0x50/0x1a8 [<808ea1fc>] kernel_init_freeable+0x270/0x30c [<806dd9dc>] kernel_init+0x20/0x110 [<80002d98>] ret_from_kernel_thread+0x14/0x1c Avoid to explicitly disable already disabled pcie gates fixes the problem. Fixes: cc4e864a5ce4 ("staging: mt7621-pci: make use of kernel clock apis") Reported-by: DENG Qingfang Signed-off-by: Sergio Paracuellos Link: https://lore.kernel.org/r/20210727054058.10612-1-sergio.paracuellos@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/mt7621-pci/pci-mt7621.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/staging/mt7621-pci/pci-mt7621.c b/drivers/staging/mt7621-pci/pci-mt7621.c index 691030e1a5ed..f9bdf4e33134 100644 --- a/drivers/staging/mt7621-pci/pci-mt7621.c +++ b/drivers/staging/mt7621-pci/pci-mt7621.c @@ -422,7 +422,6 @@ static void mt7621_pcie_init_ports(struct mt7621_pcie *pcie) dev_err(dev, "pcie%d no card, disable it (RST & CLK)\n", slot); mt7621_control_assert(port); - clk_disable_unprepare(port->clk); port->enabled = false; if (slot == 0) { From 30fad76ce4e98263edfa8f885c81d5426c1bf169 Mon Sep 17 00:00:00 2001 From: "Qiang.zhang" Date: Fri, 23 Jul 2021 08:43:34 +0800 Subject: [PATCH 206/502] USB: usbtmc: Fix RCU stall warning rcu: INFO: rcu_preempt self-detected stall on CPU rcu: 1-...!: (2 ticks this GP) idle=d92/1/0x4000000000000000 softirq=25390/25392 fqs=3 (t=12164 jiffies g=31645 q=43226) rcu: rcu_preempt kthread starved for 12162 jiffies! g31645 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x0 ->cpu=0 rcu: Unless rcu_preempt kthread gets sufficient CPU time, OOM is now expected behavior. rcu: RCU grace-period kthread stack dump: task:rcu_preempt state:R running task ........... usbtmc 3-1:0.0: unknown status received: -71 usbtmc 3-1:0.0: unknown status received: -71 usbtmc 3-1:0.0: unknown status received: -71 usbtmc 3-1:0.0: unknown status received: -71 usbtmc 3-1:0.0: unknown status received: -71 usbtmc 3-1:0.0: unknown status received: -71 usbtmc 3-1:0.0: unknown status received: -71 usbtmc 3-1:0.0: unknown status received: -71 usbtmc 3-1:0.0: usb_submit_urb failed: -19 The function usbtmc_interrupt() resubmits urbs when the error status of an urb is -EPROTO. In systems using the dummy_hcd usb controller this can result in endless interrupt loops when the usbtmc device is disconnected from the host system. Since host controller drivers already try to recover from transmission errors, there is no need to resubmit the urb or try other solutions to repair the error situation. In case of errors the INT pipe just stops to wait for further packets. Fixes: dbf3e7f654c0 ("Implement an ioctl to support the USMTMC-USB488 READ_STATUS_BYTE operation") Cc: stable@vger.kernel.org Reported-by: syzbot+e2eae5639e7203360018@syzkaller.appspotmail.com Signed-off-by: Qiang.zhang Acked-by: Guido Kiener Link: https://lore.kernel.org/r/20210723004334.458930-1-qiang.zhang@windriver.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usbtmc.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index 74d5a9c5238a..73f419adce61 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c @@ -2324,17 +2324,10 @@ static void usbtmc_interrupt(struct urb *urb) dev_err(dev, "overflow with length %d, actual length is %d\n", data->iin_wMaxPacketSize, urb->actual_length); fallthrough; - case -ECONNRESET: - case -ENOENT: - case -ESHUTDOWN: - case -EILSEQ: - case -ETIME: - case -EPIPE: + default: /* urb terminated, clean up */ dev_dbg(dev, "urb terminated, status: %d\n", status); return; - default: - dev_err(dev, "unknown status received: %d\n", status); } exit: rv = usb_submit_urb(urb, GFP_ATOMIC); From fa4a8dcfd51b911f101ebc461dfe22230b74dd64 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Tue, 27 Jul 2021 15:31:42 +0800 Subject: [PATCH 207/502] usb: gadget: remove leaked entry from udc driver list The usb_add_gadget_udc will add a new gadget to the udc class driver list. Not calling usb_del_gadget_udc in error branch will result in residual gadget entry in the udc driver list. We fix it by calling usb_del_gadget_udc to clean it when error return. Fixes: 48ba02b2e2b1 ("usb: gadget: add udc driver for max3420") Acked-by: Felipe Balbi Signed-off-by: Zhang Qilong Link: https://lore.kernel.org/r/20210727073142.84666-1-zhangqilong3@huawei.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/max3420_udc.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/udc/max3420_udc.c b/drivers/usb/gadget/udc/max3420_udc.c index 34f4db554977..d2a2b20cc1ad 100644 --- a/drivers/usb/gadget/udc/max3420_udc.c +++ b/drivers/usb/gadget/udc/max3420_udc.c @@ -1255,12 +1255,14 @@ static int max3420_probe(struct spi_device *spi) err = devm_request_irq(&spi->dev, irq, max3420_irq_handler, 0, "max3420", udc); if (err < 0) - return err; + goto del_gadget; udc->thread_task = kthread_create(max3420_thread, udc, "max3420-thread"); - if (IS_ERR(udc->thread_task)) - return PTR_ERR(udc->thread_task); + if (IS_ERR(udc->thread_task)) { + err = PTR_ERR(udc->thread_task); + goto del_gadget; + } irq = of_irq_get_byname(spi->dev.of_node, "vbus"); if (irq <= 0) { /* no vbus irq implies self-powered design */ @@ -1280,10 +1282,14 @@ static int max3420_probe(struct spi_device *spi) err = devm_request_irq(&spi->dev, irq, max3420_vbus_handler, 0, "vbus", udc); if (err < 0) - return err; + goto del_gadget; } return 0; + +del_gadget: + usb_del_gadget_udc(&udc->gadget); + return err; } static int max3420_remove(struct spi_device *spi) From 2867652e4766360adf14dfda3832455e04964f2a Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Fri, 23 Jul 2021 18:59:30 +0300 Subject: [PATCH 208/502] usb: gadget: f_hid: fixed NULL pointer dereference Disconnecting and reconnecting the USB cable can lead to crashes and a variety of kernel log spam. The problem was found and reproduced on the Raspberry Pi [1] and the original fix was created in Raspberry's own fork [2]. Link: https://github.com/raspberrypi/linux/issues/3870 [1] Link: https://github.com/raspberrypi/linux/commit/a6e47d5f4efbd2ea6a0b6565cd2f9b7bb217ded5 [2] Signed-off-by: Maxim Devaev Signed-off-by: Phil Elwell Cc: stable Link: https://lore.kernel.org/r/20210723155928.210019-1-mdevaev@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_hid.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index 02683ac0719d..08e73e8127b1 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -338,6 +338,11 @@ static ssize_t f_hidg_write(struct file *file, const char __user *buffer, spin_lock_irqsave(&hidg->write_spinlock, flags); + if (!hidg->req) { + spin_unlock_irqrestore(&hidg->write_spinlock, flags); + return -ESHUTDOWN; + } + #define WRITE_COND (!hidg->write_pending) try_again: /* write queue */ @@ -358,8 +363,14 @@ try_again: count = min_t(unsigned, count, hidg->report_length); spin_unlock_irqrestore(&hidg->write_spinlock, flags); - status = copy_from_user(req->buf, buffer, count); + if (!req) { + ERROR(hidg->func.config->cdev, "hidg->req is NULL\n"); + status = -ESHUTDOWN; + goto release_write_pending; + } + + status = copy_from_user(req->buf, buffer, count); if (status != 0) { ERROR(hidg->func.config->cdev, "copy_from_user error\n"); @@ -387,15 +398,18 @@ try_again: spin_unlock_irqrestore(&hidg->write_spinlock, flags); - status = usb_ep_queue(hidg->in_ep, req, GFP_ATOMIC); - if (status < 0) { - ERROR(hidg->func.config->cdev, - "usb_ep_queue error on int endpoint %zd\n", status); + if (!hidg->in_ep->enabled) { + ERROR(hidg->func.config->cdev, "in_ep is disabled\n"); + status = -ESHUTDOWN; goto release_write_pending; - } else { - status = count; } + status = usb_ep_queue(hidg->in_ep, req, GFP_ATOMIC); + if (status < 0) + goto release_write_pending; + else + status = count; + return status; release_write_pending: spin_lock_irqsave(&hidg->write_spinlock, flags); From afcff6dc690e24d636a41fd4bee6057e7c70eebd Mon Sep 17 00:00:00 2001 From: Maxim Devaev Date: Wed, 21 Jul 2021 21:03:51 +0300 Subject: [PATCH 209/502] usb: gadget: f_hid: added GET_IDLE and SET_IDLE handlers The USB HID standard declares mandatory support for GET_IDLE and SET_IDLE requests for Boot Keyboard. Most hosts can handle their absence, but others like some old/strange UEFIs and BIOSes consider this a critical error and refuse to work with f_hid. This primitive implementation of saving and returning idle is sufficient to meet the requirements of the standard and these devices. Acked-by: Felipe Balbi Cc: stable Signed-off-by: Maxim Devaev Link: https://lore.kernel.org/r/20210721180351.129450-1-mdevaev@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_hid.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index 08e73e8127b1..8d50c8b127fd 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -41,6 +41,7 @@ struct f_hidg { unsigned char bInterfaceSubClass; unsigned char bInterfaceProtocol; unsigned char protocol; + unsigned char idle; unsigned short report_desc_length; char *report_desc; unsigned short report_length; @@ -537,6 +538,14 @@ static int hidg_setup(struct usb_function *f, goto respond; break; + case ((USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE) << 8 + | HID_REQ_GET_IDLE): + VDBG(cdev, "get_idle\n"); + length = min_t(unsigned int, length, 1); + ((u8 *) req->buf)[0] = hidg->idle; + goto respond; + break; + case ((USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE) << 8 | HID_REQ_SET_REPORT): VDBG(cdev, "set_report | wLength=%d\n", ctrl->wLength); @@ -560,6 +569,14 @@ static int hidg_setup(struct usb_function *f, goto stall; break; + case ((USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE) << 8 + | HID_REQ_SET_IDLE): + VDBG(cdev, "set_idle\n"); + length = 0; + hidg->idle = value; + goto respond; + break; + case ((USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_INTERFACE) << 8 | USB_REQ_GET_DESCRIPTOR): switch (value >> 8) { @@ -787,6 +804,7 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f) hidg_interface_desc.bInterfaceSubClass = hidg->bInterfaceSubClass; hidg_interface_desc.bInterfaceProtocol = hidg->bInterfaceProtocol; hidg->protocol = HID_REPORT_PROTOCOL; + hidg->idle = 1; hidg_ss_in_ep_desc.wMaxPacketSize = cpu_to_le16(hidg->report_length); hidg_ss_in_comp_desc.wBytesPerInterval = cpu_to_le16(hidg->report_length); From 68d9f95d6fd5399d105eaf2308c243536c5d7664 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 27 Jul 2021 13:41:34 +0300 Subject: [PATCH 210/502] usb: musb: Fix suspend and resume issues for PHYs on I2C and SPI As the USB PHYs typically are on I2C or SPI bus for the 2430 glue layer, we need configure the PHYs early for suspend. The musb glue layer we need to suspend only after musb_suspend() in suspend_late. Fixes: 62d472d8ad88 ("usb: musb: Add missing PM suspend and resume functions for 2430 glue") Reported-by: Andreas Kemnade Signed-off-by: Tony Lindgren Link: https://lore.kernel.org/r/20210727104134.52800-1-tony@atomide.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/omap2430.c | 43 ++++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index 640a46f0d118..f086960fe2b5 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -35,6 +35,7 @@ struct omap2430_glue { struct device *control_otghs; unsigned int is_runtime_suspended:1; unsigned int needs_resume:1; + unsigned int phy_suspended:1; }; #define glue_to_musb(g) platform_get_drvdata(g->musb) @@ -458,8 +459,10 @@ static int omap2430_runtime_suspend(struct device *dev) omap2430_low_level_exit(musb); - phy_power_off(musb->phy); - phy_exit(musb->phy); + if (!glue->phy_suspended) { + phy_power_off(musb->phy); + phy_exit(musb->phy); + } glue->is_runtime_suspended = 1; @@ -474,8 +477,10 @@ static int omap2430_runtime_resume(struct device *dev) if (!musb) return 0; - phy_init(musb->phy); - phy_power_on(musb->phy); + if (!glue->phy_suspended) { + phy_init(musb->phy); + phy_power_on(musb->phy); + } omap2430_low_level_init(musb); musb_writel(musb->mregs, OTG_INTERFSEL, @@ -489,7 +494,21 @@ static int omap2430_runtime_resume(struct device *dev) return 0; } +/* I2C and SPI PHYs need to be suspended before the glue layer */ static int omap2430_suspend(struct device *dev) +{ + struct omap2430_glue *glue = dev_get_drvdata(dev); + struct musb *musb = glue_to_musb(glue); + + phy_power_off(musb->phy); + phy_exit(musb->phy); + glue->phy_suspended = 1; + + return 0; +} + +/* Glue layer needs to be suspended after musb_suspend() */ +static int omap2430_suspend_late(struct device *dev) { struct omap2430_glue *glue = dev_get_drvdata(dev); @@ -501,7 +520,7 @@ static int omap2430_suspend(struct device *dev) return omap2430_runtime_suspend(dev); } -static int omap2430_resume(struct device *dev) +static int omap2430_resume_early(struct device *dev) { struct omap2430_glue *glue = dev_get_drvdata(dev); @@ -513,10 +532,24 @@ static int omap2430_resume(struct device *dev) return omap2430_runtime_resume(dev); } +static int omap2430_resume(struct device *dev) +{ + struct omap2430_glue *glue = dev_get_drvdata(dev); + struct musb *musb = glue_to_musb(glue); + + phy_init(musb->phy); + phy_power_on(musb->phy); + glue->phy_suspended = 0; + + return 0; +} + static const struct dev_pm_ops omap2430_pm_ops = { .runtime_suspend = omap2430_runtime_suspend, .runtime_resume = omap2430_runtime_resume, .suspend = omap2430_suspend, + .suspend_late = omap2430_suspend_late, + .resume_early = omap2430_resume_early, .resume = omap2430_resume, }; From 00de6a572f30ee93cad7e0704ec4232e5e72bda8 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 21 Jul 2021 16:29:05 +0300 Subject: [PATCH 211/502] usb: host: ohci-at91: suspend/resume ports after/before OHCI accesses On SAMA7G5 suspending ports will cut the access to OHCI registers and any subsequent access to them will lead to CPU being blocked trying to access that memory. Same thing happens on resume: if OHCI memory is accessed before resuming ports the CPU will block on that access. The OCHI memory is accessed on suspend/resume though ohci_suspend()/ohci_resume(). Acked-by: Alan Stern Signed-off-by: Claudiu Beznea Cc: stable Link: https://lore.kernel.org/r/20210721132905.1970713-1-claudiu.beznea@microchip.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ohci-at91.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/ohci-at91.c b/drivers/usb/host/ohci-at91.c index 9bbd7ddd0003..a24aea3d2759 100644 --- a/drivers/usb/host/ohci-at91.c +++ b/drivers/usb/host/ohci-at91.c @@ -611,8 +611,6 @@ ohci_hcd_at91_drv_suspend(struct device *dev) if (ohci_at91->wakeup) enable_irq_wake(hcd->irq); - ohci_at91_port_suspend(ohci_at91->sfr_regmap, 1); - ret = ohci_suspend(hcd, ohci_at91->wakeup); if (ret) { if (ohci_at91->wakeup) @@ -632,7 +630,10 @@ ohci_hcd_at91_drv_suspend(struct device *dev) /* flush the writes */ (void) ohci_readl (ohci, &ohci->regs->control); msleep(1); + ohci_at91_port_suspend(ohci_at91->sfr_regmap, 1); at91_stop_clock(ohci_at91); + } else { + ohci_at91_port_suspend(ohci_at91->sfr_regmap, 1); } return ret; @@ -644,6 +645,8 @@ ohci_hcd_at91_drv_resume(struct device *dev) struct usb_hcd *hcd = dev_get_drvdata(dev); struct ohci_at91_priv *ohci_at91 = hcd_to_ohci_at91_priv(hcd); + ohci_at91_port_suspend(ohci_at91->sfr_regmap, 0); + if (ohci_at91->wakeup) disable_irq_wake(hcd->irq); else @@ -651,8 +654,6 @@ ohci_hcd_at91_drv_resume(struct device *dev) ohci_resume(hcd, false); - ohci_at91_port_suspend(ohci_at91->sfr_regmap, 0); - return 0; } From bf88fef0b6f1488abeca594d377991171c00e52a Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sat, 17 Jul 2021 21:21:27 +0300 Subject: [PATCH 212/502] usb: otg-fsm: Fix hrtimer list corruption The HNP work can be re-scheduled while it's still in-fly. This results in re-initialization of the busy work, resetting the hrtimer's list node of the work and crashing kernel with null dereference within kernel/timer once work's timer is expired. It's very easy to trigger this problem by re-plugging USB cable quickly. Initialize HNP work only once to fix this trouble. Unable to handle kernel NULL pointer dereference at virtual address 00000126) ... PC is at __run_timers.part.0+0x150/0x228 LR is at __next_timer_interrupt+0x51/0x9c ... (__run_timers.part.0) from [] (run_timer_softirq+0x2f/0x50) (run_timer_softirq) from [] (__do_softirq+0xd5/0x2f0) (__do_softirq) from [] (irq_exit+0xab/0xb8) (irq_exit) from [] (handle_domain_irq+0x45/0x60) (handle_domain_irq) from [] (gic_handle_irq+0x6b/0x7c) (gic_handle_irq) from [] (__irq_svc+0x65/0xac) Cc: stable@vger.kernel.org Acked-by: Peter Chen Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20210717182134.30262-6-digetx@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/common/usb-otg-fsm.c | 6 +++++- include/linux/usb/otg-fsm.h | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/common/usb-otg-fsm.c b/drivers/usb/common/usb-otg-fsm.c index 3740cf95560e..0697fde51d00 100644 --- a/drivers/usb/common/usb-otg-fsm.c +++ b/drivers/usb/common/usb-otg-fsm.c @@ -193,7 +193,11 @@ static void otg_start_hnp_polling(struct otg_fsm *fsm) if (!fsm->host_req_flag) return; - INIT_DELAYED_WORK(&fsm->hnp_polling_work, otg_hnp_polling_work); + if (!fsm->hnp_work_inited) { + INIT_DELAYED_WORK(&fsm->hnp_polling_work, otg_hnp_polling_work); + fsm->hnp_work_inited = true; + } + schedule_delayed_work(&fsm->hnp_polling_work, msecs_to_jiffies(T_HOST_REQ_POLL)); } diff --git a/include/linux/usb/otg-fsm.h b/include/linux/usb/otg-fsm.h index 3aee78dda16d..784659d4dc99 100644 --- a/include/linux/usb/otg-fsm.h +++ b/include/linux/usb/otg-fsm.h @@ -196,6 +196,7 @@ struct otg_fsm { struct mutex lock; u8 *host_req_flag; struct delayed_work hnp_polling_work; + bool hnp_work_inited; bool state_changed; }; From 4c4c1257b844ffe5d0933684e612f92c4b78e120 Mon Sep 17 00:00:00 2001 From: Shuo Liu Date: Thu, 22 Jul 2021 14:27:36 +0800 Subject: [PATCH 213/502] virt: acrn: Do hcall_destroy_vm() before resource release The ACRN hypervisor has scenarios which could run a real-time guest VM. The real-time guest VM occupies dedicated CPU cores, be assigned with dedicated PCI devices. It can run without the Service VM after boot up. hcall_destroy_vm() returns failure when a real-time guest VM refuses. The clearing of flag ACRN_VM_FLAG_DESTROYED causes some kernel resource double-freed in a later acrn_vm_destroy(). Do hcall_destroy_vm() before resource release to drop this chance to destroy the VM if hypercall fails. Fixes: 9c5137aedd11 ("virt: acrn: Introduce VM management interfaces") Cc: stable Signed-off-by: Shuo Liu Signed-off-by: Fei Li Link: https://lore.kernel.org/r/20210722062736.15050-1-fei1.li@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/virt/acrn/vm.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/virt/acrn/vm.c b/drivers/virt/acrn/vm.c index 0d002a355a93..fbc9f1042000 100644 --- a/drivers/virt/acrn/vm.c +++ b/drivers/virt/acrn/vm.c @@ -64,6 +64,14 @@ int acrn_vm_destroy(struct acrn_vm *vm) test_and_set_bit(ACRN_VM_FLAG_DESTROYED, &vm->flags)) return 0; + ret = hcall_destroy_vm(vm->vmid); + if (ret < 0) { + dev_err(acrn_dev.this_device, + "Failed to destroy VM %u\n", vm->vmid); + clear_bit(ACRN_VM_FLAG_DESTROYED, &vm->flags); + return ret; + } + /* Remove from global VM list */ write_lock_bh(&acrn_vm_list_lock); list_del_init(&vm->list); @@ -78,14 +86,6 @@ int acrn_vm_destroy(struct acrn_vm *vm) vm->monitor_page = NULL; } - ret = hcall_destroy_vm(vm->vmid); - if (ret < 0) { - dev_err(acrn_dev.this_device, - "Failed to destroy VM %u\n", vm->vmid); - clear_bit(ACRN_VM_FLAG_DESTROYED, &vm->flags); - return ret; - } - acrn_vm_all_ram_unmap(vm); dev_dbg(acrn_dev.this_device, "VM %u destroyed.\n", vm->vmid); From b910a0206b59eb90ea8ff76d146f4c3156da61e9 Mon Sep 17 00:00:00 2001 From: Robert Foss Date: Mon, 28 Jun 2021 10:50:33 +0200 Subject: [PATCH 214/502] drm/msm/dpu: Fix sm8250_mdp register length The downstream dts lists this value as 0x494, and not 0x45c. Fixes: af776a3e1c30 ("drm/msm/dpu: add SM8250 to hw catalog") Signed-off-by: Robert Foss Reviewed-by: Dmitry Baryshkov Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20210628085033.9905-1-robert.foss@linaro.org Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index d01c4c919504..704dace895cb 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -296,7 +296,7 @@ static const struct dpu_mdp_cfg sc7180_mdp[] = { static const struct dpu_mdp_cfg sm8250_mdp[] = { { .name = "top_0", .id = MDP_TOP, - .base = 0x0, .len = 0x45C, + .base = 0x0, .len = 0x494, .features = 0, .highest_bank_bit = 0x3, /* TODO: 2 for LP_DDR4 */ .clk_ctrls[DPU_CLK_CTRL_VIG0] = { From bceddc2cb581dffc94370517f7eedbd9aa16c74b Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 7 Jul 2021 11:01:13 -0700 Subject: [PATCH 215/502] drm/msm: Fix display fault handling It turns out that when the display is enabled by the bootloader, we can get some transient iommu faults from the display. Which doesn't go over too well when we install a fault handler that is gpu specific. To avoid this, defer installing the fault handler until we get around to setting up per-process pgtables (which is adreno_smmu specific). The arm-smmu fallback error reporting is sufficient for reporting display related faults (and in fact was all we had prior to f8f934c180f629bb927a04fd90d) Reported-by: Dmitry Baryshkov Reported-by: Yassine Oudjana Fixes: 2a574cc05d38 ("drm/msm: Improve the a6xx page fault handler") Signed-off-by: Rob Clark Tested-by: John Stultz Tested-by: Yassine Oudjana Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20210707180113.840741-1-robdclark@gmail.com Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_iommu.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index eed2a762e9dd..bcaddbba564d 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -142,6 +142,9 @@ static const struct iommu_flush_ops null_tlb_ops = { .tlb_add_page = msm_iommu_tlb_add_page, }; +static int msm_fault_handler(struct iommu_domain *domain, struct device *dev, + unsigned long iova, int flags, void *arg); + struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent) { struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(parent->dev); @@ -157,6 +160,13 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent) if (!ttbr1_cfg) return ERR_PTR(-ENODEV); + /* + * Defer setting the fault handler until we have a valid adreno_smmu + * to avoid accidentially installing a GPU specific fault handler for + * the display's iommu + */ + iommu_set_fault_handler(iommu->domain, msm_fault_handler, iommu); + pagetable = kzalloc(sizeof(*pagetable), GFP_KERNEL); if (!pagetable) return ERR_PTR(-ENOMEM); @@ -300,7 +310,6 @@ struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain) iommu->domain = domain; msm_mmu_init(&iommu->base, dev, &funcs, MSM_MMU_IOMMU); - iommu_set_fault_handler(domain, msm_fault_handler, iommu); atomic_set(&iommu->pagetables, 0); From 7591c532b818ef4b8e3e635d842547c08b3a32b4 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Tue, 13 Jul 2021 08:54:01 -0700 Subject: [PATCH 216/502] drm/msm/dp: use dp_ctrl_off_link_stream during PHY compliance test run DP cable should always connect to DPU during the entire PHY compliance testing run. Since DP PHY compliance test is executed at irq_hpd event context, dp_ctrl_off_link_stream() should be used instead of dp_ctrl_off(). dp_ctrl_off() is used for unplug event which is triggered when DP cable is dis connected. Changes in V2: -- add fixes statement Fixes: f21c8a276c2d ("drm/msm/dp: handle irq_hpd with sink_count = 0 correctly") Signed-off-by: Kuogee Hsieh Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/1626191647-13901-2-git-send-email-khsieh@codeaurora.org Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dp/dp_ctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index ee221d835fa0..eaddfd739885 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -1526,7 +1526,7 @@ static int dp_ctrl_process_phy_test_request(struct dp_ctrl_private *ctrl) * running. Add the global reset just before disabling the * link clocks and core clocks. */ - ret = dp_ctrl_off(&ctrl->dp_ctrl); + ret = dp_ctrl_off_link_stream(&ctrl->dp_ctrl); if (ret) { DRM_ERROR("failed to disable DP controller\n"); return ret; From f9a39932fa54b6421e751ada7a285da809146421 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Wed, 21 Jul 2021 19:44:34 -0700 Subject: [PATCH 217/502] drm/msm/dp: Initialize the INTF_CONFIG register Some bootloaders set the widebus enable bit in the INTF_CONFIG register, but configuration of widebus isn't yet supported ensure that the register has a known value, with widebus disabled. Fixes: c943b4948b58 ("drm/msm/dp: add displayPort driver support") Signed-off-by: Bjorn Andersson Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20210722024434.3313167-1-bjorn.andersson@linaro.org Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dp/dp_catalog.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/msm/dp/dp_catalog.c b/drivers/gpu/drm/msm/dp/dp_catalog.c index ca96e3514790..c0423e76eed7 100644 --- a/drivers/gpu/drm/msm/dp/dp_catalog.c +++ b/drivers/gpu/drm/msm/dp/dp_catalog.c @@ -771,6 +771,7 @@ int dp_catalog_panel_timing_cfg(struct dp_catalog *dp_catalog) dp_write_link(catalog, REG_DP_HSYNC_VSYNC_WIDTH_POLARITY, dp_catalog->width_blanking); dp_write_link(catalog, REG_DP_ACTIVE_HOR_VER, dp_catalog->dp_active); + dp_write_p0(catalog, MMSS_DP_INTF_CONFIG, 0); return 0; } From afc9b8b6bab8d3d3a9ae67e1d64093ad626c92a0 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Fri, 23 Jul 2021 09:55:39 -0700 Subject: [PATCH 218/502] drm/msm/dp: signal audio plugged change at dp_pm_resume There is a scenario that dp cable is unplugged from DUT during system suspended will cause audio option state does not match real connection state. Fix this problem by Signaling audio plugged change with realtime connection status at dp_pm_resume() so that audio option will be in correct state after system resumed. Changes in V2: -- correct Fixes tag commit id. Fixes: f591dbb5fb8c ("drm/msm/dp: power off DP phy at suspend") Signed-off-by: Kuogee Hsieh Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/1627059339-12142-1-git-send-email-khsieh@codeaurora.org Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dp/dp_display.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 051c1be1de7e..1d204a0fbdd2 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -1311,6 +1311,10 @@ static int dp_pm_resume(struct device *dev) else dp->dp_display.is_connected = false; + dp_display_handle_plugged_change(g_dp_display, + dp->dp_display.is_connected); + + mutex_unlock(&dp->event_mutex); return 0; From fc71c9e6f41f9912d22a75dfa76bc10811af7e22 Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Wed, 14 Jul 2021 11:28:56 -0400 Subject: [PATCH 219/502] drm/msm/dp: Initialize dp->aux->drm_dev before registration Avoids the following WARN: [ 3.009556] ------------[ cut here ]------------ [ 3.014306] WARNING: CPU: 7 PID: 109 at drivers/gpu/drm/drm_dp_helper.c:1796 drm_dp_aux_register+0xa4/0xac [ 3.024209] Modules linked in: [ 3.027351] CPU: 7 PID: 109 Comm: kworker/7:8 Not tainted 5.10.47 #69 [ 3.033958] Hardware name: Google Lazor (rev1 - 2) (DT) [ 3.039323] Workqueue: events deferred_probe_work_func [ 3.044596] pstate: 60c00009 (nZCv daif +PAN +UAO -TCO BTYPE=--) [ 3.050761] pc : drm_dp_aux_register+0xa4/0xac [ 3.055329] lr : dp_aux_register+0x40/0x88 [ 3.059538] sp : ffffffc010ad3920 [ 3.062948] x29: ffffffc010ad3920 x28: ffffffa64196ac70 [ 3.067239] mmc1: Command Queue Engine enabled [ 3.068406] x27: ffffffa64196ac68 x26: 0000000000000001 [ 3.068407] x25: 0000000000000002 x24: 0000000000000060 [ 3.068409] x23: ffffffa642ab3400 x22: ffffffe126c10e5b [ 3.068410] x21: ffffffa641dc3188 x20: ffffffa641963c10 [ 3.068412] x19: ffffffa642aba910 x18: 00000000ffff0a00 [ 3.068414] x17: 000000476f8e002a x16: 00000000000000b8 [ 3.073008] mmc1: new HS400 Enhanced strobe MMC card at address 0001 [ 3.078448] x15: ffffffffffffffff x14: ffffffffffffffff [ 3.078450] x13: 0000000000000030 x12: 0000000000000030 [ 3.078452] x11: 0101010101010101 x10: ffffffe12647a914 [ 3.078453] x9 : ffffffe12647a8cc x8 : 0000000000000000 [ 3.084452] mmcblk1: mmc1:0001 DA4032 29.1 GiB [ 3.089372] [ 3.089372] x7 : 6c6064717372fefe x6 : ffffffa642b11494 [ 3.089374] x5 : 0000000000000000 x4 : 6d006c657869ffff [ 3.089375] x3 : 000000006c657869 x2 : 000000000000000c [ 3.089376] x1 : ffffffe126c3ae3c x0 : ffffffa642aba910 [ 3.089381] Call trace: [ 3.094931] mmcblk1boot0: mmc1:0001 DA4032 partition 1 4.00 MiB [ 3.100291] drm_dp_aux_register+0xa4/0xac [ 3.100292] dp_aux_register+0x40/0x88 [ 3.100294] dp_display_bind+0x64/0xcc [ 3.100295] component_bind_all+0xdc/0x210 [ 3.100298] msm_drm_bind+0x1e8/0x5d4 [ 3.100301] try_to_bring_up_master+0x168/0x1b0 [ 3.105861] mmcblk1boot1: mmc1:0001 DA4032 partition 2 4.00 MiB [ 3.112282] __component_add+0xa0/0x158 [ 3.112283] component_add+0x1c/0x28 [ 3.112284] dp_display_probe+0x33c/0x380 [ 3.112286] platform_drv_probe+0x9c/0xbc [ 3.112287] really_probe+0x140/0x35c [ 3.112289] driver_probe_device+0x84/0xc0 [ 3.112292] __device_attach_driver+0x94/0xb0 [ 3.117967] mmcblk1rpmb: mmc1:0001 DA4032 partition 3 16.0 MiB, chardev (239:0) [ 3.123201] bus_for_each_drv+0x8c/0xd8 [ 3.123202] __device_attach+0xc4/0x150 [ 3.123204] device_initial_probe+0x1c/0x28 [ 3.123205] bus_probe_device+0x3c/0x9c [ 3.123206] deferred_probe_work_func+0x90/0xcc [ 3.123211] process_one_work+0x218/0x3ec [ 3.131976] mmcblk1: p1 p2 p3 p4 p5 p6 p7 p8 p9 p10 p11 p12 [ 3.134123] worker_thread+0x288/0x3e8 [ 3.134124] kthread+0x148/0x1b0 [ 3.134127] ret_from_fork+0x10/0x30 [ 3.134128] ---[ end trace cfb9fce3f70f824d ]--- Signed-off-by: Sean Paul Reviewed-by: Abhinav Kumar Link: https://lore.kernel.org/r/20210714152910.55093-1-sean@poorly.run Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dp/dp_display.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 1d204a0fbdd2..867388a399ad 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -219,6 +219,7 @@ static int dp_display_bind(struct device *dev, struct device *master, goto end; } + dp->aux->drm_dev = drm; rc = dp_aux_register(dp->aux); if (rc) { DRM_ERROR("DRM DP AUX register failed\n"); From 48e8a7b5a551f956002b60d2095bdfb58db96e59 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 24 Jun 2021 17:43:03 +0100 Subject: [PATCH 220/502] perf cs-etm: Split --dump-raw-trace by AUX records Currently --dump-raw-trace skips queueing and splitting buffers because of an early exit condition in cs_etm__process_auxtrace_info(). Once that is removed we can print the split data by using the queues and searching for split buffers with the same reference as the one that is currently being processed. This keeps the same behaviour of dumping in file order when an AUXTRACE event appears, rather than moving trace dump to where AUX records are in the file. There will be a newline and size printout for each fragment. For example this buffer is comprised of two AUX records, but was printed as one: 0 0 0x8098 [0x30]: PERF_RECORD_AUXTRACE size: 0xa0 offset: 0 ref: 0x491a4dfc52fc0e6e idx: 0 t . ... CoreSight ETM Trace data: size 160 bytes Idx:0; ID:10; I_ASYNC : Alignment Synchronisation. Idx:12; ID:10; I_TRACE_INFO : Trace Info.; INFO=0x0 { CC.0 } Idx:17; ID:10; I_ADDR_L_64IS0 : Address, Long, 64 bit, IS0.; Addr=0x0000000000000000; Idx:80; ID:10; I_ASYNC : Alignment Synchronisation. Idx:92; ID:10; I_TRACE_INFO : Trace Info.; INFO=0x0 { CC.0 } Idx:97; ID:10; I_ADDR_L_64IS0 : Address, Long, 64 bit, IS0.; Addr=0xFFFFDE2AD3FD76D4; But is now printed as two fragments: 0 0 0x8098 [0x30]: PERF_RECORD_AUXTRACE size: 0xa0 offset: 0 ref: 0x491a4dfc52fc0e6e idx: 0 t . ... CoreSight ETM Trace data: size 80 bytes Idx:0; ID:10; I_ASYNC : Alignment Synchronisation. Idx:12; ID:10; I_TRACE_INFO : Trace Info.; INFO=0x0 { CC.0 } Idx:17; ID:10; I_ADDR_L_64IS0 : Address, Long, 64 bit, IS0.; Addr=0x0000000000000000; . ... CoreSight ETM Trace data: size 80 bytes Idx:80; ID:10; I_ASYNC : Alignment Synchronisation. Idx:92; ID:10; I_TRACE_INFO : Trace Info.; INFO=0x0 { CC.0 } Idx:97; ID:10; I_ADDR_L_64IS0 : Address, Long, 64 bit, IS0.; Addr=0xFFFFDE2AD3FD76D4; Decoding errors that appeared in problematic files are now not present, for example: Idx:808; ID:1c; I_BAD_SEQUENCE : Invalid Sequence in packet.[I_ASYNC] ... PKTP_ETMV4I_0016 : 0x0014 (OCSD_ERR_INVALID_PCKT_HDR) [Invalid packet header]; TrcIdx=822 Signed-off-by: James Clark Reviewed-by: Mathieu Poirier Tested-by: Leo Yan Cc: Al Grant Cc: Alexander Shishkin Cc: Anshuman Khandual Cc: Branislav Rankov Cc: Denis Nikitin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20210624164303.28632-3-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 22f8326547eb..bc1f64873c8f 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -2434,6 +2434,22 @@ static int cs_etm__process_event(struct perf_session *session, return 0; } +static void dump_queued_data(struct cs_etm_auxtrace *etm, + struct perf_record_auxtrace *event) +{ + struct auxtrace_buffer *buf; + unsigned int i; + /* + * Find all buffers with same reference in the queues and dump them. + * This is because the queues can contain multiple entries of the same + * buffer that were split on aux records. + */ + for (i = 0; i < etm->queues.nr_queues; ++i) + list_for_each_entry(buf, &etm->queues.queue_array[i].head, list) + if (buf->reference == event->reference) + cs_etm__dump_event(etm, buf); +} + static int cs_etm__process_auxtrace_event(struct perf_session *session, union perf_event *event, struct perf_tool *tool __maybe_unused) @@ -2466,7 +2482,8 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session, cs_etm__dump_event(etm, buffer); auxtrace_buffer__put_data(buffer); } - } + } else if (dump_trace) + dump_queued_data(etm, &event->auxtrace); return 0; } @@ -3042,7 +3059,6 @@ int cs_etm__process_auxtrace_info(union perf_event *event, if (dump_trace) { cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); - return 0; } err = cs_etm__synth_events(etm, session); From 8e3341257e3b5774ec8cd3ef1ba0c0d3fada322b Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 27 Jul 2021 17:25:01 +0300 Subject: [PATCH 221/502] Revert "thunderbolt: Hide authorized attribute if router does not support PCIe tunnels" This reverts commit 6f3badead6a078cf3c71f381f9d84ac922984a00. It turns out bolt depends on having authorized attribute visible under each device. Hiding it makes bolt crash as several people have reported on various bug trackers. For this reason revert the commit. Link: https://gitlab.freedesktop.org/bolt/bolt/-/issues/174 Link: https://bugzilla.redhat.com/show_bug.cgi?id=1979765 Link: https://bugs.archlinux.org/task/71569 Cc: stable@vger.kernel.org Cc: Christian Kellner Fixes: 6f3badead6a0 ("thunderbolt: Hide authorized attribute if router does not support PCIe tunnels") Signed-off-by: Mika Westerberg Link: https://lore.kernel.org/r/20210727142501.27476-1-mika.westerberg@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/switch.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 83b1ef3d5d03..10d6b228cc94 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -1875,18 +1875,6 @@ static struct attribute *switch_attrs[] = { NULL, }; -static bool has_port(const struct tb_switch *sw, enum tb_port_type type) -{ - const struct tb_port *port; - - tb_switch_for_each_port(sw, port) { - if (!port->disabled && port->config.type == type) - return true; - } - - return false; -} - static umode_t switch_attr_is_visible(struct kobject *kobj, struct attribute *attr, int n) { @@ -1895,8 +1883,7 @@ static umode_t switch_attr_is_visible(struct kobject *kobj, if (attr == &dev_attr_authorized.attr) { if (sw->tb->security_level == TB_SECURITY_NOPCIE || - sw->tb->security_level == TB_SECURITY_DPONLY || - !has_port(sw, TB_TYPE_PCIE_UP)) + sw->tb->security_level == TB_SECURITY_DPONLY) return 0; } else if (attr == &dev_attr_device.attr) { if (!sw->device) From c07d5c9226980ca5ae21c6a2714baa95be2ce164 Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 20 Jul 2021 23:10:19 +0800 Subject: [PATCH 222/502] perf pmu: Fix alias matching Commit c47a5599eda324ba ("perf tools: Fix pattern matching for same substring in different PMU type"), may have fixed some alias matching, but has broken some others. Firstly it cannot handle the simple scenario of PMU name in form pmu_name{digits} - it can only handle pmu_name_{digits}. Secondly it cannot handle more complex matching in the case where we have multiple tokens. In this scenario, the code failed to realise that we may examine multiple substrings in the PMU name. Fix in two ways: - Change perf_pmu__valid_suffix() to accept a PMU name without '_' in the suffix - Only pay attention to perf_pmu__valid_suffix() for the final token Also add const qualifiers as necessary to avoid casting. Fixes: c47a5599eda324ba ("perf tools: Fix pattern matching for same substring in different PMU type") Signed-off-by: John Garry Tested-by: Jin Yao Cc: Alexander Shishkin Cc: Ian Rogers Cc: Jiri Olsa Cc: Kajol Jain Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/1626793819-79090-1-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index a1bd7007a8b4..fc683bc41715 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -742,9 +742,13 @@ struct pmu_events_map *__weak pmu_events_map__find(void) return perf_pmu__find_map(NULL); } -static bool perf_pmu__valid_suffix(char *pmu_name, char *tok) +/* + * Suffix must be in form tok_{digits}, or tok{digits}, or same as pmu_name + * to be valid. + */ +static bool perf_pmu__valid_suffix(const char *pmu_name, char *tok) { - char *p; + const char *p; if (strncmp(pmu_name, tok, strlen(tok))) return false; @@ -753,12 +757,16 @@ static bool perf_pmu__valid_suffix(char *pmu_name, char *tok) if (*p == 0) return true; - if (*p != '_') - return false; + if (*p == '_') + ++p; - ++p; - if (*p == 0 || !isdigit(*p)) - return false; + /* Ensure we end in a number */ + while (1) { + if (!isdigit(*p)) + return false; + if (*(++p) == 0) + break; + } return true; } @@ -789,12 +797,19 @@ bool pmu_uncore_alias_match(const char *pmu_name, const char *name) * match "socket" in "socketX_pmunameY" and then "pmuname" in * "pmunameY". */ - for (; tok; name += strlen(tok), tok = strtok_r(NULL, ",", &tmp)) { + while (1) { + char *next_tok = strtok_r(NULL, ",", &tmp); + name = strstr(name, tok); - if (!name || !perf_pmu__valid_suffix((char *)name, tok)) { + if (!name || + (!next_tok && !perf_pmu__valid_suffix(name, tok))) { res = false; goto out; } + if (!next_tok) + break; + tok = next_tok; + name += strlen(tok); } res = true; From 91e273712ab8dd8c31924ac7714b21e011137e98 Mon Sep 17 00:00:00 2001 From: Pratik Vishwakarma Date: Fri, 23 Jul 2021 18:08:40 +0530 Subject: [PATCH 223/502] drm/amdgpu: Check pmops for desired suspend state [Why] User might change the suspend behaviour from OS. [How] Check with pm for target suspend state and set s0ix flag only for s2idle state. v2: User might change default suspend state, use target state v3: squash in build fix Suggested-by: Lijo Lazar Signed-off-by: Pratik Vishwakarma Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 84a1b4bc9bb4..6cc0d4fa4d0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -1042,7 +1043,7 @@ bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev) #if defined(CONFIG_AMD_PMC) || defined(CONFIG_AMD_PMC_MODULE) if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) { if (adev->flags & AMD_IS_APU) - return true; + return pm_suspend_target_state == PM_SUSPEND_TO_IDLE; } #endif return false; From c8f8e96805b54968b4d1d54850f87fc39128a532 Mon Sep 17 00:00:00 2001 From: Victor Lu Date: Thu, 8 Jul 2021 14:50:48 -0400 Subject: [PATCH 224/502] drm/amd/display: Guard DST_Y_PREFETCH register overflow in DCN21 [why] DST_Y_PREFETCH can overflow when DestinationLinesForPrefetch values are too large due to the former being limited to 8 bits. [how] Set the maximum value of DestinationLinesForPrefetch to be 255 * refclk period. Reviewed-by: Laktyushkin Dmytro Acked-by: Solomon Chiu Signed-off-by: Victor Lu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c index d25a7d38d21f..6655bb99fdfd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c @@ -841,6 +841,9 @@ static bool CalculatePrefetchSchedule( else *DestinationLinesForPrefetch = dst_y_prefetch_equ; + // Limit to prevent overflow in DST_Y_PREFETCH register + *DestinationLinesForPrefetch = dml_min(*DestinationLinesForPrefetch, 63.75); + dml_print("DML: VStartup: %d\n", VStartup); dml_print("DML: TCalc: %f\n", TCalc); dml_print("DML: TWait: %f\n", TWait); From 8d177577cd9118c29960401a6de9dc4db00f2052 Mon Sep 17 00:00:00 2001 From: Victor Lu Date: Tue, 6 Jul 2021 15:45:11 -0400 Subject: [PATCH 225/502] drm/amd/display: Add missing DCN21 IP parameter [why] IP parameter min_meta_chunk_size_bytes is read for bandwidth calculations but it was never defined. [how] Define min_meta_chunk_size_bytes and initialize value to 256. Reviewed-by: Laktyushkin Dmytro Acked-by: Solomon Chiu Signed-off-by: Victor Lu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index f3d98e3ba624..bf0a198eae15 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -109,6 +109,7 @@ struct _vcs_dpi_ip_params_st dcn2_1_ip = { .max_page_table_levels = 4, .pte_chunk_size_kbytes = 2, .meta_chunk_size_kbytes = 2, + .min_meta_chunk_size_bytes = 256, .writeback_chunk_size_kbytes = 2, .line_buffer_size_bits = 789504, .is_line_buffer_bpp_fixed = 0, From b53e041d8e4308f7324999398aec092dbcb130f5 Mon Sep 17 00:00:00 2001 From: Dale Zhao Date: Fri, 16 Jul 2021 09:38:17 +0800 Subject: [PATCH 226/502] drm/amd/display: ensure dentist display clock update finished in DCN20 [Why] We don't check DENTIST_DISPCLK_CHG_DONE to ensure dentist display clockis updated to target value. In some scenarios with large display clock margin, it will deliver unfinished display clock and cause issues like display black screen. [How] Checking DENTIST_DISPCLK_CHG_DONE to ensure display clock has been update to target value before driver do other clock related actions. Reviewed-by: Cyr Aric Acked-by: Solomon Chiu Signed-off-by: Dale Zhao Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c index 6e0c5c664fdc..a5331b96f551 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c @@ -197,7 +197,7 @@ void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr, struct REG_UPDATE(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, dispclk_wdivider); -// REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 5, 100); + REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 50, 1000); REG_UPDATE(DENTIST_DISPCLK_CNTL, DENTIST_DPPCLK_WDIVIDER, dppclk_wdivider); REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DPPCLK_CHG_DONE, 1, 5, 100); From f2ad3accefc63e72e9932e141c21875cc04beec8 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 21 Jul 2021 18:11:51 -0400 Subject: [PATCH 227/502] drm/amdgpu/display: only enable aux backlight control for OLED panels We've gotten a number of reports about backlight control not working on panels which indicate that they use aux backlight control. A recent patch: commit 2d73eabe2984a435737498ab39bb1500a9ffe9a9 Author: Camille Cho Date: Thu Jul 8 18:28:37 2021 +0800 drm/amd/display: Only set default brightness for OLED [Why] We used to unconditionally set backlight path as AUX for panels capable of backlight adjustment via DPCD in set default brightness. [How] This should be limited to OLED panel only since we control backlight via PWM path for SDR mode in LCD HDR panel. Reviewed-by: Krunoslav Kovac Acked-by: Rodrigo Siqueira Signed-off-by: Camille Cho Signed-off-by: Alex Deucher Changes some other code to only use aux for backlight control on OLED panels. The commit message seems to indicate that PWM should be used for SDR mode on HDR panels. Do something similar for backlight control in general. This may need to be revisited if and when HDR started to get used. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1438 Bug: https://bugzilla.kernel.org/show_bug.cgi?id=213715 Reviewed-by: Roman Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d3a2a5ff57e9..b53f49a23ddc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2429,9 +2429,9 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) max_cll = conn_base->hdr_sink_metadata.hdmi_type1.max_cll; min_cll = conn_base->hdr_sink_metadata.hdmi_type1.min_cll; - if (caps->ext_caps->bits.oled == 1 || + if (caps->ext_caps->bits.oled == 1 /*|| caps->ext_caps->bits.sdr_aux_backlight_control == 1 || - caps->ext_caps->bits.hdr_aux_backlight_control == 1) + caps->ext_caps->bits.hdr_aux_backlight_control == 1*/) caps->aux_support = true; if (amdgpu_backlight == 0) From ec30ce41f03820b6289513344b4281ca3a1151f4 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Sun, 25 Jul 2021 16:49:01 +0000 Subject: [PATCH 228/502] maintainers: add bugs and chat URLs for amdgpu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add links to the issue tracker and the IRC channel for the amdgpu driver. Signed-off-by: Simon Ser Cc: Alex Deucher Cc: Christian König Cc: Pan Xinhui Signed-off-by: Alex Deucher --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 19135a9d778e..056966c9aac9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15468,6 +15468,8 @@ M: Pan, Xinhui L: amd-gfx@lists.freedesktop.org S: Supported T: git https://gitlab.freedesktop.org/agd5f/linux.git +B: https://gitlab.freedesktop.org/drm/amd/-/issues +C: irc://irc.oftc.net/radeon F: drivers/gpu/drm/amd/ F: drivers/gpu/drm/radeon/ F: include/uapi/drm/amdgpu_drm.h From 773af69121ecc6c53d192661af8d53bb3db028ae Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 Jul 2021 10:25:55 -0600 Subject: [PATCH 229/502] io_uring: always reissue from task_work context As a safeguard, if we're going to queue async work, do it from task_work from the original task. This ensures that we can always sanely create threads, regards of what the reissue context may be. Signed-off-by: Jens Axboe --- fs/io_uring.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index a4331deb0427..6ba101cd4661 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2060,6 +2060,12 @@ static void io_req_task_queue(struct io_kiocb *req) io_req_task_work_add(req); } +static void io_req_task_queue_reissue(struct io_kiocb *req) +{ + req->io_task_work.func = io_queue_async_work; + io_req_task_work_add(req); +} + static inline void io_queue_next(struct io_kiocb *req) { struct io_kiocb *nxt = io_req_find_next(req); @@ -2248,7 +2254,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, !(req->flags & REQ_F_DONT_REISSUE)) { req->iopoll_completed = 0; req_ref_get(req); - io_queue_async_work(req); + io_req_task_queue_reissue(req); continue; } @@ -2771,7 +2777,7 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret, req->flags &= ~REQ_F_REISSUE; if (io_resubmit_prep(req)) { req_ref_get(req); - io_queue_async_work(req); + io_req_task_queue_reissue(req); } else { int cflags = 0; From ec6446d5304b3c3dd692a1e244df7e40bbb5af36 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Tue, 13 Jul 2021 13:12:16 +0530 Subject: [PATCH 230/502] fpga: dfl: fme: Fix cpu hotplug issue in performance reporting The performance reporting driver added cpu hotplug feature but it didn't add pmu migration call in cpu offline function. This can create an issue incase the current designated cpu being used to collect fme pmu data got offline, as based on current code we are not migrating fme pmu to new target cpu. Because of that perf will still try to fetch data from that offline cpu and hence we will not get counter data. Patch fixed this issue by adding pmu_migrate_context call in fme_perf_offline_cpu function. Fixes: 724142f8c42a ("fpga: dfl: fme: add performance reporting support") Cc: stable@vger.kernel.org Tested-by: Xu Yilun Acked-by: Wu Hao Signed-off-by: Kajol Jain Signed-off-by: Moritz Fischer --- drivers/fpga/dfl-fme-perf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/fpga/dfl-fme-perf.c b/drivers/fpga/dfl-fme-perf.c index 4299145ef347..587c82be12f7 100644 --- a/drivers/fpga/dfl-fme-perf.c +++ b/drivers/fpga/dfl-fme-perf.c @@ -953,6 +953,8 @@ static int fme_perf_offline_cpu(unsigned int cpu, struct hlist_node *node) return 0; priv->cpu = target; + perf_pmu_migrate_context(&priv->pmu, cpu, target); + return 0; } From 4ee107c514139960682cc0f3623a24e86fda1a13 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Tue, 27 Jul 2021 17:26:13 +0800 Subject: [PATCH 231/502] clk: qcom: smd-rpm: Fix MSM8936 RPM_SMD_PCNOC_A_CLK Commit a0384ecfe2aa ("clk: qcom: smd-rpm: De-duplicate identical entries") introduces the following regression on MSM8936/MSM8939, as RPM_SMD_PCNOC_A_CLK gets pointed to pcnoc_clk by mistake. Fix it by correcting the clock to pcnoc_a_clk. [ 1.307363] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 [ 1.313593] Mem abort info: [ 1.322512] ESR = 0x96000004 [ 1.325132] EC = 0x25: DABT (current EL), IL = 32 bits [ 1.338872] SET = 0, FnV = 0 [ 1.355483] EA = 0, S1PTW = 0 [ 1.368702] FSC = 0x04: level 0 translation fault [ 1.383294] Data abort info: [ 1.398292] ISV = 0, ISS = 0x00000004 [ 1.398297] CM = 0, WnR = 0 [ 1.398301] [0000000000000000] user address but active_mm is swapper [ 1.404193] Internal error: Oops: 96000004 [#1] PREEMPT SMP [ 1.420596] Modules linked in: [ 1.420604] CPU: 0 PID: 5 Comm: kworker/0:0 Not tainted 5.14.0-rc3+ #198 [ 1.441010] pc : __clk_register+0x48/0x780 [ 1.446045] lr : __clk_register+0x3c/0x780 [ 1.449953] sp : ffff800010063440 [ 1.454031] x29: ffff800010063440 x28: 0000000000000004 x27: 0000000000000066 [ 1.457423] x26: 0000000000000001 x25: 000000007fffffff x24: ffff800010f9f388 [ 1.464540] x23: ffff00007fc12a90 x22: ffff0000034b2010 x21: 0000000000000000 [ 1.471658] x20: ffff800010f9fff8 x19: ffff00000152a700 x18: 0000000000000001 [ 1.478778] x17: ffff00007fbd40c8 x16: 0000000000000460 x15: 0000000000000465 [ 1.485895] x14: ffffffffffffffff x13: 746e756f635f7265 x12: 696669746f6e5f6b [ 1.493013] x11: 0000000000000006 x10: 0000000000000000 x9 : 0000000000000000 [ 1.500131] x8 : ffff00000152a800 x7 : 0000000000000000 x6 : 000000000000003f [ 1.507249] x5 : 0000000000000040 x4 : 0000000000000000 x3 : 0000000000000004 [ 1.514367] x2 : 0000000000000000 x1 : 0000000000000cc0 x0 : ffff00000152a700 [ 1.521486] Call trace: [ 1.528598] __clk_register+0x48/0x780 [ 1.530855] clk_hw_register+0x20/0x60 [ 1.534674] devm_clk_hw_register+0x50/0xa8 [ 1.538408] rpm_smd_clk_probe+0x1a4/0x260 [ 1.542488] platform_probe+0x68/0xd8 [ 1.546653] really_probe+0x140/0x2f8 [ 1.550386] __driver_probe_device+0x78/0xe0 [ 1.554033] driver_probe_device+0x80/0x110 [ 1.558373] __device_attach_driver+0x90/0xe0 [ 1.562280] bus_for_each_drv+0x78/0xc8 [ 1.566793] __device_attach+0xf0/0x150 [ 1.570438] device_initial_probe+0x14/0x20 [ 1.574259] bus_probe_device+0x9c/0xa8 [ 1.578425] device_add+0x378/0x870 [ 1.582243] of_device_add+0x44/0x60 [ 1.585716] of_platform_device_create_pdata+0xc0/0x110 [ 1.589538] of_platform_bus_create+0x17c/0x388 [ 1.594485] of_platform_populate+0x50/0xf0 [ 1.598998] qcom_smd_rpm_probe+0xd4/0x128 [ 1.603164] rpmsg_dev_probe+0xbc/0x1a8 [ 1.607330] really_probe+0x140/0x2f8 [ 1.611063] __driver_probe_device+0x78/0xe0 [ 1.614883] driver_probe_device+0x80/0x110 [ 1.619224] __device_attach_driver+0x90/0xe0 [ 1.623131] bus_for_each_drv+0x78/0xc8 [ 1.627643] __device_attach+0xf0/0x150 [ 1.631289] device_initial_probe+0x14/0x20 [ 1.635109] bus_probe_device+0x9c/0xa8 [ 1.639275] device_add+0x378/0x870 [ 1.643095] device_register+0x20/0x30 [ 1.646567] rpmsg_register_device+0x54/0x90 [ 1.650387] qcom_channel_state_worker+0x168/0x288 [ 1.654814] process_one_work+0x1a0/0x328 [ 1.659415] worker_thread+0x4c/0x420 [ 1.663494] kthread+0x150/0x160 [ 1.667138] ret_from_fork+0x10/0x18 [ 1.670442] Code: 97f56b92 b40034a0 aa0003f3 52819801 (f94002a0) [ 1.674004] ---[ end trace 412fa6f47384cdfe ]--- Fixes: a0384ecfe2aa ("clk: qcom: smd-rpm: De-duplicate identical entries") Signed-off-by: Shawn Guo Link: https://lore.kernel.org/r/20210727092613.23056-1-shawn.guo@linaro.org Signed-off-by: Stephen Boyd --- drivers/clk/qcom/clk-smd-rpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/qcom/clk-smd-rpm.c b/drivers/clk/qcom/clk-smd-rpm.c index 800b2fef1887..b2c142f3a649 100644 --- a/drivers/clk/qcom/clk-smd-rpm.c +++ b/drivers/clk/qcom/clk-smd-rpm.c @@ -467,7 +467,7 @@ DEFINE_CLK_SMD_RPM(msm8936, sysmmnoc_clk, sysmmnoc_a_clk, QCOM_SMD_RPM_BUS_CLK, static struct clk_smd_rpm *msm8936_clks[] = { [RPM_SMD_PCNOC_CLK] = &msm8916_pcnoc_clk, - [RPM_SMD_PCNOC_A_CLK] = &msm8916_pcnoc_clk, + [RPM_SMD_PCNOC_A_CLK] = &msm8916_pcnoc_a_clk, [RPM_SMD_SNOC_CLK] = &msm8916_snoc_clk, [RPM_SMD_SNOC_A_CLK] = &msm8916_snoc_a_clk, [RPM_SMD_BIMC_CLK] = &msm8916_bimc_clk, From bb7262b295472eb6858b5c49893954794027cd84 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 6 Dec 2020 22:40:07 +0100 Subject: [PATCH 232/502] timers: Move clearing of base::timer_running under base:: Lock syzbot reported KCSAN data races vs. timer_base::timer_running being set to NULL without holding base::lock in expire_timers(). This looks innocent and most reads are clearly not problematic, but Frederic identified an issue which is: int data = 0; void timer_func(struct timer_list *t) { data = 1; } CPU 0 CPU 1 ------------------------------ -------------------------- base = lock_timer_base(timer, &flags); raw_spin_unlock(&base->lock); if (base->running_timer != timer) call_timer_fn(timer, fn, baseclk); ret = detach_if_pending(timer, base, true); base->running_timer = NULL; raw_spin_unlock_irqrestore(&base->lock, flags); raw_spin_lock(&base->lock); x = data; If the timer has previously executed on CPU 1 and then CPU 0 can observe base->running_timer == NULL and returns, assuming the timer has completed, but it's not guaranteed on all architectures. The comment for del_timer_sync() makes that guarantee. Moving the assignment under base->lock prevents this. For non-RT kernel it's performance wise completely irrelevant whether the store happens before or after taking the lock. For an RT kernel moving the store under the lock requires an extra unlock/lock pair in the case that there is a waiter for the timer, but that's not the end of the world. Reported-by: syzbot+aa7c2385d46c5eba0b89@syzkaller.appspotmail.com Reported-by: syzbot+abea4558531bae1ba9fe@syzkaller.appspotmail.com Fixes: 030dcdd197d7 ("timers: Prepare support for PREEMPT_RT") Signed-off-by: Thomas Gleixner Tested-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/87lfea7gw8.fsf@nanos.tec.linutronix.de Cc: stable@vger.kernel.org --- kernel/time/timer.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 9eb11c2209e5..e3d2c23c413d 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1265,8 +1265,10 @@ static inline void timer_base_unlock_expiry(struct timer_base *base) static void timer_sync_wait_running(struct timer_base *base) { if (atomic_read(&base->timer_waiters)) { + raw_spin_unlock_irq(&base->lock); spin_unlock(&base->expiry_lock); spin_lock(&base->expiry_lock); + raw_spin_lock_irq(&base->lock); } } @@ -1457,14 +1459,14 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head) if (timer->flags & TIMER_IRQSAFE) { raw_spin_unlock(&base->lock); call_timer_fn(timer, fn, baseclk); - base->running_timer = NULL; raw_spin_lock(&base->lock); + base->running_timer = NULL; } else { raw_spin_unlock_irq(&base->lock); call_timer_fn(timer, fn, baseclk); + raw_spin_lock_irq(&base->lock); base->running_timer = NULL; timer_sync_wait_running(base); - raw_spin_lock_irq(&base->lock); } } } From 8373cd38a8888549ace7c7617163a2e826970a92 Mon Sep 17 00:00:00 2001 From: Yufeng Mo Date: Tue, 27 Jul 2021 22:03:50 +0800 Subject: [PATCH 233/502] net: hns3: change the method of obtaining default ptp cycle The ptp cycle is related to the hardware, so it may cause compatibility issues if a fixed value is used in driver. Therefore, the method of obtaining this value is changed to read from the register rather than use a fixed value in driver. Fixes: 0bf5eb788512 ("net: hns3: add support for PTP") Signed-off-by: Yufeng Mo Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hclge_ptp.c | 36 +++++++++++++++---- .../hisilicon/hns3/hns3pf/hclge_ptp.h | 10 ++++-- 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c index 3b1f84502e36..befa9bcc2f2f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c @@ -5,9 +5,27 @@ #include "hclge_main.h" #include "hnae3.h" +static int hclge_ptp_get_cycle(struct hclge_dev *hdev) +{ + struct hclge_ptp *ptp = hdev->ptp; + + ptp->cycle.quo = readl(hdev->ptp->io_base + HCLGE_PTP_CYCLE_QUO_REG) & + HCLGE_PTP_CYCLE_QUO_MASK; + ptp->cycle.numer = readl(hdev->ptp->io_base + HCLGE_PTP_CYCLE_NUM_REG); + ptp->cycle.den = readl(hdev->ptp->io_base + HCLGE_PTP_CYCLE_DEN_REG); + + if (ptp->cycle.den == 0) { + dev_err(&hdev->pdev->dev, "invalid ptp cycle denominator!\n"); + return -EINVAL; + } + + return 0; +} + static int hclge_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) { struct hclge_dev *hdev = hclge_ptp_get_hdev(ptp); + struct hclge_ptp_cycle *cycle = &hdev->ptp->cycle; u64 adj_val, adj_base, diff; unsigned long flags; bool is_neg = false; @@ -18,7 +36,7 @@ static int hclge_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) is_neg = true; } - adj_base = HCLGE_PTP_CYCLE_ADJ_BASE * HCLGE_PTP_CYCLE_ADJ_UNIT; + adj_base = (u64)cycle->quo * (u64)cycle->den + (u64)cycle->numer; adj_val = adj_base * ppb; diff = div_u64(adj_val, 1000000000ULL); @@ -29,16 +47,16 @@ static int hclge_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) /* This clock cycle is defined by three part: quotient, numerator * and denominator. For example, 2.5ns, the quotient is 2, - * denominator is fixed to HCLGE_PTP_CYCLE_ADJ_UNIT, and numerator - * is 0.5 * HCLGE_PTP_CYCLE_ADJ_UNIT. + * denominator is fixed to ptp->cycle.den, and numerator + * is 0.5 * ptp->cycle.den. */ - quo = div_u64_rem(adj_val, HCLGE_PTP_CYCLE_ADJ_UNIT, &numerator); + quo = div_u64_rem(adj_val, cycle->den, &numerator); spin_lock_irqsave(&hdev->ptp->lock, flags); - writel(quo, hdev->ptp->io_base + HCLGE_PTP_CYCLE_QUO_REG); + writel(quo & HCLGE_PTP_CYCLE_QUO_MASK, + hdev->ptp->io_base + HCLGE_PTP_CYCLE_QUO_REG); writel(numerator, hdev->ptp->io_base + HCLGE_PTP_CYCLE_NUM_REG); - writel(HCLGE_PTP_CYCLE_ADJ_UNIT, - hdev->ptp->io_base + HCLGE_PTP_CYCLE_DEN_REG); + writel(cycle->den, hdev->ptp->io_base + HCLGE_PTP_CYCLE_DEN_REG); writel(HCLGE_PTP_CYCLE_ADJ_EN, hdev->ptp->io_base + HCLGE_PTP_CYCLE_CFG_REG); spin_unlock_irqrestore(&hdev->ptp->lock, flags); @@ -475,6 +493,10 @@ int hclge_ptp_init(struct hclge_dev *hdev) ret = hclge_ptp_create_clock(hdev); if (ret) return ret; + + ret = hclge_ptp_get_cycle(hdev); + if (ret) + return ret; } ret = hclge_ptp_int_en(hdev, true); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h index 5a202b775471..dbf5f4c08019 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h @@ -29,6 +29,7 @@ #define HCLGE_PTP_TIME_ADJ_REG 0x60 #define HCLGE_PTP_TIME_ADJ_EN BIT(0) #define HCLGE_PTP_CYCLE_QUO_REG 0x64 +#define HCLGE_PTP_CYCLE_QUO_MASK GENMASK(7, 0) #define HCLGE_PTP_CYCLE_DEN_REG 0x68 #define HCLGE_PTP_CYCLE_NUM_REG 0x6C #define HCLGE_PTP_CYCLE_CFG_REG 0x70 @@ -37,9 +38,7 @@ #define HCLGE_PTP_CUR_TIME_SEC_L_REG 0x78 #define HCLGE_PTP_CUR_TIME_NSEC_REG 0x7C -#define HCLGE_PTP_CYCLE_ADJ_BASE 2 #define HCLGE_PTP_CYCLE_ADJ_MAX 500000000 -#define HCLGE_PTP_CYCLE_ADJ_UNIT 100000000 #define HCLGE_PTP_SEC_H_OFFSET 32u #define HCLGE_PTP_SEC_L_MASK GENMASK(31, 0) @@ -47,6 +46,12 @@ #define HCLGE_PTP_FLAG_TX_EN 1 #define HCLGE_PTP_FLAG_RX_EN 2 +struct hclge_ptp_cycle { + u32 quo; + u32 numer; + u32 den; +}; + struct hclge_ptp { struct hclge_dev *hdev; struct ptp_clock *clock; @@ -58,6 +63,7 @@ struct hclge_ptp { spinlock_t lock; /* protects ptp registers */ u32 ptp_cfg; u32 last_tx_seqid; + struct hclge_ptp_cycle cycle; unsigned long tx_start; unsigned long tx_cnt; unsigned long tx_skipped; From 76b4f357d0e7d8f6f0013c733e6cba1773c266d3 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 1 Jul 2021 17:41:00 +0200 Subject: [PATCH 234/502] x86/kvm: fix vcpu-id indexed array sizes KVM_MAX_VCPU_ID is the maximum vcpu-id of a guest, and not the number of vcpu-ids. Fix array indexed by vcpu-id to have KVM_MAX_VCPU_ID+1 elements. Note that this is currently no real problem, as KVM_MAX_VCPU_ID is an odd number, resulting in always enough padding being available at the end of those arrays. Nevertheless this should be fixed in order to avoid rare problems in case someone is using an even number for KVM_MAX_VCPU_ID. Signed-off-by: Juergen Gross Message-Id: <20210701154105.23215-2-jgross@suse.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/ioapic.c | 2 +- arch/x86/kvm/ioapic.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 698969e18fe3..ff005fe738a4 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -96,7 +96,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic) { ioapic->rtc_status.pending_eoi = 0; - bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID); + bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID + 1); } static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic); diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index 660401700075..11e4065e1617 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -43,13 +43,13 @@ struct kvm_vcpu; struct dest_map { /* vcpu bitmap where IRQ has been sent */ - DECLARE_BITMAP(map, KVM_MAX_VCPU_ID); + DECLARE_BITMAP(map, KVM_MAX_VCPU_ID + 1); /* * Vector sent to a given vcpu, only valid when * the vcpu's bit in map is set */ - u8 vectors[KVM_MAX_VCPU_ID]; + u8 vectors[KVM_MAX_VCPU_ID + 1]; }; From 15b7b737deb30e1f8f116a08e723173b55ebd2f3 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Tue, 13 Jul 2021 22:09:56 +0000 Subject: [PATCH 235/502] KVM: selftests: Fix missing break in dirty_log_perf_test arg parsing There is a missing break statement which causes a fallthrough to the next statement where optarg will be null and a segmentation fault will be generated. Fixes: 9e965bb75aae ("KVM: selftests: Add backing src parameter to dirty_log_perf_test") Reviewed-by: Ben Gardon Signed-off-by: David Matlack Message-Id: <20210713220957.3493520-6-dmatlack@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/dirty_log_perf_test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 04a2641261be..80cbd3a748c0 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -312,6 +312,7 @@ int main(int argc, char *argv[]) break; case 'o': p.partition_vcpu_memory_access = false; + break; case 's': p.backing_src = parse_backing_src_type(optarg); break; From c33e05d9b067433252b1008d2f37bf64e11151f1 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Tue, 13 Jul 2021 22:09:57 +0000 Subject: [PATCH 236/502] KVM: selftests: Introduce access_tracking_perf_test This test measures the performance effects of KVM's access tracking. Access tracking is driven by the MMU notifiers test_young, clear_young, and clear_flush_young. These notifiers do not have a direct userspace API, however the clear_young notifier can be triggered by marking a pages as idle in /sys/kernel/mm/page_idle/bitmap. This test leverages that mechanism to enable access tracking on guest memory. To measure performance this test runs a VM with a configurable number of vCPUs that each touch every page in disjoint regions of memory. Performance is measured in the time it takes all vCPUs to finish touching their predefined region. Example invocation: $ ./access_tracking_perf_test -v 8 Testing guest mode: PA-bits:ANY, VA-bits:48, 4K pages guest physical test memory offset: 0xffdfffff000 Populating memory : 1.337752570s Writing to populated memory : 0.010177640s Reading from populated memory : 0.009548239s Mark memory idle : 23.973131748s Writing to idle memory : 0.063584496s Mark memory idle : 24.924652964s Reading from idle memory : 0.062042814s Breaking down the results: * "Populating memory": The time it takes for all vCPUs to perform the first write to every page in their region. * "Writing to populated memory" / "Reading from populated memory": The time it takes for all vCPUs to write and read to every page in their region after it has been populated. This serves as a control for the later results. * "Mark memory idle": The time it takes for every vCPU to mark every page in their region as idle through page_idle. * "Writing to idle memory" / "Reading from idle memory": The time it takes for all vCPUs to write and read to every page in their region after it has been marked idle. This test should be portable across architectures but it is only enabled for x86_64 since that's all I have tested. Reviewed-by: Ben Gardon Signed-off-by: David Matlack Message-Id: <20210713220957.3493520-7-dmatlack@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + .../selftests/kvm/access_tracking_perf_test.c | 429 ++++++++++++++++++ 3 files changed, 431 insertions(+) create mode 100644 tools/testing/selftests/kvm/access_tracking_perf_test.c diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 06a351b4f93b..0709af0144c8 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -38,6 +38,7 @@ /x86_64/xen_vmcall_test /x86_64/xss_msr_test /x86_64/vmx_pmu_msrs_test +/access_tracking_perf_test /demand_paging_test /dirty_log_test /dirty_log_perf_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index b853be2ae3c6..5832f510a16c 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -71,6 +71,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test +TEST_GEN_PROGS_x86_64 += access_tracking_perf_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_perf_test diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c new file mode 100644 index 000000000000..e2baa187a21e --- /dev/null +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -0,0 +1,429 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * access_tracking_perf_test + * + * Copyright (C) 2021, Google, Inc. + * + * This test measures the performance effects of KVM's access tracking. + * Access tracking is driven by the MMU notifiers test_young, clear_young, and + * clear_flush_young. These notifiers do not have a direct userspace API, + * however the clear_young notifier can be triggered by marking a pages as idle + * in /sys/kernel/mm/page_idle/bitmap. This test leverages that mechanism to + * enable access tracking on guest memory. + * + * To measure performance this test runs a VM with a configurable number of + * vCPUs that each touch every page in disjoint regions of memory. Performance + * is measured in the time it takes all vCPUs to finish touching their + * predefined region. + * + * Note that a deterministic correctness test of access tracking is not possible + * by using page_idle as it exists today. This is for a few reasons: + * + * 1. page_idle only issues clear_young notifiers, which lack a TLB flush. This + * means subsequent guest accesses are not guaranteed to see page table + * updates made by KVM until some time in the future. + * + * 2. page_idle only operates on LRU pages. Newly allocated pages are not + * immediately allocated to LRU lists. Instead they are held in a "pagevec", + * which is drained to LRU lists some time in the future. There is no + * userspace API to force this drain to occur. + * + * These limitations are worked around in this test by using a large enough + * region of memory for each vCPU such that the number of translations cached in + * the TLB and the number of pages held in pagevecs are a small fraction of the + * overall workload. And if either of those conditions are not true this test + * will fail rather than silently passing. + */ +#include +#include +#include +#include +#include +#include + +#include "kvm_util.h" +#include "test_util.h" +#include "perf_test_util.h" +#include "guest_modes.h" + +/* Global variable used to synchronize all of the vCPU threads. */ +static int iteration = -1; + +/* Defines what vCPU threads should do during a given iteration. */ +static enum { + /* Run the vCPU to access all its memory. */ + ITERATION_ACCESS_MEMORY, + /* Mark the vCPU's memory idle in page_idle. */ + ITERATION_MARK_IDLE, +} iteration_work; + +/* Set to true when vCPU threads should exit. */ +static bool done; + +/* The iteration that was last completed by each vCPU. */ +static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; + +/* Whether to overlap the regions of memory vCPUs access. */ +static bool overlap_memory_access; + +struct test_params { + /* The backing source for the region of memory. */ + enum vm_mem_backing_src_type backing_src; + + /* The amount of memory to allocate for each vCPU. */ + uint64_t vcpu_memory_bytes; + + /* The number of vCPUs to create in the VM. */ + int vcpus; +}; + +static uint64_t pread_uint64(int fd, const char *filename, uint64_t index) +{ + uint64_t value; + off_t offset = index * sizeof(value); + + TEST_ASSERT(pread(fd, &value, sizeof(value), offset) == sizeof(value), + "pread from %s offset 0x%" PRIx64 " failed!", + filename, offset); + + return value; + +} + +#define PAGEMAP_PRESENT (1ULL << 63) +#define PAGEMAP_PFN_MASK ((1ULL << 55) - 1) + +static uint64_t lookup_pfn(int pagemap_fd, struct kvm_vm *vm, uint64_t gva) +{ + uint64_t hva = (uint64_t) addr_gva2hva(vm, gva); + uint64_t entry; + uint64_t pfn; + + entry = pread_uint64(pagemap_fd, "pagemap", hva / getpagesize()); + if (!(entry & PAGEMAP_PRESENT)) + return 0; + + pfn = entry & PAGEMAP_PFN_MASK; + if (!pfn) { + print_skip("Looking up PFNs requires CAP_SYS_ADMIN"); + exit(KSFT_SKIP); + } + + return pfn; +} + +static bool is_page_idle(int page_idle_fd, uint64_t pfn) +{ + uint64_t bits = pread_uint64(page_idle_fd, "page_idle", pfn / 64); + + return !!((bits >> (pfn % 64)) & 1); +} + +static void mark_page_idle(int page_idle_fd, uint64_t pfn) +{ + uint64_t bits = 1ULL << (pfn % 64); + + TEST_ASSERT(pwrite(page_idle_fd, &bits, 8, 8 * (pfn / 64)) == 8, + "Set page_idle bits for PFN 0x%" PRIx64, pfn); +} + +static void mark_vcpu_memory_idle(struct kvm_vm *vm, int vcpu_id) +{ + uint64_t base_gva = perf_test_args.vcpu_args[vcpu_id].gva; + uint64_t pages = perf_test_args.vcpu_args[vcpu_id].pages; + uint64_t page; + uint64_t still_idle = 0; + uint64_t no_pfn = 0; + int page_idle_fd; + int pagemap_fd; + + /* If vCPUs are using an overlapping region, let vCPU 0 mark it idle. */ + if (overlap_memory_access && vcpu_id) + return; + + page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR); + TEST_ASSERT(page_idle_fd > 0, "Failed to open page_idle."); + + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + TEST_ASSERT(pagemap_fd > 0, "Failed to open pagemap."); + + for (page = 0; page < pages; page++) { + uint64_t gva = base_gva + page * perf_test_args.guest_page_size; + uint64_t pfn = lookup_pfn(pagemap_fd, vm, gva); + + if (!pfn) { + no_pfn++; + continue; + } + + if (is_page_idle(page_idle_fd, pfn)) { + still_idle++; + continue; + } + + mark_page_idle(page_idle_fd, pfn); + } + + /* + * Assumption: Less than 1% of pages are going to be swapped out from + * under us during this test. + */ + TEST_ASSERT(no_pfn < pages / 100, + "vCPU %d: No PFN for %" PRIu64 " out of %" PRIu64 " pages.", + vcpu_id, no_pfn, pages); + + /* + * Test that at least 90% of memory has been marked idle (the rest might + * not be marked idle because the pages have not yet made it to an LRU + * list or the translations are still cached in the TLB). 90% is + * arbitrary; high enough that we ensure most memory access went through + * access tracking but low enough as to not make the test too brittle + * over time and across architectures. + */ + TEST_ASSERT(still_idle < pages / 10, + "vCPU%d: Too many pages still idle (%"PRIu64 " out of %" + PRIu64 ").\n", + vcpu_id, still_idle, pages); + + close(page_idle_fd); + close(pagemap_fd); +} + +static void assert_ucall(struct kvm_vm *vm, uint32_t vcpu_id, + uint64_t expected_ucall) +{ + struct ucall uc; + uint64_t actual_ucall = get_ucall(vm, vcpu_id, &uc); + + TEST_ASSERT(expected_ucall == actual_ucall, + "Guest exited unexpectedly (expected ucall %" PRIu64 + ", got %" PRIu64 ")", + expected_ucall, actual_ucall); +} + +static bool spin_wait_for_next_iteration(int *current_iteration) +{ + int last_iteration = *current_iteration; + + do { + if (READ_ONCE(done)) + return false; + + *current_iteration = READ_ONCE(iteration); + } while (last_iteration == *current_iteration); + + return true; +} + +static void *vcpu_thread_main(void *arg) +{ + struct perf_test_vcpu_args *vcpu_args = arg; + struct kvm_vm *vm = perf_test_args.vm; + int vcpu_id = vcpu_args->vcpu_id; + int current_iteration = -1; + + vcpu_args_set(vm, vcpu_id, 1, vcpu_id); + + while (spin_wait_for_next_iteration(¤t_iteration)) { + switch (READ_ONCE(iteration_work)) { + case ITERATION_ACCESS_MEMORY: + vcpu_run(vm, vcpu_id); + assert_ucall(vm, vcpu_id, UCALL_SYNC); + break; + case ITERATION_MARK_IDLE: + mark_vcpu_memory_idle(vm, vcpu_id); + break; + }; + + vcpu_last_completed_iteration[vcpu_id] = current_iteration; + } + + return NULL; +} + +static void spin_wait_for_vcpu(int vcpu_id, int target_iteration) +{ + while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != + target_iteration) { + continue; + } +} + +/* The type of memory accesses to perform in the VM. */ +enum access_type { + ACCESS_READ, + ACCESS_WRITE, +}; + +static void run_iteration(struct kvm_vm *vm, int vcpus, const char *description) +{ + struct timespec ts_start; + struct timespec ts_elapsed; + int next_iteration; + int vcpu_id; + + /* Kick off the vCPUs by incrementing iteration. */ + next_iteration = ++iteration; + + clock_gettime(CLOCK_MONOTONIC, &ts_start); + + /* Wait for all vCPUs to finish the iteration. */ + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) + spin_wait_for_vcpu(vcpu_id, next_iteration); + + ts_elapsed = timespec_elapsed(ts_start); + pr_info("%-30s: %ld.%09lds\n", + description, ts_elapsed.tv_sec, ts_elapsed.tv_nsec); +} + +static void access_memory(struct kvm_vm *vm, int vcpus, enum access_type access, + const char *description) +{ + perf_test_args.wr_fract = (access == ACCESS_READ) ? INT_MAX : 1; + sync_global_to_guest(vm, perf_test_args); + iteration_work = ITERATION_ACCESS_MEMORY; + run_iteration(vm, vcpus, description); +} + +static void mark_memory_idle(struct kvm_vm *vm, int vcpus) +{ + /* + * Even though this parallelizes the work across vCPUs, this is still a + * very slow operation because page_idle forces the test to mark one pfn + * at a time and the clear_young notifier serializes on the KVM MMU + * lock. + */ + pr_debug("Marking VM memory idle (slow)...\n"); + iteration_work = ITERATION_MARK_IDLE; + run_iteration(vm, vcpus, "Mark memory idle"); +} + +static pthread_t *create_vcpu_threads(int vcpus) +{ + pthread_t *vcpu_threads; + int i; + + vcpu_threads = malloc(vcpus * sizeof(vcpu_threads[0])); + TEST_ASSERT(vcpu_threads, "Failed to allocate vcpu_threads."); + + for (i = 0; i < vcpus; i++) { + vcpu_last_completed_iteration[i] = iteration; + pthread_create(&vcpu_threads[i], NULL, vcpu_thread_main, + &perf_test_args.vcpu_args[i]); + } + + return vcpu_threads; +} + +static void terminate_vcpu_threads(pthread_t *vcpu_threads, int vcpus) +{ + int i; + + /* Set done to signal the vCPU threads to exit */ + done = true; + + for (i = 0; i < vcpus; i++) + pthread_join(vcpu_threads[i], NULL); +} + +static void run_test(enum vm_guest_mode mode, void *arg) +{ + struct test_params *params = arg; + struct kvm_vm *vm; + pthread_t *vcpu_threads; + int vcpus = params->vcpus; + + vm = perf_test_create_vm(mode, vcpus, params->vcpu_memory_bytes, + params->backing_src); + + perf_test_setup_vcpus(vm, vcpus, params->vcpu_memory_bytes, + !overlap_memory_access); + + vcpu_threads = create_vcpu_threads(vcpus); + + pr_info("\n"); + access_memory(vm, vcpus, ACCESS_WRITE, "Populating memory"); + + /* As a control, read and write to the populated memory first. */ + access_memory(vm, vcpus, ACCESS_WRITE, "Writing to populated memory"); + access_memory(vm, vcpus, ACCESS_READ, "Reading from populated memory"); + + /* Repeat on memory that has been marked as idle. */ + mark_memory_idle(vm, vcpus); + access_memory(vm, vcpus, ACCESS_WRITE, "Writing to idle memory"); + mark_memory_idle(vm, vcpus); + access_memory(vm, vcpus, ACCESS_READ, "Reading from idle memory"); + + terminate_vcpu_threads(vcpu_threads, vcpus); + free(vcpu_threads); + perf_test_destroy_vm(vm); +} + +static void help(char *name) +{ + puts(""); + printf("usage: %s [-h] [-m mode] [-b vcpu_bytes] [-v vcpus] [-o] [-s mem_type]\n", + name); + puts(""); + printf(" -h: Display this help message."); + guest_modes_help(); + printf(" -b: specify the size of the memory region which should be\n" + " dirtied by each vCPU. e.g. 10M or 3G.\n" + " (default: 1G)\n"); + printf(" -v: specify the number of vCPUs to run.\n"); + printf(" -o: Overlap guest memory accesses instead of partitioning\n" + " them into a separate region of memory for each vCPU.\n"); + printf(" -s: specify the type of memory that should be used to\n" + " back the guest data region.\n\n"); + backing_src_help(); + puts(""); + exit(0); +} + +int main(int argc, char *argv[]) +{ + struct test_params params = { + .backing_src = VM_MEM_SRC_ANONYMOUS, + .vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE, + .vcpus = 1, + }; + int page_idle_fd; + int opt; + + guest_modes_append_default(); + + while ((opt = getopt(argc, argv, "hm:b:v:os:")) != -1) { + switch (opt) { + case 'm': + guest_modes_cmdline(optarg); + break; + case 'b': + params.vcpu_memory_bytes = parse_size(optarg); + break; + case 'v': + params.vcpus = atoi(optarg); + break; + case 'o': + overlap_memory_access = true; + break; + case 's': + params.backing_src = parse_backing_src_type(optarg); + break; + case 'h': + default: + help(argv[0]); + break; + } + } + + page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR); + if (page_idle_fd < 0) { + print_skip("CONFIG_IDLE_PAGE_TRACKING is not enabled"); + exit(KSFT_SKIP); + } + close(page_idle_fd); + + for_each_guest_mode(run_test, ¶ms); + + return 0; +} From 3fa5e8fd0a0e4ccc03c91df225be2e9b7100800c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 26 Jul 2021 12:39:01 -0400 Subject: [PATCH 237/502] KVM: SVM: delay svm_vcpu_init_msrpm after svm->vmcb is initialized Right now, svm_hv_vmcb_dirty_nested_enlightenments has an incorrect dereference of vmcb->control.reserved_sw before the vmcb is checked for being non-NULL. The compiler is usually sinking the dereference after the check; instead of doing this ourselves in the source, ensure that svm_hv_vmcb_dirty_nested_enlightenments is only called with a non-NULL VMCB. Reported-by: Dan Carpenter Cc: Vineeth Pillai Signed-off-by: Paolo Bonzini [Untested for now due to issues with my AMD machine. - Paolo] --- arch/x86/kvm/svm/svm.c | 4 ++-- arch/x86/kvm/svm/svm_onhyperv.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 9a6987549e1b..4bcb95bb8ed7 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1406,8 +1406,6 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) goto error_free_vmsa_page; } - svm_vcpu_init_msrpm(vcpu, svm->msrpm); - svm->vmcb01.ptr = page_address(vmcb01_page); svm->vmcb01.pa = __sme_set(page_to_pfn(vmcb01_page) << PAGE_SHIFT); @@ -1419,6 +1417,8 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) svm_switch_vmcb(svm, &svm->vmcb01); init_vmcb(vcpu); + svm_vcpu_init_msrpm(vcpu, svm->msrpm); + svm_init_osvw(vcpu); vcpu->arch.microcode_version = 0x01000065; diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h index 9b9a55abc29f..c53b8bf8d013 100644 --- a/arch/x86/kvm/svm/svm_onhyperv.h +++ b/arch/x86/kvm/svm/svm_onhyperv.h @@ -89,7 +89,7 @@ static inline void svm_hv_vmcb_dirty_nested_enlightenments( * as we mark it dirty unconditionally towards end of vcpu * init phase. */ - if (vmcb && vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) && + if (vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) && hve->hv_enlightenments_control.msr_bitmap) vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS); } From bb000f640e76c4c2402990d0613d4269e9c6dd29 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 26 Jul 2021 17:01:08 +0200 Subject: [PATCH 238/502] KVM: s390: restore old debugfs names commit bc9e9e672df9 ("KVM: debugfs: Reuse binary stats descriptors") did replace the old definitions with the binary ones. While doing that it missed that some files are names different than the counters. This is especially important for kvm_stat which does have special handling for counters named instruction_*. Fixes: commit bc9e9e672df9 ("KVM: debugfs: Reuse binary stats descriptors") CC: Jing Zhang Signed-off-by: Christian Borntraeger Message-Id: <20210726150108.5603-1-borntraeger@de.ibm.com> Signed-off-by: Paolo Bonzini --- arch/s390/include/asm/kvm_host.h | 18 +++++++++--------- arch/s390/kvm/diag.c | 18 +++++++++--------- arch/s390/kvm/kvm-s390.c | 18 +++++++++--------- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 9b4473f76e56..161a9e12bfb8 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -445,15 +445,15 @@ struct kvm_vcpu_stat { u64 instruction_sigp_init_cpu_reset; u64 instruction_sigp_cpu_reset; u64 instruction_sigp_unknown; - u64 diagnose_10; - u64 diagnose_44; - u64 diagnose_9c; - u64 diagnose_9c_ignored; - u64 diagnose_9c_forward; - u64 diagnose_258; - u64 diagnose_308; - u64 diagnose_500; - u64 diagnose_other; + u64 instruction_diagnose_10; + u64 instruction_diagnose_44; + u64 instruction_diagnose_9c; + u64 diag_9c_ignored; + u64 diag_9c_forward; + u64 instruction_diagnose_258; + u64 instruction_diagnose_308; + u64 instruction_diagnose_500; + u64 instruction_diagnose_other; u64 pfault_sync; }; diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 02c146f9e5cd..807fa9da1e72 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -24,7 +24,7 @@ static int diag_release_pages(struct kvm_vcpu *vcpu) start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + PAGE_SIZE; - vcpu->stat.diagnose_10++; + vcpu->stat.instruction_diagnose_10++; if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end || start < 2 * PAGE_SIZE) @@ -74,7 +74,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 3, "diag page reference parameter block at 0x%llx", vcpu->run->s.regs.gprs[rx]); - vcpu->stat.diagnose_258++; + vcpu->stat.instruction_diagnose_258++; if (vcpu->run->s.regs.gprs[rx] & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm)); @@ -145,7 +145,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu) static int __diag_time_slice_end(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); - vcpu->stat.diagnose_44++; + vcpu->stat.instruction_diagnose_44++; kvm_vcpu_on_spin(vcpu, true); return 0; } @@ -169,7 +169,7 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) int tid; tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; - vcpu->stat.diagnose_9c++; + vcpu->stat.instruction_diagnose_9c++; /* yield to self */ if (tid == vcpu->vcpu_id) @@ -192,7 +192,7 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: yield forwarded", tid); - vcpu->stat.diagnose_9c_forward++; + vcpu->stat.diag_9c_forward++; return 0; } @@ -203,7 +203,7 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) return 0; no_yield: VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: ignored", tid); - vcpu->stat.diagnose_9c_ignored++; + vcpu->stat.diag_9c_ignored++; return 0; } @@ -213,7 +213,7 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu) unsigned long subcode = vcpu->run->s.regs.gprs[reg] & 0xffff; VCPU_EVENT(vcpu, 3, "diag ipl functions, subcode %lx", subcode); - vcpu->stat.diagnose_308++; + vcpu->stat.instruction_diagnose_308++; switch (subcode) { case 3: vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR; @@ -245,7 +245,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) { int ret; - vcpu->stat.diagnose_500++; + vcpu->stat.instruction_diagnose_500++; /* No virtio-ccw notification? Get out quickly. */ if (!vcpu->kvm->arch.css_support || (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY)) @@ -299,7 +299,7 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) case 0x500: return __diag_virtio_hypercall(vcpu); default: - vcpu->stat.diagnose_other++; + vcpu->stat.instruction_diagnose_other++; return -EOPNOTSUPP; } } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b655a7d82bf0..4527ac7b5961 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -163,15 +163,15 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset), STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset), STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown), - STATS_DESC_COUNTER(VCPU, diagnose_10), - STATS_DESC_COUNTER(VCPU, diagnose_44), - STATS_DESC_COUNTER(VCPU, diagnose_9c), - STATS_DESC_COUNTER(VCPU, diagnose_9c_ignored), - STATS_DESC_COUNTER(VCPU, diagnose_9c_forward), - STATS_DESC_COUNTER(VCPU, diagnose_258), - STATS_DESC_COUNTER(VCPU, diagnose_308), - STATS_DESC_COUNTER(VCPU, diagnose_500), - STATS_DESC_COUNTER(VCPU, diagnose_other), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_10), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_44), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c), + STATS_DESC_COUNTER(VCPU, diag_9c_ignored), + STATS_DESC_COUNTER(VCPU, diag_9c_forward), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_258), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_308), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_500), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_other), STATS_DESC_COUNTER(VCPU, pfault_sync) }; static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) == From f1577ab21442476a1015d09e861c08ca76262c06 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 13 Jul 2021 17:20:16 +0300 Subject: [PATCH 239/502] KVM: SVM: svm_set_vintr don't warn if AVIC is active but is about to be deactivated It is possible for AVIC inhibit and AVIC active state to be mismatched. Currently we disable AVIC right away on vCPU which started the AVIC inhibit request thus this warning doesn't trigger but at least in theory, if svm_set_vintr is called at the same time on multiple vCPUs, the warning can happen. Signed-off-by: Maxim Levitsky Message-Id: <20210713142023.106183-2-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 4bcb95bb8ed7..e8ccab50ebf6 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1568,8 +1568,11 @@ static void svm_set_vintr(struct vcpu_svm *svm) { struct vmcb_control_area *control; - /* The following fields are ignored when AVIC is enabled */ - WARN_ON(kvm_vcpu_apicv_active(&svm->vcpu)); + /* + * The following fields are ignored when AVIC is enabled + */ + WARN_ON(kvm_apicv_activated(svm->vcpu.kvm)); + svm_set_intercept(svm, INTERCEPT_VINTR); /* From feea01360cb1925dd31a3d38514eb86f61d69468 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 13 Jul 2021 17:20:17 +0300 Subject: [PATCH 240/502] KVM: SVM: tweak warning about enabled AVIC on nested entry It is possible that AVIC was requested to be disabled but not yet disabled, e.g if the nested entry is done right after svm_vcpu_after_set_cpuid. Signed-off-by: Maxim Levitsky Message-Id: <20210713142023.106183-3-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 1c2a0414a88d..61738ff8ef33 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -515,7 +515,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm) * Also covers avic_vapic_bar, avic_backing_page, avic_logical_id, * avic_physical_id. */ - WARN_ON(svm->vmcb01.ptr->control.int_ctl & AVIC_ENABLE_MASK); + WARN_ON(kvm_apicv_activated(svm->vcpu.kvm)); /* Copied from vmcb01. msrpm_base can be overwritten later. */ svm->vmcb->control.nested_ctl = svm->vmcb01.ptr->control.nested_ctl; From 5868b8225ecef4ba3f5b17e65984d60bc5fd6254 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 13 Jul 2021 17:20:18 +0300 Subject: [PATCH 241/502] KVM: SVM: use vmcb01 in svm_refresh_apicv_exec_ctrl Currently when SVM is enabled in guest CPUID, AVIC is inhibited as soon as the guest CPUID is set. AVIC happens to be fully disabled on all vCPUs by the time any guest entry starts (if after migration the entry can be nested). The reason is that currently we disable avic right away on vCPU from which the kvm_request_apicv_update was called and for this case, it happens to be called on all vCPUs (by svm_vcpu_after_set_cpuid). After we stop doing this, AVIC will end up being disabled only when KVM_REQ_APICV_UPDATE is processed which is after we done switching to the nested guest. Fix this by just using vmcb01 in svm_refresh_apicv_exec_ctrl for avic (which is a right thing to do anyway). Signed-off-by: Maxim Levitsky Message-Id: <20210713142023.106183-4-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/avic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 1d01da64c333..a8ad78a2faa1 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -646,7 +646,7 @@ out: void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - struct vmcb *vmcb = svm->vmcb; + struct vmcb *vmcb = svm->vmcb01.ptr; bool activated = kvm_vcpu_apicv_active(vcpu); if (!enable_apicv) From 74775654332b2682a5580d6f954e5a9ac81e7477 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 27 Jul 2021 19:12:47 +0800 Subject: [PATCH 242/502] KVM: use cpu_relax when halt polling SMT siblings share caches and other hardware, and busy halt polling will degrade its sibling performance if its sibling is working Sean Christopherson suggested as below: "Rather than disallowing halt-polling entirely, on x86 it should be sufficient to simply have the hardware thread yield to its sibling(s) via PAUSE. It probably won't get back all performance, but I would expect it to be close. This compiles on all KVM architectures, and AFAICT the intended usage of cpu_relax() is identical for all architectures." Suggested-by: Sean Christopherson Signed-off-by: Li RongQing Message-Id: <20210727111247.55510-1-lirongqing@baidu.com> Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 986959833d70..0d732813fa80 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3110,6 +3110,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) ++vcpu->stat.generic.halt_poll_invalid; goto out; } + cpu_relax(); poll_end = cur = ktime_get(); } while (kvm_vcpu_can_poll(cur, stop)); } From 8750f9bbda115f3f79bfe43be85551ee5e12b6ff Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 27 Jul 2021 08:43:10 -0400 Subject: [PATCH 243/502] KVM: add missing compat KVM_CLEAR_DIRTY_LOG The arguments to the KVM_CLEAR_DIRTY_LOG ioctl include a pointer, therefore it needs a compat ioctl implementation. Otherwise, 32-bit userspace fails to invoke it on 64-bit kernels; for x86 it might work fine by chance if the padding is zero, but not on big-endian architectures. Reported-by: Thomas Sattler Cc: stable@vger.kernel.org Fixes: 2a31b9db1535 ("kvm: introduce manual dirty log reprotect") Reviewed-by: Peter Xu Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0d732813fa80..d20fba0fc290 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4391,6 +4391,16 @@ struct compat_kvm_dirty_log { }; }; +struct compat_kvm_clear_dirty_log { + __u32 slot; + __u32 num_pages; + __u64 first_page; + union { + compat_uptr_t dirty_bitmap; /* one bit per page */ + __u64 padding2; + }; +}; + static long kvm_vm_compat_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -4400,6 +4410,24 @@ static long kvm_vm_compat_ioctl(struct file *filp, if (kvm->mm != current->mm) return -EIO; switch (ioctl) { +#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT + case KVM_CLEAR_DIRTY_LOG: { + struct compat_kvm_clear_dirty_log compat_log; + struct kvm_clear_dirty_log log; + + if (copy_from_user(&compat_log, (void __user *)arg, + sizeof(compat_log))) + return -EFAULT; + log.slot = compat_log.slot; + log.num_pages = compat_log.num_pages; + log.first_page = compat_log.first_page; + log.padding2 = compat_log.padding2; + log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap); + + r = kvm_vm_ioctl_clear_dirty_log(kvm, &log); + break; + } +#endif case KVM_GET_DIRTY_LOG: { struct compat_kvm_dirty_log compat_log; struct kvm_dirty_log log; From 2bcc025ab9bbd029b1730cde71cb4e4f0ed35d0f Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sat, 17 Jul 2021 14:27:42 +0300 Subject: [PATCH 244/502] clk: tegra: Implement disable_unused() of tegra_clk_sdmmc_mux_ops Implement disable_unused() callback of tegra_clk_sdmmc_mux_ops to fix imbalanced disabling of the unused MMC clock on Tegra210 Jetson Nano. Fixes: c592c8a28f58 ("clk: tegra: Fix refcounting of gate clocks") Reported-by: Jon Hunter # T210 Nano Tested-by: Jon Hunter # T210 Nano Acked-by: Jon Hunter Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20210717112742.7196-1-digetx@gmail.com Signed-off-by: Stephen Boyd --- drivers/clk/tegra/clk-sdmmc-mux.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/clk/tegra/clk-sdmmc-mux.c b/drivers/clk/tegra/clk-sdmmc-mux.c index 316912d3b1a4..4f2c3309eea4 100644 --- a/drivers/clk/tegra/clk-sdmmc-mux.c +++ b/drivers/clk/tegra/clk-sdmmc-mux.c @@ -194,6 +194,15 @@ static void clk_sdmmc_mux_disable(struct clk_hw *hw) gate_ops->disable(gate_hw); } +static void clk_sdmmc_mux_disable_unused(struct clk_hw *hw) +{ + struct tegra_sdmmc_mux *sdmmc_mux = to_clk_sdmmc_mux(hw); + const struct clk_ops *gate_ops = sdmmc_mux->gate_ops; + struct clk_hw *gate_hw = &sdmmc_mux->gate.hw; + + gate_ops->disable_unused(gate_hw); +} + static void clk_sdmmc_mux_restore_context(struct clk_hw *hw) { struct clk_hw *parent = clk_hw_get_parent(hw); @@ -218,6 +227,7 @@ static const struct clk_ops tegra_clk_sdmmc_mux_ops = { .is_enabled = clk_sdmmc_mux_is_enabled, .enable = clk_sdmmc_mux_enable, .disable = clk_sdmmc_mux_disable, + .disable_unused = clk_sdmmc_mux_disable_unused, .restore_context = clk_sdmmc_mux_restore_context, }; From 343597d558e79fe704ba8846b5b2ed24056b89c2 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 27 Jul 2021 09:04:58 -0700 Subject: [PATCH 245/502] bpf, sockmap: Zap ingress queues after stopping strparser We don't want strparser to run and pass skbs into skmsg handlers when the psock is null. We just sk_drop them in this case. When removing a live socket from map it means extra drops that we do not need to incur. Move the zap below strparser close to avoid this condition. This way we stop the stream parser first stopping it from processing packets and then delete the psock. Fixes: a136678c0bdbb ("bpf: sk_msg, zap ingress queue on psock down") Signed-off-by: John Fastabend Signed-off-by: Andrii Nakryiko Acked-by: Jakub Sitnicki Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210727160500.1713554-2-john.fastabend@gmail.com --- net/core/skmsg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 15d71288e741..28115ef742e8 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -773,8 +773,6 @@ static void sk_psock_destroy(struct work_struct *work) void sk_psock_drop(struct sock *sk, struct sk_psock *psock) { - sk_psock_stop(psock, false); - write_lock_bh(&sk->sk_callback_lock); sk_psock_restore_proto(sk, psock); rcu_assign_sk_user_data(sk, NULL); @@ -784,6 +782,8 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock) sk_psock_stop_verdict(sk, psock); write_unlock_bh(&sk->sk_callback_lock); + sk_psock_stop(psock, false); + INIT_RCU_WORK(&psock->rwork, sk_psock_destroy); queue_rcu_work(system_wq, &psock->rwork); } From 476d98018f32e68e7c5d4e8456940cf2b6d66f10 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 27 Jul 2021 09:04:59 -0700 Subject: [PATCH 246/502] bpf, sockmap: On cleanup we additionally need to remove cached skb Its possible if a socket is closed and the receive thread is under memory pressure it may have cached a skb. We need to ensure these skbs are free'd along with the normal ingress_skb queue. Before 799aa7f98d53 ("skmsg: Avoid lock_sock() in sk_psock_backlog()") tear down and backlog processing both had sock_lock for the common case of socket close or unhash. So it was not possible to have both running in parrallel so all we would need is the kfree in those kernels. But, latest kernels include the commit 799aa7f98d5e and this requires a bit more work. Without the ingress_lock guarding reading/writing the state->skb case its possible the tear down could run before the state update causing it to leak memory or worse when the backlog reads the state it could potentially run interleaved with the tear down and we might end up free'ing the state->skb from tear down side but already have the reference from backlog side. To resolve such races we wrap accesses in ingress_lock on both sides serializing tear down and backlog case. In both cases this only happens after an EAGAIN error case so having an extra lock in place is likely fine. The normal path will skip the locks. Note, we check state->skb before grabbing lock. This works because we can only enqueue with the mutex we hold already. Avoiding a race on adding state->skb after the check. And if tear down path is running that is also fine if the tear down path then removes state->skb we will simply set skb=NULL and the subsequent goto is skipped. This slight complication avoids locking in normal case. With this fix we no longer see this warning splat from tcp side on socket close when we hit the above case with redirect to ingress self. [224913.935822] WARNING: CPU: 3 PID: 32100 at net/core/stream.c:208 sk_stream_kill_queues+0x212/0x220 [224913.935841] Modules linked in: fuse overlay bpf_preload x86_pkg_temp_thermal intel_uncore wmi_bmof squashfs sch_fq_codel efivarfs ip_tables x_tables uas xhci_pci ixgbe mdio xfrm_algo xhci_hcd wmi [224913.935897] CPU: 3 PID: 32100 Comm: fgs-bench Tainted: G I 5.14.0-rc1alu+ #181 [224913.935908] Hardware name: Dell Inc. Precision 5820 Tower/002KVM, BIOS 1.9.2 01/24/2019 [224913.935914] RIP: 0010:sk_stream_kill_queues+0x212/0x220 [224913.935923] Code: 8b 83 20 02 00 00 85 c0 75 20 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 89 df e8 2b 11 fe ff eb c3 0f 0b e9 7c ff ff ff 0f 0b eb ce <0f> 0b 5b 5d 41 5c 41 5d 41 5e 41 5f c3 90 0f 1f 44 00 00 41 57 41 [224913.935932] RSP: 0018:ffff88816271fd38 EFLAGS: 00010206 [224913.935941] RAX: 0000000000000ae8 RBX: ffff88815acd5240 RCX: dffffc0000000000 [224913.935948] RDX: 0000000000000003 RSI: 0000000000000ae8 RDI: ffff88815acd5460 [224913.935954] RBP: ffff88815acd5460 R08: ffffffff955c0ae8 R09: fffffbfff2e6f543 [224913.935961] R10: ffffffff9737aa17 R11: fffffbfff2e6f542 R12: ffff88815acd5390 [224913.935967] R13: ffff88815acd5480 R14: ffffffff98d0c080 R15: ffffffff96267500 [224913.935974] FS: 00007f86e6bd1700(0000) GS:ffff888451cc0000(0000) knlGS:0000000000000000 [224913.935981] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [224913.935988] CR2: 000000c0008eb000 CR3: 00000001020e0005 CR4: 00000000003706e0 [224913.935994] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [224913.936000] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [224913.936007] Call Trace: [224913.936016] inet_csk_destroy_sock+0xba/0x1f0 [224913.936033] __tcp_close+0x620/0x790 [224913.936047] tcp_close+0x20/0x80 [224913.936056] inet_release+0x8f/0xf0 [224913.936070] __sock_release+0x72/0x120 [224913.936083] sock_close+0x14/0x20 Fixes: a136678c0bdbb ("bpf: sk_msg, zap ingress queue on psock down") Signed-off-by: John Fastabend Signed-off-by: Andrii Nakryiko Acked-by: Jakub Sitnicki Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210727160500.1713554-3-john.fastabend@gmail.com --- net/core/skmsg.c | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 28115ef742e8..036cdb33a94a 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -590,23 +590,42 @@ static void sock_drop(struct sock *sk, struct sk_buff *skb) kfree_skb(skb); } +static void sk_psock_skb_state(struct sk_psock *psock, + struct sk_psock_work_state *state, + struct sk_buff *skb, + int len, int off) +{ + spin_lock_bh(&psock->ingress_lock); + if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { + state->skb = skb; + state->len = len; + state->off = off; + } else { + sock_drop(psock->sk, skb); + } + spin_unlock_bh(&psock->ingress_lock); +} + static void sk_psock_backlog(struct work_struct *work) { struct sk_psock *psock = container_of(work, struct sk_psock, work); struct sk_psock_work_state *state = &psock->work_state; - struct sk_buff *skb; + struct sk_buff *skb = NULL; bool ingress; u32 len, off; int ret; mutex_lock(&psock->work_mutex); - if (state->skb) { + if (unlikely(state->skb)) { + spin_lock_bh(&psock->ingress_lock); skb = state->skb; len = state->len; off = state->off; state->skb = NULL; - goto start; + spin_unlock_bh(&psock->ingress_lock); } + if (skb) + goto start; while ((skb = skb_dequeue(&psock->ingress_skb))) { len = skb->len; @@ -621,9 +640,8 @@ start: len, ingress); if (ret <= 0) { if (ret == -EAGAIN) { - state->skb = skb; - state->len = len; - state->off = off; + sk_psock_skb_state(psock, state, skb, + len, off); goto end; } /* Hard errors break pipe and stop xmit. */ @@ -722,6 +740,11 @@ static void __sk_psock_zap_ingress(struct sk_psock *psock) skb_bpf_redirect_clear(skb); sock_drop(psock->sk, skb); } + kfree_skb(psock->work_state.skb); + /* We null the skb here to ensure that calls to sk_psock_backlog + * do not pick up the free'd skb. + */ + psock->work_state.skb = NULL; __sk_psock_purge_ingress_msg(psock); } From 9635720b7c88592214562cb72605bdab6708006c Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 27 Jul 2021 09:05:00 -0700 Subject: [PATCH 247/502] bpf, sockmap: Fix memleak on ingress msg enqueue If backlog handler is running during a tear down operation we may enqueue data on the ingress msg queue while tear down is trying to free it. sk_psock_backlog() sk_psock_handle_skb() skb_psock_skb_ingress() sk_psock_skb_ingress_enqueue() sk_psock_queue_msg(psock,msg) spin_lock(ingress_lock) sk_psock_zap_ingress() _sk_psock_purge_ingerss_msg() _sk_psock_purge_ingress_msg() -- free ingress_msg list -- spin_unlock(ingress_lock) spin_lock(ingress_lock) list_add_tail(msg,ingress_msg) <- entry on list with no one left to free it. spin_unlock(ingress_lock) To fix we only enqueue from backlog if the ENABLED bit is set. The tear down logic clears the bit with ingress_lock set so we wont enqueue the msg in the last step. Fixes: 799aa7f98d53 ("skmsg: Avoid lock_sock() in sk_psock_backlog()") Signed-off-by: John Fastabend Signed-off-by: Andrii Nakryiko Acked-by: Jakub Sitnicki Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210727160500.1713554-4-john.fastabend@gmail.com --- include/linux/skmsg.h | 54 ++++++++++++++++++++++++++++--------------- net/core/skmsg.c | 6 ----- 2 files changed, 35 insertions(+), 25 deletions(-) diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 96f319099744..14ab0c0bc924 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -285,11 +285,45 @@ static inline struct sk_psock *sk_psock(const struct sock *sk) return rcu_dereference_sk_user_data(sk); } +static inline void sk_psock_set_state(struct sk_psock *psock, + enum sk_psock_state_bits bit) +{ + set_bit(bit, &psock->state); +} + +static inline void sk_psock_clear_state(struct sk_psock *psock, + enum sk_psock_state_bits bit) +{ + clear_bit(bit, &psock->state); +} + +static inline bool sk_psock_test_state(const struct sk_psock *psock, + enum sk_psock_state_bits bit) +{ + return test_bit(bit, &psock->state); +} + +static inline void sock_drop(struct sock *sk, struct sk_buff *skb) +{ + sk_drops_add(sk, skb); + kfree_skb(skb); +} + +static inline void drop_sk_msg(struct sk_psock *psock, struct sk_msg *msg) +{ + if (msg->skb) + sock_drop(psock->sk, msg->skb); + kfree(msg); +} + static inline void sk_psock_queue_msg(struct sk_psock *psock, struct sk_msg *msg) { spin_lock_bh(&psock->ingress_lock); - list_add_tail(&msg->list, &psock->ingress_msg); + if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) + list_add_tail(&msg->list, &psock->ingress_msg); + else + drop_sk_msg(psock, msg); spin_unlock_bh(&psock->ingress_lock); } @@ -406,24 +440,6 @@ static inline void sk_psock_restore_proto(struct sock *sk, psock->psock_update_sk_prot(sk, psock, true); } -static inline void sk_psock_set_state(struct sk_psock *psock, - enum sk_psock_state_bits bit) -{ - set_bit(bit, &psock->state); -} - -static inline void sk_psock_clear_state(struct sk_psock *psock, - enum sk_psock_state_bits bit) -{ - clear_bit(bit, &psock->state); -} - -static inline bool sk_psock_test_state(const struct sk_psock *psock, - enum sk_psock_state_bits bit) -{ - return test_bit(bit, &psock->state); -} - static inline struct sk_psock *sk_psock_get(struct sock *sk) { struct sk_psock *psock; diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 036cdb33a94a..2d6249b28928 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -584,12 +584,6 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb, return sk_psock_skb_ingress(psock, skb); } -static void sock_drop(struct sock *sk, struct sk_buff *skb) -{ - sk_drops_add(sk, skb); - kfree_skb(skb); -} - static void sk_psock_skb_state(struct sk_psock *psock, struct sk_psock_work_state *state, struct sk_buff *skb, From b93af3055d6f32d3b0361cfdb110c9399c1241ba Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 27 Jul 2021 17:32:53 +0800 Subject: [PATCH 248/502] blk-mq-sched: Fix blk_mq_sched_alloc_tags() error handling If the blk_mq_sched_alloc_tags() -> blk_mq_alloc_rqs() call fails, then we call blk_mq_sched_free_tags() -> blk_mq_free_rqs(). It is incorrect to do so, as any rqs would have already been freed in the blk_mq_alloc_rqs() call. Fix by calling blk_mq_free_rq_map() only directly. Fixes: 6917ff0b5bd41 ("blk-mq-sched: refactor scheduler initialization") Signed-off-by: John Garry Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/1627378373-148090-1-git-send-email-john.garry@huawei.com Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index c838d81ac058..0f006cabfd91 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -515,17 +515,6 @@ void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx, percpu_ref_put(&q->q_usage_counter); } -static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set, - struct blk_mq_hw_ctx *hctx, - unsigned int hctx_idx) -{ - if (hctx->sched_tags) { - blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx); - blk_mq_free_rq_map(hctx->sched_tags, set->flags); - hctx->sched_tags = NULL; - } -} - static int blk_mq_sched_alloc_tags(struct request_queue *q, struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) @@ -539,8 +528,10 @@ static int blk_mq_sched_alloc_tags(struct request_queue *q, return -ENOMEM; ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests); - if (ret) - blk_mq_sched_free_tags(set, hctx, hctx_idx); + if (ret) { + blk_mq_free_rq_map(hctx->sched_tags, set->flags); + hctx->sched_tags = NULL; + } return ret; } From 8b54874ef1617185048029a3083d510569e93751 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 26 Jul 2021 09:20:14 +0300 Subject: [PATCH 249/502] net/mlx5: Fix flow table chaining Fix a bug when flow table is created in priority that already has other flow tables as shown in the below diagram. If the new flow table (FT-B) has the lowest level in the priority, we need to connect the flow tables from the previous priority (p0) to this new table. In addition when this flow table is destroyed (FT-B), we need to connect the flow tables from the previous priority (p0) to the next level flow table (FT-C) in the same priority of the destroyed table (if exists). --------- |root_ns| --------- | -------------------------------- | | | ---------- ---------- --------- |p(prio)-x| | p-y | | p-n | ---------- ---------- --------- | | ---------------- ------------------ |ns(e.g bypass)| |ns(e.g. kernel) | ---------------- ------------------ | | | ------- ------ ---- | p0 | | p1 | |p2| ------- ------ ---- | | \ -------- ------- ------ | FT-A | |FT-B | |FT-C| -------- ------- ------ Fixes: f90edfd279f3 ("net/mlx5_core: Connect flow tables") Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index d7bf0a3e4a52..c0697e1b7118 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1024,17 +1024,19 @@ static int connect_fwd_rules(struct mlx5_core_dev *dev, static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, struct fs_prio *prio) { - struct mlx5_flow_table *next_ft; + struct mlx5_flow_table *next_ft, *first_ft; int err = 0; /* Connect_prev_fts and update_root_ft_create are mutually exclusive */ - if (list_empty(&prio->node.children)) { + first_ft = list_first_entry_or_null(&prio->node.children, + struct mlx5_flow_table, node.list); + if (!first_ft || first_ft->level > ft->level) { err = connect_prev_fts(dev, ft, prio); if (err) return err; - next_ft = find_next_chained_ft(prio); + next_ft = first_ft ? first_ft : find_next_chained_ft(prio); err = connect_fwd_rules(dev, ft, next_ft); if (err) return err; @@ -2120,7 +2122,7 @@ static int disconnect_flow_table(struct mlx5_flow_table *ft) node.list) == ft)) return 0; - next_ft = find_next_chained_ft(prio); + next_ft = find_next_ft(ft); err = connect_fwd_rules(dev, next_ft, ft); if (err) return err; From 90b22b9bcd242a3ba238f2c6f7eab771799001f8 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Thu, 8 Jul 2021 15:24:58 +0300 Subject: [PATCH 250/502] net/mlx5e: Disable Rx ntuple offload for uplink representor Rx ntuple offload is not supported in switchdev mode. Tryng to enable it cause kernel panic. BUG: kernel NULL pointer dereference, address: 0000000000000008 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 80000001065a5067 P4D 80000001065a5067 PUD 106594067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 7 PID: 1089 Comm: ethtool Not tainted 5.13.0-rc7_for_upstream_min_debug_2021_06_23_16_44 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:mlx5e_arfs_enable+0x70/0xd0 [mlx5_core] Code: 44 24 10 00 00 00 00 48 c7 44 24 18 00 00 00 00 49 63 c4 48 89 e2 44 89 e6 48 69 c0 20 08 00 00 48 89 ef 48 03 85 68 ac 00 00 <48> 8b 40 08 48 89 44 24 08 e8 d2 aa fd ff 48 83 05 82 96 18 00 01 RSP: 0018:ffff8881047679e0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000004000000000 RCX: 0000004000000000 RDX: ffff8881047679e0 RSI: 0000000000000000 RDI: ffff888115100880 RBP: ffff888115100880 R08: ffffffffa00f6cb0 R09: ffff888104767a18 R10: ffff8881151000a0 R11: ffff888109479540 R12: 0000000000000000 R13: ffff888104767bb8 R14: ffff888115100000 R15: ffff8881151000a0 FS: 00007f41a64ab740(0000) GS:ffff8882f5dc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 0000000104cbc005 CR4: 0000000000370ea0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: set_feature_arfs+0x1e/0x40 [mlx5_core] mlx5e_handle_feature+0x43/0xa0 [mlx5_core] mlx5e_set_features+0x139/0x1b0 [mlx5_core] __netdev_update_features+0x2b3/0xaf0 ethnl_set_features+0x176/0x3a0 ? __nla_parse+0x22/0x30 genl_family_rcv_msg_doit+0xe2/0x140 genl_rcv_msg+0xde/0x1d0 ? features_reply_size+0xe0/0xe0 ? genl_get_cmd+0xd0/0xd0 netlink_rcv_skb+0x4e/0xf0 genl_rcv+0x24/0x40 netlink_unicast+0x1f6/0x2b0 netlink_sendmsg+0x225/0x450 sock_sendmsg+0x33/0x40 __sys_sendto+0xd4/0x120 ? __sys_recvmsg+0x4e/0x90 ? exc_page_fault+0x219/0x740 __x64_sys_sendto+0x25/0x30 do_syscall_64+0x3f/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f41a65b0cba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007ffd8d688358 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00000000010f42a0 RCX: 00007f41a65b0cba RDX: 0000000000000058 RSI: 00000000010f43b0 RDI: 0000000000000003 RBP: 000000000047ae60 R08: 00007f41a667c000 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 00000000010f4340 R13: 00000000010f4350 R14: 00007ffd8d688400 R15: 00000000010f42a0 Modules linked in: mlx5_vdpa vhost_iotlb vdpa xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 br_netfilter rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad ib_ipoib rdma_cm iw_cm ib_cm mlx5_ib ib_uverbs ib_core overlay mlx5_core ptp pps_core fuse CR2: 0000000000000008 ---[ end trace c66523f2aba94b43 ]--- Fixes: 7a9fb35e8c3a ("net/mlx5e: Do not reload ethernet ports when changing eswitch mode") Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 29 +++++++++++++------ 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d09e65557e75..c6f99fc77411 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3829,6 +3829,24 @@ int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) return 0; } +static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev, + netdev_features_t features) +{ + features &= ~NETIF_F_HW_TLS_RX; + if (netdev->features & NETIF_F_HW_TLS_RX) + netdev_warn(netdev, "Disabling hw_tls_rx, not supported in switchdev mode\n"); + + features &= ~NETIF_F_HW_TLS_TX; + if (netdev->features & NETIF_F_HW_TLS_TX) + netdev_warn(netdev, "Disabling hw_tls_tx, not supported in switchdev mode\n"); + + features &= ~NETIF_F_NTUPLE; + if (netdev->features & NETIF_F_NTUPLE) + netdev_warn(netdev, "Disabling ntuple, not supported in switchdev mode\n"); + + return features; +} + static netdev_features_t mlx5e_fix_features(struct net_device *netdev, netdev_features_t features) { @@ -3860,15 +3878,8 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n"); } - if (mlx5e_is_uplink_rep(priv)) { - features &= ~NETIF_F_HW_TLS_RX; - if (netdev->features & NETIF_F_HW_TLS_RX) - netdev_warn(netdev, "Disabling hw_tls_rx, not supported in switchdev mode\n"); - - features &= ~NETIF_F_HW_TLS_TX; - if (netdev->features & NETIF_F_HW_TLS_TX) - netdev_warn(netdev, "Disabling hw_tls_tx, not supported in switchdev mode\n"); - } + if (mlx5e_is_uplink_rep(priv)) + features = mlx5e_fix_uplink_rep_features(netdev, features); mutex_unlock(&priv->state_lock); From c671972534c6f7fce789ac8156a2bc3bd146f806 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Tue, 22 Jun 2021 17:07:02 +0300 Subject: [PATCH 251/502] net/mlx5: E-Switch, Set destination vport vhca id only when merged eswitch is supported Destination vport vhca id is valid flag is set only merged eswitch isn't supported. Change destination vport vhca id value to be set also only when merged eswitch is supported. Fixes: e4ad91f23f10 ("net/mlx5e: Split offloaded eswitch TC rules for port mirroring") Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 7579f3402776..b0a2ca9037ac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -382,10 +382,11 @@ esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *f { dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest[dest_idx].vport.num = esw_attr->dests[attr_idx].rep->vport; - dest[dest_idx].vport.vhca_id = - MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id); - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) { + dest[dest_idx].vport.vhca_id = + MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id); dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; + } if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP) { if (pkt_reformat) { flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; From dd3fddb82780bfa24124834edd90bbc63bd689cc Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 2 Jun 2021 14:17:07 +0300 Subject: [PATCH 252/502] net/mlx5: E-Switch, handle devcom events only for ports on the same device This is the same check as LAG mode checks if to enable lag. This will fix adding peer miss rules if lag is not supported and even an incorrect rules in socket direct mode. Also fix the incorrect comment on mlx5_get_next_phys_dev() as flow #1 doesn't exists. Fixes: ac004b832128 ("net/mlx5e: E-Switch, Add peer miss rules") Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 5 +---- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 3 +++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index ceebfc20f65e..def2156e50ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -500,10 +500,7 @@ static int next_phys_dev(struct device *dev, const void *data) return 1; } -/* This function is called with two flows: - * 1. During initialization of mlx5_core_dev and we don't need to lock it. - * 2. During LAG configure stage and caller holds &mlx5_intf_mutex. - */ +/* Must be called with intf_mutex held */ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) { struct auxiliary_device *adev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index b0a2ca9037ac..011e766e4f67 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2368,6 +2368,9 @@ static int mlx5_esw_offloads_devcom_event(int event, switch (event) { case ESW_OFFLOADS_DEVCOM_PAIR: + if (mlx5_get_next_phys_dev(esw->dev) != peer_esw->dev) + break; + if (mlx5_eswitch_vport_match_metadata_enabled(esw) != mlx5_eswitch_vport_match_metadata_enabled(peer_esw)) break; From e2351e517068718724f1d3b4010e2a41ec91fa76 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 30 Jun 2021 13:45:05 +0300 Subject: [PATCH 253/502] net/mlx5e: RX, Avoid possible data corruption when relaxed ordering and LRO combined When HW aggregates packets for an LRO session, it writes the payload of two consecutive packets of a flow contiguously, so that they usually share a cacheline. The first byte of a packet's payload is written immediately after the last byte of the preceding packet. In this flow, there are two consecutive write requests to the shared cacheline: 1. Regular write for the earlier packet. 2. Read-modify-write for the following packet. In case of relaxed-ordering on, these two writes might be re-ordered. Using the end padding optimization (to avoid partial write for the last cacheline of a packet) becomes problematic if the two writes occur out-of-order, as the padding would overwrite payload that belongs to the following packet, causing data corruption. Avoid this by disabling the end padding optimization when both LRO and relaxed-ordering are enabled. Fixes: 17347d5430c4 ("net/mlx5e: Add support for PCI relaxed ordering") Signed-off-by: Tariq Toukan Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/params.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 150c8e82c738..2cbf18c967f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -471,6 +471,15 @@ static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev, param->cq_period_mode = params->rx_cq_moderation.cq_period_mode; } +static u8 rq_end_pad_mode(struct mlx5_core_dev *mdev, struct mlx5e_params *params) +{ + bool ro = pcie_relaxed_ordering_enabled(mdev->pdev) && + MLX5_CAP_GEN(mdev, relaxed_ordering_write); + + return ro && params->lro_en ? + MLX5_WQ_END_PAD_MODE_NONE : MLX5_WQ_END_PAD_MODE_ALIGN; +} + int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, @@ -508,7 +517,7 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, } MLX5_SET(wq, wq, wq_type, params->rq_wq_type); - MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); + MLX5_SET(wq, wq, end_padding_mode, rq_end_pad_mode(mdev, params)); MLX5_SET(wq, wq, log_wq_stride, mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs)); MLX5_SET(wq, wq, pd, mdev->mlx5e_res.hw_objs.pdn); From 9841d58f3550d11c6181424427e8ad8c9c80f1b6 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 30 Jun 2021 13:33:31 +0300 Subject: [PATCH 254/502] net/mlx5e: Add NETIF_F_HW_TC to hw_features when HTB offload is available If a feature flag is only present in features, but not in hw_features, the user can't reset it. Although hw_features may contain NETIF_F_HW_TC by the point where the driver checks whether HTB offload is supported, this flag is controlled by another condition that may not hold. Set it explicitly to make sure the user can disable it. Fixes: 214baf22870c ("net/mlx5e: Support HTB offload") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c6f99fc77411..c5a2e3e6fe4b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4870,6 +4870,9 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) if (MLX5_CAP_ETH(mdev, scatter_fcs)) netdev->hw_features |= NETIF_F_RXFCS; + if (mlx5_qos_is_supported(mdev)) + netdev->hw_features |= NETIF_F_HW_TC; + netdev->features = netdev->hw_features; /* Defaults */ @@ -4890,8 +4893,6 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_NTUPLE; #endif } - if (mlx5_qos_is_supported(mdev)) - netdev->features |= NETIF_F_HW_TC; netdev->features |= NETIF_F_HIGHDMA; netdev->features |= NETIF_F_HW_VLAN_STAG_FILTER; From a759f845d1f78634b54744db0fa48524ef6d0e14 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Wed, 30 Jun 2021 11:17:12 +0300 Subject: [PATCH 255/502] net/mlx5e: Consider PTP-RQ when setting RX VLAN stripping Add PTP-RQ to the loop when setting rx-vlan-offload feature via ethtool. On PTP-RQ's creation, set rx-vlan-offload into its parameters. Fixes: a099da8ffcf6 ("net/mlx5e: Add RQ to PTP channel") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 5 ++++- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 778e229310a9..07b429b94d93 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -482,8 +482,11 @@ static void mlx5e_ptp_build_params(struct mlx5e_ptp *c, params->log_sq_size = orig->log_sq_size; mlx5e_ptp_build_sq_param(c->mdev, params, &cparams->txq_sq_param); } - if (test_bit(MLX5E_PTP_STATE_RX, c->state)) + /* RQ */ + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) { + params->vlan_strip_disable = orig->vlan_strip_disable; mlx5e_ptp_build_rq_param(c->mdev, c->netdev, c->priv->q_counter, cparams); + } } static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c5a2e3e6fe4b..37c440837945 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3384,7 +3384,7 @@ static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool en static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd) { - int err = 0; + int err; int i; for (i = 0; i < chs->num; i++) { @@ -3392,6 +3392,8 @@ static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd) if (err) return err; } + if (chs->ptp && test_bit(MLX5E_PTP_STATE_RX, chs->ptp->state)) + return mlx5e_modify_rq_vsd(&chs->ptp->rq, vsd); return 0; } From 497008e783452a2ec45c7ec5835cfe6950dcb097 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Mon, 21 Jun 2021 18:04:07 +0300 Subject: [PATCH 256/502] net/mlx5e: Fix page allocation failure for trap-RQ over SF Set the correct device pointer to the trap-RQ, to allow access to dma_mask and avoid allocation request with the wrong pci-dev. WARNING: CPU: 1 PID: 12005 at kernel/dma/mapping.c:151 dma_map_page_attrs+0x139/0x1c0 ... all Trace: ? __page_pool_alloc_pages_slow+0x5a/0x210 mlx5e_post_rx_wqes+0x258/0x400 [mlx5_core] mlx5e_trap_napi_poll+0x44/0xc0 [mlx5_core] __napi_poll+0x24/0x150 net_rx_action+0x22b/0x280 __do_softirq+0xc7/0x27e do_softirq+0x61/0x80 __local_bh_enable_ip+0x4b/0x50 mlx5e_handle_action_trap+0x2dd/0x4d0 [mlx5_core] blocking_notifier_call_chain+0x5a/0x80 mlx5_devlink_trap_action_set+0x8b/0x100 [mlx5_core] Fixes: 5543e989fe5e ("net/mlx5e: Add trap entity to ETH driver") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/trap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c index 86ab4e864fe6..7f94508594fb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c @@ -37,7 +37,7 @@ static void mlx5e_init_trap_rq(struct mlx5e_trap *t, struct mlx5e_params *params struct mlx5e_priv *priv = t->priv; rq->wq_type = params->rq_wq_type; - rq->pdev = mdev->device; + rq->pdev = t->pdev; rq->netdev = priv->netdev; rq->priv = priv; rq->clock = &mdev->clock; From 678b1ae1af4aef488fcc42baa663e737b9a531ba Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 22 Jun 2021 10:24:17 +0300 Subject: [PATCH 257/502] net/mlx5e: Fix page allocation failure for ptp-RQ over SF Set the correct pci-device pointer to the ptp-RQ. This allows access to dma_mask and avoids allocation request with wrong pci-device. Fixes: a099da8ffcf6 ("net/mlx5e: Add RQ to PTP channel") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 07b429b94d93..efef4adce086 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -497,7 +497,7 @@ static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params, int err; rq->wq_type = params->rq_wq_type; - rq->pdev = mdev->device; + rq->pdev = c->pdev; rq->netdev = priv->netdev; rq->priv = priv; rq->clock = &mdev->clock; From 7f331bf0f060c2727e36d64f9b098b4ee5f3dfad Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Wed, 16 Jun 2021 19:11:03 +0300 Subject: [PATCH 258/502] net/mlx5: Unload device upon firmware fatal error When fw_fatal reporter reports an error, the firmware in not responding. Unload the device to ensure that the driver closes all its resources, even if recovery is not due (user disabled auto-recovery or reporter is in grace period). On successful recovery the device is loaded back up. Fixes: b3bd076f7501 ("net/mlx5: Report devlink health on FW fatal issues") Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 9ff163c5bcde..9abeb80ffa31 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -626,8 +626,16 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work) } fw_reporter_ctx.err_synd = health->synd; fw_reporter_ctx.miss_counter = health->miss_counter; - devlink_health_report(health->fw_fatal_reporter, - "FW fatal error reported", &fw_reporter_ctx); + if (devlink_health_report(health->fw_fatal_reporter, + "FW fatal error reported", &fw_reporter_ctx) == -ECANCELED) { + /* If recovery wasn't performed, due to grace period, + * unload the driver. This ensures that the driver + * closes all its resources and it is not subjected to + * requests from the kernel. + */ + mlx5_core_err(dev, "Driver is in error state. Unloading\n"); + mlx5_unload_one(dev); + } } static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = { From b1c2f6312c5005c928a72e668bf305a589d828d4 Mon Sep 17 00:00:00 2001 From: Dima Chumak Date: Mon, 26 Apr 2021 15:16:26 +0300 Subject: [PATCH 259/502] net/mlx5e: Fix nullptr in mlx5e_hairpin_get_mdev() The result of __dev_get_by_index() is not checked for NULL and then gets dereferenced immediately. Also, __dev_get_by_index() must be called while holding either RTNL lock or @dev_base_lock, which isn't satisfied by mlx5e_hairpin_get_mdev() or its callers. This makes the underlying hlist_for_each_entry() loop not safe, and can have adverse effects in itself. Fix by using dev_get_by_index() and handling nullptr return value when ifindex device is not found. Update mlx5e_hairpin_get_mdev() callers to check for possible PTR_ERR() result. Fixes: 77ab67b7f0f9 ("net/mlx5e: Basic setup of hairpin object") Addresses-Coverity: ("Dereference null return value") Signed-off-by: Dima Chumak Reviewed-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 33 +++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 629a61e8022f..d273758255c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -452,12 +452,32 @@ static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv, static struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex) { + struct mlx5_core_dev *mdev; struct net_device *netdev; struct mlx5e_priv *priv; - netdev = __dev_get_by_index(net, ifindex); + netdev = dev_get_by_index(net, ifindex); + if (!netdev) + return ERR_PTR(-ENODEV); + priv = netdev_priv(netdev); - return priv->mdev; + mdev = priv->mdev; + dev_put(netdev); + + /* Mirred tc action holds a refcount on the ifindex net_device (see + * net/sched/act_mirred.c:tcf_mirred_get_dev). So, it's okay to continue using mdev + * after dev_put(netdev), while we're in the context of adding a tc flow. + * + * The mdev pointer corresponds to the peer/out net_device of a hairpin. It is then + * stored in a hairpin object, which exists until all flows, that refer to it, get + * removed. + * + * On the other hand, after a hairpin object has been created, the peer net_device may + * be removed/unbound while there are still some hairpin flows that are using it. This + * case is handled by mlx5e_tc_hairpin_update_dead_peer, which is hooked to + * NETDEV_UNREGISTER event of the peer net_device. + */ + return mdev; } static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp) @@ -666,6 +686,10 @@ mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params func_mdev = priv->mdev; peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex); + if (IS_ERR(peer_mdev)) { + err = PTR_ERR(peer_mdev); + goto create_pair_err; + } pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params); if (IS_ERR(pair)) { @@ -804,6 +828,11 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, int err; peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex); + if (IS_ERR(peer_mdev)) { + NL_SET_ERR_MSG_MOD(extack, "invalid ifindex of mirred device"); + return PTR_ERR(peer_mdev); + } + if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) { NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported"); return -EOPNOTSUPP; From 740452e09cf5fc489ce60831cf11abef117b5d26 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Mon, 26 Apr 2021 11:06:37 +0800 Subject: [PATCH 260/502] net/mlx5: Fix mlx5_vport_tbl_attr chain from u16 to u32 The offending refactor commit uses u16 chain wrongly. Actually, it should be u32. Fixes: c620b772152b ("net/mlx5: Refactor tc flow attributes structure") CC: Ariel Levkovich Signed-off-by: Chris Mi Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 48cac5bf606d..d562edf5b0bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -636,7 +636,7 @@ struct esw_vport_tbl_namespace { }; struct mlx5_vport_tbl_attr { - u16 chain; + u32 chain; u16 prio; u16 vport; const struct esw_vport_tbl_namespace *vport_ns; From 5ab189cf3abbc9994bae3be524c5b88589ed56e2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 27 Jul 2021 14:38:09 -1000 Subject: [PATCH 261/502] blk-iocost: fix operation ordering in iocg_wake_fn() iocg_wake_fn() open-codes wait_queue_entry removal and wakeup because it wants the wq_entry to be always removed whether it ended up waking the task or not. finish_wait() tests whether wq_entry needs removal without grabbing the wait_queue lock and expects the waker to use list_del_init_careful() after all waking operations are complete, which iocg_wake_fn() didn't do. The operation order was wrong and the regular list_del_init() was used. The result is that if a waiter wakes up racing the waker, it can free pop the wq_entry off stack before the waker is still looking at it, which can lead to a backtrace like the following. [7312084.588951] general protection fault, probably for non-canonical address 0x586bf4005b2b88: 0000 [#1] SMP ... [7312084.647079] RIP: 0010:queued_spin_lock_slowpath+0x171/0x1b0 ... [7312084.858314] Call Trace: [7312084.863548] _raw_spin_lock_irqsave+0x22/0x30 [7312084.872605] try_to_wake_up+0x4c/0x4f0 [7312084.880444] iocg_wake_fn+0x71/0x80 [7312084.887763] __wake_up_common+0x71/0x140 [7312084.895951] iocg_kick_waitq+0xe8/0x2b0 [7312084.903964] ioc_rqos_throttle+0x275/0x650 [7312084.922423] __rq_qos_throttle+0x20/0x30 [7312084.930608] blk_mq_make_request+0x120/0x650 [7312084.939490] generic_make_request+0xca/0x310 [7312084.957600] submit_bio+0x173/0x200 [7312084.981806] swap_readpage+0x15c/0x240 [7312084.989646] read_swap_cache_async+0x58/0x60 [7312084.998527] swap_cluster_readahead+0x201/0x320 [7312085.023432] swapin_readahead+0x2df/0x450 [7312085.040672] do_swap_page+0x52f/0x820 [7312085.058259] handle_mm_fault+0xa16/0x1420 [7312085.066620] do_page_fault+0x2c6/0x5c0 [7312085.074459] page_fault+0x2f/0x40 Fix it by switching to list_del_init_careful() and putting it at the end. Signed-off-by: Tejun Heo Reported-by: Rik van Riel Fixes: 7caa47151ab2 ("blkcg: implement blk-iocost") Cc: stable@vger.kernel.org # v5.4+ Signed-off-by: Jens Axboe --- block/blk-iocost.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index c2d6bc88d3f1..5fac3757e6e0 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -1440,16 +1440,17 @@ static int iocg_wake_fn(struct wait_queue_entry *wq_entry, unsigned mode, return -1; iocg_commit_bio(ctx->iocg, wait->bio, wait->abs_cost, cost); + wait->committed = true; /* * autoremove_wake_function() removes the wait entry only when it - * actually changed the task state. We want the wait always - * removed. Remove explicitly and use default_wake_function(). + * actually changed the task state. We want the wait always removed. + * Remove explicitly and use default_wake_function(). Note that the + * order of operations is important as finish_wait() tests whether + * @wq_entry is removed without grabbing the lock. */ - list_del_init(&wq_entry->entry); - wait->committed = true; - default_wake_function(wq_entry, mode, flags, key); + list_del_init_careful(&wq_entry->entry); return 0; } From 340e84573878b2b9d63210482af46883366361b9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 22 Jul 2021 09:53:54 +0200 Subject: [PATCH 262/502] block: delay freeing the gendisk blkdev_get_no_open acquires a reference to the block_device through the block device inode and then tries to acquire a device model reference to the gendisk. But at this point the disk migh already be freed (although the race is free). Fix this by only freeing the gendisk from the whole device bdevs ->free_inode callback as well. Fixes: 22ae8ce8b892 ("block: simplify bdev/disk lookup in blkdev_get") Signed-off-by: Christoph Hellwig Reviewed-by: Josef Bacik Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20210722075402.983367-2-hch@lst.de Signed-off-by: Jens Axboe --- block/genhd.c | 3 +-- fs/block_dev.c | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index af4d2ab4a633..298ee78c1bda 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1079,10 +1079,9 @@ static void disk_release(struct device *dev) disk_release_events(disk); kfree(disk->random); xa_destroy(&disk->part_tbl); - bdput(disk->part0); if (test_bit(GD_QUEUE_REF, &disk->state) && disk->queue) blk_put_queue(disk->queue); - kfree(disk); + bdput(disk->part0); /* frees the disk */ } struct class block_class = { .name = "block", diff --git a/fs/block_dev.c b/fs/block_dev.c index ca8bf1869ca8..a38b0f33211c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -812,6 +812,8 @@ static void bdev_free_inode(struct inode *inode) free_percpu(bdev->bd_stats); kfree(bdev->bd_meta_info); + if (!bdev_is_partition(bdev)) + kfree(bdev->bd_disk); kmem_cache_free(bdev_cachep, BDEV_I(inode)); } From fa20bada3f934e3b3e4af4c77e5b518cd5a282e5 Mon Sep 17 00:00:00 2001 From: Maxim Devaev Date: Tue, 27 Jul 2021 21:58:00 +0300 Subject: [PATCH 263/502] usb: gadget: f_hid: idle uses the highest byte for duration SET_IDLE value must be shifted 8 bits to the right to get duration. This confirmed by USBCV test. Fixes: afcff6dc690e ("usb: gadget: f_hid: added GET_IDLE and SET_IDLE handlers") Cc: stable Signed-off-by: Maxim Devaev Link: https://lore.kernel.org/r/20210727185800.43796-1-mdevaev@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_hid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index 8d50c8b127fd..bb476e121eae 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -573,7 +573,7 @@ static int hidg_setup(struct usb_function *f, | HID_REQ_SET_IDLE): VDBG(cdev, "set_idle\n"); length = 0; - hidg->idle = value; + hidg->idle = value >> 8; goto respond; break; From d54db74ad6e0dea8c253fb68c689b836657ab914 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Mon, 7 Jun 2021 14:46:38 +0800 Subject: [PATCH 264/502] dmaengine: stm32-dma: Fix PM usage counter imbalance in stm32 dma ops pm_runtime_get_sync will increment pm usage counter even it failed. Forgetting to putting operation will result in reference leak here. We fix it by replacing it with pm_runtime_resume_and_get to keep usage counter balanced. Fixes: 48bc73ba14bcd ("dmaengine: stm32-dma: Add PM Runtime support") Fixes: 05f8740a0e6fc ("dmaengine: stm32-dma: add suspend/resume power management support") Signed-off-by: Zhang Qilong Link: https://lore.kernel.org/r/20210607064640.121394-2-zhangqilong3@huawei.com Signed-off-by: Vinod Koul --- drivers/dma/stm32-dma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c index f54ecb123a52..7dd1d3d0bf06 100644 --- a/drivers/dma/stm32-dma.c +++ b/drivers/dma/stm32-dma.c @@ -1200,7 +1200,7 @@ static int stm32_dma_alloc_chan_resources(struct dma_chan *c) chan->config_init = false; - ret = pm_runtime_get_sync(dmadev->ddev.dev); + ret = pm_runtime_resume_and_get(dmadev->ddev.dev); if (ret < 0) return ret; @@ -1470,7 +1470,7 @@ static int stm32_dma_suspend(struct device *dev) struct stm32_dma_device *dmadev = dev_get_drvdata(dev); int id, ret, scr; - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret < 0) return ret; From baa16371c9525f24d508508e4d296c031e1de29c Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Mon, 7 Jun 2021 14:46:39 +0800 Subject: [PATCH 265/502] dmaengine: stm32-dmamux: Fix PM usage counter unbalance in stm32 dmamux ops pm_runtime_get_sync will increment pm usage counter even it failed. Forgetting to putting operation will result in reference leak here. We fix it by replacing it with pm_runtime_resume_and_get to keep usage counter balanced. Fixes: 4f3ceca254e0f ("dmaengine: stm32-dmamux: Add PM Runtime support") Signed-off-by: Zhang Qilong Link: https://lore.kernel.org/r/20210607064640.121394-3-zhangqilong3@huawei.com Signed-off-by: Vinod Koul --- drivers/dma/stm32-dmamux.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma/stm32-dmamux.c b/drivers/dma/stm32-dmamux.c index ef0d0555103d..a42164389ebc 100644 --- a/drivers/dma/stm32-dmamux.c +++ b/drivers/dma/stm32-dmamux.c @@ -137,7 +137,7 @@ static void *stm32_dmamux_route_allocate(struct of_phandle_args *dma_spec, /* Set dma request */ spin_lock_irqsave(&dmamux->lock, flags); - ret = pm_runtime_get_sync(&pdev->dev); + ret = pm_runtime_resume_and_get(&pdev->dev); if (ret < 0) { spin_unlock_irqrestore(&dmamux->lock, flags); goto error; @@ -336,7 +336,7 @@ static int stm32_dmamux_suspend(struct device *dev) struct stm32_dmamux_data *stm32_dmamux = platform_get_drvdata(pdev); int i, ret; - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret < 0) return ret; @@ -361,7 +361,7 @@ static int stm32_dmamux_resume(struct device *dev) if (ret < 0) return ret; - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret < 0) return ret; From eda97cb095f2958bbad55684a6ca3e7d7af0176a Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Sat, 17 Jul 2021 22:00:21 +0300 Subject: [PATCH 266/502] dmaengine: of-dma: router_xlate to return -EPROBE_DEFER if controller is not yet available If the router_xlate can not find the controller in the available DMA devices then it should return with -EPORBE_DEFER in a same way as the of_dma_request_slave_channel() does. The issue can be reproduced if the event router is registered before the DMA controller itself and a driver would request for a channel before the controller is registered. In of_dma_request_slave_channel(): 1. of_dma_find_controller() would find the dma_router 2. ofdma->of_dma_xlate() would fail and returned NULL 3. -ENODEV is returned as error code with this patch we would return in this case the correct -EPROBE_DEFER and the client can try to request the channel later. Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20210717190021.21897-1-peter.ujfalusi@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/of-dma.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c index ec00b20ae8e4..ac61ecda2926 100644 --- a/drivers/dma/of-dma.c +++ b/drivers/dma/of-dma.c @@ -67,8 +67,12 @@ static struct dma_chan *of_dma_router_xlate(struct of_phandle_args *dma_spec, return NULL; ofdma_target = of_dma_find_controller(&dma_spec_target); - if (!ofdma_target) - return NULL; + if (!ofdma_target) { + ofdma->dma_router->route_free(ofdma->dma_router->dev, + route_data); + chan = ERR_PTR(-EPROBE_DEFER); + goto err; + } chan = ofdma_target->of_dma_xlate(&dma_spec_target, ofdma_target); if (IS_ERR_OR_NULL(chan)) { @@ -89,6 +93,7 @@ static struct dma_chan *of_dma_router_xlate(struct of_phandle_args *dma_spec, } } +err: /* * Need to put the node back since the ofdma->of_dma_route_allocate * has taken it for generating the new, translated dma_spec From 46573e3ab08fb041d5ba7bf7bf3215a1e724c78c Mon Sep 17 00:00:00 2001 From: Tang Bin Date: Wed, 28 Jul 2021 09:49:25 +0800 Subject: [PATCH 267/502] nfc: s3fwrn5: fix undefined parameter values in dev_err() In the function s3fwrn5_fw_download(), the 'ret' is not assigned, so the correct value should be given in dev_err function. Fixes: a0302ff5906a ("nfc: s3fwrn5: remove unnecessary label") Signed-off-by: Zhang Shengju Signed-off-by: Tang Bin Reviewed-by: Nathan Chancellor Signed-off-by: David S. Miller --- drivers/nfc/s3fwrn5/firmware.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/s3fwrn5/firmware.c b/drivers/nfc/s3fwrn5/firmware.c index 1340fab9565e..e3e72b8a29e3 100644 --- a/drivers/nfc/s3fwrn5/firmware.c +++ b/drivers/nfc/s3fwrn5/firmware.c @@ -423,7 +423,7 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info) if (IS_ERR(tfm)) { ret = PTR_ERR(tfm); dev_err(&fw_info->ndev->nfc_dev->dev, - "Cannot allocate shash (code=%ld)\n", PTR_ERR(tfm)); + "Cannot allocate shash (code=%pe)\n", tfm); goto out; } From 557fb5862c9272ad9b21407afe1da8acfd9b53eb Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 27 Jul 2021 23:40:54 -0300 Subject: [PATCH 268/502] sctp: fix return value check in __sctp_rcv_asconf_lookup As Ben Hutchings noticed, this check should have been inverted: the call returns true in case of success. Reported-by: Ben Hutchings Fixes: 0c5dc070ff3d ("sctp: validate from_addr_param return") Signed-off-by: Marcelo Ricardo Leitner Reviewed-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/input.c b/net/sctp/input.c index eb3c2a34a31c..5ef86fdb1176 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -1203,7 +1203,7 @@ static struct sctp_association *__sctp_rcv_asconf_lookup( if (unlikely(!af)) return NULL; - if (af->from_addr_param(&paddr, param, peer_port, 0)) + if (!af->from_addr_param(&paddr, param, peer_port, 0)) return NULL; return __sctp_lookup_association(net, laddr, &paddr, transportp); From 76a16be07b209a3f507c72abe823bd3af1c8661a Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Wed, 28 Jul 2021 15:43:13 +0800 Subject: [PATCH 269/502] tulip: windbond-840: Fix missing pci_disable_device() in probe and remove Replace pci_enable_device() with pcim_enable_device(), pci_disable_device() and pci_release_regions() will be called in release automatically. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Hulk Robot Signed-off-by: Wang Hai Signed-off-by: David S. Miller --- drivers/net/ethernet/dec/tulip/winbond-840.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c index f6ff1f76eacb..1876f15dd827 100644 --- a/drivers/net/ethernet/dec/tulip/winbond-840.c +++ b/drivers/net/ethernet/dec/tulip/winbond-840.c @@ -357,7 +357,7 @@ static int w840_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) int i, option = find_cnt < MAX_UNITS ? options[find_cnt] : 0; void __iomem *ioaddr; - i = pci_enable_device(pdev); + i = pcim_enable_device(pdev); if (i) return i; pci_set_master(pdev); @@ -379,7 +379,7 @@ static int w840_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) ioaddr = pci_iomap(pdev, TULIP_BAR, netdev_res_size); if (!ioaddr) - goto err_out_free_res; + goto err_out_netdev; for (i = 0; i < 3; i++) ((__le16 *)dev->dev_addr)[i] = cpu_to_le16(eeprom_read(ioaddr, i)); @@ -458,8 +458,6 @@ static int w840_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) err_out_cleardev: pci_iounmap(pdev, ioaddr); -err_out_free_res: - pci_release_regions(pdev); err_out_netdev: free_netdev (dev); return -ENODEV; @@ -1526,7 +1524,6 @@ static void w840_remove1(struct pci_dev *pdev) if (dev) { struct netdev_private *np = netdev_priv(dev); unregister_netdev(dev); - pci_release_regions(pdev); pci_iounmap(pdev, np->base_addr); free_netdev(dev); } From 5e7b30d24a5b8cb691c173b45b50e3ca0191be19 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 28 Jul 2021 08:49:09 +0200 Subject: [PATCH 270/502] nfc: nfcsim: fix use after free during module unload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a use after free memory corruption during module exit: - nfcsim_exit() - nfcsim_device_free(dev0) - nfc_digital_unregister_device() This iterates over command queue and frees all commands, - dev->up = false - nfcsim_link_shutdown() - nfcsim_link_recv_wake() This wakes the sleeping thread nfcsim_link_recv_skb(). - nfcsim_link_recv_skb() Wake from wait_event_interruptible_timeout(), call directly the deb->cb callback even though (dev->up == false), - digital_send_cmd_complete() Dereference of "struct digital_cmd" cmd which was freed earlier by nfc_digital_unregister_device(). This causes memory corruption shortly after (with unrelated stack trace): nfc nfc0: NFC: nfcsim_recv_wq: Device is down llcp: nfc_llcp_recv: err -19 nfc nfc1: NFC: nfcsim_recv_wq: Device is down BUG: unable to handle page fault for address: ffffffffffffffed Call Trace: fsnotify+0x54b/0x5c0 __fsnotify_parent+0x1fe/0x300 ? vfs_write+0x27c/0x390 vfs_write+0x27c/0x390 ksys_write+0x63/0xe0 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae KASAN report: BUG: KASAN: use-after-free in digital_send_cmd_complete+0x16/0x50 Write of size 8 at addr ffff88800a05f720 by task kworker/0:2/71 Workqueue: events nfcsim_recv_wq [nfcsim] Call Trace:  dump_stack_lvl+0x45/0x59  print_address_description.constprop.0+0x21/0x140  ? digital_send_cmd_complete+0x16/0x50  ? digital_send_cmd_complete+0x16/0x50  kasan_report.cold+0x7f/0x11b  ? digital_send_cmd_complete+0x16/0x50  ? digital_dep_link_down+0x60/0x60  digital_send_cmd_complete+0x16/0x50  nfcsim_recv_wq+0x38f/0x3d5 [nfcsim]  ? nfcsim_in_send_cmd+0x4a/0x4a [nfcsim]  ? lock_is_held_type+0x98/0x110  ? finish_wait+0x110/0x110  ? rcu_read_lock_sched_held+0x9c/0xd0  ? rcu_read_lock_bh_held+0xb0/0xb0  ? lockdep_hardirqs_on_prepare+0x12e/0x1f0 This flow of calling digital_send_cmd_complete() callback on driver exit is specific to nfcsim which implements reading and sending work queues. Since the NFC digital device was unregistered, the callback should not be called. Fixes: 204bddcb508f ("NFC: nfcsim: Make use of the Digital layer") Cc: Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- drivers/nfc/nfcsim.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nfc/nfcsim.c b/drivers/nfc/nfcsim.c index a9864fcdfba6..dd27c85190d3 100644 --- a/drivers/nfc/nfcsim.c +++ b/drivers/nfc/nfcsim.c @@ -192,8 +192,7 @@ static void nfcsim_recv_wq(struct work_struct *work) if (!IS_ERR(skb)) dev_kfree_skb(skb); - - skb = ERR_PTR(-ENODEV); + return; } dev->cb(dev->nfc_digital_dev, dev->arg, skb); From e9c6729acb38bcf027e40a5b50b2e1b0aa4bc170 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 23 Jul 2021 17:08:40 +0200 Subject: [PATCH 271/502] HID: fix typo in Kconfig There is a missing space in "relyingon". Add it. Signed-off-by: Christophe JAILLET Signed-off-by: Jiri Kosina --- drivers/hid/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 160554903ef9..76937f716fbe 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -576,7 +576,7 @@ config HID_LOGITECH_HIDPP depends on HID_LOGITECH select POWER_SUPPLY help - Support for Logitech devices relyingon the HID++ Logitech specification + Support for Logitech devices relying on the HID++ Logitech specification Say Y if you want support for Logitech devices relying on the HID++ specification. Such devices are the various Logitech Touchpads (T650, From ebe0b42a4252333aa4af60fd4d11b69405aa6068 Mon Sep 17 00:00:00 2001 From: Haochen Tong Date: Sun, 18 Jul 2021 01:04:31 +0800 Subject: [PATCH 272/502] HID: apple: Add support for Keychron K1 wireless keyboard The Keychron K1 wireless keyboard has a set of Apple-like function keys and an Fn key that works like on an Apple bluetooth keyboard. It identifies as an Apple Alu RevB ANSI keyboard (05ac:024f) over USB and BT. Use hid-apple for it so the Fn key and function keys work correctly. Signed-off-by: Haochen Tong Signed-off-by: Jiri Kosina --- drivers/hid/hid-apple.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index 6b8f0d004d34..dc6bd4299c54 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -501,6 +501,8 @@ static const struct hid_device_id apple_devices[] = { APPLE_RDESC_JIS }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI), .driver_data = APPLE_HAS_FN }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI), + .driver_data = APPLE_HAS_FN }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ISO), .driver_data = APPLE_HAS_FN }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ISO), From 0818ec1f508fc3b8e957f6c7f77b988c5bc24da7 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 19 Jul 2021 11:27:31 +0100 Subject: [PATCH 273/502] HID: Kconfig: Fix spelling mistake "Uninterruptable" -> "Uninterruptible" There is a spelling mistake in the Kconfig text. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Jiri Kosina --- drivers/hid/usbhid/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/usbhid/Kconfig b/drivers/hid/usbhid/Kconfig index dcf3a235870f..7c2032f7f44d 100644 --- a/drivers/hid/usbhid/Kconfig +++ b/drivers/hid/usbhid/Kconfig @@ -38,7 +38,7 @@ config USB_HIDDEV help Say Y here if you want to support HID devices (from the USB specification standpoint) that aren't strictly user interface - devices, like monitor controls and Uninterruptable Power Supplies. + devices, like monitor controls and Uninterruptible Power Supplies. This module supports these devices separately using a separate event interface on /dev/usb/hiddevX (char 180:96 to 180:111). From 6ca2350e11f09d5d3e53777d1eff8ff6d300ed93 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Mon, 19 Jul 2021 13:55:28 -0700 Subject: [PATCH 274/502] HID: wacom: Re-enable touch by default for Cintiq 24HDT / 27QHDT Commit 670e90924bfe ("HID: wacom: support named keys on older devices") added support for sending named events from the soft buttons on the 24HDT and 27QHDT. In the process, however, it inadvertantly disabled the touchscreen of the 24HDT and 27QHDT by default. The `wacom_set_shared_values` function would normally enable touch by default but because it checks the state of the non-shared `has_mute_touch_switch` flag and `wacom_setup_touch_input_capabilities` sets the state of the /shared/ version, touch ends up being disabled by default. This patch sets the non-shared flag, letting `wacom_set_shared_values` take care of copying the value over to the shared version and setting the default touch state to "on". Fixes: 670e90924bfe ("HID: wacom: support named keys on older devices") CC: stable@vger.kernel.org # 5.4+ Signed-off-by: Jason Gerecke Reviewed-by: Ping Cheng Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 81d7d12bcf34..496a000ef862 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -3831,7 +3831,7 @@ int wacom_setup_touch_input_capabilities(struct input_dev *input_dev, wacom_wac->shared->touch->product == 0xF6) { input_dev->evbit[0] |= BIT_MASK(EV_SW); __set_bit(SW_MUTE_DEVICE, input_dev->swbit); - wacom_wac->shared->has_mute_touch_switch = true; + wacom_wac->has_mute_touch_switch = true; } fallthrough; From 7cc8524f65ce1a350042836c7cf837046aaa6e21 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Mon, 19 Jul 2021 13:55:31 -0700 Subject: [PATCH 275/502] HID: wacom: Skip processing of touches with negative slot values The `input_mt_get_slot_by_key` function may return a negative value if an error occurs (e.g. running out of slots). If this occurs we should really avoid reporting any data for the slot. Signed-off-by: Ping Cheng Signed-off-by: Jason Gerecke Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 496a000ef862..81ba642adcb7 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -2548,6 +2548,9 @@ static void wacom_wac_finger_slot(struct wacom_wac *wacom_wac, int slot; slot = input_mt_get_slot_by_key(input, hid_data->id); + if (slot < 0) + return; + input_mt_slot(input, slot); input_mt_report_slot_state(input, MT_TOOL_FINGER, prox); } From a59c7b6c6ff6d5437f293709e766f939d7107266 Mon Sep 17 00:00:00 2001 From: Ping Bao Date: Wed, 21 Jul 2021 15:56:15 -0700 Subject: [PATCH 276/502] platform/x86: intel-hid: add Alder Lake ACPI device ID Alder Lake has a new ACPI ID for Intel HID event filter device. Signed-off-by: Ping Bao Link: https://lore.kernel.org/r/20210721225615.20575-1-ping.a.bao@intel.com Signed-off-by: Hans de Goede --- drivers/platform/x86/intel-hid.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/intel-hid.c b/drivers/platform/x86/intel-hid.c index 078648a9201b..e5fbe017f8e1 100644 --- a/drivers/platform/x86/intel-hid.c +++ b/drivers/platform/x86/intel-hid.c @@ -25,6 +25,7 @@ static const struct acpi_device_id intel_hid_ids[] = { {"INT33D5", 0}, {"INTC1051", 0}, {"INTC1054", 0}, + {"INTC1070", 0}, {"", 0}, }; MODULE_DEVICE_TABLE(acpi, intel_hid_ids); From 2b2c66f607d00d17f879c0d946d44340bfbdc501 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 26 Jul 2021 17:36:30 +0200 Subject: [PATCH 277/502] platform/x86: gigabyte-wmi: add support for B550 Aorus Elite V2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported as working here: https://github.com/t-8ch/linux-gigabyte-wmi-driver/issues/1#issuecomment-879398883 Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20210726153630.65213-1-linux@weissschuh.net Signed-off-by: Hans de Goede --- drivers/platform/x86/gigabyte-wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c index 5529d7b0abea..fbb224a82e34 100644 --- a/drivers/platform/x86/gigabyte-wmi.c +++ b/drivers/platform/x86/gigabyte-wmi.c @@ -141,6 +141,7 @@ static u8 gigabyte_wmi_detect_sensor_usability(struct wmi_device *wdev) static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = { DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE"), + DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE V2"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 GAMING X V2"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M AORUS PRO-P"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M DS3H"), From 1e60cebf82948cfdc9497ea4553bab125587593c Mon Sep 17 00:00:00 2001 From: zhang kai Date: Wed, 28 Jul 2021 18:54:18 +0800 Subject: [PATCH 278/502] net: let flow have same hash in two directions using same source and destination ip/port for flow hash calculation within the two directions. Signed-off-by: zhang kai Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 2aadbfc5193b..4b2415d34873 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1504,7 +1504,7 @@ __be32 flow_get_u32_dst(const struct flow_keys *flow) } EXPORT_SYMBOL(flow_get_u32_dst); -/* Sort the source and destination IP (and the ports if the IP are the same), +/* Sort the source and destination IP and the ports, * to have consistent hash within the two directions */ static inline void __flow_hash_consistentify(struct flow_keys *keys) @@ -1515,11 +1515,11 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys) case FLOW_DISSECTOR_KEY_IPV4_ADDRS: addr_diff = (__force u32)keys->addrs.v4addrs.dst - (__force u32)keys->addrs.v4addrs.src; - if ((addr_diff < 0) || - (addr_diff == 0 && - ((__force u16)keys->ports.dst < - (__force u16)keys->ports.src))) { + if (addr_diff < 0) swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst); + + if ((__force u16)keys->ports.dst < + (__force u16)keys->ports.src) { swap(keys->ports.src, keys->ports.dst); } break; @@ -1527,13 +1527,13 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys) addr_diff = memcmp(&keys->addrs.v6addrs.dst, &keys->addrs.v6addrs.src, sizeof(keys->addrs.v6addrs.dst)); - if ((addr_diff < 0) || - (addr_diff == 0 && - ((__force u16)keys->ports.dst < - (__force u16)keys->ports.src))) { + if (addr_diff < 0) { for (i = 0; i < 4; i++) swap(keys->addrs.v6addrs.src.s6_addr32[i], keys->addrs.v6addrs.dst.s6_addr32[i]); + } + if ((__force u16)keys->ports.dst < + (__force u16)keys->ports.src) { swap(keys->ports.src, keys->ports.dst); } break; From 89fb62fde3b226f99b7015280cf132e2a7438edf Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Wed, 28 Jul 2021 20:11:07 +0800 Subject: [PATCH 279/502] sis900: Fix missing pci_disable_device() in probe and remove Replace pci_enable_device() with pcim_enable_device(), pci_disable_device() and pci_release_regions() will be called in release automatically. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Hulk Robot Signed-off-by: Wang Hai Signed-off-by: David S. Miller --- drivers/net/ethernet/sis/sis900.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index ca9c00b7f588..cff87de9178a 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -443,7 +443,7 @@ static int sis900_probe(struct pci_dev *pci_dev, #endif /* setup various bits in PCI command register */ - ret = pci_enable_device(pci_dev); + ret = pcim_enable_device(pci_dev); if(ret) return ret; i = dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(32)); @@ -469,7 +469,7 @@ static int sis900_probe(struct pci_dev *pci_dev, ioaddr = pci_iomap(pci_dev, 0, 0); if (!ioaddr) { ret = -ENOMEM; - goto err_out_cleardev; + goto err_out; } sis_priv = netdev_priv(net_dev); @@ -581,8 +581,6 @@ err_unmap_tx: sis_priv->tx_ring_dma); err_out_unmap: pci_iounmap(pci_dev, ioaddr); -err_out_cleardev: - pci_release_regions(pci_dev); err_out: free_netdev(net_dev); return ret; @@ -2499,7 +2497,6 @@ static void sis900_remove(struct pci_dev *pci_dev) sis_priv->tx_ring_dma); pci_iounmap(pci_dev, sis_priv->ioaddr); free_netdev(net_dev); - pci_release_regions(pci_dev); } static int __maybe_unused sis900_suspend(struct device *dev) From ef04688871f3386b6d40ade8f5c664290420f819 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 Jul 2021 10:50:31 -0600 Subject: [PATCH 280/502] io_uring: don't block level reissue off completion path Some setups, like SCSI, can throw spurious -EAGAIN off the softirq completion path. Normally we expect this to happen inline as part of submission, but apparently SCSI has a weird corner case where it can happen as part of normal completions. This should be solved by having the -EAGAIN bubble back up the stack as part of submission, but previous attempts at this failed and we're not just quite there yet. Instead we currently use REQ_F_REISSUE to handle this case. For now, catch it in io_rw_should_reissue() and prevent a reissue from a bogus path. Cc: stable@vger.kernel.org Reported-by: Fabian Ebner Tested-by: Fabian Ebner Signed-off-by: Jens Axboe --- fs/io_uring.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 6ba101cd4661..83f67d33bf67 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2447,6 +2447,12 @@ static bool io_rw_should_reissue(struct io_kiocb *req) */ if (percpu_ref_is_dying(&ctx->refs)) return false; + /* + * Play it safe and assume not safe to re-import and reissue if we're + * not in the original thread group (or in task context). + */ + if (!same_thread_group(req->task, current) || !in_task()) + return false; return true; } #else From a890d01e4ee016978776e45340e521b3bbbdf41f Mon Sep 17 00:00:00 2001 From: Hao Xu Date: Wed, 28 Jul 2021 11:03:22 +0800 Subject: [PATCH 281/502] io_uring: fix poll requests leaking second poll entries For pure poll requests, it doesn't remove the second poll wait entry when it's done, neither after vfs_poll() or in the poll completion handler. We should remove the second poll wait entry. And we use io_poll_remove_double() rather than io_poll_remove_waitqs() since the latter has some redundant logic. Fixes: 88e41cf928a6 ("io_uring: add multishot mode for IORING_OP_POLL_ADD") Cc: stable@vger.kernel.org # 5.13+ Signed-off-by: Hao Xu Link: https://lore.kernel.org/r/20210728030322.12307-1-haoxu@linux.alibaba.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 83f67d33bf67..bf548af0426c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4939,7 +4939,6 @@ static bool io_poll_complete(struct io_kiocb *req, __poll_t mask) if (req->poll.events & EPOLLONESHOT) flags = 0; if (!io_cqring_fill_event(ctx, req->user_data, error, flags)) { - io_poll_remove_waitqs(req); req->poll.done = true; flags = 0; } @@ -4962,6 +4961,7 @@ static void io_poll_task_func(struct io_kiocb *req) done = io_poll_complete(req, req->result); if (done) { + io_poll_remove_double(req); hash_del(&req->hash_node); } else { req->result = 0; @@ -5149,7 +5149,7 @@ static __poll_t __io_arm_poll_handler(struct io_kiocb *req, ipt->error = -EINVAL; spin_lock_irq(&ctx->completion_lock); - if (ipt->error) + if (ipt->error || (mask && (poll->events & EPOLLONESHOT))) io_poll_remove_double(req); if (likely(poll->head)) { spin_lock(&poll->head->lock); @@ -5221,7 +5221,6 @@ static int io_arm_poll_handler(struct io_kiocb *req) ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, io_async_wake); if (ret || ipt.error) { - io_poll_remove_double(req); spin_unlock_irq(&ctx->completion_lock); if (ret) return IO_APOLL_READY; From 36c2530ea963884eeb0097169f853fdc36f16ad7 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Tue, 27 Jul 2021 18:04:28 +0200 Subject: [PATCH 282/502] spi: imx: mx51-ecspi: Fix CONFIGREG delay comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For (2 * 1000000) / min_speed_hz < 10 to be true in naturals with zero, the min_speed_hz must be above 200000 (i.e. 200001 rounds down to 9, so the condition triggers). Update the comment. No functional change. Fixes: 6fd8b8503a0dc ("spi: spi-imx: Fix out-of-order CS/SCLK operation at low speeds") Signed-off-by: Marek Vasut Cc: Uwe Kleine-König Cc: Mark Brown Acked-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20210727160428.7673-1-marex@denx.de Signed-off-by: Mark Brown --- drivers/spi/spi-imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 2872993550bd..fa68e9817929 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -593,7 +593,7 @@ static int mx51_ecspi_prepare_message(struct spi_imx_data *spi_imx, } delay = (2 * 1000000) / min_speed_hz; - if (likely(delay < 10)) /* SCLK is faster than 100 kHz */ + if (likely(delay < 10)) /* SCLK is faster than 200 kHz */ udelay(delay); else /* SCLK is _very_ slow */ usleep_range(delay, delay + 10); From e0eef3690dc66b3ecc6e0f1267f332403eb22bea Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Wed, 28 Jul 2021 23:19:58 +0800 Subject: [PATCH 283/502] Revert "ACPI: resources: Add checks for ACPI IRQ override" The commit 0ec4e55e9f57 ("ACPI: resources: Add checks for ACPI IRQ override") introduces regression on some platforms, at least it makes the UART can't get correct irq setting on two different platforms, and it makes the kernel can't bootup on these two platforms. This reverts commit 0ec4e55e9f571f08970ed115ec0addc691eda613. Regression-discuss: https://bugzilla.kernel.org/show_bug.cgi?id=213031 Reported-by: PGNd Cc: 5.4+ # 5.4+ Signed-off-by: Hui Wang Acked-by: Greg Kroah-Hartman Signed-off-by: Rafael J. Wysocki --- drivers/acpi/resource.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index dc01fb550b28..ee78a210c606 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -423,13 +423,6 @@ static void acpi_dev_get_irqresource(struct resource *res, u32 gsi, } } -static bool irq_is_legacy(struct acpi_resource_irq *irq) -{ - return irq->triggering == ACPI_EDGE_SENSITIVE && - irq->polarity == ACPI_ACTIVE_HIGH && - irq->shareable == ACPI_EXCLUSIVE; -} - /** * acpi_dev_resource_interrupt - Extract ACPI interrupt resource information. * @ares: Input ACPI resource object. @@ -468,7 +461,7 @@ bool acpi_dev_resource_interrupt(struct acpi_resource *ares, int index, } acpi_dev_get_irqresource(res, irq->interrupts[index], irq->triggering, irq->polarity, - irq->shareable, irq_is_legacy(irq)); + irq->shareable, true); break; case ACPI_RESOURCE_TYPE_EXTENDED_IRQ: ext_irq = &ares->data.extended_irq; From 41a8457f3f6f829be1f8f8fa7577a46b9b7223ef Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 27 Jul 2021 09:18:24 -0700 Subject: [PATCH 284/502] ACPI: DPTF: Fix reading of attributes The current assumption that methods to read PCH FIVR attributes will return integer, is not correct. There is no good way to return integer as negative numbers are also valid. These read methods return a package of integers. The first integer returns status, which is 0 on success and any other value for failure. When the returned status is zero, then the second integer returns the actual value. This change fixes this issue by replacing acpi_evaluate_integer() with acpi_evaluate_object() and use acpi_extract_package() to extract results. Fixes: 2ce6324eadb01 ("ACPI: DPTF: Add PCH FIVR participant driver") Signed-off-by: Srinivas Pandruvada Cc: 5.10+ # 5.10+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/dptf/dptf_pch_fivr.c | 51 ++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/dptf/dptf_pch_fivr.c b/drivers/acpi/dptf/dptf_pch_fivr.c index 5fca18296bf6..550b9081fcbc 100644 --- a/drivers/acpi/dptf/dptf_pch_fivr.c +++ b/drivers/acpi/dptf/dptf_pch_fivr.c @@ -9,6 +9,42 @@ #include #include +struct pch_fivr_resp { + u64 status; + u64 result; +}; + +static int pch_fivr_read(acpi_handle handle, char *method, struct pch_fivr_resp *fivr_resp) +{ + struct acpi_buffer resp = { sizeof(struct pch_fivr_resp), fivr_resp}; + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + struct acpi_buffer format = { sizeof("NN"), "NN" }; + union acpi_object *obj; + acpi_status status; + int ret = -EFAULT; + + status = acpi_evaluate_object(handle, method, NULL, &buffer); + if (ACPI_FAILURE(status)) + return ret; + + obj = buffer.pointer; + if (!obj || obj->type != ACPI_TYPE_PACKAGE) + goto release_buffer; + + status = acpi_extract_package(obj, &format, &resp); + if (ACPI_FAILURE(status)) + goto release_buffer; + + if (fivr_resp->status) + goto release_buffer; + + ret = 0; + +release_buffer: + kfree(buffer.pointer); + return ret; +} + /* * Presentation of attributes which are defined for INT1045 * They are: @@ -23,15 +59,14 @@ static ssize_t name##_show(struct device *dev,\ char *buf)\ {\ struct acpi_device *acpi_dev = dev_get_drvdata(dev);\ - unsigned long long val;\ - acpi_status status;\ + struct pch_fivr_resp fivr_resp;\ + int status;\ \ - status = acpi_evaluate_integer(acpi_dev->handle, #method,\ - NULL, &val);\ - if (ACPI_SUCCESS(status))\ - return sprintf(buf, "%d\n", (int)val);\ - else\ - return -EINVAL;\ + status = pch_fivr_read(acpi_dev->handle, #method, &fivr_resp);\ + if (status)\ + return status;\ +\ + return sprintf(buf, "%llu\n", fivr_resp.result);\ } #define PCH_FIVR_STORE(name, method) \ From 240246f6b913b0c23733cfd2def1d283f8cc9bbe Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Fri, 9 Jul 2021 11:29:22 -0500 Subject: [PATCH 285/502] btrfs: mark compressed range uptodate only if all bio succeed In compression write endio sequence, the range which the compressed_bio writes is marked as uptodate if the last bio of the compressed (sub)bios is completed successfully. There could be previous bio which may have failed which is recorded in cb->errors. Set the writeback range as uptodate only if cb->errors is zero, as opposed to checking only the last bio's status. Backporting notes: in all versions up to 4.4 the last argument is always replaced by "!cb->errors". CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Goldwyn Rodrigues Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/compression.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 9a023ae0f98b..30d82cdf128c 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -352,7 +352,7 @@ static void end_compressed_bio_write(struct bio *bio) btrfs_record_physical_zoned(inode, cb->start, bio); btrfs_writepage_endio_finish_ordered(BTRFS_I(inode), NULL, cb->start, cb->start + cb->len - 1, - bio->bi_status == BLK_STS_OK); + !cb->errors); end_compressed_writeback(inode, cb); /* note, our inode could be gone now */ From ecc64fab7d49c678e70bd4c35fe64d2ab3e3d212 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 27 Jul 2021 11:24:43 +0100 Subject: [PATCH 286/502] btrfs: fix lost inode on log replay after mix of fsync, rename and inode eviction When checking if we need to log the new name of a renamed inode, we are checking if the inode and its parent inode have been logged before, and if not we don't log the new name. The check however is buggy, as it directly compares the logged_trans field of the inodes versus the ID of the current transaction. The problem is that logged_trans is a transient field, only stored in memory and never persisted in the inode item, so if an inode was logged before, evicted and reloaded, its logged_trans field is set to a value of 0, meaning the check will return false and the new name of the renamed inode is not logged. If the old parent directory was previously fsynced and we deleted the logged directory entries corresponding to the old name, we end up with a log that when replayed will delete the renamed inode. The following example triggers the problem: $ mkfs.btrfs -f /dev/sdc $ mount /dev/sdc /mnt $ mkdir /mnt/A $ mkdir /mnt/B $ echo -n "hello world" > /mnt/A/foo $ sync # Add some new file to A and fsync directory A. $ touch /mnt/A/bar $ xfs_io -c "fsync" /mnt/A # Now trigger inode eviction. We are only interested in triggering # eviction for the inode of directory A. $ echo 2 > /proc/sys/vm/drop_caches # Move foo from directory A to directory B. # This deletes the directory entries for foo in A from the log, and # does not add the new name for foo in directory B to the log, because # logged_trans of A is 0, which is less than the current transaction ID. $ mv /mnt/A/foo /mnt/B/foo # Now make an fsync to anything except A, B or any file inside them, # like for example create a file at the root directory and fsync this # new file. This syncs the log that contains all the changes done by # previous rename operation. $ touch /mnt/baz $ xfs_io -c "fsync" /mnt/baz # Mount the filesystem and replay the log. $ mount /dev/sdc /mnt # Check the filesystem content. $ ls -1R /mnt /mnt/: A B baz /mnt/A: bar /mnt/B: $ # File foo is gone, it's neither in A/ nor in B/. Fix this by using the inode_logged() helper at btrfs_log_new_name(), which safely checks if an inode was logged before in the current transaction. A test case for fstests will follow soon. CC: stable@vger.kernel.org # 4.14+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 9fd0348be7f5..e6430ac9bbe8 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -6503,8 +6503,8 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, * if this inode hasn't been logged and directory we're renaming it * from hasn't been logged, we don't need to log it */ - if (inode->logged_trans < trans->transid && - (!old_dir || old_dir->logged_trans < trans->transid)) + if (!inode_logged(trans, inode) && + (!old_dir || !inode_logged(trans, old_dir))) return; /* From b2a616676839e2a6b02c8e40be7f886f882ed194 Mon Sep 17 00:00:00 2001 From: Desmond Cheong Zhi Xi Date: Tue, 27 Jul 2021 15:13:03 +0800 Subject: [PATCH 287/502] btrfs: fix rw device counting in __btrfs_free_extra_devids When removing a writeable device in __btrfs_free_extra_devids, the rw device count should be decremented. This error was caught by Syzbot which reported a warning in close_fs_devices: WARNING: CPU: 1 PID: 9355 at fs/btrfs/volumes.c:1168 close_fs_devices+0x763/0x880 fs/btrfs/volumes.c:1168 Modules linked in: CPU: 0 PID: 9355 Comm: syz-executor552 Not tainted 5.13.0-rc1-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:close_fs_devices+0x763/0x880 fs/btrfs/volumes.c:1168 RSP: 0018:ffffc9000333f2f0 EFLAGS: 00010293 RAX: ffffffff8365f5c3 RBX: 0000000000000001 RCX: ffff888029afd4c0 RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000000 RBP: ffff88802846f508 R08: ffffffff8365f525 R09: ffffed100337d128 R10: ffffed100337d128 R11: 0000000000000000 R12: dffffc0000000000 R13: ffff888019be8868 R14: 1ffff1100337d10d R15: 1ffff1100337d10a FS: 00007f6f53828700(0000) GS:ffff8880b9a00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000047c410 CR3: 00000000302a6000 CR4: 00000000001506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: btrfs_close_devices+0xc9/0x450 fs/btrfs/volumes.c:1180 open_ctree+0x8e1/0x3968 fs/btrfs/disk-io.c:3693 btrfs_fill_super fs/btrfs/super.c:1382 [inline] btrfs_mount_root+0xac5/0xc60 fs/btrfs/super.c:1749 legacy_get_tree+0xea/0x180 fs/fs_context.c:592 vfs_get_tree+0x86/0x270 fs/super.c:1498 fc_mount fs/namespace.c:993 [inline] vfs_kern_mount+0xc9/0x160 fs/namespace.c:1023 btrfs_mount+0x3d3/0xb50 fs/btrfs/super.c:1809 legacy_get_tree+0xea/0x180 fs/fs_context.c:592 vfs_get_tree+0x86/0x270 fs/super.c:1498 do_new_mount fs/namespace.c:2905 [inline] path_mount+0x196f/0x2be0 fs/namespace.c:3235 do_mount fs/namespace.c:3248 [inline] __do_sys_mount fs/namespace.c:3456 [inline] __se_sys_mount+0x2f9/0x3b0 fs/namespace.c:3433 do_syscall_64+0x3f/0xb0 arch/x86/entry/common.c:47 entry_SYSCALL_64_after_hwframe+0x44/0xae Because fs_devices->rw_devices was not 0 after closing all devices. Here is the call trace that was observed: btrfs_mount_root(): btrfs_scan_one_device(): device_list_add(); <---------------- device added btrfs_open_devices(): open_fs_devices(): btrfs_open_one_device(); <-------- writable device opened, rw device count ++ btrfs_fill_super(): open_ctree(): btrfs_free_extra_devids(): __btrfs_free_extra_devids(); <--- writable device removed, rw device count not decremented fail_tree_roots: btrfs_close_devices(): close_fs_devices(); <------- rw device count off by 1 As a note, prior to commit cf89af146b7e ("btrfs: dev-replace: fail mount if we don't have replace item with target device"), rw_devices was decremented on removing a writable device in __btrfs_free_extra_devids only if the BTRFS_DEV_STATE_REPLACE_TGT bit was not set for the device. However, this check does not need to be reinstated as it is now redundant and incorrect. In __btrfs_free_extra_devids, we skip removing the device if it is the target for replacement. This is done by checking whether device->devid == BTRFS_DEV_REPLACE_DEVID. Since BTRFS_DEV_STATE_REPLACE_TGT is set only on the device with devid BTRFS_DEV_REPLACE_DEVID, no devices should have the BTRFS_DEV_STATE_REPLACE_TGT bit set after the check, and so it's redundant to test for that bit. Additionally, following commit 82372bc816d7 ("Btrfs: make the logic of source device removing more clear"), rw_devices is incremented whenever a writeable device is added to the alloc list (including the target device in btrfs_dev_replace_finishing), so all removals of writable devices from the alloc list should also be accompanied by a decrement to rw_devices. Reported-by: syzbot+a70e2ad0879f160b9217@syzkaller.appspotmail.com Fixes: cf89af146b7e ("btrfs: dev-replace: fail mount if we don't have replace item with target device") CC: stable@vger.kernel.org # 5.10+ Tested-by: syzbot+a70e2ad0879f160b9217@syzkaller.appspotmail.com Reviewed-by: Anand Jain Signed-off-by: Desmond Cheong Zhi Xi Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c6c14315b1c9..4c83256ae37f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1078,6 +1078,7 @@ static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { list_del_init(&device->dev_alloc_list); clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); + fs_devices->rw_devices--; } list_del_init(&device->dev_list); fs_devices->num_devices--; From cbcf01128d0a92e131bd09f1688fe032480b65ca Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 28 Jul 2021 14:47:20 +0200 Subject: [PATCH 288/502] af_unix: fix garbage collect vs MSG_PEEK unix_gc() assumes that candidate sockets can never gain an external reference (i.e. be installed into an fd) while the unix_gc_lock is held. Except for MSG_PEEK this is guaranteed by modifying inflight count under the unix_gc_lock. MSG_PEEK does not touch any variable protected by unix_gc_lock (file count is not), yet it needs to be serialized with garbage collection. Do this by locking/unlocking unix_gc_lock: 1) increment file count 2) lock/unlock barrier to make sure incremented file count is visible to garbage collection 3) install file into fd This is a lock barrier (unlike smp_mb()) that ensures that garbage collection is run completely before or completely after the barrier. Cc: Signed-off-by: Greg Kroah-Hartman Signed-off-by: Miklos Szeredi Signed-off-by: Linus Torvalds --- net/unix/af_unix.c | 51 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 23c92ad15c61..ba7ced947e51 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1526,6 +1526,53 @@ out: return err; } +static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) +{ + scm->fp = scm_fp_dup(UNIXCB(skb).fp); + + /* + * Garbage collection of unix sockets starts by selecting a set of + * candidate sockets which have reference only from being in flight + * (total_refs == inflight_refs). This condition is checked once during + * the candidate collection phase, and candidates are marked as such, so + * that non-candidates can later be ignored. While inflight_refs is + * protected by unix_gc_lock, total_refs (file count) is not, hence this + * is an instantaneous decision. + * + * Once a candidate, however, the socket must not be reinstalled into a + * file descriptor while the garbage collection is in progress. + * + * If the above conditions are met, then the directed graph of + * candidates (*) does not change while unix_gc_lock is held. + * + * Any operations that changes the file count through file descriptors + * (dup, close, sendmsg) does not change the graph since candidates are + * not installed in fds. + * + * Dequeing a candidate via recvmsg would install it into an fd, but + * that takes unix_gc_lock to decrement the inflight count, so it's + * serialized with garbage collection. + * + * MSG_PEEK is special in that it does not change the inflight count, + * yet does install the socket into an fd. The following lock/unlock + * pair is to ensure serialization with garbage collection. It must be + * done between incrementing the file count and installing the file into + * an fd. + * + * If garbage collection starts after the barrier provided by the + * lock/unlock, then it will see the elevated refcount and not mark this + * as a candidate. If a garbage collection is already in progress + * before the file count was incremented, then the lock/unlock pair will + * ensure that garbage collection is finished before progressing to + * installing the fd. + * + * (*) A -> B where B is on the queue of A or B is on the queue of C + * which is on the queue of listening socket A. + */ + spin_lock(&unix_gc_lock); + spin_unlock(&unix_gc_lock); +} + static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) { int err = 0; @@ -2175,7 +2222,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, sk_peek_offset_fwd(sk, size); if (UNIXCB(skb).fp) - scm.fp = scm_fp_dup(UNIXCB(skb).fp); + unix_peek_fds(&scm, skb); } err = (flags & MSG_TRUNC) ? skb->len - skip : size; @@ -2418,7 +2465,7 @@ unlock: /* It is questionable, see note in unix_dgram_recvmsg. */ if (UNIXCB(skb).fp) - scm.fp = scm_fp_dup(UNIXCB(skb).fp); + unix_peek_fds(&scm, skb); sk_peek_offset_fwd(sk, chunk); From 25905f602fdb0cfa147017056636768a7aa1ff6f Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 21 Jul 2021 12:25:20 -0700 Subject: [PATCH 289/502] dmaengine: idxd: Change license on idxd.h to LGPL This file was given GPL-2.0 license. But LGPL-2.1 makes more sense as it needs to be used by libraries outside of the kernel source tree. Signed-off-by: Tony Luck Signed-off-by: Linus Torvalds --- include/uapi/linux/idxd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index e33997b4d750..edc346a77c91 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* SPDX-License-Identifier: LGPL-2.1 WITH Linux-syscall-note */ /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */ #ifndef _USR_IDXD_H_ #define _USR_IDXD_H_ From 345daff2e994ee844d6a609c37f085695fbb4c4d Mon Sep 17 00:00:00 2001 From: Alexey Gladkov Date: Tue, 27 Jul 2021 17:24:18 +0200 Subject: [PATCH 290/502] ucounts: Fix race condition between alloc_ucounts and put_ucounts The race happens because put_ucounts() doesn't use spinlock and get_ucounts is not under spinlock: CPU0 CPU1 ---- ---- alloc_ucounts() put_ucounts() spin_lock_irq(&ucounts_lock); ucounts = find_ucounts(ns, uid, hashent); atomic_dec_and_test(&ucounts->count)) spin_unlock_irq(&ucounts_lock); spin_lock_irqsave(&ucounts_lock, flags); hlist_del_init(&ucounts->node); spin_unlock_irqrestore(&ucounts_lock, flags); kfree(ucounts); ucounts = get_ucounts(ucounts); ================================================================== BUG: KASAN: use-after-free in instrument_atomic_read_write include/linux/instrumented.h:101 [inline] BUG: KASAN: use-after-free in atomic_add_negative include/asm-generic/atomic-instrumented.h:556 [inline] BUG: KASAN: use-after-free in get_ucounts kernel/ucount.c:152 [inline] BUG: KASAN: use-after-free in get_ucounts kernel/ucount.c:150 [inline] BUG: KASAN: use-after-free in alloc_ucounts+0x19b/0x5b0 kernel/ucount.c:188 Write of size 4 at addr ffff88802821e41c by task syz-executor.4/16785 CPU: 1 PID: 16785 Comm: syz-executor.4 Not tainted 5.14.0-rc1-next-20210712-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:105 print_address_description.constprop.0.cold+0x6c/0x309 mm/kasan/report.c:233 __kasan_report mm/kasan/report.c:419 [inline] kasan_report.cold+0x83/0xdf mm/kasan/report.c:436 check_region_inline mm/kasan/generic.c:183 [inline] kasan_check_range+0x13d/0x180 mm/kasan/generic.c:189 instrument_atomic_read_write include/linux/instrumented.h:101 [inline] atomic_add_negative include/asm-generic/atomic-instrumented.h:556 [inline] get_ucounts kernel/ucount.c:152 [inline] get_ucounts kernel/ucount.c:150 [inline] alloc_ucounts+0x19b/0x5b0 kernel/ucount.c:188 set_cred_ucounts+0x171/0x3a0 kernel/cred.c:684 __sys_setuid+0x285/0x400 kernel/sys.c:623 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x4665d9 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fde54097188 EFLAGS: 00000246 ORIG_RAX: 0000000000000069 RAX: ffffffffffffffda RBX: 000000000056bf80 RCX: 00000000004665d9 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00000000000000ff RBP: 00000000004bfcb9 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 000000000056bf80 R13: 00007ffc8655740f R14: 00007fde54097300 R15: 0000000000022000 Allocated by task 16784: kasan_save_stack+0x1b/0x40 mm/kasan/common.c:38 kasan_set_track mm/kasan/common.c:46 [inline] set_alloc_info mm/kasan/common.c:434 [inline] ____kasan_kmalloc mm/kasan/common.c:513 [inline] ____kasan_kmalloc mm/kasan/common.c:472 [inline] __kasan_kmalloc+0x9b/0xd0 mm/kasan/common.c:522 kmalloc include/linux/slab.h:591 [inline] kzalloc include/linux/slab.h:721 [inline] alloc_ucounts+0x23d/0x5b0 kernel/ucount.c:169 set_cred_ucounts+0x171/0x3a0 kernel/cred.c:684 __sys_setuid+0x285/0x400 kernel/sys.c:623 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Freed by task 16785: kasan_save_stack+0x1b/0x40 mm/kasan/common.c:38 kasan_set_track+0x1c/0x30 mm/kasan/common.c:46 kasan_set_free_info+0x20/0x30 mm/kasan/generic.c:360 ____kasan_slab_free mm/kasan/common.c:366 [inline] ____kasan_slab_free mm/kasan/common.c:328 [inline] __kasan_slab_free+0xfb/0x130 mm/kasan/common.c:374 kasan_slab_free include/linux/kasan.h:229 [inline] slab_free_hook mm/slub.c:1650 [inline] slab_free_freelist_hook+0xdf/0x240 mm/slub.c:1675 slab_free mm/slub.c:3235 [inline] kfree+0xeb/0x650 mm/slub.c:4295 put_ucounts kernel/ucount.c:200 [inline] put_ucounts+0x117/0x150 kernel/ucount.c:192 put_cred_rcu+0x27a/0x520 kernel/cred.c:124 rcu_do_batch kernel/rcu/tree.c:2550 [inline] rcu_core+0x7ab/0x1380 kernel/rcu/tree.c:2785 __do_softirq+0x29b/0x9c2 kernel/softirq.c:558 Last potentially related work creation: kasan_save_stack+0x1b/0x40 mm/kasan/common.c:38 kasan_record_aux_stack+0xe5/0x110 mm/kasan/generic.c:348 insert_work+0x48/0x370 kernel/workqueue.c:1332 __queue_work+0x5c1/0xed0 kernel/workqueue.c:1498 queue_work_on+0xee/0x110 kernel/workqueue.c:1525 queue_work include/linux/workqueue.h:507 [inline] call_usermodehelper_exec+0x1f0/0x4c0 kernel/umh.c:435 kobject_uevent_env+0xf8f/0x1650 lib/kobject_uevent.c:618 netdev_queue_add_kobject net/core/net-sysfs.c:1621 [inline] netdev_queue_update_kobjects+0x374/0x450 net/core/net-sysfs.c:1655 register_queue_kobjects net/core/net-sysfs.c:1716 [inline] netdev_register_kobject+0x35a/0x430 net/core/net-sysfs.c:1959 register_netdevice+0xd33/0x1500 net/core/dev.c:10331 nsim_init_netdevsim drivers/net/netdevsim/netdev.c:317 [inline] nsim_create+0x381/0x4d0 drivers/net/netdevsim/netdev.c:364 __nsim_dev_port_add+0x32e/0x830 drivers/net/netdevsim/dev.c:1295 nsim_dev_port_add_all+0x53/0x150 drivers/net/netdevsim/dev.c:1355 nsim_dev_probe+0xcb5/0x1190 drivers/net/netdevsim/dev.c:1496 call_driver_probe drivers/base/dd.c:517 [inline] really_probe+0x23c/0xcd0 drivers/base/dd.c:595 __driver_probe_device+0x338/0x4d0 drivers/base/dd.c:747 driver_probe_device+0x4c/0x1a0 drivers/base/dd.c:777 __device_attach_driver+0x20b/0x2f0 drivers/base/dd.c:894 bus_for_each_drv+0x15f/0x1e0 drivers/base/bus.c:427 __device_attach+0x228/0x4a0 drivers/base/dd.c:965 bus_probe_device+0x1e4/0x290 drivers/base/bus.c:487 device_add+0xc2f/0x2180 drivers/base/core.c:3356 nsim_bus_dev_new drivers/net/netdevsim/bus.c:431 [inline] new_device_store+0x436/0x710 drivers/net/netdevsim/bus.c:298 bus_attr_store+0x72/0xa0 drivers/base/bus.c:122 sysfs_kf_write+0x110/0x160 fs/sysfs/file.c:139 kernfs_fop_write_iter+0x342/0x500 fs/kernfs/file.c:296 call_write_iter include/linux/fs.h:2152 [inline] new_sync_write+0x426/0x650 fs/read_write.c:518 vfs_write+0x75a/0xa40 fs/read_write.c:605 ksys_write+0x12d/0x250 fs/read_write.c:658 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Second to last potentially related work creation: kasan_save_stack+0x1b/0x40 mm/kasan/common.c:38 kasan_record_aux_stack+0xe5/0x110 mm/kasan/generic.c:348 insert_work+0x48/0x370 kernel/workqueue.c:1332 __queue_work+0x5c1/0xed0 kernel/workqueue.c:1498 queue_work_on+0xee/0x110 kernel/workqueue.c:1525 queue_work include/linux/workqueue.h:507 [inline] call_usermodehelper_exec+0x1f0/0x4c0 kernel/umh.c:435 kobject_uevent_env+0xf8f/0x1650 lib/kobject_uevent.c:618 kobject_synth_uevent+0x701/0x850 lib/kobject_uevent.c:208 uevent_store+0x20/0x50 drivers/base/core.c:2371 dev_attr_store+0x50/0x80 drivers/base/core.c:2072 sysfs_kf_write+0x110/0x160 fs/sysfs/file.c:139 kernfs_fop_write_iter+0x342/0x500 fs/kernfs/file.c:296 call_write_iter include/linux/fs.h:2152 [inline] new_sync_write+0x426/0x650 fs/read_write.c:518 vfs_write+0x75a/0xa40 fs/read_write.c:605 ksys_write+0x12d/0x250 fs/read_write.c:658 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae The buggy address belongs to the object at ffff88802821e400 which belongs to the cache kmalloc-192 of size 192 The buggy address is located 28 bytes inside of 192-byte region [ffff88802821e400, ffff88802821e4c0) The buggy address belongs to the page: page:ffffea0000a08780 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x2821e flags: 0xfff00000000200(slab|node=0|zone=1|lastcpupid=0x7ff) raw: 00fff00000000200 dead000000000100 dead000000000122 ffff888010841a00 raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 0, migratetype Unmovable, gfp_mask 0x12cc0(GFP_KERNEL|__GFP_NOWARN|__GFP_NORETRY), pid 1, ts 12874702440, free_ts 12637793385 prep_new_page mm/page_alloc.c:2433 [inline] get_page_from_freelist+0xa72/0x2f80 mm/page_alloc.c:4166 __alloc_pages+0x1b2/0x500 mm/page_alloc.c:5374 alloc_page_interleave+0x1e/0x200 mm/mempolicy.c:2119 alloc_pages+0x238/0x2a0 mm/mempolicy.c:2242 alloc_slab_page mm/slub.c:1713 [inline] allocate_slab+0x32b/0x4c0 mm/slub.c:1853 new_slab mm/slub.c:1916 [inline] new_slab_objects mm/slub.c:2662 [inline] ___slab_alloc+0x4ba/0x820 mm/slub.c:2825 __slab_alloc.constprop.0+0xa7/0xf0 mm/slub.c:2865 slab_alloc_node mm/slub.c:2947 [inline] slab_alloc mm/slub.c:2989 [inline] __kmalloc+0x312/0x330 mm/slub.c:4133 kmalloc include/linux/slab.h:596 [inline] kzalloc include/linux/slab.h:721 [inline] __register_sysctl_table+0x112/0x1090 fs/proc/proc_sysctl.c:1318 rds_tcp_init_net+0x1db/0x4f0 net/rds/tcp.c:551 ops_init+0xaf/0x470 net/core/net_namespace.c:140 __register_pernet_operations net/core/net_namespace.c:1137 [inline] register_pernet_operations+0x35a/0x850 net/core/net_namespace.c:1214 register_pernet_device+0x26/0x70 net/core/net_namespace.c:1301 rds_tcp_init+0x77/0xe0 net/rds/tcp.c:717 do_one_initcall+0x103/0x650 init/main.c:1285 do_initcall_level init/main.c:1360 [inline] do_initcalls init/main.c:1376 [inline] do_basic_setup init/main.c:1396 [inline] kernel_init_freeable+0x6b8/0x741 init/main.c:1598 page last free stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1343 [inline] free_pcp_prepare+0x312/0x7d0 mm/page_alloc.c:1394 free_unref_page_prepare mm/page_alloc.c:3329 [inline] free_unref_page+0x19/0x690 mm/page_alloc.c:3408 __vunmap+0x783/0xb70 mm/vmalloc.c:2587 free_work+0x58/0x70 mm/vmalloc.c:82 process_one_work+0x98d/0x1630 kernel/workqueue.c:2276 worker_thread+0x658/0x11f0 kernel/workqueue.c:2422 kthread+0x3e5/0x4d0 kernel/kthread.c:319 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 Memory state around the buggy address: ffff88802821e300: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff88802821e380: 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc >ffff88802821e400: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88802821e480: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ffff88802821e500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ================================================================== - The race fix has two parts. * Changing the code to guarantee that ucounts->count is only decremented when ucounts_lock is held. This guarantees that find_ucounts will never find a structure with a zero reference count. * Changing alloc_ucounts to increment ucounts->count while ucounts_lock is held. This guarantees the reference count on the found data structure will not be decremented to zero (and the data structure freed) before the reference count is incremented. -- Eric Biederman Reported-by: syzbot+01985d7909f9468f013c@syzkaller.appspotmail.com Reported-by: syzbot+59dd63761094a80ad06d@syzkaller.appspotmail.com Reported-by: syzbot+6cd79f45bb8fa1c9eeae@syzkaller.appspotmail.com Reported-by: syzbot+b6e65bd125a05f803d6b@syzkaller.appspotmail.com Fixes: b6c336528926 ("Use atomic_t for ucounts reference counting") Cc: Hillf Danton Signed-off-by: Alexey Gladkov Link: https://lkml.kernel.org/r/7b2ace1759b281cdd2d66101d6b305deef722efb.1627397820.git.legion@kernel.org Signed-off-by: Eric W. Biederman --- kernel/ucount.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/kernel/ucount.c b/kernel/ucount.c index 87799e2379bd..77be3bbe3cc4 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -160,6 +160,7 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) { struct hlist_head *hashent = ucounts_hashentry(ns, uid); struct ucounts *ucounts, *new; + long overflow; spin_lock_irq(&ucounts_lock); ucounts = find_ucounts(ns, uid, hashent); @@ -184,8 +185,12 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) return new; } } + overflow = atomic_add_negative(1, &ucounts->count); spin_unlock_irq(&ucounts_lock); - ucounts = get_ucounts(ucounts); + if (overflow) { + put_ucounts(ucounts); + return NULL; + } return ucounts; } @@ -193,8 +198,7 @@ void put_ucounts(struct ucounts *ucounts) { unsigned long flags; - if (atomic_dec_and_test(&ucounts->count)) { - spin_lock_irqsave(&ucounts_lock, flags); + if (atomic_dec_and_lock_irqsave(&ucounts->count, &ucounts_lock, flags)) { hlist_del_init(&ucounts->node); spin_unlock_irqrestore(&ucounts_lock, flags); kfree(ucounts); From b946dbcfa4df80ec81b442964e07ad37000cc059 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 28 Jul 2021 16:38:29 +1000 Subject: [PATCH 291/502] cifs: add missing parsing of backupuid We lost parsing of backupuid in the switch to new mount API. Add it back. Signed-off-by: Ronnie Sahlberg Reviewed-by: Shyam Prasad N Cc: # v5.11+ Reported-by: Xiaoli Feng Signed-off-by: Steve French --- fs/cifs/fs_context.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c index 9a59d7ff9a11..eed59bc1d913 100644 --- a/fs/cifs/fs_context.c +++ b/fs/cifs/fs_context.c @@ -925,6 +925,13 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, ctx->cred_uid = uid; ctx->cruid_specified = true; break; + case Opt_backupuid: + uid = make_kuid(current_user_ns(), result.uint_32); + if (!uid_valid(uid)) + goto cifs_parse_mount_err; + ctx->backupuid = uid; + ctx->backupuid_specified = true; + break; case Opt_backupgid: gid = make_kgid(current_user_ns(), result.uint_32); if (!gid_valid(gid)) From f5e81d1117501546b7be050c5fbafa6efd2c722c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 13 Jul 2021 08:18:31 +0000 Subject: [PATCH 292/502] bpf: Introduce BPF nospec instruction for mitigating Spectre v4 In case of JITs, each of the JIT backends compiles the BPF nospec instruction /either/ to a machine instruction which emits a speculation barrier /or/ to /no/ machine instruction in case the underlying architecture is not affected by Speculative Store Bypass or has different mitigations in place already. This covers both x86 and (implicitly) arm64: In case of x86, we use 'lfence' instruction for mitigation. In case of arm64, we rely on the firmware mitigation as controlled via the ssbd kernel parameter. Whenever the mitigation is enabled, it works for all of the kernel code with no need to provide any additional instructions here (hence only comment in arm64 JIT). Other archs can follow as needed. The BPF nospec instruction is specifically targeting Spectre v4 since i) we don't use a serialization barrier for the Spectre v1 case, and ii) mitigation instructions for v1 and v4 might be different on some archs. The BPF nospec is required for a future commit, where the BPF verifier does annotate intermediate BPF programs with speculation barriers. Co-developed-by: Piotr Krysiuk Co-developed-by: Benedict Schlueter Signed-off-by: Daniel Borkmann Signed-off-by: Piotr Krysiuk Signed-off-by: Benedict Schlueter Acked-by: Alexei Starovoitov --- arch/arm/net/bpf_jit_32.c | 3 +++ arch/arm64/net/bpf_jit_comp.c | 13 +++++++++++++ arch/mips/net/ebpf_jit.c | 3 +++ arch/powerpc/net/bpf_jit_comp32.c | 6 ++++++ arch/powerpc/net/bpf_jit_comp64.c | 6 ++++++ arch/riscv/net/bpf_jit_comp32.c | 4 ++++ arch/riscv/net/bpf_jit_comp64.c | 4 ++++ arch/s390/net/bpf_jit_comp.c | 5 +++++ arch/sparc/net/bpf_jit_comp_64.c | 3 +++ arch/x86/net/bpf_jit_comp.c | 7 +++++++ arch/x86/net/bpf_jit_comp32.c | 6 ++++++ include/linux/filter.h | 15 +++++++++++++++ kernel/bpf/core.c | 19 ++++++++++++++++++- kernel/bpf/disasm.c | 16 +++++++++------- 14 files changed, 102 insertions(+), 8 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 897634d0a67c..a951276f0547 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -1602,6 +1602,9 @@ exit: rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code)); break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_W: case BPF_ST | BPF_MEM | BPF_H: diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index dccf98a37283..41c23f474ea6 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -823,6 +823,19 @@ emit_cond_jmp: return ret; break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + /* + * Nothing required here. + * + * In case of arm64, we rely on the firmware mitigation of + * Speculative Store Bypass as controlled via the ssbd kernel + * parameter. Whenever the mitigation is enabled, it works + * for all of the kernel code with no need to provide any + * additional instructions. + */ + break; + /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_W: case BPF_ST | BPF_MEM | BPF_H: diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index 939dd06764bc..3a73e9375712 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c @@ -1355,6 +1355,9 @@ jeq_common: } break; + case BPF_ST | BPF_NOSPEC: /* speculation barrier */ + break; + case BPF_ST | BPF_B | BPF_MEM: case BPF_ST | BPF_H | BPF_MEM: case BPF_ST | BPF_W | BPF_MEM: diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 34bb1583fc0c..beb12cbc8c29 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -737,6 +737,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * } break; + /* + * BPF_ST NOSPEC (speculation barrier) + */ + case BPF_ST | BPF_NOSPEC: + break; + /* * BPF_ST(X) */ diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index de8595880fee..b87a63dba9c8 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -627,6 +627,12 @@ emit_clear: } break; + /* + * BPF_ST NOSPEC (speculation barrier) + */ + case BPF_ST | BPF_NOSPEC: + break; + /* * BPF_ST(X) */ diff --git a/arch/riscv/net/bpf_jit_comp32.c b/arch/riscv/net/bpf_jit_comp32.c index 81de865f4c7c..e6497424cbf6 100644 --- a/arch/riscv/net/bpf_jit_comp32.c +++ b/arch/riscv/net/bpf_jit_comp32.c @@ -1251,6 +1251,10 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, return -1; break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; + case BPF_ST | BPF_MEM | BPF_B: case BPF_ST | BPF_MEM | BPF_H: case BPF_ST | BPF_MEM | BPF_W: diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 87e3bf5b9086..3af4131c22c7 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -939,6 +939,10 @@ out_be: emit_ld(rd, 0, RV_REG_T1, ctx); break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; + /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_B: emit_imm(RV_REG_T1, imm, ctx); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 2ae419f5115a..88419263a89a 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1153,6 +1153,11 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, break; } break; + /* + * BPF_NOSPEC (speculation barrier) + */ + case BPF_ST | BPF_NOSPEC: + break; /* * BPF_ST(X) */ diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index 4b8d3c65d266..9a2f20cbd48b 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -1287,6 +1287,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) return 1; break; } + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_W: case BPF_ST | BPF_MEM | BPF_H: diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 4b951458c9fc..16d76f814e9b 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1219,6 +1219,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, } break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + if (boot_cpu_has(X86_FEATURE_XMM2)) + /* Emit 'lfence' */ + EMIT3(0x0F, 0xAE, 0xE8); + break; + /* ST: *(u8*)(dst_reg + off) = imm */ case BPF_ST | BPF_MEM | BPF_B: if (is_ereg(dst_reg)) diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index 3da88ded6ee3..3bfda5f502cb 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -1886,6 +1886,12 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, i++; break; } + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + if (boot_cpu_has(X86_FEATURE_XMM2)) + /* Emit 'lfence' */ + EMIT3(0x0F, 0xAE, 0xE8); + break; /* ST: *(u8*)(dst_reg + off) = imm */ case BPF_ST | BPF_MEM | BPF_H: case BPF_ST | BPF_MEM | BPF_B: diff --git a/include/linux/filter.h b/include/linux/filter.h index 472f97074da0..83b896044e79 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -73,6 +73,11 @@ struct ctl_table_header; /* unused opcode to mark call to interpreter with arguments */ #define BPF_CALL_ARGS 0xe0 +/* unused opcode to mark speculation barrier for mitigating + * Speculative Store Bypass + */ +#define BPF_NOSPEC 0xc0 + /* As per nm, we expose JITed images as text (code) section for * kallsyms. That way, tools like perf can find it to match * addresses. @@ -390,6 +395,16 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) .off = 0, \ .imm = 0 }) +/* Speculation barrier */ + +#define BPF_ST_NOSPEC() \ + ((struct bpf_insn) { \ + .code = BPF_ST | BPF_NOSPEC, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = 0 }) + /* Internal classic blocks for direct assignment */ #define __BPF_STMT(CODE, K) \ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 9b1577498373..b1a5fc04492b 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -32,6 +32,8 @@ #include #include #include + +#include #include /* Registers */ @@ -1377,6 +1379,7 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) /* Non-UAPI available opcodes. */ [BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS, [BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL, + [BPF_ST | BPF_NOSPEC] = &&ST_NOSPEC, [BPF_LDX | BPF_PROBE_MEM | BPF_B] = &&LDX_PROBE_MEM_B, [BPF_LDX | BPF_PROBE_MEM | BPF_H] = &&LDX_PROBE_MEM_H, [BPF_LDX | BPF_PROBE_MEM | BPF_W] = &&LDX_PROBE_MEM_W, @@ -1621,7 +1624,21 @@ out: COND_JMP(s, JSGE, >=) COND_JMP(s, JSLE, <=) #undef COND_JMP - /* STX and ST and LDX*/ + /* ST, STX and LDX*/ + ST_NOSPEC: + /* Speculation barrier for mitigating Speculative Store Bypass. + * In case of arm64, we rely on the firmware mitigation as + * controlled via the ssbd kernel parameter. Whenever the + * mitigation is enabled, it works for all of the kernel code + * with no need to provide any additional instructions here. + * In case of x86, we use 'lfence' insn for mitigation. We + * reuse preexisting logic from Spectre v1 mitigation that + * happens to produce the required code on x86 for v4 as well. + */ +#ifdef CONFIG_X86 + barrier_nospec(); +#endif + CONT; #define LDST(SIZEOP, SIZE) \ STX_MEM_##SIZEOP: \ *(SIZE *)(unsigned long) (DST + insn->off) = SRC; \ diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c index bbfc6bb79240..ca3cd9aaa6ce 100644 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@ -206,15 +206,17 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs, verbose(cbs->private_data, "BUG_%02x\n", insn->code); } } else if (class == BPF_ST) { - if (BPF_MODE(insn->code) != BPF_MEM) { + if (BPF_MODE(insn->code) == BPF_MEM) { + verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = %d\n", + insn->code, + bpf_ldst_string[BPF_SIZE(insn->code) >> 3], + insn->dst_reg, + insn->off, insn->imm); + } else if (BPF_MODE(insn->code) == 0xc0 /* BPF_NOSPEC, no UAPI */) { + verbose(cbs->private_data, "(%02x) nospec\n", insn->code); + } else { verbose(cbs->private_data, "BUG_st_%02x\n", insn->code); - return; } - verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = %d\n", - insn->code, - bpf_ldst_string[BPF_SIZE(insn->code) >> 3], - insn->dst_reg, - insn->off, insn->imm); } else if (class == BPF_LDX) { if (BPF_MODE(insn->code) != BPF_MEM) { verbose(cbs->private_data, "BUG_ldx_%02x\n", insn->code); From 2039f26f3aca5b0e419b98f65dd36481337b86ee Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 13 Jul 2021 08:18:31 +0000 Subject: [PATCH 293/502] bpf: Fix leakage due to insufficient speculative store bypass mitigation Spectre v4 gadgets make use of memory disambiguation, which is a set of techniques that execute memory access instructions, that is, loads and stores, out of program order; Intel's optimization manual, section 2.4.4.5: A load instruction micro-op may depend on a preceding store. Many microarchitectures block loads until all preceding store addresses are known. The memory disambiguator predicts which loads will not depend on any previous stores. When the disambiguator predicts that a load does not have such a dependency, the load takes its data from the L1 data cache. Eventually, the prediction is verified. If an actual conflict is detected, the load and all succeeding instructions are re-executed. af86ca4e3088 ("bpf: Prevent memory disambiguation attack") tried to mitigate this attack by sanitizing the memory locations through preemptive "fast" (low latency) stores of zero prior to the actual "slow" (high latency) store of a pointer value such that upon dependency misprediction the CPU then speculatively executes the load of the pointer value and retrieves the zero value instead of the attacker controlled scalar value previously stored at that location, meaning, subsequent access in the speculative domain is then redirected to the "zero page". The sanitized preemptive store of zero prior to the actual "slow" store is done through a simple ST instruction based on r10 (frame pointer) with relative offset to the stack location that the verifier has been tracking on the original used register for STX, which does not have to be r10. Thus, there are no memory dependencies for this store, since it's only using r10 and immediate constant of zero; hence af86ca4e3088 /assumed/ a low latency operation. However, a recent attack demonstrated that this mitigation is not sufficient since the preemptive store of zero could also be turned into a "slow" store and is thus bypassed as well: [...] // r2 = oob address (e.g. scalar) // r7 = pointer to map value 31: (7b) *(u64 *)(r10 -16) = r2 // r9 will remain "fast" register, r10 will become "slow" register below 32: (bf) r9 = r10 // JIT maps BPF reg to x86 reg: // r9 -> r15 (callee saved) // r10 -> rbp // train store forward prediction to break dependency link between both r9 // and r10 by evicting them from the predictor's LRU table. 33: (61) r0 = *(u32 *)(r7 +24576) 34: (63) *(u32 *)(r7 +29696) = r0 35: (61) r0 = *(u32 *)(r7 +24580) 36: (63) *(u32 *)(r7 +29700) = r0 37: (61) r0 = *(u32 *)(r7 +24584) 38: (63) *(u32 *)(r7 +29704) = r0 39: (61) r0 = *(u32 *)(r7 +24588) 40: (63) *(u32 *)(r7 +29708) = r0 [...] 543: (61) r0 = *(u32 *)(r7 +25596) 544: (63) *(u32 *)(r7 +30716) = r0 // prepare call to bpf_ringbuf_output() helper. the latter will cause rbp // to spill to stack memory while r13/r14/r15 (all callee saved regs) remain // in hardware registers. rbp becomes slow due to push/pop latency. below is // disasm of bpf_ringbuf_output() helper for better visual context: // // ffffffff8117ee20: 41 54 push r12 // ffffffff8117ee22: 55 push rbp // ffffffff8117ee23: 53 push rbx // ffffffff8117ee24: 48 f7 c1 fc ff ff ff test rcx,0xfffffffffffffffc // ffffffff8117ee2b: 0f 85 af 00 00 00 jne ffffffff8117eee0 <-- jump taken // [...] // ffffffff8117eee0: 49 c7 c4 ea ff ff ff mov r12,0xffffffffffffffea // ffffffff8117eee7: 5b pop rbx // ffffffff8117eee8: 5d pop rbp // ffffffff8117eee9: 4c 89 e0 mov rax,r12 // ffffffff8117eeec: 41 5c pop r12 // ffffffff8117eeee: c3 ret 545: (18) r1 = map[id:4] 547: (bf) r2 = r7 548: (b7) r3 = 0 549: (b7) r4 = 4 550: (85) call bpf_ringbuf_output#194288 // instruction 551 inserted by verifier \ 551: (7a) *(u64 *)(r10 -16) = 0 | /both/ are now slow stores here // storing map value pointer r7 at fp-16 | since value of r10 is "slow". 552: (7b) *(u64 *)(r10 -16) = r7 / // following "fast" read to the same memory location, but due to dependency // misprediction it will speculatively execute before insn 551/552 completes. 553: (79) r2 = *(u64 *)(r9 -16) // in speculative domain contains attacker controlled r2. in non-speculative // domain this contains r7, and thus accesses r7 +0 below. 554: (71) r3 = *(u8 *)(r2 +0) // leak r3 As can be seen, the current speculative store bypass mitigation which the verifier inserts at line 551 is insufficient since /both/, the write of the zero sanitation as well as the map value pointer are a high latency instruction due to prior memory access via push/pop of r10 (rbp) in contrast to the low latency read in line 553 as r9 (r15) which stays in hardware registers. Thus, architecturally, fp-16 is r7, however, microarchitecturally, fp-16 can still be r2. Initial thoughts to address this issue was to track spilled pointer loads from stack and enforce their load via LDX through r10 as well so that /both/ the preemptive store of zero /as well as/ the load use the /same/ register such that a dependency is created between the store and load. However, this option is not sufficient either since it can be bypassed as well under speculation. An updated attack with pointer spill/fills now _all_ based on r10 would look as follows: [...] // r2 = oob address (e.g. scalar) // r7 = pointer to map value [...] // longer store forward prediction training sequence than before. 2062: (61) r0 = *(u32 *)(r7 +25588) 2063: (63) *(u32 *)(r7 +30708) = r0 2064: (61) r0 = *(u32 *)(r7 +25592) 2065: (63) *(u32 *)(r7 +30712) = r0 2066: (61) r0 = *(u32 *)(r7 +25596) 2067: (63) *(u32 *)(r7 +30716) = r0 // store the speculative load address (scalar) this time after the store // forward prediction training. 2068: (7b) *(u64 *)(r10 -16) = r2 // preoccupy the CPU store port by running sequence of dummy stores. 2069: (63) *(u32 *)(r7 +29696) = r0 2070: (63) *(u32 *)(r7 +29700) = r0 2071: (63) *(u32 *)(r7 +29704) = r0 2072: (63) *(u32 *)(r7 +29708) = r0 2073: (63) *(u32 *)(r7 +29712) = r0 2074: (63) *(u32 *)(r7 +29716) = r0 2075: (63) *(u32 *)(r7 +29720) = r0 2076: (63) *(u32 *)(r7 +29724) = r0 2077: (63) *(u32 *)(r7 +29728) = r0 2078: (63) *(u32 *)(r7 +29732) = r0 2079: (63) *(u32 *)(r7 +29736) = r0 2080: (63) *(u32 *)(r7 +29740) = r0 2081: (63) *(u32 *)(r7 +29744) = r0 2082: (63) *(u32 *)(r7 +29748) = r0 2083: (63) *(u32 *)(r7 +29752) = r0 2084: (63) *(u32 *)(r7 +29756) = r0 2085: (63) *(u32 *)(r7 +29760) = r0 2086: (63) *(u32 *)(r7 +29764) = r0 2087: (63) *(u32 *)(r7 +29768) = r0 2088: (63) *(u32 *)(r7 +29772) = r0 2089: (63) *(u32 *)(r7 +29776) = r0 2090: (63) *(u32 *)(r7 +29780) = r0 2091: (63) *(u32 *)(r7 +29784) = r0 2092: (63) *(u32 *)(r7 +29788) = r0 2093: (63) *(u32 *)(r7 +29792) = r0 2094: (63) *(u32 *)(r7 +29796) = r0 2095: (63) *(u32 *)(r7 +29800) = r0 2096: (63) *(u32 *)(r7 +29804) = r0 2097: (63) *(u32 *)(r7 +29808) = r0 2098: (63) *(u32 *)(r7 +29812) = r0 // overwrite scalar with dummy pointer; same as before, also including the // sanitation store with 0 from the current mitigation by the verifier. 2099: (7a) *(u64 *)(r10 -16) = 0 | /both/ are now slow stores here 2100: (7b) *(u64 *)(r10 -16) = r7 | since store unit is still busy. // load from stack intended to bypass stores. 2101: (79) r2 = *(u64 *)(r10 -16) 2102: (71) r3 = *(u8 *)(r2 +0) // leak r3 [...] Looking at the CPU microarchitecture, the scheduler might issue loads (such as seen in line 2101) before stores (line 2099,2100) because the load execution units become available while the store execution unit is still busy with the sequence of dummy stores (line 2069-2098). And so the load may use the prior stored scalar from r2 at address r10 -16 for speculation. The updated attack may work less reliable on CPU microarchitectures where loads and stores share execution resources. This concludes that the sanitizing with zero stores from af86ca4e3088 ("bpf: Prevent memory disambiguation attack") is insufficient. Moreover, the detection of stack reuse from af86ca4e3088 where previously data (STACK_MISC) has been written to a given stack slot where a pointer value is now to be stored does not have sufficient coverage as precondition for the mitigation either; for several reasons outlined as follows: 1) Stack content from prior program runs could still be preserved and is therefore not "random", best example is to split a speculative store bypass attack between tail calls, program A would prepare and store the oob address at a given stack slot and then tail call into program B which does the "slow" store of a pointer to the stack with subsequent "fast" read. From program B PoV such stack slot type is STACK_INVALID, and therefore also must be subject to mitigation. 2) The STACK_SPILL must not be coupled to register_is_const(&stack->spilled_ptr) condition, for example, the previous content of that memory location could also be a pointer to map or map value. Without the fix, a speculative store bypass is not mitigated in such precondition and can then lead to a type confusion in the speculative domain leaking kernel memory near these pointer types. While brainstorming on various alternative mitigation possibilities, we also stumbled upon a retrospective from Chrome developers [0]: [...] For variant 4, we implemented a mitigation to zero the unused memory of the heap prior to allocation, which cost about 1% when done concurrently and 4% for scavenging. Variant 4 defeats everything we could think of. We explored more mitigations for variant 4 but the threat proved to be more pervasive and dangerous than we anticipated. For example, stack slots used by the register allocator in the optimizing compiler could be subject to type confusion, leading to pointer crafting. Mitigating type confusion for stack slots alone would have required a complete redesign of the backend of the optimizing compiler, perhaps man years of work, without a guarantee of completeness. [...] From BPF side, the problem space is reduced, however, options are rather limited. One idea that has been explored was to xor-obfuscate pointer spills to the BPF stack: [...] // preoccupy the CPU store port by running sequence of dummy stores. [...] 2106: (63) *(u32 *)(r7 +29796) = r0 2107: (63) *(u32 *)(r7 +29800) = r0 2108: (63) *(u32 *)(r7 +29804) = r0 2109: (63) *(u32 *)(r7 +29808) = r0 2110: (63) *(u32 *)(r7 +29812) = r0 // overwrite scalar with dummy pointer; xored with random 'secret' value // of 943576462 before store ... 2111: (b4) w11 = 943576462 2112: (af) r11 ^= r7 2113: (7b) *(u64 *)(r10 -16) = r11 2114: (79) r11 = *(u64 *)(r10 -16) 2115: (b4) w2 = 943576462 2116: (af) r2 ^= r11 // ... and restored with the same 'secret' value with the help of AX reg. 2117: (71) r3 = *(u8 *)(r2 +0) [...] While the above would not prevent speculation, it would make data leakage infeasible by directing it to random locations. In order to be effective and prevent type confusion under speculation, such random secret would have to be regenerated for each store. The additional complexity involved for a tracking mechanism that prevents jumps such that restoring spilled pointers would not get corrupted is not worth the gain for unprivileged. Hence, the fix in here eventually opted for emitting a non-public BPF_ST | BPF_NOSPEC instruction which the x86 JIT translates into a lfence opcode. Inserting the latter in between the store and load instruction is one of the mitigations options [1]. The x86 instruction manual notes: [...] An LFENCE that follows an instruction that stores to memory might complete before the data being stored have become globally visible. [...] The latter meaning that the preceding store instruction finished execution and the store is at minimum guaranteed to be in the CPU's store queue, but it's not guaranteed to be in that CPU's L1 cache at that point (globally visible). The latter would only be guaranteed via sfence. So the load which is guaranteed to execute after the lfence for that local CPU would have to rely on store-to-load forwarding. [2], in section 2.3 on store buffers says: [...] For every store operation that is added to the ROB, an entry is allocated in the store buffer. This entry requires both the virtual and physical address of the target. Only if there is no free entry in the store buffer, the frontend stalls until there is an empty slot available in the store buffer again. Otherwise, the CPU can immediately continue adding subsequent instructions to the ROB and execute them out of order. On Intel CPUs, the store buffer has up to 56 entries. [...] One small upside on the fix is that it lifts constraints from af86ca4e3088 where the sanitize_stack_off relative to r10 must be the same when coming from different paths. The BPF_ST | BPF_NOSPEC gets emitted after a BPF_STX or BPF_ST instruction. This happens either when we store a pointer or data value to the BPF stack for the first time, or upon later pointer spills. The former needs to be enforced since otherwise stale stack data could be leaked under speculation as outlined earlier. For non-x86 JITs the BPF_ST | BPF_NOSPEC mapping is currently optimized away, but others could emit a speculation barrier as well if necessary. For real-world unprivileged programs e.g. generated by LLVM, pointer spill/fill is only generated upon register pressure and LLVM only tries to do that for pointers which are not used often. The program main impact will be the initial BPF_ST | BPF_NOSPEC sanitation for the STACK_INVALID case when the first write to a stack slot occurs e.g. upon map lookup. In future we might refine ways to mitigate the latter cost. [0] https://arxiv.org/pdf/1902.05178.pdf [1] https://msrc-blog.microsoft.com/2018/05/21/analysis-and-mitigation-of-speculative-store-bypass-cve-2018-3639/ [2] https://arxiv.org/pdf/1905.05725.pdf Fixes: af86ca4e3088 ("bpf: Prevent memory disambiguation attack") Fixes: f7cf25b2026d ("bpf: track spill/fill of constants") Co-developed-by: Piotr Krysiuk Co-developed-by: Benedict Schlueter Signed-off-by: Daniel Borkmann Signed-off-by: Piotr Krysiuk Signed-off-by: Benedict Schlueter Acked-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 2 +- kernel/bpf/verifier.c | 87 +++++++++++++----------------------- 2 files changed, 33 insertions(+), 56 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7ba7e800d472..828d08afeee0 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -340,8 +340,8 @@ struct bpf_insn_aux_data { }; u64 map_key_state; /* constant (32 bit) key tracking for maps */ int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ - int sanitize_stack_off; /* stack slot to be cleared */ u32 seen; /* this insn was processed by the verifier at env->pass_cnt */ + bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */ bool zext_dst; /* this insn zero extends dst reg */ u8 alu_state; /* used in combination with alu_limit */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 657062cb4d85..f9bda5476ea5 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2610,6 +2610,19 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, cur = env->cur_state->frame[env->cur_state->curframe]; if (value_regno >= 0) reg = &cur->regs[value_regno]; + if (!env->bypass_spec_v4) { + bool sanitize = reg && is_spillable_regtype(reg->type); + + for (i = 0; i < size; i++) { + if (state->stack[spi].slot_type[i] == STACK_INVALID) { + sanitize = true; + break; + } + } + + if (sanitize) + env->insn_aux_data[insn_idx].sanitize_stack_spill = true; + } if (reg && size == BPF_REG_SIZE && register_is_bounded(reg) && !register_is_null(reg) && env->bpf_capable) { @@ -2632,47 +2645,10 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, verbose(env, "invalid size of register spill\n"); return -EACCES; } - if (state != cur && reg->type == PTR_TO_STACK) { verbose(env, "cannot spill pointers to stack into stack frame of the caller\n"); return -EINVAL; } - - if (!env->bypass_spec_v4) { - bool sanitize = false; - - if (state->stack[spi].slot_type[0] == STACK_SPILL && - register_is_const(&state->stack[spi].spilled_ptr)) - sanitize = true; - for (i = 0; i < BPF_REG_SIZE; i++) - if (state->stack[spi].slot_type[i] == STACK_MISC) { - sanitize = true; - break; - } - if (sanitize) { - int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off; - int soff = (-spi - 1) * BPF_REG_SIZE; - - /* detected reuse of integer stack slot with a pointer - * which means either llvm is reusing stack slot or - * an attacker is trying to exploit CVE-2018-3639 - * (speculative store bypass) - * Have to sanitize that slot with preemptive - * store of zero. - */ - if (*poff && *poff != soff) { - /* disallow programs where single insn stores - * into two different stack slots, since verifier - * cannot sanitize them - */ - verbose(env, - "insn %d cannot access two stack slots fp%d and fp%d", - insn_idx, *poff, soff); - return -EINVAL; - } - *poff = soff; - } - } save_register_state(state, spi, reg); } else { u8 type = STACK_MISC; @@ -11913,35 +11889,33 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) for (i = 0; i < insn_cnt; i++, insn++) { bpf_convert_ctx_access_t convert_ctx_access; + bool ctx_access; if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) || insn->code == (BPF_LDX | BPF_MEM | BPF_H) || insn->code == (BPF_LDX | BPF_MEM | BPF_W) || - insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) + insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) { type = BPF_READ; - else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) || - insn->code == (BPF_STX | BPF_MEM | BPF_H) || - insn->code == (BPF_STX | BPF_MEM | BPF_W) || - insn->code == (BPF_STX | BPF_MEM | BPF_DW)) + ctx_access = true; + } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) || + insn->code == (BPF_STX | BPF_MEM | BPF_H) || + insn->code == (BPF_STX | BPF_MEM | BPF_W) || + insn->code == (BPF_STX | BPF_MEM | BPF_DW) || + insn->code == (BPF_ST | BPF_MEM | BPF_B) || + insn->code == (BPF_ST | BPF_MEM | BPF_H) || + insn->code == (BPF_ST | BPF_MEM | BPF_W) || + insn->code == (BPF_ST | BPF_MEM | BPF_DW)) { type = BPF_WRITE; - else + ctx_access = BPF_CLASS(insn->code) == BPF_STX; + } else { continue; + } if (type == BPF_WRITE && - env->insn_aux_data[i + delta].sanitize_stack_off) { + env->insn_aux_data[i + delta].sanitize_stack_spill) { struct bpf_insn patch[] = { - /* Sanitize suspicious stack slot with zero. - * There are no memory dependencies for this store, - * since it's only using frame pointer and immediate - * constant of zero - */ - BPF_ST_MEM(BPF_DW, BPF_REG_FP, - env->insn_aux_data[i + delta].sanitize_stack_off, - 0), - /* the original STX instruction will immediately - * overwrite the same stack slot with appropriate value - */ *insn, + BPF_ST_NOSPEC(), }; cnt = ARRAY_SIZE(patch); @@ -11955,6 +11929,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) continue; } + if (!ctx_access) + continue; + switch (env->insn_aux_data[i + delta].ptr_type) { case PTR_TO_CTX: if (!ops->convert_ctx_access) From d712d3fb484b7fa8d1d57e9ca6f134bb9d8c18b1 Mon Sep 17 00:00:00 2001 From: Igor Pylypiv Date: Wed, 7 Jul 2021 11:59:45 -0700 Subject: [PATCH 294/502] scsi: pm80xx: Fix TMF task completion race condition The TMF timeout timer may trigger at the same time when the response from a controller is being handled. When this happens the SAS task may get freed before the response processing is finished. Fix this by calling complete() only when SAS_TASK_STATE_DONE is not set. A similar race condition was fixed in commit b90cd6f2b905 ("scsi: libsas: fix a race condition when smp task timeout") Link: https://lore.kernel.org/r/20210707185945.35559-1-ipylypiv@google.com Reviewed-by: Vishakha Channapattan Acked-by: Jack Wang Signed-off-by: Igor Pylypiv Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm8001_sas.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index 48548a95327b..32e60f0c3b14 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -684,8 +684,7 @@ int pm8001_dev_found(struct domain_device *dev) void pm8001_task_done(struct sas_task *task) { - if (!del_timer(&task->slow_task->timer)) - return; + del_timer(&task->slow_task->timer); complete(&task->slow_task->completion); } @@ -693,9 +692,14 @@ static void pm8001_tmf_timedout(struct timer_list *t) { struct sas_task_slow *slow = from_timer(slow, t, timer); struct sas_task *task = slow->task; + unsigned long flags; - task->task_state_flags |= SAS_TASK_STATE_ABORTED; - complete(&task->slow_task->completion); + spin_lock_irqsave(&task->task_state_lock, flags); + if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) { + task->task_state_flags |= SAS_TASK_STATE_ABORTED; + complete(&task->slow_task->completion); + } + spin_unlock_irqrestore(&task->task_state_lock, flags); } #define PM8001_TASK_TIMEOUT 20 @@ -748,13 +752,10 @@ static int pm8001_exec_internal_tmf_task(struct domain_device *dev, } res = -TMF_RESP_FUNC_FAILED; /* Even TMF timed out, return direct. */ - if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) { - if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) { - pm8001_dbg(pm8001_ha, FAIL, - "TMF task[%x]timeout.\n", - tmf->tmf); - goto ex_err; - } + if (task->task_state_flags & SAS_TASK_STATE_ABORTED) { + pm8001_dbg(pm8001_ha, FAIL, "TMF task[%x]timeout.\n", + tmf->tmf); + goto ex_err; } if (task->task_status.resp == SAS_TASK_COMPLETE && @@ -834,12 +835,9 @@ pm8001_exec_internal_task_abort(struct pm8001_hba_info *pm8001_ha, wait_for_completion(&task->slow_task->completion); res = TMF_RESP_FUNC_FAILED; /* Even TMF timed out, return direct. */ - if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) { - if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) { - pm8001_dbg(pm8001_ha, FAIL, - "TMF task timeout.\n"); - goto ex_err; - } + if (task->task_state_flags & SAS_TASK_STATE_ABORTED) { + pm8001_dbg(pm8001_ha, FAIL, "TMF task timeout.\n"); + goto ex_err; } if (task->task_status.resp == SAS_TASK_COMPLETE && From 77541f78eadfe9fdb018a7b8b69f0f2af2cf4b82 Mon Sep 17 00:00:00 2001 From: Harshvardhan Jha Date: Thu, 8 Jul 2021 13:16:42 +0530 Subject: [PATCH 295/502] scsi: megaraid_mm: Fix end of loop tests for list_for_each_entry() The list_for_each_entry() iterator, "adapter" in this code, can never be NULL. If we exit the loop without finding the correct adapter then "adapter" points invalid memory that is an offset from the list head. This will eventually lead to memory corruption and presumably a kernel crash. Link: https://lore.kernel.org/r/20210708074642.23599-1-harshvardhan.jha@oracle.com Acked-by: Sumit Saxena Signed-off-by: Harshvardhan Jha Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_mm.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_mm.c b/drivers/scsi/megaraid/megaraid_mm.c index abf7b401f5b9..c509440bd161 100644 --- a/drivers/scsi/megaraid/megaraid_mm.c +++ b/drivers/scsi/megaraid/megaraid_mm.c @@ -238,7 +238,7 @@ mraid_mm_get_adapter(mimd_t __user *umimd, int *rval) mimd_t mimd; uint32_t adapno; int iterator; - + bool is_found; if (copy_from_user(&mimd, umimd, sizeof(mimd_t))) { *rval = -EFAULT; @@ -254,12 +254,16 @@ mraid_mm_get_adapter(mimd_t __user *umimd, int *rval) adapter = NULL; iterator = 0; + is_found = false; list_for_each_entry(adapter, &adapters_list_g, list) { - if (iterator++ == adapno) break; + if (iterator++ == adapno) { + is_found = true; + break; + } } - if (!adapter) { + if (!is_found) { *rval = -ENODEV; return NULL; } @@ -725,6 +729,7 @@ ioctl_done(uioc_t *kioc) uint32_t adapno; int iterator; mraid_mmadp_t* adapter; + bool is_found; /* * When the kioc returns from driver, make sure it still doesn't @@ -747,19 +752,23 @@ ioctl_done(uioc_t *kioc) iterator = 0; adapter = NULL; adapno = kioc->adapno; + is_found = false; con_log(CL_ANN, ( KERN_WARNING "megaraid cmm: completed " "ioctl that was timedout before\n")); list_for_each_entry(adapter, &adapters_list_g, list) { - if (iterator++ == adapno) break; + if (iterator++ == adapno) { + is_found = true; + break; + } } kioc->timedout = 0; - if (adapter) { + if (is_found) mraid_mm_dealloc_kioc( adapter, kioc ); - } + } else { wake_up(&wait_q); From 640b7ea5f888b521dcf28e2564ce75d08a783fd7 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 27 Jul 2021 23:38:24 +0300 Subject: [PATCH 296/502] alpha: register early reserved memory in memblock The memory reserved by console/PALcode or non-volatile memory is not added to memblock.memory. Since commit fa3354e4ea39 (mm: free_area_init: use maximal zone PFNs rather than zone sizes) the initialization of the memory map relies on the accuracy of memblock.memory to properly calculate zone sizes. The holes in memblock.memory caused by absent regions reserved by the firmware cause incorrect initialization of struct pages which leads to BUG() during the initial page freeing: BUG: Bad page state in process swapper pfn:2ffc53 page:fffffc000ecf14c0 refcount:0 mapcount:1 mapping:0000000000000000 index:0x0 flags: 0x0() raw: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 raw: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 page dumped because: nonzero mapcount Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 5.7.0-03841-gfa3354e4ea39-dirty #26 fffffc0001b5bd68 fffffc0001b5be80 fffffc00011cd148 fffffc000ecf14c0 fffffc00019803df fffffc0001b5be80 fffffc00011ce340 fffffc000ecf14c0 0000000000000000 fffffc0001b5be80 fffffc0001b482c0 fffffc00027d6618 fffffc00027da7d0 00000000002ff97a 0000000000000000 fffffc0001b5be80 fffffc00011d1abc fffffc000ecf14c0 fffffc0002d00000 fffffc0001b5be80 fffffc0001b2350c 0000000000300000 fffffc0001b48298 fffffc0001b482c0 Trace: [] bad_page+0x168/0x1b0 [] free_pcp_prepare+0x1e0/0x290 [] free_unref_page+0x2c/0xa0 [] cmp_ex_sort+0x0/0x30 [] cmp_ex_sort+0x0/0x30 [] _stext+0x1c/0x20 Fix this by registering the reserved ranges in memblock.memory. Link: https://lore.kernel.org/lkml/20210726192311.uffqnanxw3ac5wwi@ivybridge Fixes: fa3354e4ea39 ("mm: free_area_init: use maximal zone PFNs rather than zone sizes") Reported-by: Matt Turner Cc: Signed-off-by: Mike Rapoport Signed-off-by: Matt Turner --- arch/alpha/kernel/setup.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index 7d56c217b235..b4fbbba30aa2 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -319,18 +319,19 @@ setup_memory(void *kernel_end) i, cluster->usage, cluster->start_pfn, cluster->start_pfn + cluster->numpages); - /* Bit 0 is console/PALcode reserved. Bit 1 is - non-volatile memory -- we might want to mark - this for later. */ - if (cluster->usage & 3) - continue; - end = cluster->start_pfn + cluster->numpages; if (end > max_low_pfn) max_low_pfn = end; memblock_add(PFN_PHYS(cluster->start_pfn), cluster->numpages << PAGE_SHIFT); + + /* Bit 0 is console/PALcode reserved. Bit 1 is + non-volatile memory -- we might want to mark + this for later. */ + if (cluster->usage & 3) + memblock_reserve(PFN_PHYS(cluster->start_pfn), + cluster->numpages << PAGE_SHIFT); } /* From aa35772f61752d4c636d46be51a4f7ca6c029ee6 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Wed, 23 Jun 2021 09:02:47 +0200 Subject: [PATCH 297/502] usb: cdns3: Fixed incorrect gadget state For delayed status phase, the usb_gadget->state was set to USB_STATE_ADDRESS and it has never been updated to USB_STATE_CONFIGURED. Patch updates the gadget state to correct USB_STATE_CONFIGURED. As a result of this bug the controller was not able to enter to Test Mode while using MSC function. Cc: Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver") Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20210623070247.46151-1-pawell@gli-login.cadence.com Signed-off-by: Peter Chen --- drivers/usb/cdns3/cdns3-ep0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/cdns3/cdns3-ep0.c b/drivers/usb/cdns3/cdns3-ep0.c index 02ec7ab4bb48..e29989d57bef 100644 --- a/drivers/usb/cdns3/cdns3-ep0.c +++ b/drivers/usb/cdns3/cdns3-ep0.c @@ -731,6 +731,7 @@ static int cdns3_gadget_ep0_queue(struct usb_ep *ep, request->actual = 0; priv_dev->status_completion_no_call = true; priv_dev->pending_status_request = request; + usb_gadget_set_state(&priv_dev->gadget, USB_STATE_CONFIGURED); spin_unlock_irqrestore(&priv_dev->lock, flags); /* From aa82f94e869edd72f4fadb08c6ffca8927e4934e Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Fri, 25 Jun 2021 12:25:02 +0200 Subject: [PATCH 298/502] usb: cdnsp: Fix incorrect supported maximum speed Driver had hardcoded in initialization maximum supported speed to USB_SPEED_SUPER_PLUS but it should consider the speed returned from usb_get_maximum_speed function. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20210625102502.26336-1-pawell@gli-login.cadence.com Signed-off-by: Peter Chen --- drivers/usb/cdns3/cdnsp-gadget.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/cdnsp-gadget.c b/drivers/usb/cdns3/cdnsp-gadget.c index c23f53e9b1ef..27df0c697897 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.c +++ b/drivers/usb/cdns3/cdnsp-gadget.c @@ -1882,7 +1882,7 @@ static int __cdnsp_gadget_init(struct cdns *cdns) pdev->gadget.name = "cdnsp-gadget"; pdev->gadget.speed = USB_SPEED_UNKNOWN; pdev->gadget.sg_supported = 1; - pdev->gadget.max_speed = USB_SPEED_SUPER_PLUS; + pdev->gadget.max_speed = max_speed; pdev->gadget.lpm_capable = 1; pdev->setup_buf = kzalloc(CDNSP_EP0_SETUP_SIZE, GFP_KERNEL); From e913aada06830338633fb8524733b0ad3d38a7c1 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Wed, 23 Jun 2021 09:27:28 +0200 Subject: [PATCH 299/502] usb: cdnsp: Fixed issue with ZLP The condition "if (need_zero_pkt && zero_len_trb)" was always false and it caused that TRB for ZLP was not prepared. Fix causes that after preparing last TRB in TD, the driver prepares additional TD with ZLP when a ZLP is required. Cc: Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20210623072728.41275-1-pawell@gli-login.cadence.com Signed-off-by: Peter Chen --- drivers/usb/cdns3/cdnsp-ring.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c index 68972746e363..1b1438457fb0 100644 --- a/drivers/usb/cdns3/cdnsp-ring.c +++ b/drivers/usb/cdns3/cdnsp-ring.c @@ -1932,15 +1932,13 @@ int cdnsp_queue_bulk_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq) } if (enqd_len + trb_buff_len >= full_len) { - if (need_zero_pkt && zero_len_trb) { - zero_len_trb = true; - } else { - field &= ~TRB_CHAIN; - field |= TRB_IOC; - more_trbs_coming = false; - need_zero_pkt = false; - preq->td.last_trb = ring->enqueue; - } + if (need_zero_pkt) + zero_len_trb = !zero_len_trb; + + field &= ~TRB_CHAIN; + field |= TRB_IOC; + more_trbs_coming = false; + preq->td.last_trb = ring->enqueue; } /* Only set interrupt on short packet for OUT endpoints. */ @@ -1955,7 +1953,7 @@ int cdnsp_queue_bulk_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq) length_field = TRB_LEN(trb_buff_len) | TRB_TD_SIZE(remainder) | TRB_INTR_TARGET(0); - cdnsp_queue_trb(pdev, ring, more_trbs_coming | need_zero_pkt, + cdnsp_queue_trb(pdev, ring, more_trbs_coming | zero_len_trb, lower_32_bits(send_addr), upper_32_bits(send_addr), length_field, From 5df09c15bab98463203c83ecab88b9321466e626 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 22 Jun 2021 21:37:48 +0200 Subject: [PATCH 300/502] usb: cdnsp: Fix the IMAN_IE_SET and IMAN_IE_CLEAR macro IMAN_IE is BIT(1), so these macro are respectively equivalent to BIT(1) and 0, whatever the value of 'p'. The purpose was to set and reset a single bit in 'p'. Fix these macros to do that correctly. Acked-by: Pawel Laszczak Fixes: e93e58d27402 ("usb: cdnsp: Device side header file for CDNSP driver") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/d12bfcc9cbffb89e27b120668821b3c4f09b6755.1624390584.git.christophe.jaillet@wanadoo.fr Signed-off-by: Peter Chen --- drivers/usb/cdns3/cdnsp-gadget.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/cdns3/cdnsp-gadget.h b/drivers/usb/cdns3/cdnsp-gadget.h index 783ca8ffde00..f740fa6089d8 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.h +++ b/drivers/usb/cdns3/cdnsp-gadget.h @@ -383,8 +383,8 @@ struct cdnsp_intr_reg { #define IMAN_IE BIT(1) #define IMAN_IP BIT(0) /* bits 2:31 need to be preserved */ -#define IMAN_IE_SET(p) (((p) & IMAN_IE) | 0x2) -#define IMAN_IE_CLEAR(p) (((p) & IMAN_IE) & ~(0x2)) +#define IMAN_IE_SET(p) ((p) | IMAN_IE) +#define IMAN_IE_CLEAR(p) ((p) & ~IMAN_IE) /* IMOD - Interrupter Moderation Register - irq_control bitmasks. */ /* From 0d4867a185460397af56b9afe3e2243d3e610e37 Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Wed, 21 Jul 2021 20:01:41 +0300 Subject: [PATCH 301/502] ALSA: hda/realtek: add mic quirk for Acer SF314-42 The Acer Swift SF314-42 laptop is using Realtek ALC255 codec. Add a quirk so microphone in a headset connected via the right-hand side jack is usable. Signed-off-by: Alexander Monakov Cc: Link: https://lore.kernel.org/r/20210721170141.24807-1-amonakov@ispras.ru Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 14e1ab7c7954..21c521596c9d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8278,6 +8278,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x1308, "Acer Aspire Z24-890", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x132a, "Acer TravelMate B114-21", ALC233_FIXUP_ACER_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1330, "Acer TravelMate X514-51T", ALC255_FIXUP_ACER_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x142b, "Acer Swift SF314-42", ALC255_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1430, "Acer TravelMate B311R-31", ALC256_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1466, "Acer Aspire A515-56", ALC255_FIXUP_ACER_HEADPHONE_AND_MIC), SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z), From db8d3a21275c807a4047a21bde3b57d49ca55d82 Mon Sep 17 00:00:00 2001 From: Michael Zaidman Date: Thu, 29 Jul 2021 13:26:03 +0300 Subject: [PATCH 302/502] HID: ft260: fix device removal due to USB disconnect This commit fixes a functional regression introduced by the commit 82f09a637dd3 ("HID: ft260: improve error handling of ft260_hid_feature_report_get()") when upon USB disconnect, the FTDI FT260 i2c device is still available within the /dev folder. In my company's product, where the host USB to FT260 USB connection is hard-wired in the PCB, the issue is not reproducible. To reproduce it, I used the VirtualBox Ubuntu 20.04 VM and the UMFT260EV1A development module for the FTDI FT260 chip: Plug the UMFT260EV1A module into a USB port and attach it to VM. The VM shows 2 i2c devices under the /dev: michael@michael-VirtualBox:~$ ls /dev/i2c-* /dev/i2c-0 /dev/i2c-1 The i2c-0 is not related to the FTDI FT260: michael@michael-VirtualBox:~$ cat /sys/bus/i2c/devices/i2c-0/name SMBus PIIX4 adapter at 4100 The i2c-1 is created by hid-ft260.ko: michael@michael-VirtualBox:~$ cat /sys/bus/i2c/devices/i2c-1/name FT260 usb-i2c bridge on hidraw1 Now, detach the FTDI FT260 USB device from VM. We expect the /dev/i2c-1 to disappear, but it's still here: michael@michael-VirtualBox:~$ ls /dev/i2c-* /dev/i2c-0 /dev/i2c-1 And the kernel log shows: [ +0.001202] usb 2-2: USB disconnect, device number 3 [ +0.000109] ft260 0003:0403:6030.0002: failed to retrieve system status [ +0.000316] ft260 0003:0403:6030.0003: failed to retrieve system status It happens because the commit 82f09a637dd3 changed the ft260_get_system_config() return logic. This caused the ft260_is_interface_enabled() to exit with error upon the FT260 device USB disconnect, which in turn, aborted the ft260_remove() before deleting the FT260 i2c device and cleaning its sysfs stuff. This commit restores the FT260 USB removal functionality and improves the ft260_is_interface_enabled() code to handle correctly all chip modes defined by the device interface configuration pins DCNF0 and DCNF1. Signed-off-by: Michael Zaidman Acked-by: Aaron Jones (FTDI-UK) Signed-off-by: Jiri Kosina --- drivers/hid/hid-ft260.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/drivers/hid/hid-ft260.c b/drivers/hid/hid-ft260.c index 6f10df2042c4..4ef1c3b8094e 100644 --- a/drivers/hid/hid-ft260.c +++ b/drivers/hid/hid-ft260.c @@ -742,7 +742,7 @@ static int ft260_is_interface_enabled(struct hid_device *hdev) int ret; ret = ft260_get_system_config(hdev, &cfg); - if (ret) + if (ret < 0) return ret; ft260_dbg("interface: 0x%02x\n", interface); @@ -754,23 +754,16 @@ static int ft260_is_interface_enabled(struct hid_device *hdev) switch (cfg.chip_mode) { case FT260_MODE_ALL: case FT260_MODE_BOTH: - if (interface == 1) { + if (interface == 1) hid_info(hdev, "uart interface is not supported\n"); - return 0; - } - ret = 1; + else + ret = 1; break; case FT260_MODE_UART: - if (interface == 0) { - hid_info(hdev, "uart is unsupported on interface 0\n"); - ret = 0; - } + hid_info(hdev, "uart interface is not supported\n"); break; case FT260_MODE_I2C: - if (interface == 1) { - hid_info(hdev, "i2c is unsupported on interface 1\n"); - ret = 0; - } + ret = 1; break; } return ret; @@ -1004,11 +997,9 @@ err_hid_stop: static void ft260_remove(struct hid_device *hdev) { - int ret; struct ft260_device *dev = hid_get_drvdata(hdev); - ret = ft260_is_interface_enabled(hdev); - if (ret <= 0) + if (!dev) return; sysfs_remove_group(&hdev->dev.kobj, &ft260_attr_group); From 7280305eb57dd32735f795ed4ee679bf9854f9d0 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 28 Jul 2021 18:00:24 +0200 Subject: [PATCH 303/502] btrfs: calculate number of eb pages properly in csum_tree_block MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building with -Warray-bounds on systems with 64K pages there's a warning: fs/btrfs/disk-io.c: In function ‘csum_tree_block’: fs/btrfs/disk-io.c:226:34: warning: array subscript 1 is above array bounds of ‘struct page *[1]’ [-Warray-bounds] 226 | kaddr = page_address(buf->pages[i]); | ~~~~~~~~~~^~~ ./include/linux/mm.h:1630:48: note: in definition of macro ‘page_address’ 1630 | #define page_address(page) lowmem_page_address(page) | ^~~~ In file included from fs/btrfs/ctree.h:32, from fs/btrfs/disk-io.c:23: fs/btrfs/extent_io.h:98:15: note: while referencing ‘pages’ 98 | struct page *pages[1]; | ^~~~~ The compiler has no way to know that in that case the nodesize is exactly PAGE_SIZE, so the resulting number of pages will be correct (1). Let's use num_extent_pages that makes the case nodesize == PAGE_SIZE explicitly 1. Reported-by: Gustavo A. R. Silva Reviewed-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b117dd3b8172..a59ab7b9aea0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -209,7 +209,7 @@ void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, static void csum_tree_block(struct extent_buffer *buf, u8 *result) { struct btrfs_fs_info *fs_info = buf->fs_info; - const int num_pages = fs_info->nodesize >> PAGE_SHIFT; + const int num_pages = num_extent_pages(buf); const int first_page_part = min_t(u32, PAGE_SIZE, fs_info->nodesize); SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); char *kaddr; From 3c18e9baee0ef97510dcda78c82285f52626764b Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sat, 24 Jul 2021 17:27:39 +0200 Subject: [PATCH 304/502] USB: serial: ch341: fix character loss at high transfer rates The chip supports high transfer rates, but with the small default buffers (64 bytes read), some entire blocks are regularly lost. This typically happens at 1.5 Mbps (which is the default speed on Rockchip devices) when used as a console to access U-Boot where the output of the "help" command misses many lines and where "printenv" mangles the environment. The FTDI driver doesn't suffer at all from this. One difference is that it uses 512 bytes rx buffers and 256 bytes tx buffers. Adopting these values completely resolved the issue, even the output of "dmesg" is reliable. I preferred to leave the Tx value unchanged as it is not involved in this issue, while a change could increase the risk of triggering the same issue with other devices having too small buffers. I verified that it backports well (and works) at least to 5.4. It's of low importance enough to be dropped where it doesn't trivially apply anymore. Cc: stable@vger.kernel.org Signed-off-by: Willy Tarreau Link: https://lore.kernel.org/r/20210724152739.18726-1-w@1wt.eu Signed-off-by: Johan Hovold --- drivers/usb/serial/ch341.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 2db917eab799..8a521b5ea769 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -851,6 +851,7 @@ static struct usb_serial_driver ch341_device = { .owner = THIS_MODULE, .name = "ch341-uart", }, + .bulk_in_size = 512, .id_table = id_table, .num_ports = 1, .open = ch341_open, From 333cf507465fbebb3727f5b53e77538467df312a Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Thu, 29 Jul 2021 11:34:49 +0530 Subject: [PATCH 305/502] powerpc/pseries: Fix regression while building external modules With commit c9f3401313a5 ("powerpc: Always enable queued spinlocks for 64s, disable for others") CONFIG_PPC_QUEUED_SPINLOCKS is always enabled on ppc64le, external modules that use spinlock APIs are failing. ERROR: modpost: GPL-incompatible module XXX.ko uses GPL-only symbol 'shared_processor' Before the above commit, modules were able to build without any issues. Also this problem is not seen on other architectures. This problem can be workaround if CONFIG_UNINLINE_SPIN_UNLOCK is enabled in the config. However CONFIG_UNINLINE_SPIN_UNLOCK is not enabled by default and only enabled in certain conditions like CONFIG_DEBUG_SPINLOCKS is set in the kernel config. #include spinlock_t spLock; static int __init spinlock_test_init(void) { spin_lock_init(&spLock); spin_lock(&spLock); spin_unlock(&spLock); return 0; } static void __exit spinlock_test_exit(void) { printk("spinlock_test unloaded\n"); } module_init(spinlock_test_init); module_exit(spinlock_test_exit); MODULE_DESCRIPTION ("spinlock_test"); MODULE_LICENSE ("non-GPL"); MODULE_AUTHOR ("Srikar Dronamraju"); Given that spin locks are one of the basic facilities for module code, this effectively makes it impossible to build/load almost any non GPL modules on ppc64le. This was first reported at https://github.com/openzfs/zfs/issues/11172 Currently shared_processor is exported as GPL only symbol. Fix this for parity with other architectures by exposing shared_processor to non-GPL modules too. Fixes: 14c73bd344da ("powerpc/vcpu: Assume dedicated processors as non-preempt") Cc: stable@vger.kernel.org # v5.5+ Reported-by: marc.c.dionne@gmail.com Signed-off-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210729060449.292780-1-srikar@linux.vnet.ibm.com --- arch/powerpc/platforms/pseries/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 631a0d57b6cd..6b0886668465 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -77,7 +77,7 @@ #include "../../../../drivers/pci/pci.h" DEFINE_STATIC_KEY_FALSE(shared_processor); -EXPORT_SYMBOL_GPL(shared_processor); +EXPORT_SYMBOL(shared_processor); int CMO_PrPSP = -1; int CMO_SecPSP = -1; From a88603f4b92ecef9e2359e40bcb99ad399d85dd7 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 29 Jul 2021 22:56:36 +1000 Subject: [PATCH 306/502] powerpc/vdso: Don't use r30 to avoid breaking Go lang The Go runtime uses r30 for some special value called 'g'. It assumes that value will remain unchanged even when calling VDSO functions. Although r30 is non-volatile across function calls, the callee is free to use it, as long as the callee saves the value and restores it before returning. It used to be true by accident that the VDSO didn't use r30, because the VDSO was hand-written asm. When we switched to building the VDSO from C the compiler started using r30, at least in some builds, leading to crashes in Go. eg: ~/go/src$ ./all.bash Building Go cmd/dist using /usr/lib/go-1.16. (go1.16.2 linux/ppc64le) Building Go toolchain1 using /usr/lib/go-1.16. go build os/exec: /usr/lib/go-1.16/pkg/tool/linux_ppc64le/compile: signal: segmentation fault go build reflect: /usr/lib/go-1.16/pkg/tool/linux_ppc64le/compile: signal: segmentation fault go tool dist: FAILED: /usr/lib/go-1.16/bin/go install -gcflags=-l -tags=math_big_pure_go compiler_bootstrap bootstrap/cmd/...: exit status 1 There are patches in flight to fix Go[1], but until they are released and widely deployed we can workaround it in the VDSO by avoiding use of r30. Note this only works with GCC, clang does not support -ffixed-rN. 1: https://go-review.googlesource.com/c/go/+/328110 Fixes: ab037dd87a2f ("powerpc/vdso: Switch VDSO to generic C implementation.") Cc: stable@vger.kernel.org # v5.11+ Reported-by: Paul Menzel Tested-by: Paul Menzel Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210729131244.2595519-1-mpe@ellerman.id.au --- arch/powerpc/kernel/vdso64/Makefile | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index 2813e3f98db6..3c5baaa6f1e7 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -27,6 +27,13 @@ KASAN_SANITIZE := n ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both + +# Go prior to 1.16.x assumes r30 is not clobbered by any VDSO code. That used to be true +# by accident when the VDSO was hand-written asm code, but may not be now that the VDSO is +# compiler generated. To avoid breaking Go tell GCC not to use r30. Impact on code +# generation is minimal, it will just use r29 instead. +ccflags-y += $(call cc-option, -ffixed-r30) + asflags-y := -D__VDSO64__ -s targets += vdso64.lds From 06e91df16f3e1ca1a1886968fb22d4258f3b6b6f Mon Sep 17 00:00:00 2001 From: Sherry Sun Date: Thu, 29 Jul 2021 16:31:09 +0800 Subject: [PATCH 307/502] tty: serial: fsl_lpuart: fix the wrong return value in lpuart32_get_mctrl Patch e60c2991f18b make the lpuart32_get_mctrl always return 0, actually this will break the functions of device which use flow control such as Bluetooth. For lpuart32 plaform, the hardware can handle the CTS automatically. So we should set TIOCM_CTS active. Also need to set CAR and DSR active. Patch has been tested on lpuart32 platforms such as imx8qm and imx8ulp. Fixes: e60c2991f18b ("serial: fsl_lpuart: remove RTSCTS handling from get_mctrl()") Cc: stable Signed-off-by: Sherry Sun Link: https://lore.kernel.org/r/20210729083109.31541-1-sherry.sun@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 508128ddfa01..f0e5da77ed6d 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -1415,7 +1415,7 @@ static unsigned int lpuart_get_mctrl(struct uart_port *port) static unsigned int lpuart32_get_mctrl(struct uart_port *port) { - unsigned int mctrl = 0; + unsigned int mctrl = TIOCM_CAR | TIOCM_DSR | TIOCM_CTS; u32 reg; reg = lpuart32_read(port, UARTCTRL); From 7c4a509d3815a260c423c0633bd73695250ac26d Mon Sep 17 00:00:00 2001 From: Zhiyong Tao Date: Thu, 29 Jul 2021 16:46:40 +0800 Subject: [PATCH 308/502] serial: 8250_mtk: fix uart corruption issue when rx power off Fix uart corruption issue when rx power off. Add spin lock in mtk8250_dma_rx_complete function in APDMA mode. when uart is used as a communication port with external device(GPS). when external device(GPS) power off, the power of rx pin is also from 1.8v to 0v. Even if there is not any data in rx. But uart rx pin can capture the data "0". If uart don't receive any data in specified cycle, uart will generates BI(Break interrupt) interrupt. If external device(GPS) power off, we found that BI interrupt appeared continuously and very frequently. When uart interrupt type is BI, uart IRQ handler(8250 framwork API:serial8250_handle_irq) will push data to tty buffer. mtk8250_dma_rx_complete is a task of mtk_uart_apdma_rx_handler. mtk8250_dma_rx_complete priority is lower than uart irq handler(serial8250_handle_irq). if we are in process of mtk8250_dma_rx_complete, uart appear BI interrupt:1)serial8250_handle_irq will priority execution.2)it may cause write tty buffer conflict in mtk8250_dma_rx_complete. So the spin lock protect the rx receive data process is not break. Signed-off-by: Zhiyong Tao Cc: stable Link: https://lore.kernel.org/r/20210729084640.17613-2-zhiyong.tao@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_mtk.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c index f7d3023f860f..fb65dc601b23 100644 --- a/drivers/tty/serial/8250/8250_mtk.c +++ b/drivers/tty/serial/8250/8250_mtk.c @@ -93,10 +93,13 @@ static void mtk8250_dma_rx_complete(void *param) struct dma_tx_state state; int copied, total, cnt; unsigned char *ptr; + unsigned long flags; if (data->rx_status == DMA_RX_SHUTDOWN) return; + spin_lock_irqsave(&up->port.lock, flags); + dmaengine_tx_status(dma->rxchan, dma->rx_cookie, &state); total = dma->rx_size - state.residue; cnt = total; @@ -120,6 +123,8 @@ static void mtk8250_dma_rx_complete(void *param) tty_flip_buffer_push(tty_port); mtk8250_rx_dma(up); + + spin_unlock_irqrestore(&up->port.lock, flags); } static void mtk8250_rx_dma(struct uart_8250_port *up) From 0d6434e10b5377a006f6dd995c8fc5e2d82acddc Mon Sep 17 00:00:00 2001 From: Anirudh Rayabharam Date: Wed, 28 Jul 2021 14:21:06 +0530 Subject: [PATCH 309/502] firmware_loader: use -ETIMEDOUT instead of -EAGAIN in fw_load_sysfs_fallback The only motivation for using -EAGAIN in commit 0542ad88fbdd81bb ("firmware loader: Fix _request_firmware_load() return val for fw load abort") was to distinguish the error from -ENOMEM, and so there is no real reason in keeping it. -EAGAIN is typically used to tell the userspace to try something again and in this case re-using the sysfs loading interface cannot be retried when a timeout happens, so the return value is also bogus. -ETIMEDOUT is received when the wait times out and returning that is much more telling of what the reason for the failure was. So, just propagate that instead of returning -EAGAIN. Suggested-by: Luis Chamberlain Reviewed-by: Shuah Khan Acked-by: Luis Chamberlain Signed-off-by: Anirudh Rayabharam Cc: stable Link: https://lore.kernel.org/r/20210728085107.4141-2-mail@anirudhrb.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/firmware_loader/fallback.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/base/firmware_loader/fallback.c b/drivers/base/firmware_loader/fallback.c index 91899d185e31..1a48be0a030e 100644 --- a/drivers/base/firmware_loader/fallback.c +++ b/drivers/base/firmware_loader/fallback.c @@ -535,8 +535,6 @@ static int fw_load_sysfs_fallback(struct fw_sysfs *fw_sysfs, long timeout) if (fw_state_is_aborted(fw_priv)) { if (retval == -ERESTARTSYS) retval = -EINTR; - else - retval = -EAGAIN; } else if (fw_priv->is_paged_buf && !fw_priv->data) retval = -ENOMEM; From 75d95e2e39b27f733f21e6668af1c9893a97de5e Mon Sep 17 00:00:00 2001 From: Anirudh Rayabharam Date: Wed, 28 Jul 2021 14:21:07 +0530 Subject: [PATCH 310/502] firmware_loader: fix use-after-free in firmware_fallback_sysfs This use-after-free happens when a fw_priv object has been freed but hasn't been removed from the pending list (pending_fw_head). The next time fw_load_sysfs_fallback tries to insert into the list, it ends up accessing the pending_list member of the previously freed fw_priv. The root cause here is that all code paths that abort the fw load don't delete it from the pending list. For example: _request_firmware() -> fw_abort_batch_reqs() -> fw_state_aborted() To fix this, delete the fw_priv from the list in __fw_set_state() if the new state is DONE or ABORTED. This way, all aborts will remove the fw_priv from the list. Accordingly, remove calls to list_del_init that were being made before calling fw_state_(aborted|done). Also, in fw_load_sysfs_fallback, don't add the fw_priv to the pending list if it is already aborted. Instead, just jump out and return early. Fixes: bcfbd3523f3c ("firmware: fix a double abort case with fw_load_sysfs_fallback") Cc: stable Reported-by: syzbot+de271708674e2093097b@syzkaller.appspotmail.com Tested-by: syzbot+de271708674e2093097b@syzkaller.appspotmail.com Reviewed-by: Shuah Khan Acked-by: Luis Chamberlain Signed-off-by: Anirudh Rayabharam Link: https://lore.kernel.org/r/20210728085107.4141-3-mail@anirudhrb.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/firmware_loader/fallback.c | 12 ++++++++---- drivers/base/firmware_loader/firmware.h | 10 +++++++++- drivers/base/firmware_loader/main.c | 2 ++ 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/base/firmware_loader/fallback.c b/drivers/base/firmware_loader/fallback.c index 1a48be0a030e..d7d63c1aa993 100644 --- a/drivers/base/firmware_loader/fallback.c +++ b/drivers/base/firmware_loader/fallback.c @@ -89,12 +89,11 @@ static void __fw_load_abort(struct fw_priv *fw_priv) { /* * There is a small window in which user can write to 'loading' - * between loading done and disappearance of 'loading' + * between loading done/aborted and disappearance of 'loading' */ - if (fw_sysfs_done(fw_priv)) + if (fw_state_is_aborted(fw_priv) || fw_sysfs_done(fw_priv)) return; - list_del_init(&fw_priv->pending_list); fw_state_aborted(fw_priv); } @@ -280,7 +279,6 @@ static ssize_t firmware_loading_store(struct device *dev, * Same logic as fw_load_abort, only the DONE bit * is ignored and we set ABORT only on failure. */ - list_del_init(&fw_priv->pending_list); if (rc) { fw_state_aborted(fw_priv); written = rc; @@ -513,6 +511,11 @@ static int fw_load_sysfs_fallback(struct fw_sysfs *fw_sysfs, long timeout) } mutex_lock(&fw_lock); + if (fw_state_is_aborted(fw_priv)) { + mutex_unlock(&fw_lock); + retval = -EINTR; + goto out; + } list_add(&fw_priv->pending_list, &pending_fw_head); mutex_unlock(&fw_lock); @@ -538,6 +541,7 @@ static int fw_load_sysfs_fallback(struct fw_sysfs *fw_sysfs, long timeout) } else if (fw_priv->is_paged_buf && !fw_priv->data) retval = -ENOMEM; +out: device_del(f_dev); err_put_dev: put_device(f_dev); diff --git a/drivers/base/firmware_loader/firmware.h b/drivers/base/firmware_loader/firmware.h index 63bd29fdcb9c..a3014e9e2c85 100644 --- a/drivers/base/firmware_loader/firmware.h +++ b/drivers/base/firmware_loader/firmware.h @@ -117,8 +117,16 @@ static inline void __fw_state_set(struct fw_priv *fw_priv, WRITE_ONCE(fw_st->status, status); - if (status == FW_STATUS_DONE || status == FW_STATUS_ABORTED) + if (status == FW_STATUS_DONE || status == FW_STATUS_ABORTED) { +#ifdef CONFIG_FW_LOADER_USER_HELPER + /* + * Doing this here ensures that the fw_priv is deleted from + * the pending list in all abort/done paths. + */ + list_del_init(&fw_priv->pending_list); +#endif complete_all(&fw_st->completion); + } } static inline void fw_state_aborted(struct fw_priv *fw_priv) diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c index 4fdb8219cd08..68c549d71230 100644 --- a/drivers/base/firmware_loader/main.c +++ b/drivers/base/firmware_loader/main.c @@ -783,8 +783,10 @@ static void fw_abort_batch_reqs(struct firmware *fw) return; fw_priv = fw->priv; + mutex_lock(&fw_lock); if (!fw_state_is_aborted(fw_priv)) fw_state_aborted(fw_priv); + mutex_unlock(&fw_lock); } /* called from request_firmware() and request_firmware_work_func() */ From b1e27239b9169f07edba0ca0e52805645a1768ba Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 27 Jul 2021 16:23:46 -0700 Subject: [PATCH 311/502] xfs: flush data dev on external log write We incorrectly flush the log device instead of the data device when trying to ensure metadata is correctly on disk before writing the unmount record. Fixes: eef983ffeae7 ("xfs: journal IO cache flush reductions") Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 36fa2650b081..96434cc4df6e 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -833,7 +833,7 @@ xlog_write_unmount_record( * stamp the tail LSN into the unmount record. */ if (log->l_targ != log->l_mp->m_ddev_targp) - blkdev_issue_flush(log->l_targ->bt_bdev); + blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev); return xlog_write(log, &vec, ticket, NULL, NULL, XLOG_UNMOUNT_TRANS); } From b5d721eaae47eaa4b4c2754699dadacc4cbca2e0 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 27 Jul 2021 16:23:47 -0700 Subject: [PATCH 312/502] xfs: external logs need to flush data device The recent journal flush/FUA changes replaced the flushing of the data device on every iclog write with an up-front async data device cache flush. Unfortunately, the assumption of which this was based on has been proven incorrect by the flush vs log tail update ordering issue. As the fix for that issue uses the XLOG_ICL_NEED_FLUSH flag to indicate that data device needs a cache flush, we now need to (once again) ensure that an iclog write to external logs that need a cache flush to be issued actually issue a cache flush to the data device as well as the log device. Fixes: eef983ffeae7 ("xfs: journal IO cache flush reductions") Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 96434cc4df6e..a3c4d48195d9 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -827,13 +827,6 @@ xlog_write_unmount_record( /* account for space used by record data */ ticket->t_curr_res -= sizeof(ulf); - /* - * For external log devices, we need to flush the data device cache - * first to ensure all metadata writeback is on stable storage before we - * stamp the tail LSN into the unmount record. - */ - if (log->l_targ != log->l_mp->m_ddev_targp) - blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev); return xlog_write(log, &vec, ticket, NULL, NULL, XLOG_UNMOUNT_TRANS); } @@ -1796,10 +1789,20 @@ xlog_write_iclog( * metadata writeback and causing priority inversions. */ iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_IDLE; - if (iclog->ic_flags & XLOG_ICL_NEED_FLUSH) + if (iclog->ic_flags & XLOG_ICL_NEED_FLUSH) { iclog->ic_bio.bi_opf |= REQ_PREFLUSH; + /* + * For external log devices, we also need to flush the data + * device cache first to ensure all metadata writeback covered + * by the LSN in this iclog is on stable storage. This is slow, + * but it *must* complete before we issue the external log IO. + */ + if (log->l_targ != log->l_mp->m_ddev_targp) + blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev); + } if (iclog->ic_flags & XLOG_ICL_NEED_FUA) iclog->ic_bio.bi_opf |= REQ_FUA; + iclog->ic_flags &= ~(XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA); if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) { From 9d3920644081edf311878b56e0c1e1477991a195 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 27 Jul 2021 16:23:47 -0700 Subject: [PATCH 313/502] xfs: fold __xlog_state_release_iclog into xlog_state_release_iclog Fold __xlog_state_release_iclog into its only caller to prepare make an upcoming fix easier. Signed-off-by: Dave Chinner [hch: split from a larger patch] Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 45 +++++++++++++++++---------------------------- 1 file changed, 17 insertions(+), 28 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index a3c4d48195d9..82f5996d3889 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -487,29 +487,6 @@ out_error: return error; } -static bool -__xlog_state_release_iclog( - struct xlog *log, - struct xlog_in_core *iclog) -{ - lockdep_assert_held(&log->l_icloglock); - - if (iclog->ic_state == XLOG_STATE_WANT_SYNC) { - /* update tail before writing to iclog */ - xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp); - - iclog->ic_state = XLOG_STATE_SYNCING; - iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); - xlog_verify_tail_lsn(log, iclog, tail_lsn); - /* cycle incremented when incrementing curr_block */ - trace_xlog_iclog_syncing(iclog, _RET_IP_); - return true; - } - - ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); - return false; -} - /* * Flush iclog to disk if this is the last reference to the given iclog and the * it is in the WANT_SYNC state. @@ -519,19 +496,31 @@ xlog_state_release_iclog( struct xlog *log, struct xlog_in_core *iclog) { + xfs_lsn_t tail_lsn; lockdep_assert_held(&log->l_icloglock); trace_xlog_iclog_release(iclog, _RET_IP_); if (iclog->ic_state == XLOG_STATE_IOERROR) return -EIO; - if (atomic_dec_and_test(&iclog->ic_refcnt) && - __xlog_state_release_iclog(log, iclog)) { - spin_unlock(&log->l_icloglock); - xlog_sync(log, iclog); - spin_lock(&log->l_icloglock); + if (!atomic_dec_and_test(&iclog->ic_refcnt)) + return 0; + + if (iclog->ic_state != XLOG_STATE_WANT_SYNC) { + ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); + return 0; } + /* update tail before writing to iclog */ + tail_lsn = xlog_assign_tail_lsn(log->l_mp); + iclog->ic_state = XLOG_STATE_SYNCING; + iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); + xlog_verify_tail_lsn(log, iclog, tail_lsn); + trace_xlog_iclog_syncing(iclog, _RET_IP_); + + spin_unlock(&log->l_icloglock); + xlog_sync(log, iclog); + spin_lock(&log->l_icloglock); return 0; } From 0dc8f7f139f07aaca1afcec0ade5718c4ebba91e Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 27 Jul 2021 16:23:48 -0700 Subject: [PATCH 314/502] xfs: fix ordering violation between cache flushes and tail updates There is a race between the new CIL async data device metadata IO completion cache flush and the log tail in the iclog the flush covers being updated. This can be seen by repeating generic/482 in a loop and eventually log recovery fails with a failures such as this: XFS (dm-3): Starting recovery (logdev: internal) XFS (dm-3): bad inode magic/vsn daddr 228352 #0 (magic=0) XFS (dm-3): Metadata corruption detected at xfs_inode_buf_verify+0x180/0x190, xfs_inode block 0x37c00 xfs_inode_buf_verify XFS (dm-3): Unmount and run xfs_repair XFS (dm-3): First 128 bytes of corrupted metadata buffer: 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000040: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000050: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000060: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000070: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ XFS (dm-3): metadata I/O error in "xlog_recover_items_pass2+0x55/0xc0" at daddr 0x37c00 len 32 error 117 Analysis of the logwrite replay shows that there were no writes to the data device between the FUA @ write 124 and the FUA at write @ 125, but log recovery @ 125 failed. The difference was the one log write @ 125 moved the tail of the log forwards from (1,8) to (1,32) and so the inode create intent in (1,8) was not replayed and so the inode cluster was zero on disk when replay of the first inode item in (1,32) was attempted. What this meant was that the journal write that occurred at @ 125 did not ensure that metadata completed before the iclog was written was correctly on stable storage. The tail of the log moved forward, so IO must have been completed between the two iclog writes. This means that there is a race condition between the unconditional async cache flush in the CIL push work and the tail LSN that is written to the iclog. This happens like so: CIL push work AIL push work ------------- ------------- Add to committing list start async data dev cache flush ..... xlog_write .... push inode create buffer ..... xlog_write(commit record) .... log tail moves xlog_assign_tail_lsn() start_lsn == commit_lsn xlog_state_release_iclog __xlog_state_release_iclog() xlog_sync() .... submit_bio() Essentially, this can only occur if the commit iclog is issued without a cache flush. If the iclog bio is submitted with REQ_PREFLUSH, then it will guarantee that all the completed IO is one stable storage before the iclog bio with the new tail LSN in it is written to the log. IOWs, the tail lsn that is written to the iclog needs to be sampled *before* we issue the cache flush that guarantees all IO up to that LSN has been completed. To fix this without giving up the performance advantage of the flush/FUA optimisations (e.g. g/482 runtime halves with 5.14-rc1 compared to 5.13), we need to ensure that we always issue a cache flush if the tail LSN changes between the initial async flush and the commit record being written. THis requires sampling the tail_lsn before we start the flush, and then passing the sampled tail LSN to xlog_state_release_iclog() so it can determine if the the tail LSN has changed while writing the checkpoint. If the tail LSN has changed, then it needs to set the NEED_FLUSH flag on the iclog and we'll issue another cache flush before writing the iclog. Fixes: eef983ffeae7 ("xfs: journal IO cache flush reductions") Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 36 ++++++++++++++++++++++++++---------- fs/xfs/xfs_log_cil.c | 13 +++++++++++-- fs/xfs/xfs_log_priv.h | 3 ++- 3 files changed, 39 insertions(+), 13 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 82f5996d3889..e8c6c96d4f7c 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -489,12 +489,17 @@ out_error: /* * Flush iclog to disk if this is the last reference to the given iclog and the - * it is in the WANT_SYNC state. + * it is in the WANT_SYNC state. If the caller passes in a non-zero + * @old_tail_lsn and the current log tail does not match, there may be metadata + * on disk that must be persisted before this iclog is written. To satisfy that + * requirement, set the XLOG_ICL_NEED_FLUSH flag as a condition for writing this + * iclog with the new log tail value. */ int xlog_state_release_iclog( struct xlog *log, - struct xlog_in_core *iclog) + struct xlog_in_core *iclog, + xfs_lsn_t old_tail_lsn) { xfs_lsn_t tail_lsn; lockdep_assert_held(&log->l_icloglock); @@ -503,6 +508,19 @@ xlog_state_release_iclog( if (iclog->ic_state == XLOG_STATE_IOERROR) return -EIO; + /* + * Grabbing the current log tail needs to be atomic w.r.t. the writing + * of the tail LSN into the iclog so we guarantee that the log tail does + * not move between deciding if a cache flush is required and writing + * the LSN into the iclog below. + */ + if (old_tail_lsn || iclog->ic_state == XLOG_STATE_WANT_SYNC) { + tail_lsn = xlog_assign_tail_lsn(log->l_mp); + + if (old_tail_lsn && tail_lsn != old_tail_lsn) + iclog->ic_flags |= XLOG_ICL_NEED_FLUSH; + } + if (!atomic_dec_and_test(&iclog->ic_refcnt)) return 0; @@ -511,8 +529,6 @@ xlog_state_release_iclog( return 0; } - /* update tail before writing to iclog */ - tail_lsn = xlog_assign_tail_lsn(log->l_mp); iclog->ic_state = XLOG_STATE_SYNCING; iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); xlog_verify_tail_lsn(log, iclog, tail_lsn); @@ -858,7 +874,7 @@ out_err: * iclog containing the unmount record is written. */ iclog->ic_flags |= (XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA); - error = xlog_state_release_iclog(log, iclog); + error = xlog_state_release_iclog(log, iclog, 0); xlog_wait_on_iclog(iclog); if (tic) { @@ -2302,7 +2318,7 @@ xlog_write_copy_finish( return 0; release_iclog: - error = xlog_state_release_iclog(log, iclog); + error = xlog_state_release_iclog(log, iclog, 0); spin_unlock(&log->l_icloglock); return error; } @@ -2521,7 +2537,7 @@ next_lv: ASSERT(optype & XLOG_COMMIT_TRANS); *commit_iclog = iclog; } else { - error = xlog_state_release_iclog(log, iclog); + error = xlog_state_release_iclog(log, iclog, 0); } spin_unlock(&log->l_icloglock); @@ -2959,7 +2975,7 @@ restart: * reference to the iclog. */ if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1)) - error = xlog_state_release_iclog(log, iclog); + error = xlog_state_release_iclog(log, iclog, 0); spin_unlock(&log->l_icloglock); if (error) return error; @@ -3195,7 +3211,7 @@ xfs_log_force( atomic_inc(&iclog->ic_refcnt); lsn = be64_to_cpu(iclog->ic_header.h_lsn); xlog_state_switch_iclogs(log, iclog, 0); - if (xlog_state_release_iclog(log, iclog)) + if (xlog_state_release_iclog(log, iclog, 0)) goto out_error; if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) @@ -3275,7 +3291,7 @@ xlog_force_lsn( } atomic_inc(&iclog->ic_refcnt); xlog_state_switch_iclogs(log, iclog, 0); - if (xlog_state_release_iclog(log, iclog)) + if (xlog_state_release_iclog(log, iclog, 0)) goto out_error; if (log_flushed) *log_flushed = 1; diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index b128aaa9b870..4c44bc3786c0 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -654,8 +654,9 @@ xlog_cil_push_work( struct xfs_trans_header thdr; struct xfs_log_iovec lhdr; struct xfs_log_vec lvhdr = { NULL }; + xfs_lsn_t preflush_tail_lsn; xfs_lsn_t commit_lsn; - xfs_lsn_t push_seq; + xfs_csn_t push_seq; struct bio bio; DECLARE_COMPLETION_ONSTACK(bdev_flush); @@ -730,7 +731,15 @@ xlog_cil_push_work( * because we hold the flush lock exclusively. Hence we can now issue * a cache flush to ensure all the completed metadata in the journal we * are about to overwrite is on stable storage. + * + * Because we are issuing this cache flush before we've written the + * tail lsn to the iclog, we can have metadata IO completions move the + * tail forwards between the completion of this flush and the iclog + * being written. In this case, we need to re-issue the cache flush + * before the iclog write. To detect whether the log tail moves, sample + * the tail LSN *before* we issue the flush. */ + preflush_tail_lsn = atomic64_read(&log->l_tail_lsn); xfs_flush_bdev_async(&bio, log->l_mp->m_ddev_targp->bt_bdev, &bdev_flush); @@ -941,7 +950,7 @@ restart: * storage. */ commit_iclog->ic_flags |= XLOG_ICL_NEED_FUA; - xlog_state_release_iclog(log, commit_iclog); + xlog_state_release_iclog(log, commit_iclog, preflush_tail_lsn); spin_unlock(&log->l_icloglock); return; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 4c41bbfa33b0..7cbde0b4f990 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -497,7 +497,8 @@ int xlog_commit_record(struct xlog *log, struct xlog_ticket *ticket, void xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket); void xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket); -int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog); +int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog, + xfs_lsn_t log_tail_lsn); /* * When we crack an atomic LSN, we sample it first so that the value will not From 45eddb414047c366744cc60dd6cef7c7e58c6ab9 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 27 Jul 2021 16:23:48 -0700 Subject: [PATCH 315/502] xfs: factor out forced iclog flushes We force iclogs in several places - we need them all to have the same cache flush semantics, so start by factoring out the iclog force into a common helper. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 42 ++++++++++++++++++------------------------ 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index e8c6c96d4f7c..184c68ea62bb 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -778,6 +778,20 @@ xfs_log_mount_cancel( xfs_log_unmount(mp); } +/* + * Flush out the iclog to disk ensuring that device caches are flushed and + * the iclog hits stable storage before any completion waiters are woken. + */ +static inline int +xlog_force_iclog( + struct xlog_in_core *iclog) +{ + atomic_inc(&iclog->ic_refcnt); + if (iclog->ic_state == XLOG_STATE_ACTIVE) + xlog_state_switch_iclogs(iclog->ic_log, iclog, 0); + return xlog_state_release_iclog(iclog->ic_log, iclog, 0); +} + /* * Wait for the iclog and all prior iclogs to be written disk as required by the * log force state machine. Waiting on ic_force_wait ensures iclog completions @@ -863,18 +877,8 @@ out_err: spin_lock(&log->l_icloglock); iclog = log->l_iclog; - atomic_inc(&iclog->ic_refcnt); - if (iclog->ic_state == XLOG_STATE_ACTIVE) - xlog_state_switch_iclogs(log, iclog, 0); - else - ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC || - iclog->ic_state == XLOG_STATE_IOERROR); - /* - * Ensure the journal is fully flushed and on stable storage once the - * iclog containing the unmount record is written. - */ iclog->ic_flags |= (XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA); - error = xlog_state_release_iclog(log, iclog, 0); + error = xlog_force_iclog(iclog); xlog_wait_on_iclog(iclog); if (tic) { @@ -3201,17 +3205,9 @@ xfs_log_force( iclog = iclog->ic_prev; } else if (iclog->ic_state == XLOG_STATE_ACTIVE) { if (atomic_read(&iclog->ic_refcnt) == 0) { - /* - * We are the only one with access to this iclog. - * - * Flush it out now. There should be a roundoff of zero - * to show that someone has already taken care of the - * roundoff from the previous sync. - */ - atomic_inc(&iclog->ic_refcnt); + /* We have exclusive access to this iclog. */ lsn = be64_to_cpu(iclog->ic_header.h_lsn); - xlog_state_switch_iclogs(log, iclog, 0); - if (xlog_state_release_iclog(log, iclog, 0)) + if (xlog_force_iclog(iclog)) goto out_error; if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) @@ -3289,9 +3285,7 @@ xlog_force_lsn( &log->l_icloglock); return -EAGAIN; } - atomic_inc(&iclog->ic_refcnt); - xlog_state_switch_iclogs(log, iclog, 0); - if (xlog_state_release_iclog(log, iclog, 0)) + if (xlog_force_iclog(iclog)) goto out_error; if (log_flushed) *log_flushed = 1; From 2bf1ec0ff067ff8f692d261b29c713f3583f7e2a Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 27 Jul 2021 16:23:49 -0700 Subject: [PATCH 316/502] xfs: log forces imply data device cache flushes After fixing the tail_lsn vs cache flush race, generic/482 continued to fail in a similar way where cache flushes were missing before iclog FUA writes. Tracing of iclog state changes during the fsstress workload portion of the test (via xlog_iclog* events) indicated that iclog writes were coming from two sources - CIL pushes and log forces (due to fsync/O_SYNC operations). All of the cases where a recovery problem was triggered indicated that the log force was the source of the iclog write that was not preceeded by a cache flush. This was an oversight in the modifications made in commit eef983ffeae7 ("xfs: journal IO cache flush reductions"). Log forces for fsync imply a data device cache flush has been issued if an iclog was flushed to disk and is indicated to the caller via the log_flushed parameter so they can elide the device cache flush if the journal issued one. The change in eef983ffeae7 results in iclogs only issuing a cache flush if XLOG_ICL_NEED_FLUSH is set on the iclog, but this was not added to the iclogs that the log force code flushes to disk. Hence log forces are no longer guaranteeing that a cache flush is issued, hence opening up a potential on-disk ordering failure. Log forces should also set XLOG_ICL_NEED_FUA as well to ensure that the actual iclogs it forces to the journal are also on stable storage before it returns to the caller. This patch introduces the xlog_force_iclog() helper function to encapsulate the process of taking a reference to an iclog, switching its state if WANT_SYNC and flushing it to stable storage correctly. Both xfs_log_force() and xfs_log_force_lsn() are converted to use it, as is xlog_unmount_write() which has an elaborate method of doing exactly the same "write this iclog to stable storage" operation. Further, if the log force code needs to wait on a iclog in the WANT_SYNC state, it needs to ensure that iclog also results in a cache flush being issued. This covers the case where the iclog contains the commit record of the CIL flush that the log force triggered, but it hasn't been written yet because there is still an active reference to the iclog. Note: this whole cache flush whack-a-mole patch is a result of log forces still being iclog state centric rather than being CIL sequence centric. Most of this nasty code will go away in future when log forces are converted to wait on CIL sequence push completion rather than iclog completion. With the CIL push algorithm guaranteeing that the CIL checkpoint is fully on stable storage when it completes, we no longer need to iterate iclogs and push them to ensure a CIL sequence push has completed and so all this nasty iclog iteration and flushing code will go away. Fixes: eef983ffeae7 ("xfs: journal IO cache flush reductions") Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 47 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 184c68ea62bb..160b8bb7ee60 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -787,6 +787,7 @@ xlog_force_iclog( struct xlog_in_core *iclog) { atomic_inc(&iclog->ic_refcnt); + iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA; if (iclog->ic_state == XLOG_STATE_ACTIVE) xlog_state_switch_iclogs(iclog->ic_log, iclog, 0); return xlog_state_release_iclog(iclog->ic_log, iclog, 0); @@ -877,7 +878,6 @@ out_err: spin_lock(&log->l_icloglock); iclog = log->l_iclog; - iclog->ic_flags |= (XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA); error = xlog_force_iclog(iclog); xlog_wait_on_iclog(iclog); @@ -3214,22 +3214,23 @@ xfs_log_force( goto out_unlock; } else { /* - * Someone else is writing to this iclog. - * - * Use its call to flush out the data. However, the - * other thread may not force out this LR, so we mark - * it WANT_SYNC. + * Someone else is still writing to this iclog, so we + * need to ensure that when they release the iclog it + * gets synced immediately as we may be waiting on it. */ xlog_state_switch_iclogs(log, iclog, 0); } - } else { - /* - * If the head iclog is not active nor dirty, we just attach - * ourselves to the head and go to sleep if necessary. - */ - ; } + /* + * The iclog we are about to wait on may contain the checkpoint pushed + * by the above xlog_cil_force() call, but it may not have been pushed + * to disk yet. Like the ACTIVE case above, we need to make sure caches + * are flushed when this iclog is written. + */ + if (iclog->ic_state == XLOG_STATE_WANT_SYNC) + iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA; + if (flags & XFS_LOG_SYNC) return xlog_wait_on_iclog(iclog); out_unlock: @@ -3262,7 +3263,8 @@ xlog_force_lsn( goto out_unlock; } - if (iclog->ic_state == XLOG_STATE_ACTIVE) { + switch (iclog->ic_state) { + case XLOG_STATE_ACTIVE: /* * We sleep here if we haven't already slept (e.g. this is the * first time we've looked at the correct iclog buf) and the @@ -3289,6 +3291,25 @@ xlog_force_lsn( goto out_error; if (log_flushed) *log_flushed = 1; + break; + case XLOG_STATE_WANT_SYNC: + /* + * This iclog may contain the checkpoint pushed by the + * xlog_cil_force_seq() call, but there are other writers still + * accessing it so it hasn't been pushed to disk yet. Like the + * ACTIVE case above, we need to make sure caches are flushed + * when this iclog is written. + */ + iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA; + break; + default: + /* + * The entire checkpoint was written by the CIL force and is on + * its way to disk already. It will be stable when it + * completes, so we don't need to manipulate caches here at all. + * We just need to wait for completion if necessary. + */ + break; } if (flags & XFS_LOG_SYNC) From 8191d8222c514c69a8e1ac46bd9812b9e0aab7d0 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 27 Jul 2021 16:23:49 -0700 Subject: [PATCH 317/502] xfs: avoid unnecessary waits in xfs_log_force_lsn() Before waiting on a iclog in xfs_log_force_lsn(), we don't check to see if the iclog has already been completed and the contents on stable storage. We check for completed iclogs in xfs_log_force(), so we should do the same thing for xfs_log_force_lsn(). This fixed some random up-to-30s pauses seen in unmounting filesystems in some tests. A log force ends up waiting on completed iclog, and that doesn't then get flushed (and hence the log force get completed) until the background log worker issues a log force that flushes the iclog in question. Then the unmount unblocks and continues. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 42 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 160b8bb7ee60..1c328efdca66 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -3143,6 +3143,35 @@ xlog_state_switch_iclogs( log->l_iclog = iclog->ic_next; } +/* + * Force the iclog to disk and check if the iclog has been completed before + * xlog_force_iclog() returns. This can happen on synchronous (e.g. + * pmem) or fast async storage because we drop the icloglock to issue the IO. + * If completion has already occurred, tell the caller so that it can avoid an + * unnecessary wait on the iclog. + */ +static int +xlog_force_and_check_iclog( + struct xlog_in_core *iclog, + bool *completed) +{ + xfs_lsn_t lsn = be64_to_cpu(iclog->ic_header.h_lsn); + int error; + + *completed = false; + error = xlog_force_iclog(iclog); + if (error) + return error; + + /* + * If the iclog has already been completed and reused the header LSN + * will have been rewritten by completion + */ + if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) + *completed = true; + return 0; +} + /* * Write out all data in the in-core log as of this exact moment in time. * @@ -3177,7 +3206,6 @@ xfs_log_force( { struct xlog *log = mp->m_log; struct xlog_in_core *iclog; - xfs_lsn_t lsn; XFS_STATS_INC(mp, xs_log_force); trace_xfs_log_force(mp, 0, _RET_IP_); @@ -3206,11 +3234,12 @@ xfs_log_force( } else if (iclog->ic_state == XLOG_STATE_ACTIVE) { if (atomic_read(&iclog->ic_refcnt) == 0) { /* We have exclusive access to this iclog. */ - lsn = be64_to_cpu(iclog->ic_header.h_lsn); - if (xlog_force_iclog(iclog)) + bool completed; + + if (xlog_force_and_check_iclog(iclog, &completed)) goto out_error; - if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) + if (completed) goto out_unlock; } else { /* @@ -3250,6 +3279,7 @@ xlog_force_lsn( bool already_slept) { struct xlog_in_core *iclog; + bool completed; spin_lock(&log->l_icloglock); iclog = log->l_iclog; @@ -3287,10 +3317,12 @@ xlog_force_lsn( &log->l_icloglock); return -EAGAIN; } - if (xlog_force_iclog(iclog)) + if (xlog_force_and_check_iclog(iclog, &completed)) goto out_error; if (log_flushed) *log_flushed = 1; + if (completed) + goto out_unlock; break; case XLOG_STATE_WANT_SYNC: /* From 32baa63d82ee3f5ab3bd51bae6bf7d1c15aed8c7 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 27 Jul 2021 16:23:49 -0700 Subject: [PATCH 318/502] xfs: logging the on disk inode LSN can make it go backwards When we log an inode, we format the "log inode" core and set an LSN in that inode core. We do that via xfs_inode_item_format_core(), which calls: xfs_inode_to_log_dinode(ip, dic, ip->i_itemp->ili_item.li_lsn); to format the log inode. It writes the LSN from the inode item into the log inode, and if recovery decides the inode item needs to be replayed, it recovers the log inode LSN field and writes it into the on disk inode LSN field. Now this might seem like a reasonable thing to do, but it is wrong on multiple levels. Firstly, if the item is not yet in the AIL, item->li_lsn is zero. i.e. the first time the inode it is logged and formatted, the LSN we write into the log inode will be zero. If we only log it once, recovery will run and can write this zero LSN into the inode. This means that the next time the inode is logged and log recovery runs, it will *always* replay changes to the inode regardless of whether the inode is newer on disk than the version in the log and that violates the entire purpose of recording the LSN in the inode at writeback time (i.e. to stop it going backwards in time on disk during recovery). Secondly, if we commit the CIL to the journal so the inode item moves to the AIL, and then relog the inode, the LSN that gets stamped into the log inode will be the LSN of the inode's current location in the AIL, not it's age on disk. And it's not the LSN that will be associated with the current change. That means when log recovery replays this inode item, the LSN that ends up on disk is the LSN for the previous changes in the log, not the current changes being replayed. IOWs, after recovery the LSN on disk is not in sync with the LSN of the modifications that were replayed into the inode. This, again, violates the recovery ordering semantics that on-disk writeback LSNs provide. Hence the inode LSN in the log dinode is -always- invalid. Thirdly, recovery actually has the LSN of the log transaction it is replaying right at hand - it uses it to determine if it should replay the inode by comparing it to the on-disk inode's LSN. But it doesn't use that LSN to stamp the LSN into the inode which will be written back when the transaction is fully replayed. It uses the one in the log dinode, which we know is always going to be incorrect. Looking back at the change history, the inode logging was broken by commit 93f958f9c41f ("xfs: cull unnecessary icdinode fields") way back in 2016 by a stupid idiot who thought he knew how this code worked. i.e. me. That commit replaced an in memory di_lsn field that was updated only at inode writeback time from the inode item.li_lsn value - and hence always contained the same LSN that appeared in the on-disk inode - with a read of the inode item LSN at inode format time. CLearly these are not the same thing. Before 93f958f9c41f, the log recovery behaviour was irrelevant, because the LSN in the log inode always matched the on-disk LSN at the time the inode was logged, hence recovery of the transaction would never make the on-disk LSN in the inode go backwards or get out of sync. A symptom of the problem is this, caught from a failure of generic/482. Before log recovery, the inode has been allocated but never used: xfs_db> inode 393388 xfs_db> p core.magic = 0x494e core.mode = 0 .... v3.crc = 0x99126961 (correct) v3.change_count = 0 v3.lsn = 0 v3.flags2 = 0 v3.cowextsize = 0 v3.crtime.sec = Thu Jan 1 10:00:00 1970 v3.crtime.nsec = 0 After log recovery: xfs_db> p core.magic = 0x494e core.mode = 020444 .... v3.crc = 0x23e68f23 (correct) v3.change_count = 2 v3.lsn = 0 v3.flags2 = 0 v3.cowextsize = 0 v3.crtime.sec = Thu Jul 22 17:03:03 2021 v3.crtime.nsec = 751000000 ... You can see that the LSN of the on-disk inode is 0, even though it clearly has been written to disk. I point out this inode, because the generic/482 failure occurred because several adjacent inodes in this specific inode cluster were not replayed correctly and still appeared to be zero on disk when all the other metadata (inobt, finobt, directories, etc) indicated they should be allocated and written back. The fix for this is two-fold. The first is that we need to either revert the LSN changes in 93f958f9c41f or stop logging the inode LSN altogether. If we do the former, log recovery does not need to change but we add 8 bytes of memory per inode to store what is largely a write-only inode field. If we do the latter, log recovery needs to stamp the on-disk inode in the same manner that inode writeback does. I prefer the latter, because we shouldn't really be trying to log and replay changes to the on disk LSN as the on-disk value is the canonical source of the on-disk version of the inode. It also matches the way we recover buffer items - we create a buf_log_item that carries the current recovery transaction LSN that gets stamped into the buffer by the write verifier when it gets written back when the transaction is fully recovered. However, this might break log recovery on older kernels even more, so I'm going to simply ignore the logged value in recovery and stamp the on-disk inode with the LSN of the transaction being recovered that will trigger writeback on transaction recovery completion. This will ensure that the on-disk inode LSN always reflects the LSN of the last change that was written to disk, regardless of whether it comes from log recovery or runtime writeback. Fixes: 93f958f9c41f ("xfs: cull unnecessary icdinode fields") Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_log_format.h | 11 +++++++++- fs/xfs/xfs_inode_item_recover.c | 39 ++++++++++++++++++++++++--------- 2 files changed, 39 insertions(+), 11 deletions(-) diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index d548ea4b6aab..2c5bcbc19264 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -411,7 +411,16 @@ struct xfs_log_dinode { /* start of the extended dinode, writable fields */ uint32_t di_crc; /* CRC of the inode */ uint64_t di_changecount; /* number of attribute changes */ - xfs_lsn_t di_lsn; /* flush sequence */ + + /* + * The LSN we write to this field during formatting is not a reflection + * of the current on-disk LSN. It should never be used for recovery + * sequencing, nor should it be recovered into the on-disk inode at all. + * See xlog_recover_inode_commit_pass2() and xfs_log_dinode_to_disk() + * for details. + */ + xfs_lsn_t di_lsn; + uint64_t di_flags2; /* more random flags */ uint32_t di_cowextsize; /* basic cow extent size for file */ uint8_t di_pad2[12]; /* more padding for future expansion */ diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c index 7b79518b6c20..e0072a6cd2d3 100644 --- a/fs/xfs/xfs_inode_item_recover.c +++ b/fs/xfs/xfs_inode_item_recover.c @@ -145,7 +145,8 @@ xfs_log_dinode_to_disk_ts( STATIC void xfs_log_dinode_to_disk( struct xfs_log_dinode *from, - struct xfs_dinode *to) + struct xfs_dinode *to, + xfs_lsn_t lsn) { to->di_magic = cpu_to_be16(from->di_magic); to->di_mode = cpu_to_be16(from->di_mode); @@ -182,7 +183,7 @@ xfs_log_dinode_to_disk( to->di_flags2 = cpu_to_be64(from->di_flags2); to->di_cowextsize = cpu_to_be32(from->di_cowextsize); to->di_ino = cpu_to_be64(from->di_ino); - to->di_lsn = cpu_to_be64(from->di_lsn); + to->di_lsn = cpu_to_be64(lsn); memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); uuid_copy(&to->di_uuid, &from->di_uuid); to->di_flushiter = 0; @@ -261,16 +262,25 @@ xlog_recover_inode_commit_pass2( } /* - * If the inode has an LSN in it, recover the inode only if it's less - * than the lsn of the transaction we are replaying. Note: we still - * need to replay an owner change even though the inode is more recent - * than the transaction as there is no guarantee that all the btree - * blocks are more recent than this transaction, too. + * If the inode has an LSN in it, recover the inode only if the on-disk + * inode's LSN is older than the lsn of the transaction we are + * replaying. We can have multiple checkpoints with the same start LSN, + * so the current LSN being equal to the on-disk LSN doesn't necessarily + * mean that the on-disk inode is more recent than the change being + * replayed. + * + * We must check the current_lsn against the on-disk inode + * here because the we can't trust the log dinode to contain a valid LSN + * (see comment below before replaying the log dinode for details). + * + * Note: we still need to replay an owner change even though the inode + * is more recent than the transaction as there is no guarantee that all + * the btree blocks are more recent than this transaction, too. */ if (dip->di_version >= 3) { xfs_lsn_t lsn = be64_to_cpu(dip->di_lsn); - if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { + if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) > 0) { trace_xfs_log_recover_inode_skip(log, in_f); error = 0; goto out_owner_change; @@ -368,8 +378,17 @@ xlog_recover_inode_commit_pass2( goto out_release; } - /* recover the log dinode inode into the on disk inode */ - xfs_log_dinode_to_disk(ldip, dip); + /* + * Recover the log dinode inode into the on disk inode. + * + * The LSN in the log dinode is garbage - it can be zero or reflect + * stale in-memory runtime state that isn't coherent with the changes + * logged in this transaction or the changes written to the on-disk + * inode. Hence we write the current lSN into the inode because that + * matches what xfs_iflush() would write inode the inode when flushing + * the changes in this transaction. + */ + xfs_log_dinode_to_disk(ldip, dip, current_lsn); fields = in_f->ilf_fields; if (fields & XFS_ILOG_DEV) From d8f4c2d0398fa1d92cacf854daf80d21a46bfefc Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 27 Jul 2021 16:23:50 -0700 Subject: [PATCH 319/502] xfs: Enforce attr3 buffer recovery order From the department of "WTAF? How did we miss that!?"... When we are recovering a buffer, the first thing we do is check the buffer magic number and extract the LSN from the buffer. If the LSN is older than the current LSN, we replay the modification to it. If the metadata on disk is newer than the transaction in the log, we skip it. This is a fundamental v5 filesystem metadata recovery behaviour. generic/482 failed with an attribute writeback failure during log recovery. The write verifier caught the corruption before it got written to disk, and the attr buffer dump looked like: XFS (dm-3): Metadata corruption detected at xfs_attr3_leaf_verify+0x275/0x2e0, xfs_attr3_leaf block 0x19be8 XFS (dm-3): Unmount and run xfs_repair XFS (dm-3): First 128 bytes of corrupted metadata buffer: 00000000: 00 00 00 00 00 00 00 00 3b ee 00 00 4d 2a 01 e1 ........;...M*.. 00000010: 00 00 00 00 00 01 9b e8 00 00 00 01 00 00 05 38 ...............8 ^^^^^^^^^^^^^^^^^^^^^^^ 00000020: df 39 5e 51 58 ac 44 b6 8d c5 e7 10 44 09 bc 17 .9^QX.D.....D... 00000030: 00 00 00 00 00 02 00 83 00 03 00 cc 0f 24 01 00 .............$.. 00000040: 00 68 0e bc 0f c8 00 10 00 00 00 00 00 00 00 00 .h.............. 00000050: 00 00 3c 31 0f 24 01 00 00 00 3c 32 0f 88 01 00 ..<1.$....<2.... 00000060: 00 00 3c 33 0f d8 01 00 00 00 00 00 00 00 00 00 ..<3............ 00000070: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ ..... The highlighted bytes are the LSN that was replayed into the buffer: 0x100000538. This is cycle 1, block 0x538. Prior to replay, that block on disk looks like this: $ sudo xfs_db -c "fsb 0x417d" -c "type attr3" -c p /dev/mapper/thin-vol hdr.info.hdr.forw = 0 hdr.info.hdr.back = 0 hdr.info.hdr.magic = 0x3bee hdr.info.crc = 0xb5af0bc6 (correct) hdr.info.bno = 105448 hdr.info.lsn = 0x100000900 ^^^^^^^^^^^ hdr.info.uuid = df395e51-58ac-44b6-8dc5-e7104409bc17 hdr.info.owner = 131203 hdr.count = 2 hdr.usedbytes = 120 hdr.firstused = 3796 hdr.holes = 1 hdr.freemap[0-2] = [base,size] Note the LSN stamped into the buffer on disk: 1/0x900. The version on disk is much newer than the log transaction that was being replayed. That's a bug, and should -never- happen. So I immediately went to look at xlog_recover_get_buf_lsn() to check that we handled the LSN correctly. I was wondering if there was a similar "two commits with the same start LSN skips the second replay" problem with buffers. I didn't get that far, because I found a much more basic, rudimentary bug: xlog_recover_get_buf_lsn() doesn't recognise buffers with XFS_ATTR3_LEAF_MAGIC set in them!!! IOWs, attr3 leaf buffers fall through the magic number checks unrecognised, so trigger the "recover immediately" behaviour instead of undergoing an LSN check. IOWs, we incorrectly replay ATTR3 leaf buffers and that causes silent on disk corruption of inode attribute forks and potentially other things.... Git history shows this is *another* zero day bug, this time introduced in commit 50d5c8d8e938 ("xfs: check LSN ordering for v5 superblocks during recovery") which failed to handle the attr3 leaf buffers in recovery. And we've failed to handle them ever since... Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_buf_item_recover.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c index d44e8b4a3391..05fd816edf59 100644 --- a/fs/xfs/xfs_buf_item_recover.c +++ b/fs/xfs/xfs_buf_item_recover.c @@ -796,6 +796,7 @@ xlog_recover_get_buf_lsn( switch (magicda) { case XFS_DIR3_LEAF1_MAGIC: case XFS_DIR3_LEAFN_MAGIC: + case XFS_ATTR3_LEAF_MAGIC: case XFS_DA3_NODE_MAGIC: lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn); uuid = &((struct xfs_da3_blkinfo *)blk)->uuid; From b2ae3a9ef91152931b99620c431cf3805daa1429 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 27 Jul 2021 16:23:50 -0700 Subject: [PATCH 320/502] xfs: need to see iclog flags in tracing Because I cannot tell if the NEED_FLUSH flag is being set correctly by the log force and CIL push machinery without it. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log_priv.h | 13 ++++++++++--- fs/xfs/xfs_trace.h | 5 ++++- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 7cbde0b4f990..f3e79a45d60a 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -59,6 +59,16 @@ enum xlog_iclog_state { { XLOG_STATE_DIRTY, "XLOG_STATE_DIRTY" }, \ { XLOG_STATE_IOERROR, "XLOG_STATE_IOERROR" } +/* + * In core log flags + */ +#define XLOG_ICL_NEED_FLUSH (1 << 0) /* iclog needs REQ_PREFLUSH */ +#define XLOG_ICL_NEED_FUA (1 << 1) /* iclog needs REQ_FUA */ + +#define XLOG_ICL_STRINGS \ + { XLOG_ICL_NEED_FLUSH, "XLOG_ICL_NEED_FLUSH" }, \ + { XLOG_ICL_NEED_FUA, "XLOG_ICL_NEED_FUA" } + /* * Log ticket flags @@ -143,9 +153,6 @@ enum xlog_iclog_state { #define XLOG_COVER_OPS 5 -#define XLOG_ICL_NEED_FLUSH (1 << 0) /* iclog needs REQ_PREFLUSH */ -#define XLOG_ICL_NEED_FUA (1 << 1) /* iclog needs REQ_FUA */ - /* Ticket reservation region accounting */ #define XLOG_TIC_LEN_MAX 15 diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index f9d8d605f9b1..19260291ff8b 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -3944,6 +3944,7 @@ DECLARE_EVENT_CLASS(xlog_iclog_class, __field(uint32_t, state) __field(int32_t, refcount) __field(uint32_t, offset) + __field(uint32_t, flags) __field(unsigned long long, lsn) __field(unsigned long, caller_ip) ), @@ -3952,15 +3953,17 @@ DECLARE_EVENT_CLASS(xlog_iclog_class, __entry->state = iclog->ic_state; __entry->refcount = atomic_read(&iclog->ic_refcnt); __entry->offset = iclog->ic_offset; + __entry->flags = iclog->ic_flags; __entry->lsn = be64_to_cpu(iclog->ic_header.h_lsn); __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d state %s refcnt %d offset %u lsn 0x%llx caller %pS", + TP_printk("dev %d:%d state %s refcnt %d offset %u lsn 0x%llx flags %s caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __print_symbolic(__entry->state, XLOG_STATE_STRINGS), __entry->refcount, __entry->offset, __entry->lsn, + __print_flags(__entry->flags, "|", XLOG_ICL_STRINGS), (char *)__entry->caller_ip) ); From 9d110014205cb1129fa570d8de83d486fa199354 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 28 Jul 2021 17:14:11 -0700 Subject: [PATCH 321/502] xfs: limit iclog tail updates From the department of "generic/482 keeps on giving", we bring you another tail update race condition: iclog: S1 C1 +-----------------------+-----------------------+ S2 EOIC Two checkpoints in a single iclog. One is complete, the other just contains the start record and overruns into a new iclog. Timeline: Before S1: Cache flush, log tail = X At S1: Metadata stable, write start record and checkpoint At C1: Write commit record, set NEED_FUA Single iclog checkpoint, so no need for NEED_FLUSH Log tail still = X, so no need for NEED_FLUSH After C1, Before S2: Cache flush, log tail = X At S2: Metadata stable, write start record and checkpoint After S2: Log tail moves to X+1 At EOIC: End of iclog, more journal data to write Releases iclog Not a commit iclog, so no need for NEED_FLUSH Writes log tail X+1 into iclog. At this point, the iclog has tail X+1 and NEED_FUA set. There has been no cache flush for the metadata between X and X+1, and the iclog writes the new tail permanently to the log. THis is sufficient to violate on disk metadata/journal ordering. We have two options here. The first is to detect this case in some manner and ensure that the partial checkpoint write sets NEED_FLUSH when the iclog is already marked NEED_FUA and the log tail changes. This seems somewhat fragile and quite complex to get right, and it doesn't actually make it obvious what underlying problem it is actually addressing from reading the code. The second option seems much cleaner to me, because it is derived directly from the requirements of the C1 commit record in the iclog. That is, when we write this commit record to the iclog, we've guaranteed that the metadata/data ordering is correct for tail update purposes. Hence if we only write the log tail into the iclog for the *first* commit record rather than the log tail at the last release, we guarantee that the log tail does not move past where the the first commit record in the log expects it to be. IOWs, taking the first option means that replay of C1 becomes dependent on future operations doing the right thing, not just the C1 checkpoint itself doing the right thing. This makes log recovery almost impossible to reason about because now we have to take into account what might or might not have happened in the future when looking at checkpoints in the log rather than just having to reconstruct the past... Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 50 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 1c328efdca66..60ac5fd63f1e 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -78,13 +78,12 @@ xlog_verify_iclog( STATIC void xlog_verify_tail_lsn( struct xlog *log, - struct xlog_in_core *iclog, - xfs_lsn_t tail_lsn); + struct xlog_in_core *iclog); #else #define xlog_verify_dest_ptr(a,b) #define xlog_verify_grant_tail(a) #define xlog_verify_iclog(a,b,c) -#define xlog_verify_tail_lsn(a,b,c) +#define xlog_verify_tail_lsn(a,b) #endif STATIC int @@ -489,12 +488,31 @@ out_error: /* * Flush iclog to disk if this is the last reference to the given iclog and the - * it is in the WANT_SYNC state. If the caller passes in a non-zero - * @old_tail_lsn and the current log tail does not match, there may be metadata - * on disk that must be persisted before this iclog is written. To satisfy that - * requirement, set the XLOG_ICL_NEED_FLUSH flag as a condition for writing this - * iclog with the new log tail value. + * it is in the WANT_SYNC state. + * + * If the caller passes in a non-zero @old_tail_lsn and the current log tail + * does not match, there may be metadata on disk that must be persisted before + * this iclog is written. To satisfy that requirement, set the + * XLOG_ICL_NEED_FLUSH flag as a condition for writing this iclog with the new + * log tail value. + * + * If XLOG_ICL_NEED_FUA is already set on the iclog, we need to ensure that the + * log tail is updated correctly. NEED_FUA indicates that the iclog will be + * written to stable storage, and implies that a commit record is contained + * within the iclog. We need to ensure that the log tail does not move beyond + * the tail that the first commit record in the iclog ordered against, otherwise + * correct recovery of that checkpoint becomes dependent on future operations + * performed on this iclog. + * + * Hence if NEED_FUA is set and the current iclog tail lsn is empty, write the + * current tail into iclog. Once the iclog tail is set, future operations must + * not modify it, otherwise they potentially violate ordering constraints for + * the checkpoint commit that wrote the initial tail lsn value. The tail lsn in + * the iclog will get zeroed on activation of the iclog after sync, so we + * always capture the tail lsn on the iclog on the first NEED_FUA release + * regardless of the number of active reference counts on this iclog. */ + int xlog_state_release_iclog( struct xlog *log, @@ -519,6 +537,10 @@ xlog_state_release_iclog( if (old_tail_lsn && tail_lsn != old_tail_lsn) iclog->ic_flags |= XLOG_ICL_NEED_FLUSH; + + if ((iclog->ic_flags & XLOG_ICL_NEED_FUA) && + !iclog->ic_header.h_tail_lsn) + iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); } if (!atomic_dec_and_test(&iclog->ic_refcnt)) @@ -530,8 +552,9 @@ xlog_state_release_iclog( } iclog->ic_state = XLOG_STATE_SYNCING; - iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); - xlog_verify_tail_lsn(log, iclog, tail_lsn); + if (!iclog->ic_header.h_tail_lsn) + iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); + xlog_verify_tail_lsn(log, iclog); trace_xlog_iclog_syncing(iclog, _RET_IP_); spin_unlock(&log->l_icloglock); @@ -2579,6 +2602,7 @@ xlog_state_activate_iclog( memset(iclog->ic_header.h_cycle_data, 0, sizeof(iclog->ic_header.h_cycle_data)); iclog->ic_header.h_lsn = 0; + iclog->ic_header.h_tail_lsn = 0; } /* @@ -3614,10 +3638,10 @@ xlog_verify_grant_tail( STATIC void xlog_verify_tail_lsn( struct xlog *log, - struct xlog_in_core *iclog, - xfs_lsn_t tail_lsn) + struct xlog_in_core *iclog) { - int blocks; + xfs_lsn_t tail_lsn = be64_to_cpu(iclog->ic_header.h_tail_lsn); + int blocks; if (CYCLE_LSN(tail_lsn) == log->l_prev_cycle) { blocks = From 81a448d7b0668ae39c08e6f34a54cc7eafb844f1 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 26 Jul 2021 16:43:17 -0700 Subject: [PATCH 322/502] xfs: prevent spoofing of rtbitmap blocks when recovering buffers While reviewing the buffer item recovery code, the thought occurred to me: in V5 filesystems we use log sequence number (LSN) tracking to avoid replaying older metadata updates against newer log items. However, we use the magic number of the ondisk buffer to find the LSN of the ondisk metadata, which means that if an attacker can control the layout of the realtime device precisely enough that the start of an rt bitmap block matches the magic and UUID of some other kind of block, they can control the purported LSN of that spoofed block and thereby break log replay. Since realtime bitmap and summary blocks don't have headers at all, we have no way to tell if a block really should be replayed. The best we can do is replay unconditionally and hope for the best. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Reviewed-by: Carlos Maiolino --- fs/xfs/xfs_buf_item_recover.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c index 05fd816edf59..4775485b4062 100644 --- a/fs/xfs/xfs_buf_item_recover.c +++ b/fs/xfs/xfs_buf_item_recover.c @@ -698,7 +698,8 @@ xlog_recover_do_inode_buffer( static xfs_lsn_t xlog_recover_get_buf_lsn( struct xfs_mount *mp, - struct xfs_buf *bp) + struct xfs_buf *bp, + struct xfs_buf_log_format *buf_f) { uint32_t magic32; uint16_t magic16; @@ -706,11 +707,20 @@ xlog_recover_get_buf_lsn( void *blk = bp->b_addr; uuid_t *uuid; xfs_lsn_t lsn = -1; + uint16_t blft; /* v4 filesystems always recover immediately */ if (!xfs_sb_version_hascrc(&mp->m_sb)) goto recover_immediately; + /* + * realtime bitmap and summary file blocks do not have magic numbers or + * UUIDs, so we must recover them immediately. + */ + blft = xfs_blft_from_flags(buf_f); + if (blft == XFS_BLFT_RTBITMAP_BUF || blft == XFS_BLFT_RTSUMMARY_BUF) + goto recover_immediately; + magic32 = be32_to_cpu(*(__be32 *)blk); switch (magic32) { case XFS_ABTB_CRC_MAGIC: @@ -920,7 +930,7 @@ xlog_recover_buf_commit_pass2( * the verifier will be reset to match whatever recover turns that * buffer into. */ - lsn = xlog_recover_get_buf_lsn(mp, bp); + lsn = xlog_recover_get_buf_lsn(mp, bp, buf_f); if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { trace_xfs_log_recover_buf_skip(log, buf_f); xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN); From 696e572dc85c674b31f4f13f59d8e217ee1b057f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 26 Jul 2021 15:06:22 -0500 Subject: [PATCH 323/502] ARM: riscpc: Fix fall-through warning for Clang Fix the following fallthrough warning: arch/arm/mach-rpc/riscpc.c:52:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] default: ^ arch/arm/mach-rpc/riscpc.c:52:2: note: insert 'break;' to avoid fall-through default: ^ break; Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/202107260355.bF00i5bi-lkp@intel.com/ Signed-off-by: Gustavo A. R. Silva --- arch/arm/mach-rpc/riscpc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-rpc/riscpc.c b/arch/arm/mach-rpc/riscpc.c index d23970bd638d..f70fb9c4b0cb 100644 --- a/arch/arm/mach-rpc/riscpc.c +++ b/arch/arm/mach-rpc/riscpc.c @@ -49,6 +49,7 @@ static int __init parse_tag_acorn(const struct tag *tag) fallthrough; /* ??? */ case 256: vram_size += PAGE_SIZE * 256; + break; default: break; } From eb4f520ca691f109f5fb1d16fc9cc26447a941e1 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 26 Jul 2021 15:33:53 -0500 Subject: [PATCH 324/502] scsi: acornscsi: Fix fall-through warning for clang Fix the following fallthrough warning (on ARM): drivers/scsi/arm/acornscsi.c:2651:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] case res_success: ^ drivers/scsi/arm/acornscsi.c:2651:2: note: insert '__attribute__((fallthrough));' to silence this warning case res_success: ^ __attribute__((fallthrough)); drivers/scsi/arm/acornscsi.c:2651:2: note: insert 'break;' to avoid fall-through case res_success: ^ break; Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/202107260355.bF00i5bi-lkp@intel.com/ Signed-off-by: Gustavo A. R. Silva --- drivers/scsi/arm/acornscsi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/arm/acornscsi.c b/drivers/scsi/arm/acornscsi.c index 84fc7a0c6ff4..4a84599ff491 100644 --- a/drivers/scsi/arm/acornscsi.c +++ b/drivers/scsi/arm/acornscsi.c @@ -2642,6 +2642,7 @@ int acornscsi_abort(struct scsi_cmnd *SCpnt) //#endif clear_bit(SCpnt->device->id * 8 + (u8)(SCpnt->device->lun & 0x7), host->busyluns); + fallthrough; /* * We found the command, and cleared it out. Either From cb163627e6d32dbaca4d89b2292788cee895b06d Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 26 Jul 2021 15:46:47 -0500 Subject: [PATCH 325/502] scsi: fas216: Fix fall-through warning for Clang Fix the following fallthrough warning (on ARM): drivers/scsi/arm/fas216.c:1379:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] default: ^ drivers/scsi/arm/fas216.c:1379:2: note: insert 'break;' to avoid fall-through default: ^ break; Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/202107260355.bF00i5bi-lkp@intel.com/ Signed-off-by: Gustavo A. R. Silva --- drivers/scsi/arm/fas216.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/arm/fas216.c b/drivers/scsi/arm/fas216.c index 6baa9b36367d..9c4458a99025 100644 --- a/drivers/scsi/arm/fas216.c +++ b/drivers/scsi/arm/fas216.c @@ -1375,6 +1375,7 @@ static void fas216_busservice_intr(FAS216_Info *info, unsigned int stat, unsigne case IS_COMPLETE: break; } + break; default: break; From 0aab5dce395636eddf4e5f33eba88390328a95b4 Mon Sep 17 00:00:00 2001 From: Edmund Dea Date: Tue, 25 Aug 2020 14:51:17 -0700 Subject: [PATCH 326/502] drm/kmb: Enable LCD DMA for low TVDDCV There's an undocumented dependency between LCD layer enable bits [2-5] and the AXI pipelined read enable bit [28] in the LCD_CONTROL register. The proper order of operation is: 1) Clear AXI pipelined read enable bit 2) Set LCD layers 3) Set AXI pipelined read enable bit With this update, LCD can start DMA when TVDDCV is reduced down to 700mV. Fixes: 7f7b96a8a0a1 ("drm/kmb: Add support for KeemBay Display") Signed-off-by: Edmund Dea Signed-off-by: Anitha Chrisanthus Acked-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20210728003126.1425028-1-anitha.chrisanthus@intel.com --- drivers/gpu/drm/kmb/kmb_drv.c | 14 ++++++++++++++ drivers/gpu/drm/kmb/kmb_plane.c | 15 +++++++++++++-- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/kmb/kmb_drv.c b/drivers/gpu/drm/kmb/kmb_drv.c index 96ea1a2c11dd..c0b1c6f99249 100644 --- a/drivers/gpu/drm/kmb/kmb_drv.c +++ b/drivers/gpu/drm/kmb/kmb_drv.c @@ -203,6 +203,7 @@ static irqreturn_t handle_lcd_irq(struct drm_device *dev) unsigned long status, val, val1; int plane_id, dma0_state, dma1_state; struct kmb_drm_private *kmb = to_kmb(dev); + u32 ctrl = 0; status = kmb_read_lcd(kmb, LCD_INT_STATUS); @@ -227,6 +228,19 @@ static irqreturn_t handle_lcd_irq(struct drm_device *dev) kmb_clr_bitmask_lcd(kmb, LCD_CONTROL, kmb->plane_status[plane_id].ctrl); + ctrl = kmb_read_lcd(kmb, LCD_CONTROL); + if (!(ctrl & (LCD_CTRL_VL1_ENABLE | + LCD_CTRL_VL2_ENABLE | + LCD_CTRL_GL1_ENABLE | + LCD_CTRL_GL2_ENABLE))) { + /* If no LCD layers are using DMA, + * then disable DMA pipelined AXI read + * transactions. + */ + kmb_clr_bitmask_lcd(kmb, LCD_CONTROL, + LCD_CTRL_PIPELINE_DMA); + } + kmb->plane_status[plane_id].disable = false; } } diff --git a/drivers/gpu/drm/kmb/kmb_plane.c b/drivers/gpu/drm/kmb/kmb_plane.c index d5b6195856d1..ecee6782612d 100644 --- a/drivers/gpu/drm/kmb/kmb_plane.c +++ b/drivers/gpu/drm/kmb/kmb_plane.c @@ -427,8 +427,14 @@ static void kmb_plane_atomic_update(struct drm_plane *plane, kmb_set_bitmask_lcd(kmb, LCD_CONTROL, ctrl); - /* FIXME no doc on how to set output format,these values are - * taken from the Myriadx tests + /* Enable pipeline AXI read transactions for the DMA + * after setting graphics layers. This must be done + * in a separate write cycle. + */ + kmb_set_bitmask_lcd(kmb, LCD_CONTROL, LCD_CTRL_PIPELINE_DMA); + + /* FIXME no doc on how to set output format, these values are taken + * from the Myriadx tests */ out_format |= LCD_OUTF_FORMAT_RGB888; @@ -526,6 +532,11 @@ struct kmb_plane *kmb_plane_init(struct drm_device *drm) plane->id = i; } + /* Disable pipeline AXI read transactions for the DMA + * prior to setting graphics layers + */ + kmb_clr_bitmask_lcd(kmb, LCD_CONTROL, LCD_CTRL_PIPELINE_DMA); + return primary; cleanup: drmm_kfree(drm, plane); From eb92830cdbc232a0e8166c48061ca276132646a7 Mon Sep 17 00:00:00 2001 From: Edmund Dea Date: Wed, 26 Aug 2020 13:17:29 -0700 Subject: [PATCH 327/502] drm/kmb: Define driver date and major/minor version Added macros for date and version Fixes: 7f7b96a8a0a1 ("drm/kmb: Add support for KeemBay Display") Signed-off-by: Edmund Dea Signed-off-by: Anitha Chrisanthus Acked-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20210728003126.1425028-2-anitha.chrisanthus@intel.com --- drivers/gpu/drm/kmb/kmb_drv.c | 8 ++++---- drivers/gpu/drm/kmb/kmb_drv.h | 5 +++++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/kmb/kmb_drv.c b/drivers/gpu/drm/kmb/kmb_drv.c index c0b1c6f99249..f54392ec4fab 100644 --- a/drivers/gpu/drm/kmb/kmb_drv.c +++ b/drivers/gpu/drm/kmb/kmb_drv.c @@ -425,10 +425,10 @@ static const struct drm_driver kmb_driver = { .fops = &fops, DRM_GEM_CMA_DRIVER_OPS_VMAP, .name = "kmb-drm", - .desc = "KEEMBAY DISPLAY DRIVER ", - .date = "20201008", - .major = 1, - .minor = 0, + .desc = "KEEMBAY DISPLAY DRIVER", + .date = DRIVER_DATE, + .major = DRIVER_MAJOR, + .minor = DRIVER_MINOR, }; static int kmb_remove(struct platform_device *pdev) diff --git a/drivers/gpu/drm/kmb/kmb_drv.h b/drivers/gpu/drm/kmb/kmb_drv.h index 02e806712a64..ebbaa5f422d5 100644 --- a/drivers/gpu/drm/kmb/kmb_drv.h +++ b/drivers/gpu/drm/kmb/kmb_drv.h @@ -15,6 +15,11 @@ #define KMB_MAX_HEIGHT 1080 /*Max height in pixels */ #define KMB_MIN_WIDTH 1920 /*Max width in pixels */ #define KMB_MIN_HEIGHT 1080 /*Max height in pixels */ + +#define DRIVER_DATE "20210223" +#define DRIVER_MAJOR 1 +#define DRIVER_MINOR 1 + #define KMB_LCD_DEFAULT_CLK 200000000 #define KMB_SYS_CLK_MHZ 500 From bc546c0c9abb3bb2fb46866b3d1e6ade9695a5f6 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Wed, 13 Jan 2021 14:31:03 +0800 Subject: [PATCH 328/502] scsi: scsi_dh_rdac: Avoid crash during rdac_bus_attach() The following BUG_ON() was observed during RDAC scan: [595952.944297] kernel BUG at drivers/scsi/device_handler/scsi_dh_rdac.c:427! [595952.951143] Internal error: Oops - BUG: 0 [#1] SMP ...... [595953.251065] Call trace: [595953.259054] check_ownership+0xb0/0x118 [595953.269794] rdac_bus_attach+0x1f0/0x4b0 [595953.273787] scsi_dh_handler_attach+0x3c/0xe8 [595953.278211] scsi_dh_add_device+0xc4/0xe8 [595953.282291] scsi_sysfs_add_sdev+0x8c/0x2a8 [595953.286544] scsi_probe_and_add_lun+0x9fc/0xd00 [595953.291142] __scsi_scan_target+0x598/0x630 [595953.295395] scsi_scan_target+0x120/0x130 [595953.299481] fc_user_scan+0x1a0/0x1c0 [scsi_transport_fc] [595953.304944] store_scan+0xb0/0x108 [595953.308420] dev_attr_store+0x44/0x60 [595953.312160] sysfs_kf_write+0x58/0x80 [595953.315893] kernfs_fop_write+0xe8/0x1f0 [595953.319888] __vfs_write+0x60/0x190 [595953.323448] vfs_write+0xac/0x1c0 [595953.326836] ksys_write+0x74/0xf0 [595953.330221] __arm64_sys_write+0x24/0x30 Code is in check_ownership: list_for_each_entry_rcu(tmp, &h->ctlr->dh_list, node) { /* h->sdev should always be valid */ BUG_ON(!tmp->sdev); tmp->sdev->access_state = access_state; } rdac_bus_attach initialize_controller list_add_rcu(&h->node, &h->ctlr->dh_list); h->sdev = sdev; rdac_bus_detach list_del_rcu(&h->node); h->sdev = NULL; Fix the race between rdac_bus_attach() and rdac_bus_detach() where h->sdev is NULL when processing the RDAC attach. Link: https://lore.kernel.org/r/20210113063103.2698953-1-yebin10@huawei.com Reviewed-by: Bart Van Assche Signed-off-by: Ye Bin Signed-off-by: Martin K. Petersen --- drivers/scsi/device_handler/scsi_dh_rdac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c index 25f6e1ac9e7b..66652ab409cc 100644 --- a/drivers/scsi/device_handler/scsi_dh_rdac.c +++ b/drivers/scsi/device_handler/scsi_dh_rdac.c @@ -453,8 +453,8 @@ static int initialize_controller(struct scsi_device *sdev, if (!h->ctlr) err = SCSI_DH_RES_TEMP_UNAVAIL; else { - list_add_rcu(&h->node, &h->ctlr->dh_list); h->sdev = sdev; + list_add_rcu(&h->node, &h->ctlr->dh_list); } spin_unlock(&list_lock); err = SCSI_DH_OK; @@ -778,11 +778,11 @@ static void rdac_bus_detach( struct scsi_device *sdev ) spin_lock(&list_lock); if (h->ctlr) { list_del_rcu(&h->node); - h->sdev = NULL; kref_put(&h->ctlr->kref, release_controller); } spin_unlock(&list_lock); sdev->handler_data = NULL; + synchronize_rcu(); kfree(h); } From 70edd2e6f652f67d854981fd67f9ad0f1deaea92 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Mon, 26 Jul 2021 17:24:02 +0530 Subject: [PATCH 329/502] scsi: core: Avoid printing an error if target_alloc() returns -ENXIO Avoid printing a 'target allocation failed' error if the driver target_alloc() callback function returns -ENXIO. This return value indicates that the corresponding H:C:T:L entry is empty. Removing this error reduces the scan time if the user issues SCAN_WILD_CARD scan operation through sysfs parameter on a host with a lot of empty H:C:T:L entries. Avoiding the printk on -ENXIO matches the behavior of the other callback functions during scanning. Link: https://lore.kernel.org/r/20210726115402.1936-1-sreekanth.reddy@broadcom.com Signed-off-by: Sreekanth Reddy Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_scan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index b059bf2b61d4..5b6996a2401b 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -475,7 +475,8 @@ static struct scsi_target *scsi_alloc_target(struct device *parent, error = shost->hostt->target_alloc(starget); if(error) { - dev_printk(KERN_ERR, dev, "target allocation failed, error %d\n", error); + if (error != -ENXIO) + dev_err(dev, "target allocation failed, error %d\n", error); /* don't want scsi_target_reap to do the final * put because it will be under the host lock */ scsi_target_destroy(starget); From a264cf5e81c78e2b9918b8b9ef2ace9dde1850df Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Fri, 16 Jul 2021 14:52:20 -0600 Subject: [PATCH 330/502] scsi: ibmvfc: Fix command state accounting and stale response detection Prior to commit 1f4a4a19508d ("scsi: ibmvfc: Complete commands outside the host/queue lock") responses to commands were completed sequentially with the host lock held such that a command had a basic binary state of active or free. It was therefore a simple affair of ensuring the assocaiated ibmvfc_event to a VIOS response was valid by testing that it was not already free. The lock relexation work to complete commands outside the lock inadverdently made it a trinary command state such that a command is either in flight, received and being completed, or completed and now free. This breaks the stale command detection logic as a command may be still marked active and been placed on the delayed completion list when a second stale response for the same command arrives. This can lead to double completions and list corruption. This issue was exposed by a recent VIOS regression were a missing memory barrier could occasionally result in the ibmvfc client receiving a duplicate response for the same command. Fix the issue by introducing the atomic ibmvfc_event.active to track the trinary state of a command. The state is explicitly set to 1 when a command is successfully sent. The CRQ response handlers use atomic_dec_if_positive() to test for stale responses and correctly transition to the completion state when a active command is received. Finally, atomic_dec_and_test() is used to sanity check transistions when commands are freed as a result of a completion, or moved to the purge list as a result of error handling or adapter reset. Link: https://lore.kernel.org/r/20210716205220.1101150-1-tyreld@linux.ibm.com Fixes: 1f4a4a19508d ("scsi: ibmvfc: Complete commands outside the host/queue lock") Cc: stable@vger.kernel.org Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvfc.c | 19 +++++++++++++++++-- drivers/scsi/ibmvscsi/ibmvfc.h | 1 + 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index bee1bec49c09..935b01ee44b7 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -807,6 +807,13 @@ static int ibmvfc_init_event_pool(struct ibmvfc_host *vhost, for (i = 0; i < size; ++i) { struct ibmvfc_event *evt = &pool->events[i]; + /* + * evt->active states + * 1 = in flight + * 0 = being completed + * -1 = free/freed + */ + atomic_set(&evt->active, -1); atomic_set(&evt->free, 1); evt->crq.valid = 0x80; evt->crq.ioba = cpu_to_be64(pool->iu_token + (sizeof(*evt->xfer_iu) * i)); @@ -1017,6 +1024,7 @@ static void ibmvfc_free_event(struct ibmvfc_event *evt) BUG_ON(!ibmvfc_valid_event(pool, evt)); BUG_ON(atomic_inc_return(&evt->free) != 1); + BUG_ON(atomic_dec_and_test(&evt->active)); spin_lock_irqsave(&evt->queue->l_lock, flags); list_add_tail(&evt->queue_list, &evt->queue->free); @@ -1072,6 +1080,12 @@ static void ibmvfc_complete_purge(struct list_head *purge_list) **/ static void ibmvfc_fail_request(struct ibmvfc_event *evt, int error_code) { + /* + * Anything we are failing should still be active. Otherwise, it + * implies we already got a response for the command and are doing + * something bad like double completing it. + */ + BUG_ON(!atomic_dec_and_test(&evt->active)); if (evt->cmnd) { evt->cmnd->result = (error_code << 16); evt->done = ibmvfc_scsi_eh_done; @@ -1723,6 +1737,7 @@ static int ibmvfc_send_event(struct ibmvfc_event *evt, evt->done(evt); } else { + atomic_set(&evt->active, 1); spin_unlock_irqrestore(&evt->queue->l_lock, flags); ibmvfc_trc_start(evt); } @@ -3251,7 +3266,7 @@ static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost, return; } - if (unlikely(atomic_read(&evt->free))) { + if (unlikely(atomic_dec_if_positive(&evt->active))) { dev_err(vhost->dev, "Received duplicate correlation_token 0x%08llx!\n", crq->ioba); return; @@ -3778,7 +3793,7 @@ static void ibmvfc_handle_scrq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost return; } - if (unlikely(atomic_read(&evt->free))) { + if (unlikely(atomic_dec_if_positive(&evt->active))) { dev_err(vhost->dev, "Received duplicate correlation_token 0x%08llx!\n", crq->ioba); return; diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h index 4f0f3baefae4..92fb889d7eb0 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.h +++ b/drivers/scsi/ibmvscsi/ibmvfc.h @@ -745,6 +745,7 @@ struct ibmvfc_event { struct ibmvfc_target *tgt; struct scsi_cmnd *cmnd; atomic_t free; + atomic_t active; union ibmvfc_iu *xfer_iu; void (*done)(struct ibmvfc_event *evt); void (*_done)(struct ibmvfc_event *evt); From 5c04243a56a7977185b00400e59ca7e108004faf Mon Sep 17 00:00:00 2001 From: Li Manyi Date: Mon, 26 Jul 2021 19:49:13 +0800 Subject: [PATCH 331/502] scsi: sr: Return correct event when media event code is 3 Media event code 3 is defined in the MMC-6 spec as follows: "MediaRemoval: The media has been removed from the specified slot, and the Drive is unable to access the media without user intervention. This applies to media changers only." This indicated that treating the condition as an EJECT_REQUEST was appropriate. However, doing so had the unfortunate side-effect of causing the drive tray to be physically ejected on resume. Instead treat the event as a MEDIA_CHANGE request. Fixes: 7dd753ca59d6 ("scsi: sr: Return appropriate error code when disk is ejected") Link: https://bugzilla.kernel.org/show_bug.cgi?id=213759 Link: https://lore.kernel.org/r/20210726114913.6760-1-limanyi@uniontech.com Signed-off-by: Li Manyi Signed-off-by: Martin K. Petersen --- drivers/scsi/sr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 94c254e9012e..a6d3ac0a6cbc 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -221,7 +221,7 @@ static unsigned int sr_get_events(struct scsi_device *sdev) else if (med->media_event_code == 2) return DISK_EVENT_MEDIA_CHANGE; else if (med->media_event_code == 3) - return DISK_EVENT_EJECT_REQUEST; + return DISK_EVENT_MEDIA_CHANGE; return 0; } From f0f82e2476f6adb9c7a0135cfab8091456990c99 Mon Sep 17 00:00:00 2001 From: lijinlin Date: Tue, 27 Jul 2021 11:44:55 +0800 Subject: [PATCH 332/502] scsi: core: Fix capacity set to zero after offlinining device After adding physical volumes to a volume group through vgextend, the kernel will rescan the partitions. This in turn will cause the device capacity to be queried. If the device status is set to offline through sysfs at this time, READ CAPACITY command will return a result which the host byte is DID_NO_CONNECT, and the capacity of the device will be set to zero in read_capacity_error(). After setting device status back to running, the capacity of the device will remain stuck at zero. Fix this issue by rescanning device when the device state changes to SDEV_RUNNING. Link: https://lore.kernel.org/r/20210727034455.1494960-1-lijinlin3@huawei.com Reviewed-by: Bart Van Assche Signed-off-by: lijinlin Signed-off-by: Wu Bo Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_sysfs.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 32489d25158f..ae9bfc658203 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -807,11 +807,14 @@ store_state_field(struct device *dev, struct device_attribute *attr, mutex_lock(&sdev->state_mutex); ret = scsi_device_set_state(sdev, state); /* - * If the device state changes to SDEV_RUNNING, we need to run - * the queue to avoid I/O hang. + * If the device state changes to SDEV_RUNNING, we need to + * rescan the device to revalidate it, and run the queue to + * avoid I/O hang. */ - if (ret == 0 && state == SDEV_RUNNING) + if (ret == 0 && state == SDEV_RUNNING) { + scsi_rescan_device(dev); blk_mq_run_hw_queues(sdev->request_queue, true); + } mutex_unlock(&sdev->state_mutex); return ret == 0 ? count : -EINVAL; From 8a7b46fa7902a3d36ce44a64f4d66586d66206ea Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 26 Jul 2021 11:26:44 +0200 Subject: [PATCH 333/502] MAINTAINERS: add Yasushi SHOJI as reviewer for the Microchip CAN BUS Analyzer Tool driver This patch adds Yasushi SHOJI as a reviewer for the Microchip CAN BUS Analyzer Tool driver. Link: https://lore.kernel.org/r/20210726111619.1023991-1-mkl@pengutronix.de Acked-by: Yasushi SHOJI Signed-off-by: Marc Kleine-Budde --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 58afeb12d3b3..42ea3183e87c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11327,6 +11327,12 @@ W: https://linuxtv.org T: git git://linuxtv.org/media_tree.git F: drivers/media/radio/radio-maxiradio* +MCAB MICROCHIP CAN BUS ANALYZER TOOL DRIVER +R: Yasushi SHOJI +L: linux-can@vger.kernel.org +S: Maintained +F: drivers/net/can/usb/mcba_usb.c + MCAN MMIO DEVICE DRIVER M: Chandrasekar Ramakrishnan L: linux-can@vger.kernel.org From f6b3c7848e66e9046c8a79a5b88fd03461cc252b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 29 Jul 2021 17:12:46 +0300 Subject: [PATCH 334/502] can: hi311x: fix a signedness bug in hi3110_cmd() The hi3110_cmd() is supposed to return zero on success and negative error codes on failure, but it was accidentally declared as a u8 when it needs to be an int type. Fixes: 57e83fb9b746 ("can: hi311x: Add Holt HI-311x CAN driver") Link: https://lore.kernel.org/r/20210729141246.GA1267@kili Signed-off-by: Dan Carpenter Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/hi311x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c index dd17b8c53e1c..89d9c986a229 100644 --- a/drivers/net/can/spi/hi311x.c +++ b/drivers/net/can/spi/hi311x.c @@ -218,7 +218,7 @@ static int hi3110_spi_trans(struct spi_device *spi, int len) return ret; } -static u8 hi3110_cmd(struct spi_device *spi, u8 command) +static int hi3110_cmd(struct spi_device *spi, u8 command) { struct hi3110_priv *priv = spi_get_drvdata(spi); From fc43fb69a7af92839551f99c1a96a37b77b3ae7a Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Sun, 25 Jul 2021 13:36:30 +0300 Subject: [PATCH 335/502] can: mcba_usb_start(): add missing urb->transfer_dma initialization Yasushi reported, that his Microchip CAN Analyzer stopped working since commit 91c02557174b ("can: mcba_usb: fix memory leak in mcba_usb"). The problem was in missing urb->transfer_dma initialization. In my previous patch to this driver I refactored mcba_usb_start() code to avoid leaking usb coherent buffers. To archive it, I passed local stack variable to usb_alloc_coherent() and then saved it to private array to correctly free all coherent buffers on ->close() call. But I forgot to initialize urb->transfer_dma with variable passed to usb_alloc_coherent(). All of this was causing device to not work, since dma addr 0 is not valid and following log can be found on bug report page, which points exactly to problem described above. | DMAR: [DMA Write] Request device [00:14.0] PASID ffffffff fault addr 0 [fault reason 05] PTE Write access is not set Fixes: 91c02557174b ("can: mcba_usb: fix memory leak in mcba_usb") Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=990850 Link: https://lore.kernel.org/r/20210725103630.23864-1-paskripkin@gmail.com Cc: linux-stable Reported-by: Yasushi SHOJI Signed-off-by: Pavel Skripkin Tested-by: Yasushi SHOJI [mkl: fixed typos in commit message - thanks Yasushi SHOJI] Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/mcba_usb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index a45865bd7254..a1a154c08b7f 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -653,6 +653,8 @@ static int mcba_usb_start(struct mcba_priv *priv) break; } + urb->transfer_dma = buf_dma; + usb_fill_bulk_urb(urb, priv->udev, usb_rcvbulkpipe(priv->udev, MCBA_USB_EP_IN), buf, MCBA_USB_RX_BUFF_SIZE, From 8dde723fcde4479f256441da03793e37181d9f21 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 29 Jul 2021 20:51:26 +0200 Subject: [PATCH 336/502] ALSA: usb-audio: Avoid unnecessary or invalid connector selection at resume The recent fix for the resume on Lenovo machines seems causing a regression on others. It's because the change always triggers the connector selection no matter which widget node type is. This patch addresses the regression by setting the resume callback selectively only for the connector widget. Fixes: 44609fc01f28 ("ALSA: usb-audio: Check connector value on resume") Cc: BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=213897 Link: https://lore.kernel.org/r/20210729185126.24432-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/mixer.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index f4cdaf1ba44a..9b713b4a5ec4 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1816,6 +1816,15 @@ static void get_connector_control_name(struct usb_mixer_interface *mixer, strlcat(name, " - Output Jack", name_size); } +/* get connector value to "wake up" the USB audio */ +static int connector_mixer_resume(struct usb_mixer_elem_list *list) +{ + struct usb_mixer_elem_info *cval = mixer_elem_list_to_info(list); + + get_connector_value(cval, NULL, NULL); + return 0; +} + /* Build a mixer control for a UAC connector control (jack-detect) */ static void build_connector_control(struct usb_mixer_interface *mixer, const struct usbmix_name_map *imap, @@ -1833,6 +1842,10 @@ static void build_connector_control(struct usb_mixer_interface *mixer, if (!cval) return; snd_usb_mixer_elem_init_std(&cval->head, mixer, term->id); + + /* set up a specific resume callback */ + cval->head.resume = connector_mixer_resume; + /* * UAC2: The first byte from reading the UAC2_TE_CONNECTOR control returns the * number of channels connected. @@ -3642,23 +3655,15 @@ static int restore_mixer_value(struct usb_mixer_elem_list *list) return 0; } -static int default_mixer_resume(struct usb_mixer_elem_list *list) -{ - struct usb_mixer_elem_info *cval = mixer_elem_list_to_info(list); - - /* get connector value to "wake up" the USB audio */ - if (cval->val_type == USB_MIXER_BOOLEAN && cval->channels == 1) - get_connector_value(cval, NULL, NULL); - - return 0; -} - static int default_mixer_reset_resume(struct usb_mixer_elem_list *list) { - int err = default_mixer_resume(list); + int err; - if (err < 0) - return err; + if (list->resume) { + err = list->resume(list); + if (err < 0) + return err; + } return restore_mixer_value(list); } @@ -3697,7 +3702,7 @@ void snd_usb_mixer_elem_init_std(struct usb_mixer_elem_list *list, list->id = unitid; list->dump = snd_usb_mixer_dump_cval; #ifdef CONFIG_PM - list->resume = default_mixer_resume; + list->resume = NULL; list->reset_resume = default_mixer_reset_resume; #endif } From 0e865f0c31928d6a313269ef624907eec55287c4 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Tue, 27 Jul 2021 19:59:57 +0300 Subject: [PATCH 337/502] can: usb_8dev: fix memory leak In usb_8dev_start() MAX_RX_URBS coherent buffers are allocated and there is nothing, that frees them: 1) In callback function the urb is resubmitted and that's all 2) In disconnect function urbs are simply killed, but URB_FREE_BUFFER is not set (see usb_8dev_start) and this flag cannot be used with coherent buffers. So, all allocated buffers should be freed with usb_free_coherent() explicitly. Side note: This code looks like a copy-paste of other can drivers. The same patch was applied to mcba_usb driver and it works nice with real hardware. There is no change in functionality, only clean-up code for coherent buffers. Fixes: 0024d8ad1639 ("can: usb_8dev: Add support for USB2CAN interface from 8 devices") Link: https://lore.kernel.org/r/d39b458cd425a1cf7f512f340224e6e9563b07bd.1627404470.git.paskripkin@gmail.com Cc: linux-stable Signed-off-by: Pavel Skripkin Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/usb_8dev.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index b6e7ef0d5bc6..d1b83bd1b3cb 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -137,7 +137,8 @@ struct usb_8dev_priv { u8 *cmd_msg_buffer; struct mutex usb_8dev_cmd_lock; - + void *rxbuf[MAX_RX_URBS]; + dma_addr_t rxbuf_dma[MAX_RX_URBS]; }; /* tx frame */ @@ -733,6 +734,7 @@ static int usb_8dev_start(struct usb_8dev_priv *priv) for (i = 0; i < MAX_RX_URBS; i++) { struct urb *urb = NULL; u8 *buf; + dma_addr_t buf_dma; /* create a URB, and a buffer for it */ urb = usb_alloc_urb(0, GFP_KERNEL); @@ -742,7 +744,7 @@ static int usb_8dev_start(struct usb_8dev_priv *priv) } buf = usb_alloc_coherent(priv->udev, RX_BUFFER_SIZE, GFP_KERNEL, - &urb->transfer_dma); + &buf_dma); if (!buf) { netdev_err(netdev, "No memory left for USB buffer\n"); usb_free_urb(urb); @@ -750,6 +752,8 @@ static int usb_8dev_start(struct usb_8dev_priv *priv) break; } + urb->transfer_dma = buf_dma; + usb_fill_bulk_urb(urb, priv->udev, usb_rcvbulkpipe(priv->udev, USB_8DEV_ENDP_DATA_RX), @@ -767,6 +771,9 @@ static int usb_8dev_start(struct usb_8dev_priv *priv) break; } + priv->rxbuf[i] = buf; + priv->rxbuf_dma[i] = buf_dma; + /* Drop reference, USB core will take care of freeing it */ usb_free_urb(urb); } @@ -836,6 +843,10 @@ static void unlink_all_urbs(struct usb_8dev_priv *priv) usb_kill_anchored_urbs(&priv->rx_submitted); + for (i = 0; i < MAX_RX_URBS; ++i) + usb_free_coherent(priv->udev, RX_BUFFER_SIZE, + priv->rxbuf[i], priv->rxbuf_dma[i]); + usb_kill_anchored_urbs(&priv->tx_submitted); atomic_set(&priv->active_tx_urbs, 0); From 9969e3c5f40c166e3396acc36c34f9de502929f6 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Tue, 27 Jul 2021 20:00:33 +0300 Subject: [PATCH 338/502] can: ems_usb: fix memory leak In ems_usb_start() MAX_RX_URBS coherent buffers are allocated and there is nothing, that frees them: 1) In callback function the urb is resubmitted and that's all 2) In disconnect function urbs are simply killed, but URB_FREE_BUFFER is not set (see ems_usb_start) and this flag cannot be used with coherent buffers. So, all allocated buffers should be freed with usb_free_coherent() explicitly. Side note: This code looks like a copy-paste of other can drivers. The same patch was applied to mcba_usb driver and it works nice with real hardware. There is no change in functionality, only clean-up code for coherent buffers. Fixes: 702171adeed3 ("ems_usb: Added support for EMS CPC-USB/ARM7 CAN/USB interface") Link: https://lore.kernel.org/r/59aa9fbc9a8cbf9af2bbd2f61a659c480b415800.1627404470.git.paskripkin@gmail.com Cc: linux-stable Signed-off-by: Pavel Skripkin Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/ems_usb.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index 0a37af4a3fa4..2b5302e72435 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -255,6 +255,8 @@ struct ems_usb { unsigned int free_slots; /* remember number of available slots */ struct ems_cpc_msg active_params; /* active controller parameters */ + void *rxbuf[MAX_RX_URBS]; + dma_addr_t rxbuf_dma[MAX_RX_URBS]; }; static void ems_usb_read_interrupt_callback(struct urb *urb) @@ -587,6 +589,7 @@ static int ems_usb_start(struct ems_usb *dev) for (i = 0; i < MAX_RX_URBS; i++) { struct urb *urb = NULL; u8 *buf = NULL; + dma_addr_t buf_dma; /* create a URB, and a buffer for it */ urb = usb_alloc_urb(0, GFP_KERNEL); @@ -596,7 +599,7 @@ static int ems_usb_start(struct ems_usb *dev) } buf = usb_alloc_coherent(dev->udev, RX_BUFFER_SIZE, GFP_KERNEL, - &urb->transfer_dma); + &buf_dma); if (!buf) { netdev_err(netdev, "No memory left for USB buffer\n"); usb_free_urb(urb); @@ -604,6 +607,8 @@ static int ems_usb_start(struct ems_usb *dev) break; } + urb->transfer_dma = buf_dma; + usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, 2), buf, RX_BUFFER_SIZE, ems_usb_read_bulk_callback, dev); @@ -619,6 +624,9 @@ static int ems_usb_start(struct ems_usb *dev) break; } + dev->rxbuf[i] = buf; + dev->rxbuf_dma[i] = buf_dma; + /* Drop reference, USB core will take care of freeing it */ usb_free_urb(urb); } @@ -684,6 +692,10 @@ static void unlink_all_urbs(struct ems_usb *dev) usb_kill_anchored_urbs(&dev->rx_submitted); + for (i = 0; i < MAX_RX_URBS; ++i) + usb_free_coherent(dev->udev, RX_BUFFER_SIZE, + dev->rxbuf[i], dev->rxbuf_dma[i]); + usb_kill_anchored_urbs(&dev->tx_submitted); atomic_set(&dev->active_tx_urbs, 0); From 928150fad41ba16df7fcc9f7f945747d0f56cbb6 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Tue, 27 Jul 2021 20:00:46 +0300 Subject: [PATCH 339/502] can: esd_usb2: fix memory leak In esd_usb2_setup_rx_urbs() MAX_RX_URBS coherent buffers are allocated and there is nothing, that frees them: 1) In callback function the urb is resubmitted and that's all 2) In disconnect function urbs are simply killed, but URB_FREE_BUFFER is not set (see esd_usb2_setup_rx_urbs) and this flag cannot be used with coherent buffers. So, all allocated buffers should be freed with usb_free_coherent() explicitly. Side note: This code looks like a copy-paste of other can drivers. The same patch was applied to mcba_usb driver and it works nice with real hardware. There is no change in functionality, only clean-up code for coherent buffers. Fixes: 96d8e90382dc ("can: Add driver for esd CAN-USB/2 device") Link: https://lore.kernel.org/r/b31b096926dcb35998ad0271aac4b51770ca7cc8.1627404470.git.paskripkin@gmail.com Cc: linux-stable Signed-off-by: Pavel Skripkin Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/esd_usb2.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c index 65b58f8fc328..66fa8b07c2e6 100644 --- a/drivers/net/can/usb/esd_usb2.c +++ b/drivers/net/can/usb/esd_usb2.c @@ -195,6 +195,8 @@ struct esd_usb2 { int net_count; u32 version; int rxinitdone; + void *rxbuf[MAX_RX_URBS]; + dma_addr_t rxbuf_dma[MAX_RX_URBS]; }; struct esd_usb2_net_priv { @@ -545,6 +547,7 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev) for (i = 0; i < MAX_RX_URBS; i++) { struct urb *urb = NULL; u8 *buf = NULL; + dma_addr_t buf_dma; /* create a URB, and a buffer for it */ urb = usb_alloc_urb(0, GFP_KERNEL); @@ -554,7 +557,7 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev) } buf = usb_alloc_coherent(dev->udev, RX_BUFFER_SIZE, GFP_KERNEL, - &urb->transfer_dma); + &buf_dma); if (!buf) { dev_warn(dev->udev->dev.parent, "No memory left for USB buffer\n"); @@ -562,6 +565,8 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev) goto freeurb; } + urb->transfer_dma = buf_dma; + usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, 1), buf, RX_BUFFER_SIZE, @@ -574,8 +579,12 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev) usb_unanchor_urb(urb); usb_free_coherent(dev->udev, RX_BUFFER_SIZE, buf, urb->transfer_dma); + goto freeurb; } + dev->rxbuf[i] = buf; + dev->rxbuf_dma[i] = buf_dma; + freeurb: /* Drop reference, USB core will take care of freeing it */ usb_free_urb(urb); @@ -663,6 +672,11 @@ static void unlink_all_urbs(struct esd_usb2 *dev) int i, j; usb_kill_anchored_urbs(&dev->rx_submitted); + + for (i = 0; i < MAX_RX_URBS; ++i) + usb_free_coherent(dev->udev, RX_BUFFER_SIZE, + dev->rxbuf[i], dev->rxbuf_dma[i]); + for (i = 0; i < dev->net_count; i++) { priv = dev->nets[i]; if (priv) { From fe911792eae32f03d27d8f3de2f0271862d435ac Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 23 Jun 2021 10:45:20 +0200 Subject: [PATCH 340/502] media: Revert "media: rtl28xxu: fix zero-length control request" This reverts commit 25d5ce3a606a1eb23a9265d615a92a876ff9cb5f. The patch in question causes a regression and was superseded by a second version. Unfortunately, the first revision ended up being applied instead of the correct one. Link: https://lore.kernel.org/r/YL3MCGY5wTsW2kEF@hovoldconsulting.com Signed-off-by: Johan Hovold Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb-v2/rtl28xxu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c index 83705730e37e..0cbdb95f8d35 100644 --- a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c +++ b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c @@ -612,9 +612,8 @@ static int rtl28xxu_read_config(struct dvb_usb_device *d) static int rtl28xxu_identify_state(struct dvb_usb_device *d, const char **name) { struct rtl28xxu_dev *dev = d_to_priv(d); - u8 buf[1]; int ret; - struct rtl28xxu_req req_demod_i2c = {0x0020, CMD_I2C_DA_RD, 1, buf}; + struct rtl28xxu_req req_demod_i2c = {0x0020, CMD_I2C_DA_RD, 0, NULL}; dev_dbg(&d->intf->dev, "\n"); From 76f22c93b209c811bd489950f17f8839adb31901 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 23 Jun 2021 10:45:21 +0200 Subject: [PATCH 341/502] media: rtl28xxu: fix zero-length control request The direction of the pipe argument must match the request-type direction bit or control requests may fail depending on the host-controller-driver implementation. Control transfers without a data stage are treated as OUT requests by the USB stack and should be using usb_sndctrlpipe(). Failing to do so will now trigger a warning. The driver uses a zero-length i2c-read request for type detection so update the control-request code to use usb_sndctrlpipe() in this case. Note that actually trying to read the i2c register in question does not work as the register might not exist (e.g. depending on the demodulator) as reported by Eero Lehtinen . Reported-by: syzbot+faf11bbadc5a372564da@syzkaller.appspotmail.com Reported-by: Eero Lehtinen Tested-by: Eero Lehtinen Fixes: d0f232e823af ("[media] rtl28xxu: add heuristic to detect chip type") Cc: stable@vger.kernel.org # 4.0 Cc: Antti Palosaari Signed-off-by: Johan Hovold Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb-v2/rtl28xxu.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c index 0cbdb95f8d35..795a012d4020 100644 --- a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c +++ b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c @@ -37,7 +37,16 @@ static int rtl28xxu_ctrl_msg(struct dvb_usb_device *d, struct rtl28xxu_req *req) } else { /* read */ requesttype = (USB_TYPE_VENDOR | USB_DIR_IN); - pipe = usb_rcvctrlpipe(d->udev, 0); + + /* + * Zero-length transfers must use usb_sndctrlpipe() and + * rtl28xxu_identify_state() uses a zero-length i2c read + * command to determine the chip type. + */ + if (req->size) + pipe = usb_rcvctrlpipe(d->udev, 0); + else + pipe = usb_sndctrlpipe(d->udev, 0); } ret = usb_control_msg(d->udev, pipe, 0, requesttype, req->value, From c592b46907adbeb81243f7eb7a468c36692658b8 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 30 Jun 2021 09:58:23 +0200 Subject: [PATCH 342/502] media: videobuf2-core: dequeue if start_streaming fails If a vb2_queue sets q->min_buffers_needed then when the number of queued buffers reaches q->min_buffers_needed, vb2_core_qbuf() will call the start_streaming() callback. If start_streaming() returns an error, then that error was just returned by vb2_core_qbuf(), but the buffer was still queued. However, userspace expects that if VIDIOC_QBUF fails, the buffer is returned dequeued. So if start_streaming() fails, then remove the buffer from the queue, thus avoiding this unwanted side-effect. Signed-off-by: Hans Verkuil Reviewed-by: Laurent Pinchart Tested-by: Kieran Bingham Fixes: b3379c6201bb ("[media] vb2: only call start_streaming if sufficient buffers are queued") Signed-off-by: Mauro Carvalho Chehab --- drivers/media/common/videobuf2/videobuf2-core.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c index 02281d13505f..508ac295eb06 100644 --- a/drivers/media/common/videobuf2/videobuf2-core.c +++ b/drivers/media/common/videobuf2/videobuf2-core.c @@ -1573,6 +1573,7 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb, struct media_request *req) { struct vb2_buffer *vb; + enum vb2_buffer_state orig_state; int ret; if (q->error) { @@ -1673,6 +1674,7 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb, * Add to the queued buffers list, a buffer will stay on it until * dequeued in dqbuf. */ + orig_state = vb->state; list_add_tail(&vb->queued_entry, &q->queued_list); q->queued_count++; q->waiting_for_buffers = false; @@ -1703,8 +1705,17 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb, if (q->streaming && !q->start_streaming_called && q->queued_count >= q->min_buffers_needed) { ret = vb2_start_streaming(q); - if (ret) + if (ret) { + /* + * Since vb2_core_qbuf will return with an error, + * we should return it to state DEQUEUED since + * the error indicates that the buffer wasn't queued. + */ + list_del(&vb->queued_entry); + q->queued_count--; + vb->state = orig_state; return ret; + } } dprintk(q, 2, "qbuf of buffer %d succeeded\n", vb->index); From f1de1c7803595e937ce9b922807f499851225021 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Mon, 5 Jul 2021 14:57:08 +0200 Subject: [PATCH 343/502] media: atmel: fix build when ISC=m and XISC=y Building VIDEO_ATMEL_ISC as module and VIDEO_ATMEL_XISC as built-in (or viceversa) causes build errors: or1k-linux-ld: drivers/media/platform/atmel/atmel-isc-base.o: in function `isc_async_complete': atmel-isc-base.c:(.text+0x40d0): undefined reference to `__this_module' or1k-linux-ld: atmel-isc-base.c:(.text+0x40f0): undefined reference to `__this_module' or1k-linux-ld: drivers/media/platform/atmel/atmel-isc-base.o:(.rodata+0x390): undefined reference to `__this_module' or1k-linux-ld: drivers/media/platform/atmel/atmel-isc-base.o:(__param+0x4): undefined reference to `__this_module' or1k-linux-ld: drivers/media/platform/atmel/atmel-isc-base.o:(__param+0x18): undefined reference to `__this_module' This is caused by the file atmel-isc-base.c which is common code between the two drivers. The solution is to create another Kconfig symbol that is automatically selected and generates the module atmel-isc-base.ko. This module can be loaded when both drivers are modules, or built-in when at least one of them is built-in. Reported-by: kernel test robot Fixes: c9aa973884a1 ("media: atmel: atmel-isc: add microchip-xisc driver") Signed-off-by: Eugen Hristev Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/atmel/Kconfig | 8 ++++++++ drivers/media/platform/atmel/Makefile | 5 +++-- drivers/media/platform/atmel/atmel-isc-base.c | 11 +++++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/media/platform/atmel/Kconfig b/drivers/media/platform/atmel/Kconfig index 99b51213f871..dda2f27da317 100644 --- a/drivers/media/platform/atmel/Kconfig +++ b/drivers/media/platform/atmel/Kconfig @@ -8,6 +8,7 @@ config VIDEO_ATMEL_ISC select VIDEOBUF2_DMA_CONTIG select REGMAP_MMIO select V4L2_FWNODE + select VIDEO_ATMEL_ISC_BASE help This module makes the ATMEL Image Sensor Controller available as a v4l2 device. @@ -19,10 +20,17 @@ config VIDEO_ATMEL_XISC select VIDEOBUF2_DMA_CONTIG select REGMAP_MMIO select V4L2_FWNODE + select VIDEO_ATMEL_ISC_BASE help This module makes the ATMEL eXtended Image Sensor Controller available as a v4l2 device. +config VIDEO_ATMEL_ISC_BASE + tristate + default n + help + ATMEL ISC and XISC common code base. + config VIDEO_ATMEL_ISI tristate "ATMEL Image Sensor Interface (ISI) support" depends on VIDEO_V4L2 && OF diff --git a/drivers/media/platform/atmel/Makefile b/drivers/media/platform/atmel/Makefile index c5c01556c653..46d264ab7948 100644 --- a/drivers/media/platform/atmel/Makefile +++ b/drivers/media/platform/atmel/Makefile @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only -atmel-isc-objs = atmel-sama5d2-isc.o atmel-isc-base.o -atmel-xisc-objs = atmel-sama7g5-isc.o atmel-isc-base.o +atmel-isc-objs = atmel-sama5d2-isc.o +atmel-xisc-objs = atmel-sama7g5-isc.o obj-$(CONFIG_VIDEO_ATMEL_ISI) += atmel-isi.o +obj-$(CONFIG_VIDEO_ATMEL_ISC_BASE) += atmel-isc-base.o obj-$(CONFIG_VIDEO_ATMEL_ISC) += atmel-isc.o obj-$(CONFIG_VIDEO_ATMEL_XISC) += atmel-xisc.o diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c index 19daa49bf604..136ab7cf36ed 100644 --- a/drivers/media/platform/atmel/atmel-isc-base.c +++ b/drivers/media/platform/atmel/atmel-isc-base.c @@ -378,6 +378,7 @@ int isc_clk_init(struct isc_device *isc) return 0; } +EXPORT_SYMBOL_GPL(isc_clk_init); void isc_clk_cleanup(struct isc_device *isc) { @@ -392,6 +393,7 @@ void isc_clk_cleanup(struct isc_device *isc) clk_unregister(isc_clk->clk); } } +EXPORT_SYMBOL_GPL(isc_clk_cleanup); static int isc_queue_setup(struct vb2_queue *vq, unsigned int *nbuffers, unsigned int *nplanes, @@ -1578,6 +1580,7 @@ irqreturn_t isc_interrupt(int irq, void *dev_id) return ret; } +EXPORT_SYMBOL_GPL(isc_interrupt); static void isc_hist_count(struct isc_device *isc, u32 *min, u32 *max) { @@ -2212,6 +2215,7 @@ const struct v4l2_async_notifier_operations isc_async_ops = { .unbind = isc_async_unbind, .complete = isc_async_complete, }; +EXPORT_SYMBOL_GPL(isc_async_ops); void isc_subdev_cleanup(struct isc_device *isc) { @@ -2224,6 +2228,7 @@ void isc_subdev_cleanup(struct isc_device *isc) INIT_LIST_HEAD(&isc->subdev_entities); } +EXPORT_SYMBOL_GPL(isc_subdev_cleanup); int isc_pipeline_init(struct isc_device *isc) { @@ -2264,6 +2269,7 @@ int isc_pipeline_init(struct isc_device *isc) return 0; } +EXPORT_SYMBOL_GPL(isc_pipeline_init); /* regmap configuration */ #define ATMEL_ISC_REG_MAX 0xd5c @@ -2273,4 +2279,9 @@ const struct regmap_config isc_regmap_config = { .val_bits = 32, .max_register = ATMEL_ISC_REG_MAX, }; +EXPORT_SYMBOL_GPL(isc_regmap_config); +MODULE_AUTHOR("Songjun Wu"); +MODULE_AUTHOR("Eugen Hristev"); +MODULE_DESCRIPTION("Atmel ISC common code base"); +MODULE_LICENSE("GPL v2"); From 341abd693d10e5f337a51f140ae3e7a1ae0febf6 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Thu, 29 Jul 2021 06:33:06 +0200 Subject: [PATCH 344/502] serial: 8250_pci: Avoid irq sharing for MSI(-X) interrupts. This attempts to fix a bug found with a serial port card which uses an MCS9922 chip, one of the 4 models for which MSI-X interrupts are currently supported. I don't possess such a card, and i'm not experienced with the serial subsystem, so this patch is based on what i think i found as a likely reason for failure, based on walking the user who actually owns the card through some diagnostic. The user who reported the problem finds the following in his dmesg output for the relevant ttyS4 and ttyS5: [ 0.580425] serial 0000:02:00.0: enabling device (0000 -> 0003) [ 0.601448] 0000:02:00.0: ttyS4 at I/O 0x3010 (irq = 125, base_baud = 115200) is a ST16650V2 [ 0.603089] serial 0000:02:00.1: enabling device (0000 -> 0003) [ 0.624119] 0000:02:00.1: ttyS5 at I/O 0x3000 (irq = 126, base_baud = 115200) is a ST16650V2 ... [ 6.323784] genirq: Flags mismatch irq 128. 00000080 (ttyS5) vs. 00000000 (xhci_hcd) [ 6.324128] genirq: Flags mismatch irq 128. 00000080 (ttyS5) vs. 00000000 (xhci_hcd) ... Output of setserial -a: /dev/ttyS4, Line 4, UART: 16650V2, Port: 0x3010, IRQ: 127 Baud_base: 115200, close_delay: 50, divisor: 0 closing_wait: 3000 Flags: spd_normal skip_test This suggests to me that the serial driver wants to register and share a MSI/MSI-X irq 128 with the xhci_hcd driver, whereas the xhci driver does not want to share the irq, as flags 0x00000080 (== IRQF_SHARED) from the serial port driver means to share the irq, and this mismatch ends in some failed irq init? With this setup, data reception works very unreliable, with dropped data, already at a transmission rate of only a 16 Bytes chunk every 1/120th of a second, ie. 1920 Bytes/sec, presumably due to rx fifo overflow due to mishandled or not used at all rx irq's? See full discussion thread with attempted diagnosis at: https://psychtoolbox.discourse.group/t/issues-with-iscan-serial-port-recording/3886 Disabling the use of MSI interrupts for the serial port pci card did fix the reliability problems. The user executed the following sequence of commands to achieve this: echo 0000:02:00.0 | sudo tee /sys/bus/pci/drivers/serial/unbind echo 0000:02:00.1 | sudo tee /sys/bus/pci/drivers/serial/unbind echo 0 | sudo tee /sys/bus/pci/devices/0000:02:00.0/msi_bus echo 0 | sudo tee /sys/bus/pci/devices/0000:02:00.1/msi_bus echo 0000:02:00.0 | sudo tee /sys/bus/pci/drivers/serial/bind echo 0000:02:00.1 | sudo tee /sys/bus/pci/drivers/serial/bind This resulted in the following log output: [ 82.179021] pci 0000:02:00.0: MSI/MSI-X disallowed for future drivers [ 87.003031] pci 0000:02:00.1: MSI/MSI-X disallowed for future drivers [ 98.537010] 0000:02:00.0: ttyS4 at I/O 0x3010 (irq = 17, base_baud = 115200) is a ST16650V2 [ 103.648124] 0000:02:00.1: ttyS5 at I/O 0x3000 (irq = 18, base_baud = 115200) is a ST16650V2 This patch attempts to fix the problem by disabling irq sharing when using MSI irq's. Note that all i know for sure is that disabling MSI irq's fixed the problem for the user, so this patch could be wrong and is untested. Please review with caution, keeping this in mind. Fixes: 8428413b1d14 ("serial: 8250_pci: Implement MSI(-X) support") Cc: Ralf Ramsauer Cc: stable Reviewed-by: Andy Shevchenko Signed-off-by: Mario Kleiner Link: https://lore.kernel.org/r/20210729043306.18528-1-mario.kleiner.de@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 02985cf90ef2..a808c283883e 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -4002,6 +4002,7 @@ pciserial_init_ports(struct pci_dev *dev, const struct pciserial_board *board) if (pci_match_id(pci_use_msi, dev)) { dev_dbg(&dev->dev, "Using MSI(-X) interrupts\n"); pci_set_master(dev); + uart.port.flags &= ~UPF_SHARE_IRQ; rc = pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_ALL_TYPES); } else { dev_dbg(&dev->dev, "Using legacy interrupts\n"); From fa7a549d321a4189677b0cea86e58d9db7977f7b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 14 Jul 2021 17:37:49 -0400 Subject: [PATCH 345/502] KVM: x86: accept userspace interrupt only if no event is injected Once an exception has been injected, any side effects related to the exception (such as setting CR2 or DR6) have been taked place. Therefore, once KVM sets the VM-entry interruption information field or the AMD EVENTINJ field, the next VM-entry must deliver that exception. Pending interrupts are processed after injected exceptions, so in theory it would not be a problem to use KVM_INTERRUPT when an injected exception is present. However, DOSEMU is using run->ready_for_interrupt_injection to detect interrupt windows and then using KVM_SET_SREGS/KVM_SET_REGS to inject the interrupt manually. For this to work, the interrupt window must be delayed after the completion of the previous event injection. Cc: stable@vger.kernel.org Reported-by: Stas Sergeev Tested-by: Stas Sergeev Fixes: 71cc849b7093 ("KVM: x86: Fix split-irqchip vs interrupt injection window request") Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4116567f3d44..e5d5c5ed7dd4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4358,8 +4358,17 @@ static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu) static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu) { - return kvm_arch_interrupt_allowed(vcpu) && - kvm_cpu_accept_dm_intr(vcpu); + /* + * Do not cause an interrupt window exit if an exception + * is pending or an event needs reinjection; userspace + * might want to inject the interrupt manually using KVM_SET_REGS + * or KVM_SET_SREGS. For that to work, we must be at an + * instruction boundary and with no events half-injected. + */ + return (kvm_arch_interrupt_allowed(vcpu) && + kvm_cpu_accept_dm_intr(vcpu) && + !kvm_event_needs_reinjection(vcpu) && + !vcpu->arch.exception.pending); } static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, From 3a0670824979a986a2314c921aa092e60730eeae Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 20 Jun 2021 20:54:21 +0200 Subject: [PATCH 346/502] ARM: dts: stm32: Prefer HW RTC on DHCOM SoM The DHCOM SoM has two RTC, one is the STM32 RTC built into the SoC and another is Microcrystal RV RTC. By default, only the later has battery backup, the former does not. The order in which the RTCs are probed on boot is random, which means the kernel might pick up system time from the STM32 RTC which has no battery backup. This then leads to incorrect initial system time setup, even though the HW RTC has correct time configured in it. Add DT alias entries, so that the RTCs get assigned fixed IDs and the HW RTC is always picked by the kernel as the default RTC, thus resulting in correct system time in early userspace. Fixes: 34e0c7847dcf ("ARM: dts: stm32: Add DH Electronics DHCOM STM32MP1 SoM and PDK2 board") Signed-off-by: Marek Vasut Cc: Alexandre Torgue Cc: Patrice Chotard Cc: Patrick Delaunay Cc: linux-stm32@st-md-mailman.stormreply.com To: linux-arm-kernel@lists.infradead.org Signed-off-by: Alexandre Torgue --- arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi index 2af0a6752674..8349c9099e30 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi @@ -12,6 +12,8 @@ aliases { ethernet0 = ðernet0; ethernet1 = &ksz8851; + rtc0 = &hwrtc; + rtc1 = &rtc; }; memory@c0000000 { @@ -248,7 +250,7 @@ /delete-property/dmas; /delete-property/dma-names; - rtc@32 { + hwrtc: rtc@32 { compatible = "microcrystal,rv8803"; reg = <0x32>; }; From 36862c1ebc92a7e6fcc55002965c44b8ad17d4ca Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 21 Jul 2021 20:12:53 +0200 Subject: [PATCH 347/502] ARM: dts: stm32: Disable LAN8710 EDPD on DHCOM The LAN8710 Energy Detect Power Down (EDPD) functionality might cause unreliable cable detection. There are multiple accounts of this in the SMSC PHY driver patches which attempted to make EDPD reliable, however it seems there is always some sort of corner case left. Unfortunatelly, there is no errata documented which would confirm this to be a silicon bug on the LAN87xx series of PHYs (LAN8700, LAN8710, LAN8720 at least). Disable EDPD on the DHCOM SoM, just like multiple other boards already do as well, to make the cable detection reliable. Fixes: 34e0c7847dcf ("ARM: dts: stm32: Add DH Electronics DHCOM STM32MP1 SoM and PDK2 board") Signed-off-by: Marek Vasut Cc: Alexandre Torgue Cc: Patrice Chotard Cc: Patrick Delaunay Cc: linux-stm32@st-md-mailman.stormreply.com To: linux-arm-kernel@lists.infradead.org Signed-off-by: Alexandre Torgue --- arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi index 8349c9099e30..8c41f819f776 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi @@ -140,6 +140,7 @@ reset-gpios = <&gpioh 3 GPIO_ACTIVE_LOW>; reset-assert-us = <500>; reset-deassert-us = <500>; + smsc,disable-energy-detect; interrupt-parent = <&gpioi>; interrupts = <11 IRQ_TYPE_LEVEL_LOW>; }; From 15f68f027ebd961b99a1c420f96ff3838c5e4450 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 21 Jul 2021 20:10:40 +0200 Subject: [PATCH 348/502] ARM: dts: stm32: Fix touchscreen IRQ line assignment on DHCOM While 7e5f3155dcbb4 ("ARM: dts: stm32: Fix LED5 on STM32MP1 DHCOM PDK2") fixed the LED0 assignment on the PDK2 board, the same commit did not update the touchscreen IRQ line assignment, which is the same GPIO line, shared between the LED0 output and touchscreen IRQ input. To make this more convoluted, the same EXTI input (not the same GPIO line) is shared between Button B which is Active-Low IRQ, and touchscreen IRQ which is Edge-Falling IRQ, which cannot be used at the same time. In case the LCD board with touchscreen is in use, which is the case here, LED0 must be disabled, Button B must be polled, so the touchscreen interrupt works as it should. Update the touchscreen IRQ line assignment, disable LED0 and use polled GPIO button driver for Button B, since the DT here describes baseboard with LCD board. Fixes: 7e5f3155dcbb4 ("ARM: dts: stm32: Fix LED5 on STM32MP1 DHCOM PDK2") Signed-off-by: Marek Vasut Cc: Alexandre Torgue Cc: Patrice Chotard Cc: Patrick Delaunay Cc: linux-stm32@st-md-mailman.stormreply.com To: linux-arm-kernel@lists.infradead.org Signed-off-by: Alexandre Torgue --- arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi index c5ea08fec535..6cf1c8b4c6e2 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi @@ -37,7 +37,7 @@ poll-interval = <20>; /* - * The EXTi IRQ line 3 is shared with touchscreen and ethernet, + * The EXTi IRQ line 3 is shared with ethernet, * so mark this as polled GPIO key. */ button-0 { @@ -46,6 +46,16 @@ gpios = <&gpiof 3 GPIO_ACTIVE_LOW>; }; + /* + * The EXTi IRQ line 6 is shared with touchscreen, + * so mark this as polled GPIO key. + */ + button-1 { + label = "TA2-GPIO-B"; + linux,code = ; + gpios = <&gpiod 6 GPIO_ACTIVE_LOW>; + }; + /* * The EXTi IRQ line 0 is shared with PMIC, * so mark this as polled GPIO key. @@ -60,13 +70,6 @@ gpio-keys { compatible = "gpio-keys"; - button-1 { - label = "TA2-GPIO-B"; - linux,code = ; - gpios = <&gpiod 6 GPIO_ACTIVE_LOW>; - wakeup-source; - }; - button-3 { label = "TA4-GPIO-D"; linux,code = ; @@ -82,6 +85,7 @@ label = "green:led5"; gpios = <&gpioc 6 GPIO_ACTIVE_HIGH>; default-state = "off"; + status = "disabled"; }; led-1 { @@ -185,8 +189,8 @@ touchscreen@38 { compatible = "edt,edt-ft5406"; reg = <0x38>; - interrupt-parent = <&gpiog>; - interrupts = <2 IRQ_TYPE_EDGE_FALLING>; /* GPIO E */ + interrupt-parent = <&gpioc>; + interrupts = <6 IRQ_TYPE_EDGE_FALLING>; /* GPIO E */ }; }; From ce5a595744126be4f1327e29e3c5ae9aac6b38d5 Mon Sep 17 00:00:00 2001 From: Mike Tipton Date: Wed, 21 Jul 2021 10:54:31 -0700 Subject: [PATCH 349/502] interconnect: qcom: icc-rpmh: Ensure floor BW is enforced for all nodes We currently only enforce BW floors for a subset of nodes in a path. All BCMs that need updating are queued in the pre_aggregate/aggregate phase. The first set() commits all queued BCMs and subsequent set() calls short-circuit without committing anything. Since the floor BW isn't set in sum_avg/max_peak until set(), then some BCMs are committed before their associated nodes reflect the floor. Set the floor as each node is being aggregated. This ensures that all all relevant floors are set before the BCMs are committed. Fixes: 266cd33b5913 ("interconnect: qcom: Ensure that the floor bandwidth value is enforced") Signed-off-by: Mike Tipton Link: https://lore.kernel.org/r/20210721175432.2119-4-mdtipton@codeaurora.org [georgi: Removed unused variable] Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/icc-rpmh.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/interconnect/qcom/icc-rpmh.c b/drivers/interconnect/qcom/icc-rpmh.c index bf01d09dba6c..f6fae64861ce 100644 --- a/drivers/interconnect/qcom/icc-rpmh.c +++ b/drivers/interconnect/qcom/icc-rpmh.c @@ -57,6 +57,11 @@ int qcom_icc_aggregate(struct icc_node *node, u32 tag, u32 avg_bw, qn->sum_avg[i] += avg_bw; qn->max_peak[i] = max_t(u32, qn->max_peak[i], peak_bw); } + + if (node->init_avg || node->init_peak) { + qn->sum_avg[i] = max_t(u64, qn->sum_avg[i], node->init_avg); + qn->max_peak[i] = max_t(u64, qn->max_peak[i], node->init_peak); + } } *agg_avg += avg_bw; @@ -79,7 +84,6 @@ EXPORT_SYMBOL_GPL(qcom_icc_aggregate); int qcom_icc_set(struct icc_node *src, struct icc_node *dst) { struct qcom_icc_provider *qp; - struct qcom_icc_node *qn; struct icc_node *node; if (!src) @@ -88,12 +92,6 @@ int qcom_icc_set(struct icc_node *src, struct icc_node *dst) node = src; qp = to_qcom_provider(node->provider); - qn = node->data; - - qn->sum_avg[QCOM_ICC_BUCKET_AMC] = max_t(u64, qn->sum_avg[QCOM_ICC_BUCKET_AMC], - node->avg_bw); - qn->max_peak[QCOM_ICC_BUCKET_AMC] = max_t(u64, qn->max_peak[QCOM_ICC_BUCKET_AMC], - node->peak_bw); qcom_icc_bcm_voter_commit(qp->voter); From f84f5b6f72e68bbaeb850b58ac167e4a3a47532a Mon Sep 17 00:00:00 2001 From: Mike Tipton Date: Wed, 21 Jul 2021 10:54:32 -0700 Subject: [PATCH 350/502] interconnect: qcom: icc-rpmh: Add BCMs to commit list in pre_aggregate We're only adding BCMs to the commit list in aggregate(), but there are cases where pre_aggregate() is called without subsequently calling aggregate(). In particular, in icc_sync_state() when a node with initial BW has zero requests. Since BCMs aren't added to the commit list in these cases, we don't actually send the zero BW request to HW. So the resources remain on unnecessarily. Add BCMs to the commit list in pre_aggregate() instead, which is always called even when there are no requests. Fixes: 976daac4a1c5 ("interconnect: qcom: Consolidate interconnect RPMh support") Signed-off-by: Mike Tipton Link: https://lore.kernel.org/r/20210721175432.2119-5-mdtipton@codeaurora.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/icc-rpmh.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/interconnect/qcom/icc-rpmh.c b/drivers/interconnect/qcom/icc-rpmh.c index f6fae64861ce..27cc5f03611c 100644 --- a/drivers/interconnect/qcom/icc-rpmh.c +++ b/drivers/interconnect/qcom/icc-rpmh.c @@ -20,13 +20,18 @@ void qcom_icc_pre_aggregate(struct icc_node *node) { size_t i; struct qcom_icc_node *qn; + struct qcom_icc_provider *qp; qn = node->data; + qp = to_qcom_provider(node->provider); for (i = 0; i < QCOM_ICC_NUM_BUCKETS; i++) { qn->sum_avg[i] = 0; qn->max_peak[i] = 0; } + + for (i = 0; i < qn->num_bcms; i++) + qcom_icc_bcm_voter_add(qp->voter, qn->bcms[i]); } EXPORT_SYMBOL_GPL(qcom_icc_pre_aggregate); @@ -44,10 +49,8 @@ int qcom_icc_aggregate(struct icc_node *node, u32 tag, u32 avg_bw, { size_t i; struct qcom_icc_node *qn; - struct qcom_icc_provider *qp; qn = node->data; - qp = to_qcom_provider(node->provider); if (!tag) tag = QCOM_ICC_TAG_ALWAYS; @@ -67,9 +70,6 @@ int qcom_icc_aggregate(struct icc_node *node, u32 tag, u32 avg_bw, *agg_avg += avg_bw; *agg_peak = max_t(u32, *agg_peak, peak_bw); - for (i = 0; i < qn->num_bcms; i++) - qcom_icc_bcm_voter_add(qp->voter, qn->bcms[i]); - return 0; } EXPORT_SYMBOL_GPL(qcom_icc_aggregate); From 094121ef815f29d9e6a01fafca365831454ce293 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Wed, 28 Jul 2021 20:21:15 +0200 Subject: [PATCH 351/502] arch: Kconfig: clean up obsolete use of HAVE_IDE The arch-specific Kconfig files use HAVE_IDE to indicate if IDE is supported. As IDE support and the HAVE_IDE config vanishes with commit b7fb14d3ac63 ("ide: remove the legacy ide driver"), there is no need to mention HAVE_IDE in all those arch-specific Kconfig files. The issue was identified with ./scripts/checkkconfigsymbols.py. Fixes: b7fb14d3ac63 ("ide: remove the legacy ide driver") Suggested-by: Randy Dunlap Signed-off-by: Lukas Bulwahn Acked-by: Randy Dunlap Link: https://lore.kernel.org/r/20210728182115.4401-1-lukas.bulwahn@gmail.com Reviewed-by: Christoph Hellwig Acked-by: Geert Uytterhoeven Signed-off-by: Jens Axboe --- arch/alpha/Kconfig | 1 - arch/arm/Kconfig | 6 ------ arch/arm/mach-davinci/Kconfig | 1 - arch/h8300/Kconfig.cpu | 1 - arch/ia64/Kconfig | 1 - arch/m68k/Kconfig | 1 - arch/mips/Kconfig | 1 - arch/parisc/Kconfig | 1 - arch/powerpc/Kconfig | 1 - arch/sh/Kconfig | 1 - arch/sparc/Kconfig | 1 - arch/x86/Kconfig | 1 - arch/xtensa/Kconfig | 1 - 13 files changed, 18 deletions(-) diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 77d3280dc678..a6d4c2f744e3 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -14,7 +14,6 @@ config ALPHA select PCI_SYSCALL if PCI select HAVE_AOUT select HAVE_ASM_MODVERSIONS - select HAVE_IDE select HAVE_PCSPKR_PLATFORM select HAVE_PERF_EVENTS select NEED_DMA_MAP_STATE diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 06b6187b67af..f2ce83e643e7 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -95,7 +95,6 @@ config ARM select HAVE_FUNCTION_TRACER if !XIP_KERNEL select HAVE_GCC_PLUGINS select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7) - select HAVE_IDE if PCI || ISA || PCMCIA select HAVE_IRQ_TIME_ACCOUNTING select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZ4 @@ -361,7 +360,6 @@ config ARCH_FOOTBRIDGE bool "FootBridge" select CPU_SA110 select FOOTBRIDGE - select HAVE_IDE select NEED_MACH_IO_H if !MMU select NEED_MACH_MEMORY_H help @@ -429,7 +427,6 @@ config ARCH_PXA select GENERIC_IRQ_MULTI_HANDLER select GPIO_PXA select GPIOLIB - select HAVE_IDE select IRQ_DOMAIN select PLAT_PXA select SPARSE_IRQ @@ -445,7 +442,6 @@ config ARCH_RPC select ARM_HAS_SG_CHAIN select CPU_SA110 select FIQ - select HAVE_IDE select HAVE_PATA_PLATFORM select ISA_DMA_API select LEGACY_TIMER_TICK @@ -468,7 +464,6 @@ config ARCH_SA1100 select CPU_SA1100 select GENERIC_IRQ_MULTI_HANDLER select GPIOLIB - select HAVE_IDE select IRQ_DOMAIN select ISA select NEED_MACH_MEMORY_H @@ -504,7 +499,6 @@ config ARCH_OMAP1 select GENERIC_IRQ_CHIP select GENERIC_IRQ_MULTI_HANDLER select GPIOLIB - select HAVE_IDE select HAVE_LEGACY_CLK select IRQ_DOMAIN select NEED_MACH_IO_H if PCCARD diff --git a/arch/arm/mach-davinci/Kconfig b/arch/arm/mach-davinci/Kconfig index de11030748d0..1d3aef84287d 100644 --- a/arch/arm/mach-davinci/Kconfig +++ b/arch/arm/mach-davinci/Kconfig @@ -9,7 +9,6 @@ menuconfig ARCH_DAVINCI select PM_GENERIC_DOMAINS_OF if PM && OF select REGMAP_MMIO select RESET_CONTROLLER - select HAVE_IDE select PINCTRL_SINGLE if ARCH_DAVINCI diff --git a/arch/h8300/Kconfig.cpu b/arch/h8300/Kconfig.cpu index b5e14d513e62..c30baa0499fc 100644 --- a/arch/h8300/Kconfig.cpu +++ b/arch/h8300/Kconfig.cpu @@ -44,7 +44,6 @@ config H8300_H8MAX bool "H8MAX" select H83069 select RAMKERNEL - select HAVE_IDE help H8MAX Evaluation Board Support More Information. (Japanese Only) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index cf425c2c63af..4993c7ac7ff6 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -25,7 +25,6 @@ config IA64 select HAVE_ASM_MODVERSIONS select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_EXIT_THREAD - select HAVE_IDE select HAVE_KPROBES select HAVE_KRETPROBES select HAVE_FTRACE_MCOUNT_RECORD diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 96989ad46f66..d632a1d576f9 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -23,7 +23,6 @@ config M68K select HAVE_DEBUG_BUGVERBOSE select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_HAS_NO_UNALIGNED select HAVE_FUTEX_CMPXCHG if MMU && FUTEX - select HAVE_IDE select HAVE_MOD_ARCH_SPECIFIC select HAVE_UID16 select MMU_GATHER_NO_RANGE if MMU diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index cee6087cd686..6dfb27d531dd 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -71,7 +71,6 @@ config MIPS select HAVE_FUNCTION_TRACER select HAVE_GCC_PLUGINS select HAVE_GENERIC_VDSO - select HAVE_IDE select HAVE_IOREMAP_PROT select HAVE_IRQ_EXIT_ON_IRQ_STACK select HAVE_IRQ_TIME_ACCOUNTING diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index bde9907bc5b2..4f8c1fbf8f2f 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -3,7 +3,6 @@ config PARISC def_bool y select ARCH_32BIT_OFF_T if !64BIT select ARCH_MIGHT_HAVE_PC_PARPORT - select HAVE_IDE select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER select HAVE_SYSCALL_TRACEPOINTS diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index d01e3401581d..663766fbf505 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -220,7 +220,6 @@ config PPC select HAVE_HARDLOCKUP_DETECTOR_ARCH if PPC_BOOK3S_64 && SMP select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) - select HAVE_IDE select HAVE_IOREMAP_PROT select HAVE_IRQ_EXIT_ON_IRQ_STACK select HAVE_IRQ_TIME_ACCOUNTING diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 45a0549421cd..b683b69a4556 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -39,7 +39,6 @@ config SUPERH select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_FTRACE_MCOUNT_RECORD select HAVE_HW_BREAKPOINT - select HAVE_IDE if HAS_IOPORT_MAP select HAVE_IOREMAP_PROT if MMU && !X2TLB select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index c5fa7932b550..f0c0f955e169 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -19,7 +19,6 @@ config SPARC select OF select OF_PROMTREE select HAVE_ASM_MODVERSIONS - select HAVE_IDE select HAVE_ARCH_KGDB if !SMP || SPARC64 select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_SECCOMP if SPARC64 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 49270655e827..88fb922c23a0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -202,7 +202,6 @@ config X86 select HAVE_FUNCTION_TRACER select HAVE_GCC_PLUGINS select HAVE_HW_BREAKPOINT - select HAVE_IDE select HAVE_IOREMAP_PROT select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64 select HAVE_IRQ_TIME_ACCOUNTING diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 2332b2156993..3878880469d1 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -327,7 +327,6 @@ config XTENSA_PLATFORM_ISS config XTENSA_PLATFORM_XT2000 bool "XT2000" - select HAVE_IDE help XT2000 is the name of Tensilica's feature-rich emulation platform. This hardware is capable of running a full Linux distribution. From 7561c14d8a4d1a24a40b1839d927d488e2d6345a Mon Sep 17 00:00:00 2001 From: Sumanth Korikkar Date: Wed, 28 Jul 2021 13:24:53 +0200 Subject: [PATCH 352/502] s390/vdso: add .got.plt in vdso linker script KCFLAGS="-mno-pic-data-is-text-relative" make leads to bfd assertion error in s390_got_pointer(): LD arch/s390/kernel/vdso64/vdso64.so.dbg ld: BFD version 2.35-18.fc33 assertion fail elf-s390-common.c:74 readelf -Wr vdso64_generic.o | grep GOT 0000000000000032 000000110000001a R_390_GOTENT 0000000000000000 _vdso_data + 2 (...) Add .got.plt in linker script to avoid this. Suggested-by: Ilya Leoshkevich Signed-off-by: Sumanth Korikkar Signed-off-by: Heiko Carstens --- arch/s390/kernel/vdso32/vdso32.lds.S | 1 + arch/s390/kernel/vdso64/vdso64.lds.S | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S index bff50b6acd6d..edf5ff1debe1 100644 --- a/arch/s390/kernel/vdso32/vdso32.lds.S +++ b/arch/s390/kernel/vdso32/vdso32.lds.S @@ -51,6 +51,7 @@ SECTIONS .rela.dyn ALIGN(8) : { *(.rela.dyn) } .got ALIGN(8) : { *(.got .toc) } + .got.plt ALIGN(8) : { *(.got.plt) } _end = .; PROVIDE(end = .); diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S index d4fb336d747b..4461ea151e49 100644 --- a/arch/s390/kernel/vdso64/vdso64.lds.S +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -51,6 +51,7 @@ SECTIONS .rela.dyn ALIGN(8) : { *(.rela.dyn) } .got ALIGN(8) : { *(.got .toc) } + .got.plt ALIGN(8) : { *(.got.plt) } _end = .; PROVIDE(end = .); From 88731c8f3636b133e27df88febcd7cd2fdece0a7 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 29 Jul 2021 15:29:19 +0200 Subject: [PATCH 353/502] s390/boot: fix zstd build for -march=z900 zstd decompression uses __builtin_clz() which fails back to __clzdi2() when the kernel is built for older hardware like z900. This leads to build failures like the following: s390x-11.1.0-ld: /devel/src/kernel/arch/s390/boot/compressed/../../../../lib/zstd/bitstream.h:148: undefined reference to `__clzdi2' Fix that by optionally including lib/clz_ctz.c into the decompressor. Reported-by: kernel test robot Fixes: 7b034d9c1b08 ("s390/boot: add zstd support") Signed-off-by: Vasily Gorbik Link: https://lore.kernel.org/r/patch-1.thread-f0f589.git-f0f58936888f.your-ad-here.call-01627564869-ext-2765@work.hours Signed-off-by: Heiko Carstens --- arch/s390/boot/compressed/Makefile | 1 + arch/s390/boot/compressed/clz_ctz.c | 2 ++ 2 files changed, 3 insertions(+) create mode 100644 arch/s390/boot/compressed/clz_ctz.c diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 660c799d875d..e30d3fdbbc78 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -11,6 +11,7 @@ UBSAN_SANITIZE := n KASAN_SANITIZE := n obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o +obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o obj-all := $(obj-y) piggy.o syms.o targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 diff --git a/arch/s390/boot/compressed/clz_ctz.c b/arch/s390/boot/compressed/clz_ctz.c new file mode 100644 index 000000000000..c3ebf248596b --- /dev/null +++ b/arch/s390/boot/compressed/clz_ctz.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../../../lib/clz_ctz.c" From 1e9faef4d26de33bd6b5018695996e7394119e5b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 30 Jul 2021 14:21:56 +0200 Subject: [PATCH 354/502] USB: serial: pl2303: fix HX type detection The device release number for HX-type devices is configurable in EEPROM/OTPROM and cannot be used reliably for type detection. Assume all (non-H) devices with bcdUSB 1.1 and unknown bcdDevice to be of HX type while adding a bcdDevice check for HXD and TB (1.1 and 2.0, respectively). Reported-by: Chris Fixes: 8a7bf7510d1f ("USB: serial: pl2303: amend and tighten type detection") Cc: stable@vger.kernel.org # 5.13 Link: https://lore.kernel.org/r/20210730122156.718-1-johan@kernel.org Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold --- drivers/usb/serial/pl2303.c | 41 ++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 2f2f5047452b..17601e32083e 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -418,24 +418,33 @@ static int pl2303_detect_type(struct usb_serial *serial) bcdDevice = le16_to_cpu(desc->bcdDevice); bcdUSB = le16_to_cpu(desc->bcdUSB); - switch (bcdDevice) { - case 0x100: - /* - * Assume it's an HXN-type if the device doesn't support the old read - * request value. - */ - if (bcdUSB == 0x200 && !pl2303_supports_hx_status(serial)) - return TYPE_HXN; + switch (bcdUSB) { + case 0x110: + switch (bcdDevice) { + case 0x300: + return TYPE_HX; + case 0x400: + return TYPE_HXD; + default: + return TYPE_HX; + } break; - case 0x300: - if (bcdUSB == 0x200) + case 0x200: + switch (bcdDevice) { + case 0x100: + /* + * Assume it's an HXN-type if the device doesn't + * support the old read request value. + */ + if (!pl2303_supports_hx_status(serial)) + return TYPE_HXN; + break; + case 0x300: return TYPE_TA; - - return TYPE_HX; - case 0x400: - return TYPE_HXD; - case 0x500: - return TYPE_TB; + case 0x500: + return TYPE_TB; + } + break; } dev_err(&serial->interface->dev, From 4d77f36f2c8c62b230f4a5eb264c169fa04c4a5a Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Tue, 27 Jul 2021 17:43:37 +0800 Subject: [PATCH 355/502] drm/amdgpu: Fix out-of-bounds read when update mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If one GTT BO has been evicted/swapped out, it should sit in CPU domain. TTM only alloc struct ttm_resource instead of struct ttm_range_mgr_node for sysMem. Now when we update mapping for such invalidated BOs, we might walk out of bounds of struct ttm_resource. Three possible fix: 1) Let sysMem manager alloc struct ttm_range_mgr_node, like ttm_range_manager does. 2) Pass pages_addr to update_mapping function too, but need memset pages_addr[] to zero when unpopulate. 3) Init amdgpu_res_cursor directly. bug is detected by kfence. ================================================================== BUG: KFENCE: out-of-bounds read in amdgpu_vm_bo_update_mapping+0x564/0x6e0 Out-of-bounds read at 0x000000008ea93fe9 (64B right of kfence-#167): amdgpu_vm_bo_update_mapping+0x564/0x6e0 [amdgpu] amdgpu_vm_bo_update+0x282/0xa40 [amdgpu] amdgpu_vm_handle_moved+0x19e/0x1f0 [amdgpu] amdgpu_cs_vm_handling+0x4e4/0x640 [amdgpu] amdgpu_cs_ioctl+0x19e7/0x23c0 [amdgpu] drm_ioctl_kernel+0xf3/0x180 [drm] drm_ioctl+0x2cb/0x550 [drm] amdgpu_drm_ioctl+0x5e/0xb0 [amdgpu] kfence-#167 [0x000000008e11c055-0x000000001f676b3e ttm_sys_man_alloc+0x35/0x80 [ttm] ttm_resource_alloc+0x39/0x50 [ttm] ttm_bo_swapout+0x252/0x5a0 [ttm] ttm_device_swapout+0x107/0x180 [ttm] ttm_global_swapout+0x6f/0x130 [ttm] ttm_tt_populate+0xb1/0x2a0 [ttm] ttm_bo_handle_move_mem+0x17e/0x1d0 [ttm] ttm_mem_evict_first+0x59d/0x9c0 [ttm] ttm_bo_mem_space+0x39f/0x400 [ttm] ttm_bo_validate+0x13c/0x340 [ttm] ttm_bo_init_reserved+0x269/0x540 [ttm] amdgpu_bo_create+0x1d1/0xa30 [amdgpu] amdgpu_bo_create_user+0x40/0x80 [amdgpu] amdgpu_gem_object_create+0x71/0xc0 [amdgpu] amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu+0x2f2/0xcd0 [amdgpu] kfd_ioctl_alloc_memory_of_gpu+0xe2/0x330 [amdgpu] kfd_ioctl+0x461/0x690 [amdgpu] Signed-off-by: xinhui pan Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h index 59e0fefb15aa..acfa207cf970 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h @@ -54,11 +54,12 @@ static inline void amdgpu_res_first(struct ttm_resource *res, { struct drm_mm_node *node; - if (!res) { + if (!res || res->mem_type == TTM_PL_SYSTEM) { cur->start = start; cur->size = size; cur->remaining = size; cur->node = NULL; + WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT); return; } From 1c0539a6fc8a4a4b77278e35d763073890de96b9 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Thu, 10 Jun 2021 09:55:01 +0800 Subject: [PATCH 356/502] drm/amdgpu: fix the doorbell missing when in CGPG issue for renoir. If GC has entered CGPG, ringing doorbell > first page doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround this issue. Signed-off-by: Yifan Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 044076ec1d03..6a23c6826e12 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1295,6 +1295,16 @@ static bool is_raven_kicker(struct amdgpu_device *adev) return false; } +static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev) +{ + if ((adev->asic_type == CHIP_RENOIR) && + (adev->gfx.me_fw_version >= 0x000000a5) && + (adev->gfx.me_feature_version >= 52)) + return true; + else + return false; +} + static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) { if (gfx_v9_0_should_disable_gfxoff(adev->pdev)) @@ -3675,7 +3685,16 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) if (ring->use_doorbell) { WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, (adev->doorbell_index.kiq * 2) << 2); - WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, + /* If GC has entered CGPG, ringing doorbell > first page + * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to + * workaround this issue. And this change has to align with firmware + * update. + */ + if (check_if_enlarge_doorbell_range(adev)) + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, + (adev->doorbell.size - 4)); + else + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, (adev->doorbell_index.userqueue_end * 2) << 2); } From 028a71775f811e9d60664ba2c248ff95c6cf57cb Mon Sep 17 00:00:00 2001 From: Catherine Sullivan Date: Thu, 29 Jul 2021 08:52:58 -0700 Subject: [PATCH 357/502] gve: Update MAINTAINERS list The team maintaining the gve driver has undergone some changes, this updates the MAINTAINERS file accordingly. Signed-off-by: Catherine Sullivan Signed-off-by: Jon Olson Signed-off-by: David Awogbemila Signed-off-by: Jeroen de Borst Link: https://lore.kernel.org/r/20210729155258.442650-1-csully@google.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 58afeb12d3b3..17a873153eba 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7858,9 +7858,9 @@ S: Maintained F: drivers/input/touchscreen/goodix.c GOOGLE ETHERNET DRIVERS -M: Catherine Sullivan -R: Sagi Shahar -R: Jon Olson +M: Jeroen de Borst +R: Catherine Sullivan +R: David Awogbemila L: netdev@vger.kernel.org S: Supported F: Documentation/networking/device_drivers/ethernet/google/gve.rst From b2ff70a01a7a8083e749e01e5d3ffda706fe3305 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Thu, 29 Jul 2021 14:53:35 -0700 Subject: [PATCH 358/502] lib/test_string.c: move string selftest in the Runtime Testing menu STRING_SELFTEST is presented in the "Library routines" menu. Move it in Kernel hacking > Kernel Testing and Coverage > Runtime Testing together with other similar tests found in lib/ --- Runtime Testing <*> Test functions located in the hexdump module at runtime <*> Test string functions (NEW) <*> Test functions located in the string_helpers module at runtime <*> Test strscpy*() family of functions at runtime <*> Test kstrto*() family of functions at runtime <*> Test printf() family of functions at runtime <*> Test scanf() family of functions at runtime Link: https://lkml.kernel.org/r/20210719185158.190371-1-mcroce@linux.microsoft.com Signed-off-by: Matteo Croce Cc: Peter Rosin Cc: Geert Uytterhoeven Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig | 3 --- lib/Kconfig.debug | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/Kconfig b/lib/Kconfig index d241fe476fda..5c9c0687f76d 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -683,9 +683,6 @@ config PARMAN config OBJAGG tristate "objagg" if COMPILE_TEST -config STRING_SELFTEST - tristate "Test string functions" - endmenu config GENERIC_IOREMAP diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 831212722924..5ddd575159fb 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2180,6 +2180,9 @@ config ASYNC_RAID6_TEST config TEST_HEXDUMP tristate "Test functions located in the hexdump module at runtime" +config STRING_SELFTEST + tristate "Test string functions at runtime" + config TEST_STRING_HELPERS tristate "Test functions located in the string_helpers module at runtime" From f267aeb6dea5e468793e5b8eb6a9c72c0020d418 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Thu, 29 Jul 2021 14:53:38 -0700 Subject: [PATCH 359/502] ocfs2: fix zero out valid data If append-dio feature is enabled, direct-io write and fallocate could run in parallel to extend file size, fallocate used "orig_isize" to record i_size before taking "ip_alloc_sem", when ocfs2_zeroout_partial_cluster() zeroout EOF blocks, i_size maybe already extended by ocfs2_dio_end_io_write(), that will cause valid data zeroed out. Link: https://lkml.kernel.org/r/20210722054923.24389-1-junxiao.bi@oracle.com Fixes: 6bba4471f0cc ("ocfs2: fix data corruption by fallocate") Signed-off-by: Junxiao Bi Reviewed-by: Joseph Qi Cc: Changwei Ge Cc: Gang He Cc: Joel Becker Cc: Jun Piao Cc: Mark Fasheh Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 775657943057..53bb46ce3cbb 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1935,7 +1935,6 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, goto out_inode_unlock; } - orig_isize = i_size_read(inode); switch (sr->l_whence) { case 0: /*SEEK_SET*/ break; @@ -1943,7 +1942,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, sr->l_start += f_pos; break; case 2: /*SEEK_END*/ - sr->l_start += orig_isize; + sr->l_start += i_size_read(inode); break; default: ret = -EINVAL; @@ -1998,6 +1997,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, ret = -EINVAL; } + orig_isize = i_size_read(inode); /* zeroout eof blocks in the cluster. */ if (!ret && change_size && orig_isize < size) { ret = ocfs2_zeroout_partial_cluster(inode, orig_isize, From 9449ad33be8480f538b11a593e2dda2fb33ca06d Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Thu, 29 Jul 2021 14:53:41 -0700 Subject: [PATCH 360/502] ocfs2: issue zeroout to EOF blocks For punch holes in EOF blocks, fallocate used buffer write to zero the EOF blocks in last cluster. But since ->writepage will ignore EOF pages, those zeros will not be flushed. This "looks" ok as commit 6bba4471f0cc ("ocfs2: fix data corruption by fallocate") will zero the EOF blocks when extend the file size, but it isn't. The problem happened on those EOF pages, before writeback, those pages had DIRTY flag set and all buffer_head in them also had DIRTY flag set, when writeback run by write_cache_pages(), DIRTY flag on the page was cleared, but DIRTY flag on the buffer_head not. When next write happened to those EOF pages, since buffer_head already had DIRTY flag set, it would not mark page DIRTY again. That made writeback ignore them forever. That will cause data corruption. Even directio write can't work because it will fail when trying to drop pages caches before direct io, as it found the buffer_head for those pages still had DIRTY flag set, then it will fall back to buffer io mode. To make a summary of the issue, as writeback ingores EOF pages, once any EOF page is generated, any write to it will only go to the page cache, it will never be flushed to disk even file size extends and that page is not EOF page any more. The fix is to avoid zero EOF blocks with buffer write. The following code snippet from qemu-img could trigger the corruption. 656 open("6b3711ae-3306-4bdd-823c-cf1c0060a095.conv.2", O_RDWR|O_DIRECT|O_CLOEXEC) = 11 ... 660 fallocate(11, FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE, 2275868672, 327680 660 fallocate(11, 0, 2275868672, 327680) = 0 658 pwrite64(11, " Link: https://lkml.kernel.org/r/20210722054923.24389-2-junxiao.bi@oracle.com Signed-off-by: Junxiao Bi Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/file.c | 99 ++++++++++++++++++++++++++++++------------------- 1 file changed, 60 insertions(+), 39 deletions(-) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 53bb46ce3cbb..54d7843c0211 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1529,6 +1529,45 @@ static void ocfs2_truncate_cluster_pages(struct inode *inode, u64 byte_start, } } +/* + * zero out partial blocks of one cluster. + * + * start: file offset where zero starts, will be made upper block aligned. + * len: it will be trimmed to the end of current cluster if "start + len" + * is bigger than it. + */ +static int ocfs2_zeroout_partial_cluster(struct inode *inode, + u64 start, u64 len) +{ + int ret; + u64 start_block, end_block, nr_blocks; + u64 p_block, offset; + u32 cluster, p_cluster, nr_clusters; + struct super_block *sb = inode->i_sb; + u64 end = ocfs2_align_bytes_to_clusters(sb, start); + + if (start + len < end) + end = start + len; + + start_block = ocfs2_blocks_for_bytes(sb, start); + end_block = ocfs2_blocks_for_bytes(sb, end); + nr_blocks = end_block - start_block; + if (!nr_blocks) + return 0; + + cluster = ocfs2_bytes_to_clusters(sb, start); + ret = ocfs2_get_clusters(inode, cluster, &p_cluster, + &nr_clusters, NULL); + if (ret) + return ret; + if (!p_cluster) + return 0; + + offset = start_block - ocfs2_clusters_to_blocks(sb, cluster); + p_block = ocfs2_clusters_to_blocks(sb, p_cluster) + offset; + return sb_issue_zeroout(sb, p_block, nr_blocks, GFP_NOFS); +} + static int ocfs2_zero_partial_clusters(struct inode *inode, u64 start, u64 len) { @@ -1538,6 +1577,7 @@ static int ocfs2_zero_partial_clusters(struct inode *inode, struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); unsigned int csize = osb->s_clustersize; handle_t *handle; + loff_t isize = i_size_read(inode); /* * The "start" and "end" values are NOT necessarily part of @@ -1558,6 +1598,26 @@ static int ocfs2_zero_partial_clusters(struct inode *inode, if ((start & (csize - 1)) == 0 && (end & (csize - 1)) == 0) goto out; + /* No page cache for EOF blocks, issue zero out to disk. */ + if (end > isize) { + /* + * zeroout eof blocks in last cluster starting from + * "isize" even "start" > "isize" because it is + * complicated to zeroout just at "start" as "start" + * may be not aligned with block size, buffer write + * would be required to do that, but out of eof buffer + * write is not supported. + */ + ret = ocfs2_zeroout_partial_cluster(inode, isize, + end - isize); + if (ret) { + mlog_errno(ret); + goto out; + } + if (start >= isize) + goto out; + end = isize; + } handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); if (IS_ERR(handle)) { ret = PTR_ERR(handle); @@ -1855,45 +1915,6 @@ out: return ret; } -/* - * zero out partial blocks of one cluster. - * - * start: file offset where zero starts, will be made upper block aligned. - * len: it will be trimmed to the end of current cluster if "start + len" - * is bigger than it. - */ -static int ocfs2_zeroout_partial_cluster(struct inode *inode, - u64 start, u64 len) -{ - int ret; - u64 start_block, end_block, nr_blocks; - u64 p_block, offset; - u32 cluster, p_cluster, nr_clusters; - struct super_block *sb = inode->i_sb; - u64 end = ocfs2_align_bytes_to_clusters(sb, start); - - if (start + len < end) - end = start + len; - - start_block = ocfs2_blocks_for_bytes(sb, start); - end_block = ocfs2_blocks_for_bytes(sb, end); - nr_blocks = end_block - start_block; - if (!nr_blocks) - return 0; - - cluster = ocfs2_bytes_to_clusters(sb, start); - ret = ocfs2_get_clusters(inode, cluster, &p_cluster, - &nr_clusters, NULL); - if (ret) - return ret; - if (!p_cluster) - return 0; - - offset = start_block - ocfs2_clusters_to_blocks(sb, cluster); - p_block = ocfs2_clusters_to_blocks(sb, p_cluster) + offset; - return sb_issue_zeroout(sb, p_block, nr_blocks, GFP_NOFS); -} - /* * Parts of this function taken from xfs_change_file_space() */ From 30def93565e5ba08676aa2b9083f253fc586dbed Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 29 Jul 2021 14:53:44 -0700 Subject: [PATCH 361/502] mm: memcontrol: fix blocking rstat function called from atomic cgroup1 thresholding code Dan Carpenter reports: The patch 2d146aa3aa84: "mm: memcontrol: switch to rstat" from Apr 29, 2021, leads to the following static checker warning: kernel/cgroup/rstat.c:200 cgroup_rstat_flush() warn: sleeping in atomic context mm/memcontrol.c 3572 static unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) 3573 { 3574 unsigned long val; 3575 3576 if (mem_cgroup_is_root(memcg)) { 3577 cgroup_rstat_flush(memcg->css.cgroup); ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ This is from static analysis and potentially a false positive. The problem is that mem_cgroup_usage() is called from __mem_cgroup_threshold() which holds an rcu_read_lock(). And the cgroup_rstat_flush() function can sleep. 3578 val = memcg_page_state(memcg, NR_FILE_PAGES) + 3579 memcg_page_state(memcg, NR_ANON_MAPPED); 3580 if (swap) 3581 val += memcg_page_state(memcg, MEMCG_SWAP); 3582 } else { 3583 if (!swap) 3584 val = page_counter_read(&memcg->memory); 3585 else 3586 val = page_counter_read(&memcg->memsw); 3587 } 3588 return val; 3589 } __mem_cgroup_threshold() indeed holds the rcu lock. In addition, the thresholding code is invoked during stat changes, and those contexts have irqs disabled as well. If the lock breaking occurs inside the flush function, it will result in a sleep from an atomic context. Use the irqsafe flushing variant in mem_cgroup_usage() to fix this. Link: https://lkml.kernel.org/r/20210726150019.251820-1-hannes@cmpxchg.org Fixes: 2d146aa3aa84 ("mm: memcontrol: switch to rstat") Signed-off-by: Johannes Weiner Reported-by: Dan Carpenter Acked-by: Chris Down Reviewed-by: Rik van Riel Acked-by: Michal Hocko Reviewed-by: Shakeel Butt Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ae1f5d0cb581..eb8e87c4833f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3574,7 +3574,8 @@ static unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) unsigned long val; if (mem_cgroup_is_root(memcg)) { - cgroup_rstat_flush(memcg->css.cgroup); + /* mem_cgroup_threshold() calls here from irqsafe context */ + cgroup_rstat_flush_irqsafe(memcg->css.cgroup); val = memcg_page_state(memcg, NR_FILE_PAGES) + memcg_page_state(memcg, NR_ANON_MAPPED); if (swap) From b5916c025432b7c776b6bb13617485fbc0bd3ebd Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 29 Jul 2021 14:53:47 -0700 Subject: [PATCH 362/502] mm/migrate: fix NR_ISOLATED corruption on 64-bit Similar to commit 2da9f6305f30 ("mm/vmscan: fix NR_ISOLATED_FILE corruption on 64-bit") avoid using unsigned int for nr_pages. With unsigned int type the large unsigned int converts to a large positive signed long. Symptoms include CMA allocations hanging forever due to alloc_contig_range->...->isolate_migratepages_block waiting forever in "while (unlikely(too_many_isolated(pgdat)))". Link: https://lkml.kernel.org/r/20210728042531.359409-1-aneesh.kumar@linux.ibm.com Fixes: c5fc5c3ae0c8 ("mm: migrate: account THP NUMA migration counters correctly") Signed-off-by: Aneesh Kumar K.V Reported-by: Michael Ellerman Reported-by: Alexey Kardashevskiy Reviewed-by: Yang Shi Cc: Mel Gorman Cc: Nicholas Piggin Cc: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/migrate.c b/mm/migrate.c index 34a9ad3e0a4f..7e240437e7d9 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2068,7 +2068,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, LIST_HEAD(migratepages); new_page_t *new; bool compound; - unsigned int nr_pages = thp_nr_pages(page); + int nr_pages = thp_nr_pages(page); /* * PTE mapped THP or HugeTLB page can't reach here so the page could From f227f0faf63b46a113c4d1aca633c80195622dd2 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Thu, 29 Jul 2021 14:53:50 -0700 Subject: [PATCH 363/502] slub: fix unreclaimable slab stat for bulk free SLUB uses page allocator for higher order allocations and update unreclaimable slab stat for such allocations. At the moment, the bulk free for SLUB does not share code with normal free code path for these type of allocations and have missed the stat update. So, fix the stat update by common code. The user visible impact of the bug is the potential of inconsistent unreclaimable slab stat visible through meminfo and vmstat. Link: https://lkml.kernel.org/r/20210728155354.3440560-1-shakeelb@google.com Fixes: 6a486c0ad4dc ("mm, sl[ou]b: improve memory accounting") Signed-off-by: Shakeel Butt Acked-by: Michal Hocko Acked-by: Roman Gushchin Reviewed-by: Muchun Song Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 090fa14628f9..af984e4990e8 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3236,6 +3236,16 @@ struct detached_freelist { struct kmem_cache *s; }; +static inline void free_nonslab_page(struct page *page) +{ + unsigned int order = compound_order(page); + + VM_BUG_ON_PAGE(!PageCompound(page), page); + kfree_hook(page_address(page)); + mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, -(PAGE_SIZE << order)); + __free_pages(page, order); +} + /* * This function progressively scans the array with free objects (with * a limited look ahead) and extract objects belonging to the same @@ -3272,9 +3282,7 @@ int build_detached_freelist(struct kmem_cache *s, size_t size, if (!s) { /* Handle kalloc'ed objects */ if (unlikely(!PageSlab(page))) { - BUG_ON(!PageCompound(page)); - kfree_hook(object); - __free_pages(page, compound_order(page)); + free_nonslab_page(page); p[size] = NULL; /* mark object processed */ return size; } @@ -4250,13 +4258,7 @@ void kfree(const void *x) page = virt_to_head_page(x); if (unlikely(!PageSlab(page))) { - unsigned int order = compound_order(page); - - BUG_ON(!PageCompound(page)); - kfree_hook(object); - mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, - -(PAGE_SIZE << order)); - __free_pages(page, order); + free_nonslab_page(page); return; } slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_); From 121dffe20b141c9b27f39d49b15882469cbebae7 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Thu, 29 Jul 2021 14:53:54 -0700 Subject: [PATCH 364/502] mm/memcg: fix NULL pointer dereference in memcg_slab_free_hook() When I use kfree_rcu() to free a large memory allocated by kmalloc_node(), the following dump occurs. BUG: kernel NULL pointer dereference, address: 0000000000000020 [...] Oops: 0000 [#1] SMP [...] Workqueue: events kfree_rcu_work RIP: 0010:__obj_to_index include/linux/slub_def.h:182 [inline] RIP: 0010:obj_to_index include/linux/slub_def.h:191 [inline] RIP: 0010:memcg_slab_free_hook+0x120/0x260 mm/slab.h:363 [...] Call Trace: kmem_cache_free_bulk+0x58/0x630 mm/slub.c:3293 kfree_bulk include/linux/slab.h:413 [inline] kfree_rcu_work+0x1ab/0x200 kernel/rcu/tree.c:3300 process_one_work+0x207/0x530 kernel/workqueue.c:2276 worker_thread+0x320/0x610 kernel/workqueue.c:2422 kthread+0x13d/0x160 kernel/kthread.c:313 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:294 When kmalloc_node() a large memory, page is allocated, not slab, so when freeing memory via kfree_rcu(), this large memory should not be used by memcg_slab_free_hook(), because memcg_slab_free_hook() is is used for slab. Using page_objcgs_check() instead of page_objcgs() in memcg_slab_free_hook() to fix this bug. Link: https://lkml.kernel.org/r/20210728145655.274476-1-wanghai38@huawei.com Fixes: 270c6a71460e ("mm: memcontrol/slab: Use helpers to access slab page's memcg_data") Signed-off-by: Wang Hai Reviewed-by: Shakeel Butt Acked-by: Michal Hocko Acked-by: Roman Gushchin Reviewed-by: Kefeng Wang Reviewed-by: Muchun Song Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Vlastimil Babka Cc: Johannes Weiner Cc: Alexei Starovoitov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slab.h b/mm/slab.h index f997fd5e42c8..58c01a34e5b8 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -346,7 +346,7 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig, continue; page = virt_to_head_page(p[i]); - objcgs = page_objcgs(page); + objcgs = page_objcgs_check(page); if (!objcgs) continue; From 852a8a97776a153be2e6c803218eced45f37a19c Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Fri, 30 Jul 2021 11:02:54 +0200 Subject: [PATCH 365/502] ALSA: pcm - fix mmap capability check for the snd-dummy driver The snd-dummy driver (fake_buffer configuration) uses the ops->page callback for the mmap operations. Allow mmap for this case, too. Cc: Fixes: c4824ae7db41 ("ALSA: pcm: Fix mmap capability check") Signed-off-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20210730090254.612478-1-perex@perex.cz Signed-off-by: Takashi Iwai --- sound/core/pcm_native.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 6a2971a7e6a1..09c0e2a6489c 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -246,7 +246,7 @@ static bool hw_support_mmap(struct snd_pcm_substream *substream) if (!(substream->runtime->hw.info & SNDRV_PCM_INFO_MMAP)) return false; - if (substream->ops->mmap) + if (substream->ops->mmap || substream->ops->page) return true; switch (substream->dma_buffer.dev.type) { From 9bac1bd6e6d36459087a728a968e79e37ebcea1a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 30 Jul 2021 18:26:22 -0300 Subject: [PATCH 366/502] Revert "perf map: Fix dso->nsinfo refcounting" This makes 'perf top' abort in some cases, and the right fix will involve surgery that is too much to do at this stage, so revert for now and fix it in the next merge window. This reverts commit 2d6b74baa7147251c30a46c4996e8cc224aa2dc5. Cc: Riccardo Mancini Cc: Ian Rogers Cc: Jiri Olsa Cc: Krister Johansen Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 72e7f3616157..8af693d9678c 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -192,8 +192,6 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, if (!(prot & PROT_EXEC)) dso__set_loaded(dso); } - - nsinfo__put(dso->nsinfo); dso->nsinfo = nsi; if (build_id__is_defined(bid)) From 3a34b13a88caeb2800ab44a4918f230041b37dd9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 30 Jul 2021 15:42:34 -0700 Subject: [PATCH 367/502] pipe: make pipe writes always wake up readers Since commit 1b6b26ae7053 ("pipe: fix and clarify pipe write wakeup logic") we have sanitized the pipe write logic, and would only try to wake up readers if they needed it. In particular, if the pipe already had data in it before the write, there was no point in trying to wake up a reader, since any existing readers must have been aware of the pre-existing data already. Doing extraneous wakeups will only cause potential thundering herd problems. However, it turns out that some Android libraries have misused the EPOLL interface, and expected "edge triggered" be to "any new write will trigger it". Even if there was no edge in sight. Quoting Sandeep Patil: "The commit 1b6b26ae7053 ('pipe: fix and clarify pipe write wakeup logic') changed pipe write logic to wakeup readers only if the pipe was empty at the time of write. However, there are libraries that relied upon the older behavior for notification scheme similar to what's described in [1] One such library 'realm-core'[2] is used by numerous Android applications. The library uses a similar notification mechanism as GNU Make but it never drains the pipe until it is full. When Android moved to v5.10 kernel, all applications using this library stopped working. The library has since been fixed[3] but it will be a while before all applications incorporate the updated library" Our regression rule for the kernel is that if applications break from new behavior, it's a regression, even if it was because the application did something patently wrong. Also note the original report [4] by Michal Kerrisk about a test for this epoll behavior - but at that point we didn't know of any actual broken use case. So add the extraneous wakeup, to approximate the old behavior. [ I say "approximate", because the exact old behavior was to do a wakeup not for each write(), but for each pipe buffer chunk that was filled in. The behavior introduced by this change is not that - this is just "every write will cause a wakeup, whether necessary or not", which seems to be sufficient for the broken library use. ] It's worth noting that this adds the extraneous wakeup only for the write side, while the read side still considers the "edge" to be purely about reading enough from the pipe to allow further writes. See commit f467a6a66419 ("pipe: fix and clarify pipe read wakeup logic") for the pipe read case, which remains that "only wake up if the pipe was full, and we read something from it". Link: https://lore.kernel.org/lkml/CAHk-=wjeG0q1vgzu4iJhW5juPkTsjTYmiqiMUYAebWW+0bam6w@mail.gmail.com/ [1] Link: https://github.com/realm/realm-core [2] Link: https://github.com/realm/realm-core/issues/4666 [3] Link: https://lore.kernel.org/lkml/CAKgNAkjMBGeAwF=2MKK758BhxvW58wYTgYKB2V-gY1PwXxrH+Q@mail.gmail.com/ [4] Link: https://lore.kernel.org/lkml/20210729222635.2937453-1-sspatil@android.com/ Reported-by: Sandeep Patil Cc: Michael Kerrisk Signed-off-by: Linus Torvalds --- fs/pipe.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index bfd946a9ad01..9ef4231cce61 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -429,20 +429,20 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) #endif /* - * Only wake up if the pipe started out empty, since - * otherwise there should be no readers waiting. + * Epoll nonsensically wants a wakeup whether the pipe + * was already empty or not. * * If it wasn't empty we try to merge new data into * the last buffer. * * That naturally merges small writes, but it also - * page-aligs the rest of the writes for large writes + * page-aligns the rest of the writes for large writes * spanning multiple pages. */ head = pipe->head; - was_empty = pipe_empty(head, pipe->tail); + was_empty = true; chars = total_len & (PAGE_SIZE-1); - if (chars && !was_empty) { + if (chars && !pipe_empty(head, pipe->tail)) { unsigned int mask = pipe->ring_size - 1; struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask]; int offset = buf->offset + buf->len; From ff41c28c4b54052942180d8b3f49e75f1445135a Mon Sep 17 00:00:00 2001 From: Kamal Agrawal Date: Fri, 30 Jul 2021 18:53:06 +0530 Subject: [PATCH 368/502] tracing: Fix NULL pointer dereference in start_creating The event_trace_add_tracer() can fail. In this case, it leads to a crash in start_creating with below call stack. Handle the error scenario properly in trace_array_create_dir. Call trace: down_write+0x7c/0x204 start_creating.25017+0x6c/0x194 tracefs_create_file+0xc4/0x2b4 init_tracer_tracefs+0x5c/0x940 trace_array_create_dir+0x58/0xb4 trace_array_create+0x1bc/0x2b8 trace_array_get_by_name+0xdc/0x18c Link: https://lkml.kernel.org/r/1627651386-21315-1-git-send-email-kamaagra@codeaurora.org Cc: stable@vger.kernel.org Fixes: 4114fbfd02f1 ("tracing: Enable creating new instance early boot") Signed-off-by: Kamal Agrawal Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c59dd35a6da5..33899a71fdc1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -9135,8 +9135,10 @@ static int trace_array_create_dir(struct trace_array *tr) return -EINVAL; ret = event_trace_add_tracer(tr->dir, tr); - if (ret) + if (ret) { tracefs_remove(tr->dir); + return ret; + } init_tracer_tracefs(tr, tr->dir); __update_tracer_options(tr); From f828b0bcacef189edbd247e9f48864fc36bfbe33 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 30 Jul 2021 19:59:50 -0700 Subject: [PATCH 369/502] clk: fix leak on devm_clk_bulk_get_all() unwind clk_bulk_get_all() allocates an array of struct clk_bulk data for us (unlike clk_bulk_get()), so we need to free it. Let's use the clk_bulk_put_all() helper. kmemleak complains, on an RK3399 Gru/Kevin system: unreferenced object 0xffffff80045def00 (size 128): comm "swapper/0", pid 1, jiffies 4294667682 (age 86.394s) hex dump (first 32 bytes): 44 32 60 fe fe ff ff ff 00 00 00 00 00 00 00 00 D2`............. 48 32 60 fe fe ff ff ff 00 00 00 00 00 00 00 00 H2`............. backtrace: [<00000000742860d6>] __kmalloc+0x22c/0x39c [<00000000b0493f2c>] clk_bulk_get_all+0x64/0x188 [<00000000325f5900>] devm_clk_bulk_get_all+0x58/0xa8 [<00000000175b9bc5>] dwc3_probe+0x8ac/0xb5c [<000000009169e2f9>] platform_drv_probe+0x9c/0xbc [<000000005c51e2ee>] really_probe+0x13c/0x378 [<00000000c47b1f24>] driver_probe_device+0x84/0xc0 [<00000000f870fcfb>] __device_attach_driver+0x94/0xb0 [<000000004d1b92ae>] bus_for_each_drv+0x8c/0xd8 [<00000000481d60c3>] __device_attach+0xc4/0x150 [<00000000a163bd36>] device_initial_probe+0x1c/0x28 [<00000000accb6bad>] bus_probe_device+0x3c/0x9c [<000000001a199f89>] device_add+0x218/0x3cc [<000000001bd84952>] of_device_add+0x40/0x50 [<000000009c658c29>] of_platform_device_create_pdata+0xac/0x100 [<0000000021c69ba4>] of_platform_bus_create+0x190/0x224 Fixes: f08c2e2865f6 ("clk: add managed version of clk_bulk_get_all") Cc: Dong Aisheng Cc: stable@vger.kernel.org Signed-off-by: Brian Norris Link: https://lore.kernel.org/r/20210731025950.2238582-1-briannorris@chromium.org Signed-off-by: Stephen Boyd --- drivers/clk/clk-devres.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c index be160764911b..f9d5b7334341 100644 --- a/drivers/clk/clk-devres.c +++ b/drivers/clk/clk-devres.c @@ -92,13 +92,20 @@ int __must_check devm_clk_bulk_get_optional(struct device *dev, int num_clks, } EXPORT_SYMBOL_GPL(devm_clk_bulk_get_optional); +static void devm_clk_bulk_release_all(struct device *dev, void *res) +{ + struct clk_bulk_devres *devres = res; + + clk_bulk_put_all(devres->num_clks, devres->clks); +} + int __must_check devm_clk_bulk_get_all(struct device *dev, struct clk_bulk_data **clks) { struct clk_bulk_devres *devres; int ret; - devres = devres_alloc(devm_clk_bulk_release, + devres = devres_alloc(devm_clk_bulk_release_all, sizeof(*devres), GFP_KERNEL); if (!devres) return -ENOMEM; From c500bee1c5b2f1d59b1081ac879d73268ab0ff17 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 1 Aug 2021 17:04:17 -0700 Subject: [PATCH 370/502] Linux 5.14-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6b555f64df06..27a072cffcb9 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Opossums on Parade # *DOCUMENTATION* From 7199ddede9f0f2f68d41e6928e1c6c4bca9c39c0 Mon Sep 17 00:00:00 2001 From: Juergen Borleis Date: Thu, 29 Jul 2021 09:18:21 +0200 Subject: [PATCH 371/502] dmaengine: imx-dma: configure the generic DMA type to make it work Commit dea7a9fbb009 ("dmaengine: imx-dma: remove dma_slave_config direction usage") changes the method from a "configuration when called" to an "configuration when used". Due to this, only the cyclic DMA type gets configured correctly, while the generic DMA type is left non-configured. Without this additional call, the struct imxdma_channel::word_size member is stuck at DMA_SLAVE_BUSWIDTH_UNDEFINED and imxdma_prep_slave_sg() always returns NULL. Signed-off-by: Juergen Borleis Fixes: dea7a9fbb009 ("dmaengine: imx-dma: remove dma_slave_config direction usage") Link: https://lore.kernel.org/r/20210729071821.9857-1-jbe@pengutronix.de Signed-off-by: Vinod Koul --- drivers/dma/imx-dma.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c index 7f116bbcfad2..2ddc31e64db0 100644 --- a/drivers/dma/imx-dma.c +++ b/drivers/dma/imx-dma.c @@ -812,6 +812,8 @@ static struct dma_async_tx_descriptor *imxdma_prep_slave_sg( dma_length += sg_dma_len(sg); } + imxdma_config_write(chan, &imxdmac->config, direction); + switch (imxdmac->word_size) { case DMA_SLAVE_BUSWIDTH_4_BYTES: if (sg_dma_len(sgl) & 3 || sgl->dma_address & 3) From eda80d7c9c4db0f55f130e38c682e19b58d5add7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 1 Aug 2021 13:38:01 +0200 Subject: [PATCH 372/502] ALSA: memalloc: Fix regression with SNDRV_DMA_TYPE_CONTINUOUS The recent code refactoring made the mmap of continuous pages to be done via the own helper snd_dma_continuous_mmap() with remap_pfn_range(). There I overlooked that dmab->addr isn't set for the allocation with SNDRV_DMA_TYPE_CONTINUOUS. This resulted always in an error at mmap with this buffer type on the system such as Intel SST Baytrail driver. This patch fixes the regression by passing the correct address. Fixes: 30b7ba6972d5 ("ALSA: core: Add continuous and vmalloc mmap ops") Reported-by: Hans de Goede Link: https://lore.kernel.org/r/8d6674da-7d7b-803e-acc9-7de6cb1223fa@redhat.com Link: https://lore.kernel.org/r/20210801113801.31290-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/memalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index 83b79edfa52d..439a358ecfe9 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -215,7 +215,7 @@ static int snd_dma_continuous_mmap(struct snd_dma_buffer *dmab, struct vm_area_struct *area) { return remap_pfn_range(area, area->vm_start, - dmab->addr >> PAGE_SHIFT, + page_to_pfn(virt_to_page(dmab->area)), area->vm_end - area->vm_start, area->vm_page_prot); } From 1159e25c137422bdc48ee96e3fb014bd942092c6 Mon Sep 17 00:00:00 2001 From: Prabhakar Kushwaha Date: Thu, 29 Jul 2021 14:43:06 +0300 Subject: [PATCH 373/502] qede: fix crash in rmmod qede while automatic debug collection A crash has been observed if rmmod is done while automatic debug collection in progress. It is due to a race condition between both of them. To fix stop the sp_task during unload to avoid running qede_sp_task even if they are schedule during removal process. Signed-off-by: Alok Prasad Signed-off-by: Shai Malin Signed-off-by: Ariel Elior Signed-off-by: Prabhakar Kushwaha Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede.h | 1 + drivers/net/ethernet/qlogic/qede/qede_main.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 2e62a2c4eb63..5630008f38b7 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -501,6 +501,7 @@ struct qede_fastpath { #define QEDE_SP_HW_ERR 4 #define QEDE_SP_ARFS_CONFIG 5 #define QEDE_SP_AER 7 +#define QEDE_SP_DISABLE 8 #ifdef CONFIG_RFS_ACCEL int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 01ac1e93d27a..7c6064baeba2 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -1009,6 +1009,13 @@ static void qede_sp_task(struct work_struct *work) struct qede_dev *edev = container_of(work, struct qede_dev, sp_task.work); + /* Disable execution of this deferred work once + * qede removal is in progress, this stop any future + * scheduling of sp_task. + */ + if (test_bit(QEDE_SP_DISABLE, &edev->sp_flags)) + return; + /* The locking scheme depends on the specific flag: * In case of QEDE_SP_RECOVERY, acquiring the RTNL lock is required to * ensure that ongoing flows are ended and new ones are not started. @@ -1300,6 +1307,7 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) qede_rdma_dev_remove(edev, (mode == QEDE_REMOVE_RECOVERY)); if (mode != QEDE_REMOVE_RECOVERY) { + set_bit(QEDE_SP_DISABLE, &edev->sp_flags); unregister_netdev(ndev); cancel_delayed_work_sync(&edev->sp_task); From d51c5907e9809a803b276883d203f45849abd4d6 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 29 Jul 2021 15:48:20 +0200 Subject: [PATCH 374/502] net, gro: Set inner transport header offset in tcp/udp GRO hook GSO expects inner transport header offset to be valid when skb->encapsulation flag is set. GSO uses this value to calculate the length of an individual segment of a GSO packet in skb_gso_transport_seglen(). However, tcp/udp gro_complete callbacks don't update the skb->inner_transport_header when processing an encapsulated TCP/UDP segment. As a result a GRO skb has ->inner_transport_header set to a value carried over from earlier skb processing. This can have mild to tragic consequences. From miscalculating the GSO segment length to triggering a page fault [1], when trying to read TCP/UDP header at an address past the skb->data page. The latter scenario leads to an oops report like so: BUG: unable to handle page fault for address: ffff9fa7ec00d008 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 123f201067 P4D 123f201067 PUD 123f209067 PMD 0 Oops: 0000 [#1] SMP NOPTI CPU: 44 PID: 0 Comm: swapper/44 Not tainted 5.4.53-cloudflare-2020.7.21 #1 Hardware name: HYVE EDGE-METAL-GEN10/HS-1811DLite1, BIOS V2.15 02/21/2020 RIP: 0010:skb_gso_transport_seglen+0x44/0xa0 Code: c0 41 83 e0 11 f6 87 81 00 00 00 20 74 30 0f b7 87 aa 00 00 00 0f [...] RSP: 0018:ffffad8640bacbb8 EFLAGS: 00010202 RAX: 000000000000feda RBX: ffff9fcc8d31bc00 RCX: ffff9fa7ec00cffc RDX: ffff9fa7ebffdec0 RSI: 000000000000feda RDI: 0000000000000122 RBP: 00000000000005c4 R08: 0000000000000001 R09: 0000000000000000 R10: ffff9fe588ae3800 R11: ffff9fe011fc92f0 R12: ffff9fcc8d31bc00 R13: ffff9fe0119d4300 R14: 00000000000005c4 R15: ffff9fba57d70900 FS: 0000000000000000(0000) GS:ffff9fe68df00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff9fa7ec00d008 CR3: 0000003e99b1c000 CR4: 0000000000340ee0 Call Trace: skb_gso_validate_network_len+0x11/0x70 __ip_finish_output+0x109/0x1c0 ip_sublist_rcv_finish+0x57/0x70 ip_sublist_rcv+0x2aa/0x2d0 ? ip_rcv_finish_core.constprop.0+0x390/0x390 ip_list_rcv+0x12b/0x14f __netif_receive_skb_list_core+0x2a9/0x2d0 netif_receive_skb_list_internal+0x1b5/0x2e0 napi_complete_done+0x93/0x140 veth_poll+0xc0/0x19f [veth] ? mlx5e_napi_poll+0x221/0x610 [mlx5_core] net_rx_action+0x1f8/0x790 __do_softirq+0xe1/0x2bf irq_exit+0x8e/0xc0 do_IRQ+0x58/0xe0 common_interrupt+0xf/0xf The bug can be observed in a simple setup where we send IP/GRE/IP/TCP packets into a netns over a veth pair. Inside the netns, packets are forwarded to dummy device: trafgen -> [veth A]--[veth B] -forward-> [dummy] For veth B to GRO aggregate packets on receive, it needs to have an XDP program attached (for example, a trivial XDP_PASS). Additionally, for UDP, we need to enable GSO_UDP_L4 feature on the device: ip netns exec A ethtool -K AB rx-udp-gro-forwarding on The last component is an artificial delay to increase the chances of GRO batching happening: ip netns exec A tc qdisc add dev AB root \ netem delay 200us slot 5ms 10ms packets 2 bytes 64k With such a setup in place, the bug can be observed by tracing the skb outer and inner offsets when GSO skb is transmitted from the dummy device: tcp: FUNC DEV SKB_LEN NH TH ENC INH ITH GSO_SIZE GSO_TYPE ip_finish_output dumB 2830 270 290 1 294 254 1383 (tcpv4,gre,) ^^^ udp: FUNC DEV SKB_LEN NH TH ENC INH ITH GSO_SIZE GSO_TYPE ip_finish_output dumB 2818 270 290 1 294 254 1383 (gre,udp_l4,) ^^^ Fix it by updating the inner transport header offset in tcp/udp gro_complete callbacks, similar to how {inet,ipv6}_gro_complete callbacks update the inner network header offset, when skb->encapsulation flag is set. [1] https://lore.kernel.org/netdev/CAKxSbF01cLpZem2GFaUaifh0S-5WYViZemTicAg7FCHOnh6kug@mail.gmail.com/ Fixes: bf296b125b21 ("tcp: Add GRO support") Fixes: f993bc25e519 ("net: core: handle encapsulation offloads when computing segment lengths") Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.") Reported-by: Alex Forster Signed-off-by: Jakub Sitnicki Signed-off-by: David S. Miller --- net/ipv4/tcp_offload.c | 3 +++ net/ipv4/udp_offload.c | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index e09147ac9a99..fc61cd3fea65 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -298,6 +298,9 @@ int tcp_gro_complete(struct sk_buff *skb) if (th->cwr) skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; + if (skb->encapsulation) + skb->inner_transport_header = skb->transport_header; + return 0; } EXPORT_SYMBOL(tcp_gro_complete); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 9dde1e5fb449..1380a6b6f4ff 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -624,6 +624,10 @@ static int udp_gro_complete_segment(struct sk_buff *skb) skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4; + + if (skb->encapsulation) + skb->inner_transport_header = skb->transport_header; + return 0; } From 85b1ebfea2b0d8797266bcc6f04b6cc87e38290a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 30 Jul 2021 08:54:08 +0100 Subject: [PATCH 375/502] interconnect: Fix undersized devress_alloc allocation The expression sizeof(**ptr) for the void **ptr is just 1 rather than the size of a pointer. Fix this by using sizeof(*ptr). Addresses-Coverity: ("Wrong sizeof argument") Fixes: e145d9a184f2 ("interconnect: Add devm_of_icc_get() as exported API for users") Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20210730075408.19945-1-colin.king@canonical.com Signed-off-by: Georgi Djakov --- drivers/interconnect/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index 1b2c564eaa99..7887941730db 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -403,7 +403,7 @@ struct icc_path *devm_of_icc_get(struct device *dev, const char *name) { struct icc_path **ptr, *path; - ptr = devres_alloc(devm_icc_release, sizeof(**ptr), GFP_KERNEL); + ptr = devres_alloc(devm_icc_release, sizeof(*ptr), GFP_KERNEL); if (!ptr) return ERR_PTR(-ENOMEM); From ebca25ead0711729e0aeeec45062e7ac4df3e158 Mon Sep 17 00:00:00 2001 From: Yannick Vignon Date: Fri, 30 Jul 2021 18:53:21 +0200 Subject: [PATCH 376/502] net/sched: taprio: Fix init procedure Commit 13511704f8d759 ("net: taprio offload: enforce qdisc to netdev queue mapping") resulted in duplicate entries in the qdisc hash. While this did not impact the overall operation of the qdisc and taprio code paths, it did result in an infinite loop when dumping the qdisc properties, at least on one target (NXP LS1028 ARDB). Removing the duplicate call to qdisc_hash_add() solves the problem. Fixes: 13511704f8d759 ("net: taprio offload: enforce qdisc to netdev queue mapping") Signed-off-by: Yannick Vignon Signed-off-by: David S. Miller --- net/sched/sch_taprio.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 07b30d0601d7..9c79374457a0 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1739,8 +1739,6 @@ static void taprio_attach(struct Qdisc *sch) if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; old = dev_graft_qdisc(qdisc->dev_queue, qdisc); - if (ntx < dev->real_num_tx_queues) - qdisc_hash_add(qdisc, false); } else { old = dev_graft_qdisc(qdisc->dev_queue, sch); qdisc_refcount_inc(sch); From 0d5c3954b35eddff0da0436c31e8d721eceb7dc2 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 1 Aug 2021 20:00:23 -0700 Subject: [PATCH 377/502] spi: mediatek: Fix fifo transfer Commit 3a70dd2d0503 ("spi: mediatek: fix fifo rx mode") claims that fifo RX mode was never handled, and adds the presumably missing code to the FIFO transfer function. However, the claim that receive data was not handled is incorrect. It was handled as part of interrupt handling after the transfer was complete. The code added with the above mentioned commit reads data from the receive FIFO before the transfer is started, which is wrong. This results in an actual transfer error on a Hayato Chromebook. Remove the code trying to handle receive data before the transfer is started to fix the problem. Fixes: 3a70dd2d0503 ("spi: mediatek: fix fifo rx mode") Cc: Peter Hess Cc: Frank Wunderlich Cc: Tzung-Bi Shih Cc: Hsin-Yi Wang Signed-off-by: Guenter Roeck Tested-by: Hsin-Yi Wang Tested-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20210802030023.1748777-1-linux@roeck-us.net Signed-off-by: Mark Brown --- drivers/spi/spi-mt65xx.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index 68dca8ceb3ad..7914255521c3 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -426,24 +426,15 @@ static int mtk_spi_fifo_transfer(struct spi_master *master, mtk_spi_prepare_transfer(master, xfer); mtk_spi_setup_packet(master); - cnt = xfer->len / 4; - if (xfer->tx_buf) + if (xfer->tx_buf) { + cnt = xfer->len / 4; iowrite32_rep(mdata->base + SPI_TX_DATA_REG, xfer->tx_buf, cnt); - - if (xfer->rx_buf) - ioread32_rep(mdata->base + SPI_RX_DATA_REG, xfer->rx_buf, cnt); - - remainder = xfer->len % 4; - if (remainder > 0) { - reg_val = 0; - if (xfer->tx_buf) { + remainder = xfer->len % 4; + if (remainder > 0) { + reg_val = 0; memcpy(®_val, xfer->tx_buf + (cnt * 4), remainder); writel(reg_val, mdata->base + SPI_TX_DATA_REG); } - if (xfer->rx_buf) { - reg_val = readl(mdata->base + SPI_RX_DATA_REG); - memcpy(xfer->rx_buf + (cnt * 4), ®_val, remainder); - } } mtk_spi_enable_transfer(master); From 40e159403896f7d55c98f858d0b20fee1d941fa4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 2 Aug 2021 12:21:30 +0100 Subject: [PATCH 378/502] mhi: Fix networking tree build. Signed-off-by: David S. Miller --- include/linux/mhi.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 944aa3aa3035..5e08468854db 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -719,8 +719,13 @@ void mhi_device_put(struct mhi_device *mhi_dev); * host and device execution environments match and * channels are in a DISABLED state. * @mhi_dev: Device associated with the channels + * @flags: MHI channel flags */ -int mhi_prepare_for_transfer(struct mhi_device *mhi_dev); +int mhi_prepare_for_transfer(struct mhi_device *mhi_dev, + unsigned int flags); + +/* Automatically allocate and queue inbound buffers */ +#define MHI_CH_INBOUND_ALLOC_BUFS BIT(0) /** * mhi_unprepare_from_transfer - Reset UL and DL channels for data transfer. From 47091f473b364c98207c4def197a0ae386fc9af1 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Sat, 26 Jun 2021 02:01:03 +0200 Subject: [PATCH 379/502] ARM: dts: nomadik: Fix up interrupt controller node names Once the new schema interrupt-controller/arm,vic.yaml is added, we get the below warnings: arch/arm/boot/dts/ste-nomadik-nhk15.dt.yaml: intc@10140000: $nodename:0: 'intc@10140000' does not match '^interrupt-controller(@[0-9a-f,]+)*$' Fix the node names for the interrupt controller to conform to the standard node name interrupt-controller@.. Signed-off-by: Sudeep Holla Signed-off-by: Linus Walleij Cc: Linus Walleij Link: https://lore.kernel.org/r/20210617210825.3064367-2-sudeep.holla@arm.com Link: https://lore.kernel.org/r/20210626000103.830184-1-linus.walleij@linaro.org' Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/ste-nomadik-stn8815.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi index c9b906432341..1815361fe73c 100644 --- a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi +++ b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi @@ -755,14 +755,14 @@ status = "disabled"; }; - vica: intc@10140000 { + vica: interrupt-controller@10140000 { compatible = "arm,versatile-vic"; interrupt-controller; #interrupt-cells = <1>; reg = <0x10140000 0x20>; }; - vicb: intc@10140020 { + vicb: interrupt-controller@10140020 { compatible = "arm,versatile-vic"; interrupt-controller; #interrupt-cells = <1>; From a4282f66d90e93aacfe1b19509fd5851bf95be68 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 22 Jul 2021 02:26:16 +0300 Subject: [PATCH 380/502] soc/tegra: Make regulator couplers depend on CONFIG_REGULATOR The regulator coupler drivers now use regulator-driver API function that isn't available during compile-testing. Make regulator coupler drivers dependent on CONFIG_REGULATOR in Kconfig. Fixes: 03978d42ed0d ("soc/tegra: regulators: Bump voltages on system reboot") Reported-by: kernel test robot Signed-off-by: Dmitry Osipenko Acked-by: Jon Hunter Signed-off-by: Arnd Bergmann --- drivers/soc/tegra/Kconfig | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/soc/tegra/Kconfig b/drivers/soc/tegra/Kconfig index 20ace654553a..8b53ed1cc67e 100644 --- a/drivers/soc/tegra/Kconfig +++ b/drivers/soc/tegra/Kconfig @@ -15,7 +15,7 @@ config ARCH_TEGRA_2x_SOC select PL310_ERRATA_769419 if CACHE_L2X0 select SOC_TEGRA_FLOWCTRL select SOC_TEGRA_PMC - select SOC_TEGRA20_VOLTAGE_COUPLER + select SOC_TEGRA20_VOLTAGE_COUPLER if REGULATOR select TEGRA_TIMER help Support for NVIDIA Tegra AP20 and T20 processors, based on the @@ -29,7 +29,7 @@ config ARCH_TEGRA_3x_SOC select PL310_ERRATA_769419 if CACHE_L2X0 select SOC_TEGRA_FLOWCTRL select SOC_TEGRA_PMC - select SOC_TEGRA30_VOLTAGE_COUPLER + select SOC_TEGRA30_VOLTAGE_COUPLER if REGULATOR select TEGRA_TIMER help Support for NVIDIA Tegra T30 processor family, based on the @@ -155,7 +155,9 @@ config SOC_TEGRA_POWERGATE_BPMP config SOC_TEGRA20_VOLTAGE_COUPLER bool "Voltage scaling support for Tegra20 SoCs" depends on ARCH_TEGRA_2x_SOC || COMPILE_TEST + depends on REGULATOR config SOC_TEGRA30_VOLTAGE_COUPLER bool "Voltage scaling support for Tegra30 SoCs" depends on ARCH_TEGRA_3x_SOC || COMPILE_TEST + depends on REGULATOR From 7f94b69ece515ac82defa60ef7cba2cf26180216 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 21 Jul 2021 23:13:43 +0200 Subject: [PATCH 381/502] ARM: ixp4xx: fix compile-testing soc drivers Randconfig builds on the ixp4xx ethernet driver showed that the qmgr and npe drivers are not actually built even when compile testing is enabled: ERROR: modpost: "qmgr_stat_empty" [drivers/net/ethernet/xscale/ixp4xx_eth.ko] undefined! ERROR: modpost: "qmgr_enable_irq" [drivers/net/ethernet/xscale/ixp4xx_eth.ko] undefined! ERROR: modpost: "qmgr_set_irq" [drivers/net/ethernet/xscale/ixp4xx_eth.ko] undefined! ERROR: modpost: "__qmgr_request_queue" [drivers/net/ethernet/xscale/ixp4xx_eth.ko] undefined! ERROR: modpost: "npe_send_recv_message" [drivers/net/ethernet/xscale/ixp4xx_eth.ko] undefined! ERROR: modpost: "npe_recv_message" [drivers/net/ethernet/xscale/ixp4xx_eth.ko] undefined! ERROR: modpost: "npe_load_firmware" [drivers/net/ethernet/xscale/ixp4xx_eth.ko] undefined! ERROR: modpost: "npe_running" [drivers/net/ethernet/xscale/ixp4xx_eth.ko] undefined! ERROR: modpost: "qmgr_disable_irq" [drivers/net/ethernet/xscale/ixp4xx_eth.ko] undefined! ERROR: modpost: "qmgr_stat_below_low_watermark" [drivers/net/ethernet/xscale/ixp4xx_eth.ko] undefined! Fix it by always entering the drivers/soc/ixp4xx/ directory, and fix the resulting compile test failures by removing the #include statements that prevent building on most other platforms. Fixes: 7a6c9dbb36a4 ("soc: ixp4xx: Protect IXP4xx SoC drivers by ARCH_IXP4XX || COMPILE_TEST") Fixes: fcf2d8978cd5 ("ARM: ixp4xx: Move NPE and QMGR to drivers/soc") Signed-off-by: Arnd Bergmann Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20210721211412.3537004-1-arnd@kernel.org' Signed-off-by: Arnd Bergmann --- drivers/soc/Makefile | 2 +- drivers/soc/ixp4xx/ixp4xx-npe.c | 1 - drivers/soc/ixp4xx/ixp4xx-qmgr.c | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile index f678e4d9e585..a05e9fbcd3e0 100644 --- a/drivers/soc/Makefile +++ b/drivers/soc/Makefile @@ -13,7 +13,7 @@ obj-$(CONFIG_MACH_DOVE) += dove/ obj-y += fsl/ obj-$(CONFIG_ARCH_GEMINI) += gemini/ obj-y += imx/ -obj-$(CONFIG_ARCH_IXP4XX) += ixp4xx/ +obj-y += ixp4xx/ obj-$(CONFIG_SOC_XWAY) += lantiq/ obj-$(CONFIG_LITEX_SOC_CONTROLLER) += litex/ obj-y += mediatek/ diff --git a/drivers/soc/ixp4xx/ixp4xx-npe.c b/drivers/soc/ixp4xx/ixp4xx-npe.c index 7bd19354982a..fea50e04d5a1 100644 --- a/drivers/soc/ixp4xx/ixp4xx-npe.c +++ b/drivers/soc/ixp4xx/ixp4xx-npe.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #define DEBUG_MSG 0 diff --git a/drivers/soc/ixp4xx/ixp4xx-qmgr.c b/drivers/soc/ixp4xx/ixp4xx-qmgr.c index 7149510b307e..c6bf6ef257c0 100644 --- a/drivers/soc/ixp4xx/ixp4xx-qmgr.c +++ b/drivers/soc/ixp4xx/ixp4xx-qmgr.c @@ -12,7 +12,6 @@ #include #include #include -#include #include static struct qmgr_regs __iomem *qmgr_regs; From 796a8c85b1216618258e08b463d3bef0d7123760 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 21 Jul 2021 17:16:04 +0200 Subject: [PATCH 382/502] ARM: ixp4xx: goramo_mlr depends on old PCI driver When this driver is disabled, the board file fails to build, so add a dependency: arch/arm/mach-ixp4xx/goramo_mlr.c: In function 'gmlr_pci_preinit': arch/arm/mach-ixp4xx/goramo_mlr.c:472:9: error: implicit declaration of function 'ixp4xx_pci_preinit'; did you mean 'iop3xx_pci_preinit'? [-Werror=implicit-function-declaration] 472 | ixp4xx_pci_preinit(); | ^~~~~~~~~~~~~~~~~~ | iop3xx_pci_preinit arch/arm/mach-ixp4xx/goramo_mlr.c: In function 'gmlr_pci_postinit': arch/arm/mach-ixp4xx/goramo_mlr.c:481:22: error: implicit declaration of function 'ixp4xx_pci_read' [-Werror=implicit-function-declaration] 481 | if (!ixp4xx_pci_read(addr, NP_CMD_CONFIGREAD, &value)) { | ^~~~~~~~~~~~~~~ arch/arm/mach-ixp4xx/goramo_mlr.c:231:35: error: 'IXP4XX_UART1_BASE_PHYS' undeclared here (not in a function) 231 | .start = IXP4XX_UART1_BASE_PHYS, | ^~~~~~~~~~~~~~~~~~~~~~ arch/arm/mach-ixp4xx/goramo_mlr.c: In function 'gmlr_init': arch/arm/mach-ixp4xx/goramo_mlr.c:376:9: error: implicit declaration of function 'ixp4xx_sys_init' [-Werror=implicit-function-declaration] 376 | ixp4xx_sys_init(); | ^~~~~~~~~~~~~~~ Signed-off-by: Arnd Bergmann Reviewed-by: Linus Walleij Cc: Linus Walleij Cc: soc@kernel.org Link: https://lore.kernel.org/r/20210721151620.2373500-1-arnd@kernel.org' Signed-off-by: Arnd Bergmann --- arch/arm/mach-ixp4xx/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-ixp4xx/Kconfig b/arch/arm/mach-ixp4xx/Kconfig index bf14d65120b9..34a1c7742088 100644 --- a/arch/arm/mach-ixp4xx/Kconfig +++ b/arch/arm/mach-ixp4xx/Kconfig @@ -91,6 +91,7 @@ config MACH_IXDP465 config MACH_GORAMO_MLR bool "GORAMO Multi Link Router" + depends on IXP4XX_PCI_LEGACY help Say 'Y' here if you want your kernel to support GORAMO MultiLink router. From cb81698fddbcc9a3ee75857e99dfc29caa96135b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 30 Jul 2021 20:18:10 +0300 Subject: [PATCH 383/502] net: dsa: sja1105: fix static FDB writes for SJA1110 The blamed commit made FDB access on SJA1110 functional only as far as dumping the existing entries goes, but anything having to do with an entry's index (adding, deleting) is still broken. There are in fact 2 problems, all caused by improperly inheriting the code from SJA1105P/Q/R/S: - An entry size is SJA1110_SIZE_L2_LOOKUP_ENTRY (24) bytes and not SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY (20) bytes - The "index" field within an FDB entry is at bits 10:1 for SJA1110 and not 15:6 as in SJA1105P/Q/R/S This patch moves the packing function for the cmd->index outside of sja1105pqrs_common_l2_lookup_cmd_packing() and into the device specific functions sja1105pqrs_l2_lookup_cmd_packing and sja1110_l2_lookup_cmd_packing. Fixes: 74e7feff0e22 ("net: dsa: sja1105: fix dynamic access to L2 Address Lookup table for SJA1110") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- .../net/dsa/sja1105/sja1105_dynamic_config.c | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c index 56fead68ea9f..147709131c13 100644 --- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c +++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c @@ -304,6 +304,15 @@ sja1105pqrs_common_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, hostcmd = SJA1105_HOSTCMD_INVALIDATE; } sja1105_packing(p, &hostcmd, 25, 23, size, op); +} + +static void +sja1105pqrs_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, + enum packing_op op) +{ + int entry_size = SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY; + + sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, entry_size); /* Hack - The hardware takes the 'index' field within * struct sja1105_l2_lookup_entry as the index on which this command @@ -313,26 +322,18 @@ sja1105pqrs_common_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, * such that our API doesn't need to ask for a full-blown entry * structure when e.g. a delete is requested. */ - sja1105_packing(buf, &cmd->index, 15, 6, - SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY, op); -} - -static void -sja1105pqrs_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, - enum packing_op op) -{ - int size = SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY; - - return sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, size); + sja1105_packing(buf, &cmd->index, 15, 6, entry_size, op); } static void sja1110_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, enum packing_op op) { - int size = SJA1110_SIZE_L2_LOOKUP_ENTRY; + int entry_size = SJA1110_SIZE_L2_LOOKUP_ENTRY; - return sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, size); + sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, entry_size); + + sja1105_packing(buf, &cmd->index, 10, 1, entry_size, op); } /* The switch is so retarded that it makes our command/entry abstraction From e11e865bf84e3c6ea91563ff3e858cfe0e184bd2 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 30 Jul 2021 20:18:11 +0300 Subject: [PATCH 384/502] net: dsa: sja1105: overwrite dynamic FDB entries with static ones in .port_fdb_add The SJA1105 switch family leaves it up to software to decide where within the FDB to install a static entry, and to concatenate destination ports for already existing entries (the FDB is also used for multicast entries), it is not as simple as just saying "please add this entry". This means we first need to search for an existing FDB entry before adding a new one. The driver currently manages to fool itself into thinking that if an FDB entry already exists, there is nothing to be done. But that FDB entry might be dynamically learned, case in which it should be replaced with a static entry, but instead it is left alone. This patch checks the LOCKEDS ("locked/static") bit from found FDB entries, and lets the code "goto skip_finding_an_index;" if the FDB entry was not static. So we also need to move the place where we set LOCKEDS = true, to cover the new case where a dynamic FDB entry existed but was dynamic. Fixes: 291d1e72b756 ("net: dsa: sja1105: Add support for FDB and MDB management") Fixes: 1da73821343c ("net: dsa: sja1105: Add FDB operations for P/Q/R/S series") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index e2dc997580a8..cc4a22ee1474 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -1333,7 +1333,7 @@ int sja1105et_fdb_add(struct dsa_switch *ds, int port, * mask? If yes, we need to do nothing. If not, we need * to rewrite the entry by adding this port to it. */ - if (l2_lookup.destports & BIT(port)) + if ((l2_lookup.destports & BIT(port)) && l2_lookup.lockeds) return 0; l2_lookup.destports |= BIT(port); } else { @@ -1364,6 +1364,7 @@ int sja1105et_fdb_add(struct dsa_switch *ds, int port, index, NULL, false); } } + l2_lookup.lockeds = true; l2_lookup.index = sja1105et_fdb_index(bin, way); rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP, @@ -1434,10 +1435,10 @@ int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port, rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP, SJA1105_SEARCH, &l2_lookup); if (rc == 0) { - /* Found and this port is already in the entry's + /* Found a static entry and this port is already in the entry's * port mask => job done */ - if (l2_lookup.destports & BIT(port)) + if ((l2_lookup.destports & BIT(port)) && l2_lookup.lockeds) return 0; /* l2_lookup.index is populated by the switch in case it * found something. @@ -1460,10 +1461,11 @@ int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port, dev_err(ds->dev, "FDB is full, cannot add entry.\n"); return -EINVAL; } - l2_lookup.lockeds = true; l2_lookup.index = i; skip_finding_an_index: + l2_lookup.lockeds = true; + rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP, l2_lookup.index, &l2_lookup, true); From 6c5fc159e0927531707895709eee1f8bfa04289f Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 30 Jul 2021 20:18:12 +0300 Subject: [PATCH 385/502] net: dsa: sja1105: invalidate dynamic FDB entries learned concurrently with statically added ones The procedure to add a static FDB entry in sja1105 is concurrent with dynamic learning performed on all bridge ports and the CPU port. The switch looks up the FDB from left to right, and also learns dynamically from left to right, so it is possible that between the moment when we pick up a free slot to install an FDB entry, another slot to the left of that one becomes free due to an address ageing out, and that other slot is then immediately used by the switch to learn dynamically the same address as we're trying to add statically. The result is that we succeeded to add our static FDB entry, but it is being shadowed by a dynamic FDB entry to its left, and the switch will behave as if our static FDB entry did not exist. We cannot really prevent this from happening unless we make the entire process to add a static FDB entry a huge critical section where address learning is temporarily disabled on _all_ ports, and then re-enabled according to the configuration done by sja1105_port_set_learning. However, that is kind of disruptive for the operation of the network. What we can do alternatively is to simply read back the FDB for dynamic entries located before our newly added static one, and delete them. This will guarantee that our static FDB entry is now operational. It will still not guarantee that there aren't dynamic FDB entries to the _right_ of that static FDB entry, but at least those entries will age out by themselves since they aren't hit, and won't bother anyone. Fixes: 291d1e72b756 ("net: dsa: sja1105: Add support for FDB and MDB management") Fixes: 1da73821343c ("net: dsa: sja1105: Add FDB operations for P/Q/R/S series") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 57 +++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index cc4a22ee1474..5a4c7789ca43 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -1318,10 +1318,11 @@ static int sja1105et_is_fdb_entry_in_bin(struct sja1105_private *priv, int bin, int sja1105et_fdb_add(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid) { - struct sja1105_l2_lookup_entry l2_lookup = {0}; + struct sja1105_l2_lookup_entry l2_lookup = {0}, tmp; struct sja1105_private *priv = ds->priv; struct device *dev = ds->dev; int last_unused = -1; + int start, end, i; int bin, way, rc; bin = sja1105et_fdb_hash(priv, addr, vid); @@ -1373,6 +1374,29 @@ int sja1105et_fdb_add(struct dsa_switch *ds, int port, if (rc < 0) return rc; + /* Invalidate a dynamically learned entry if that exists */ + start = sja1105et_fdb_index(bin, 0); + end = sja1105et_fdb_index(bin, way); + + for (i = start; i < end; i++) { + rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP, + i, &tmp); + if (rc == -ENOENT) + continue; + if (rc) + return rc; + + if (tmp.macaddr != ether_addr_to_u64(addr) || tmp.vlanid != vid) + continue; + + rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP, + i, NULL, false); + if (rc) + return rc; + + break; + } + return sja1105_static_fdb_change(priv, port, &l2_lookup, true); } @@ -1414,7 +1438,7 @@ int sja1105et_fdb_del(struct dsa_switch *ds, int port, int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid) { - struct sja1105_l2_lookup_entry l2_lookup = {0}; + struct sja1105_l2_lookup_entry l2_lookup = {0}, tmp; struct sja1105_private *priv = ds->priv; int rc, i; @@ -1472,6 +1496,35 @@ skip_finding_an_index: if (rc < 0) return rc; + /* The switch learns dynamic entries and looks up the FDB left to + * right. It is possible that our addition was concurrent with the + * dynamic learning of the same address, so now that the static entry + * has been installed, we are certain that address learning for this + * particular address has been turned off, so the dynamic entry either + * is in the FDB at an index smaller than the static one, or isn't (it + * can also be at a larger index, but in that case it is inactive + * because the static FDB entry will match first, and the dynamic one + * will eventually age out). Search for a dynamically learned address + * prior to our static one and invalidate it. + */ + tmp = l2_lookup; + + rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP, + SJA1105_SEARCH, &tmp); + if (rc < 0) { + dev_err(ds->dev, + "port %d failed to read back entry for %pM vid %d: %pe\n", + port, addr, vid, ERR_PTR(rc)); + return rc; + } + + if (tmp.index < l2_lookup.index) { + rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP, + tmp.index, NULL, false); + if (rc < 0) + return rc; + } + return sja1105_static_fdb_change(priv, port, &l2_lookup, true); } From 728db843df88753aeb7224314807a203afa8eb32 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 30 Jul 2021 20:18:13 +0300 Subject: [PATCH 386/502] net: dsa: sja1105: ignore the FDB entry for unknown multicast when adding a new address Currently, when sja1105pqrs_fdb_add() is called for a host-joined IPv6 MDB entry such as 33:33:00:00:00:6a, the search for that address will return the FDB entry for SJA1105_UNKNOWN_MULTICAST, which has a destination MAC of 01:00:00:00:00:00 and a mask of 01:00:00:00:00:00. It returns that entry because, well, it matches, in the sense that unknown multicast is supposed by design to match it... But the issue is that we then proceed to overwrite this entry with the one for our precise host-joined multicast address, and the unknown multicast entry is no longer there - unknown multicast is now flooded to the same group of ports as broadcast, which does not look up the FDB. To solve this problem, we should ignore searches that return the unknown multicast address as the match, and treat them as "no match" which will result in the entry being installed to hardware. For this to work properly, we need to put the result of the FDB search in a temporary variable in order to avoid overwriting the l2_lookup entry we want to program. The l2_lookup entry returned by the search might not have the same set of DESTPORTS and not even the same MACADDR as the entry we're trying to add. Fixes: 4d9423549501 ("net: dsa: sja1105: offload bridge port flags to device") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 5a4c7789ca43..5d8739b30d8c 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -1456,14 +1456,19 @@ int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port, } l2_lookup.destports = BIT(port); + tmp = l2_lookup; + rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP, - SJA1105_SEARCH, &l2_lookup); - if (rc == 0) { + SJA1105_SEARCH, &tmp); + if (rc == 0 && tmp.index != SJA1105_MAX_L2_LOOKUP_COUNT - 1) { /* Found a static entry and this port is already in the entry's * port mask => job done */ - if ((l2_lookup.destports & BIT(port)) && l2_lookup.lockeds) + if ((tmp.destports & BIT(port)) && tmp.lockeds) return 0; + + l2_lookup = tmp; + /* l2_lookup.index is populated by the switch in case it * found something. */ From 589918df93226a1e5f104306c185b6dcf2bd8051 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 30 Jul 2021 20:18:14 +0300 Subject: [PATCH 387/502] net: dsa: sja1105: be stateless with FDB entries on SJA1105P/Q/R/S/SJA1110 too Similar but not quite the same with what was done in commit b11f0a4c0c81 ("net: dsa: sja1105: be stateless when installing FDB entries") for SJA1105E/T, it is desirable to drop the priv->vlan_aware check and simply go ahead and install FDB entries in the VLAN that was given by the bridge. As opposed to SJA1105E/T, in SJA1105P/Q/R/S and SJA1110, the FDB is a maskable TCAM, and we are installing VLAN-unaware FDB entries with the VLAN ID masked off. However, such FDB entries might completely obscure VLAN-aware entries where the VLAN ID is included in the search mask, because the switch looks up the FDB from left to right and picks the first entry which results in a masked match. So it depends on whether the bridge installs first the VLAN-unaware or the VLAN-aware FDB entries. Anyway, if we had a VLAN-unaware FDB entry towards one set of DESTPORTS and a VLAN-aware one towards other set of DESTPORTS, the result is that the packets in VLAN-aware mode will be forwarded towards the DESTPORTS specified by the VLAN-unaware entry. To solve this, simply do not use the masked matching ability of the FDB for VLAN ID, and always match precisely on it. In VLAN-unaware mode, we configure the switch for shared VLAN learning, so the VLAN ID will be ignored anyway during lookup, so it is redundant to mask it off in the TCAM. This patch conflicts with net-next commit 0fac6aa098ed ("net: dsa: sja1105: delete the best_effort_vlan_filtering mode") which changed this line: if (priv->vlan_state != SJA1105_VLAN_UNAWARE) { into: if (priv->vlan_aware) { When merging with net-next, the lines added by this patch should take precedence in the conflict resolution (i.e. the "if" condition should be deleted in both cases). Fixes: 1da73821343c ("net: dsa: sja1105: Add FDB operations for P/Q/R/S series") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 5d8739b30d8c..335b608bad11 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -1447,13 +1447,8 @@ int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port, l2_lookup.vlanid = vid; l2_lookup.iotag = SJA1105_S_TAG; l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0); - if (priv->vlan_state != SJA1105_VLAN_UNAWARE) { - l2_lookup.mask_vlanid = VLAN_VID_MASK; - l2_lookup.mask_iotag = BIT(0); - } else { - l2_lookup.mask_vlanid = 0; - l2_lookup.mask_iotag = 0; - } + l2_lookup.mask_vlanid = VLAN_VID_MASK; + l2_lookup.mask_iotag = BIT(0); l2_lookup.destports = BIT(port); tmp = l2_lookup; @@ -1545,13 +1540,8 @@ int sja1105pqrs_fdb_del(struct dsa_switch *ds, int port, l2_lookup.vlanid = vid; l2_lookup.iotag = SJA1105_S_TAG; l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0); - if (priv->vlan_state != SJA1105_VLAN_UNAWARE) { - l2_lookup.mask_vlanid = VLAN_VID_MASK; - l2_lookup.mask_iotag = BIT(0); - } else { - l2_lookup.mask_vlanid = 0; - l2_lookup.mask_iotag = 0; - } + l2_lookup.mask_vlanid = VLAN_VID_MASK; + l2_lookup.mask_iotag = BIT(0); l2_lookup.destports = BIT(port); rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP, From 47c2c0c2312118a478f738503781de1d1a6020d2 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 30 Jul 2021 20:18:15 +0300 Subject: [PATCH 388/502] net: dsa: sja1105: match FDB entries regardless of inner/outer VLAN tag On SJA1105P/Q/R/S and SJA1110, the L2 Lookup Table entries contain a maskable "inner/outer tag" bit which means: - when set to 1: match single-outer and double tagged frames - when set to 0: match untagged and single-inner tagged frames - when masked off: match all frames regardless of the type of tag This driver does not make any meaningful distinction between inner tags (matches on TPID) and outer tags (matches on TPID2). In fact, all VLAN table entries are installed as SJA1110_VLAN_D_TAG, which means that they match on both inner and outer tags. So it does not make sense that we install FDB entries with the IOTAG bit set to 1. In VLAN-unaware mode, we set both TPID and TPID2 to 0xdadb, so the switch will see frames as outer-tagged or double-tagged (never inner). So the FDB entries will match if IOTAG is set to 1. In VLAN-aware mode, we set TPID to 0x8100 and TPID2 to 0x88a8. So the switch will see untagged and 802.1Q-tagged packets as inner-tagged, and 802.1ad-tagged packets as outer-tagged. So untagged and 802.1Q-tagged packets will not match FDB entries if IOTAG is set to 1, but 802.1ad tagged packets will. Strange. To fix this, simply mask off the IOTAG bit from FDB entries, and make them match regardless of whether the VLAN tag is inner or outer. Fixes: 1da73821343c ("net: dsa: sja1105: Add FDB operations for P/Q/R/S series") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 335b608bad11..8667c9754330 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -1445,10 +1445,8 @@ int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port, /* Search for an existing entry in the FDB table */ l2_lookup.macaddr = ether_addr_to_u64(addr); l2_lookup.vlanid = vid; - l2_lookup.iotag = SJA1105_S_TAG; l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0); l2_lookup.mask_vlanid = VLAN_VID_MASK; - l2_lookup.mask_iotag = BIT(0); l2_lookup.destports = BIT(port); tmp = l2_lookup; @@ -1538,10 +1536,8 @@ int sja1105pqrs_fdb_del(struct dsa_switch *ds, int port, l2_lookup.macaddr = ether_addr_to_u64(addr); l2_lookup.vlanid = vid; - l2_lookup.iotag = SJA1105_S_TAG; l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0); l2_lookup.mask_vlanid = VLAN_VID_MASK; - l2_lookup.mask_iotag = BIT(0); l2_lookup.destports = BIT(port); rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP, From 4c156084daa8ee70978e4b150b5eb5fc7b1f15be Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Thu, 29 Jul 2021 11:16:44 +0800 Subject: [PATCH 389/502] selinux: correct the return value when loads initial sids It should not return 0 when SID 0 is assigned to isids. This patch fixes it. Cc: stable@vger.kernel.org Fixes: e3e0b582c321a ("selinux: remove unused initial SIDs and improve handling") Signed-off-by: Xiu Jianfeng [PM: remove changelog from description] Signed-off-by: Paul Moore --- security/selinux/ss/policydb.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index defc5ef35c66..0ae1b718194a 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -874,7 +874,7 @@ int policydb_load_isids(struct policydb *p, struct sidtab *s) rc = sidtab_init(s); if (rc) { pr_err("SELinux: out of memory on SID table init\n"); - goto out; + return rc; } head = p->ocontexts[OCON_ISID]; @@ -885,7 +885,7 @@ int policydb_load_isids(struct policydb *p, struct sidtab *s) if (sid == SECSID_NULL) { pr_err("SELinux: SID 0 was assigned a context.\n"); sidtab_destroy(s); - goto out; + return -EINVAL; } /* Ignore initial SIDs unused by this kernel. */ @@ -897,12 +897,10 @@ int policydb_load_isids(struct policydb *p, struct sidtab *s) pr_err("SELinux: unable to load initial SID %s.\n", name); sidtab_destroy(s); - goto out; + return rc; } } - rc = 0; -out: - return rc; + return 0; } int policydb_class_isvalid(struct policydb *p, unsigned int class) From a5e63c7d38d548b8dab6c6205e0b6af76899dbf5 Mon Sep 17 00:00:00 2001 From: Steve Bennett Date: Sat, 31 Jul 2021 08:57:50 +1000 Subject: [PATCH 390/502] net: phy: micrel: Fix detection of ksz87xx switch The logic for discerning between KSZ8051 and KSZ87XX PHYs is incorrect such that the that KSZ87XX switch is not identified correctly. ksz8051_ksz8795_match_phy_device() uses the parameter ksz_phy_id to discriminate whether it was called from ksz8051_match_phy_device() or from ksz8795_match_phy_device() but since PHY_ID_KSZ87XX is the same value as PHY_ID_KSZ8051, this doesn't work. Instead use a bool to discriminate the caller. Without this patch, the KSZ8795 switch port identifies as: ksz8795-switch spi3.1 ade1 (uninitialized): PHY [dsa-0.1:03] driver [Generic PHY] With the patch, it identifies correctly: ksz8795-switch spi3.1 ade1 (uninitialized): PHY [dsa-0.1:03] driver [Micrel KSZ87XX Switch] Fixes: 8b95599c55ed24b36cf4 ("net: phy: micrel: Discern KSZ8051 and KSZ8795 PHYs") Signed-off-by: Steve Bennett Reviewed-by: Marek Vasut Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 4d53886f7d51..53bdd673ae56 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -401,11 +401,11 @@ static int ksz8041_config_aneg(struct phy_device *phydev) } static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev, - const u32 ksz_phy_id) + const bool ksz_8051) { int ret; - if ((phydev->phy_id & MICREL_PHY_ID_MASK) != ksz_phy_id) + if ((phydev->phy_id & MICREL_PHY_ID_MASK) != PHY_ID_KSZ8051) return 0; ret = phy_read(phydev, MII_BMSR); @@ -418,7 +418,7 @@ static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev, * the switch does not. */ ret &= BMSR_ERCAP; - if (ksz_phy_id == PHY_ID_KSZ8051) + if (ksz_8051) return ret; else return !ret; @@ -426,7 +426,7 @@ static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev, static int ksz8051_match_phy_device(struct phy_device *phydev) { - return ksz8051_ksz8795_match_phy_device(phydev, PHY_ID_KSZ8051); + return ksz8051_ksz8795_match_phy_device(phydev, true); } static int ksz8081_config_init(struct phy_device *phydev) @@ -535,7 +535,7 @@ static int ksz8061_config_init(struct phy_device *phydev) static int ksz8795_match_phy_device(struct phy_device *phydev) { - return ksz8051_ksz8795_match_phy_device(phydev, PHY_ID_KSZ87XX); + return ksz8051_ksz8795_match_phy_device(phydev, false); } static int ksz9021_load_values_from_of(struct phy_device *phydev, From 7fe74dfd41c428afb24e2e615470832fa997ff14 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Sat, 31 Jul 2021 14:38:01 +0800 Subject: [PATCH 391/502] net: natsemi: Fix missing pci_disable_device() in probe and remove Replace pci_enable_device() with pcim_enable_device(), pci_disable_device() and pci_release_regions() will be called in release automatically. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Hulk Robot Signed-off-by: Wang Hai Signed-off-by: David S. Miller --- drivers/net/ethernet/natsemi/natsemi.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c index 51b4b25d15ad..84f7dbe9edff 100644 --- a/drivers/net/ethernet/natsemi/natsemi.c +++ b/drivers/net/ethernet/natsemi/natsemi.c @@ -819,7 +819,7 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) printk(version); #endif - i = pci_enable_device(pdev); + i = pcim_enable_device(pdev); if (i) return i; /* natsemi has a non-standard PM control register @@ -852,7 +852,7 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) ioaddr = ioremap(iostart, iosize); if (!ioaddr) { i = -ENOMEM; - goto err_ioremap; + goto err_pci_request_regions; } /* Work around the dropped serial bit. */ @@ -974,9 +974,6 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) err_register_netdev: iounmap(ioaddr); - err_ioremap: - pci_release_regions(pdev); - err_pci_request_regions: free_netdev(dev); return i; @@ -3241,7 +3238,6 @@ static void natsemi_remove1(struct pci_dev *pdev) NATSEMI_REMOVE_FILE(pdev, dspcfg_workaround); unregister_netdev (dev); - pci_release_regions (pdev); iounmap(ioaddr); free_netdev (dev); } From 6387f65e2acb9a63044bd64464401771b8cf1acc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 31 Jul 2021 07:39:17 -0700 Subject: [PATCH 392/502] net: sparx5: fix compiletime_assert for GCC 4.9 Stephen reports sparx5 broke GCC 4.9 build. Move the compiletime_assert() out of the static function. Compile-tested only, no object code changes. Reported-by: Stephen Rothwell Fixes: f3cad2611a77 ("net: sparx5: add hostmode with phylink support") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- .../ethernet/microchip/sparx5/sparx5_netdev.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c index 9d485a9d1f1f..1a240e6bddd0 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c @@ -13,7 +13,19 @@ */ #define VSTAX 73 -static void ifh_encode_bitfield(void *ifh, u64 value, u32 pos, u32 width) +#define ifh_encode_bitfield(ifh, value, pos, _width) \ + ({ \ + u32 width = (_width); \ + \ + /* Max width is 5 bytes - 40 bits. In worst case this will + * spread over 6 bytes - 48 bits + */ \ + compiletime_assert(width <= 40, \ + "Unsupported width, must be <= 40"); \ + __ifh_encode_bitfield((ifh), (value), (pos), width); \ + }) + +static void __ifh_encode_bitfield(void *ifh, u64 value, u32 pos, u32 width) { u8 *ifh_hdr = ifh; /* Calculate the Start IFH byte position of this IFH bit position */ @@ -22,11 +34,6 @@ static void ifh_encode_bitfield(void *ifh, u64 value, u32 pos, u32 width) u32 bit = (pos % 8); u64 encode = GENMASK(bit + width - 1, bit) & (value << bit); - /* Max width is 5 bytes - 40 bits. In worst case this will - * spread over 6 bytes - 48 bits - */ - compiletime_assert(width <= 40, "Unsupported width, must be <= 40"); - /* The b0-b7 goes into the start IFH byte */ if (encode & 0xFF) ifh_hdr[byte] |= (u8)((encode & 0xFF)); From 66e0da21728343bd3e75230a53d909e045fb9dd7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 31 Jul 2021 07:40:07 -0700 Subject: [PATCH 393/502] docs: operstates: fix typo TVL -> TLV Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/networking/operstates.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/operstates.rst b/Documentation/networking/operstates.rst index 9c918f7cb0e8..f6b9cce5b201 100644 --- a/Documentation/networking/operstates.rst +++ b/Documentation/networking/operstates.rst @@ -111,7 +111,7 @@ it as lower layer. Note that for certain kind of soft-devices, which are not managing any real hardware, it is possible to set this bit from userspace. One -should use TVL IFLA_CARRIER to do so. +should use TLV IFLA_CARRIER to do so. netif_carrier_ok() can be used to query that bit. From 7a7b8635b622add64d98cff84bf3ee71eac36237 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 31 Jul 2021 07:40:52 -0700 Subject: [PATCH 394/502] docs: operstates: document IF_OPER_TESTING IF_OPER_TESTING is in fact used today. Signed-off-by: Jakub Kicinski Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- Documentation/networking/operstates.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/operstates.rst b/Documentation/networking/operstates.rst index f6b9cce5b201..1ee2141e8ef1 100644 --- a/Documentation/networking/operstates.rst +++ b/Documentation/networking/operstates.rst @@ -73,7 +73,9 @@ IF_OPER_LOWERLAYERDOWN (3): state (f.e. VLAN). IF_OPER_TESTING (4): - Unused in current kernel. + Interface is in testing mode, for example executing driver self-tests + or media (cable) test. It can't be used for normal traffic until tests + complete. IF_OPER_DORMANT (5): Interface is L1 up, but waiting for an external event, f.e. for a From 1c69d7cf4a8b6b6cfd920a1e809f1cd33ae4369c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Aug 2021 07:30:29 -0700 Subject: [PATCH 395/502] Revert "mhi: Fix networking tree build." This reverts commit 40e159403896f7d55c98f858d0b20fee1d941fa4. Looks like this commit breaks the build for me. Signed-off-by: Jakub Kicinski --- include/linux/mhi.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 5e08468854db..944aa3aa3035 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -719,13 +719,8 @@ void mhi_device_put(struct mhi_device *mhi_dev); * host and device execution environments match and * channels are in a DISABLED state. * @mhi_dev: Device associated with the channels - * @flags: MHI channel flags */ -int mhi_prepare_for_transfer(struct mhi_device *mhi_dev, - unsigned int flags); - -/* Automatically allocate and queue inbound buffers */ -#define MHI_CH_INBOUND_ALLOC_BUFS BIT(0) +int mhi_prepare_for_transfer(struct mhi_device *mhi_dev); /** * mhi_unprepare_from_transfer - Reset UL and DL channels for data transfer. From 9b87f43537acfa24b95c236beba0f45901356eb2 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Fri, 16 Jul 2021 12:00:47 +0200 Subject: [PATCH 396/502] gpio: tqmx86: really make IRQ optional The tqmx86 MFD driver was passing IRQ 0 for "no IRQ" in the past. This causes warnings with newer kernels. Prepare the gpio-tqmx86 driver for the fixed MFD driver by handling a missing IRQ properly. Fixes: b868db94a6a7 ("gpio: tqmx86: Add GPIO from for this IO controller") Signed-off-by: Matthias Schiffer Reviewed-by: Andrew Lunn Acked-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-tqmx86.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-tqmx86.c b/drivers/gpio/gpio-tqmx86.c index 5022e0ad0fae..0f5d17f343f1 100644 --- a/drivers/gpio/gpio-tqmx86.c +++ b/drivers/gpio/gpio-tqmx86.c @@ -238,8 +238,8 @@ static int tqmx86_gpio_probe(struct platform_device *pdev) struct resource *res; int ret, irq; - irq = platform_get_irq(pdev, 0); - if (irq < 0) + irq = platform_get_irq_optional(pdev, 0); + if (irq < 0 && irq != -ENXIO) return irq; res = platform_get_resource(pdev, IORESOURCE_IO, 0); @@ -278,7 +278,7 @@ static int tqmx86_gpio_probe(struct platform_device *pdev) pm_runtime_enable(&pdev->dev); - if (irq) { + if (irq > 0) { struct irq_chip *irq_chip = &gpio->irq_chip; u8 irq_status; From d6793ca97b76642b77629dd0783ec64782a50bdb Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Tue, 27 Jul 2021 10:16:06 +0300 Subject: [PATCH 397/502] RDMA/mlx5: Delay emptying a cache entry when a new MR is added to it recently Fixing a typo that causes a cache entry to shrink immediately after adding to it new MRs if the entry size exceeds the high limit. In doing so, the cache misses its purpose to prevent the creation of new mkeys on the runtime by using the cached ones. Fixes: b9358bdbc713 ("RDMA/mlx5: Fix locking in MR cache work queue") Link: https://lore.kernel.org/r/fcb546986be346684a016f5ca23a0567399145fa.1627370131.git.leonro@nvidia.com Signed-off-by: Aharon Landau Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 3263851ea574..3f1c5a4f158b 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -531,8 +531,8 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) */ spin_unlock_irq(&ent->lock); need_delay = need_resched() || someone_adding(cache) || - time_after(jiffies, - READ_ONCE(cache->last_add) + 300 * HZ); + !time_after(jiffies, + READ_ONCE(cache->last_add) + 300 * HZ); spin_lock_irq(&ent->lock); if (ent->disabled) goto out; From db4657afd10e45855ac1d8437fcc9a86bd3d741d Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 29 Jul 2021 14:26:22 -0400 Subject: [PATCH 398/502] RDMA/cma: Revert INIT-INIT patch The net/sunrpc/xprtrdma module creates its QP using rdma_create_qp() and immediately post receives, implicitly assuming the QP is in the INIT state and thus valid for ib_post_recv(). The patch noted in Fixes: removed the RESET->INIT modifiy from rdma_create_qp(), breaking NFS rdma for verbs providers that fail the ib_post_recv() for a bad state. This situation was proven using kprobes in rvt_post_recv() and rvt_modify_qp(). The traces showed that the rvt_post_recv() failed before ANY modify QP and that the current state was RESET. Fix by reverting the patch below. Fixes: dc70f7c3ed34 ("RDMA/cma: Remove unnecessary INIT->INIT transition") Link: https://lore.kernel.org/r/1627583182-81330-1-git-send-email-mike.marciniszyn@cornelisnetworks.com Cc: Haakon Bugge Cc: Chuck Lever III Signed-off-by: Mike Marciniszyn Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 515a7e95a421..5d3b8b8d163d 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -926,12 +926,25 @@ static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) return ret; } +static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) +{ + struct ib_qp_attr qp_attr; + int qp_attr_mask, ret; + + qp_attr.qp_state = IB_QPS_INIT; + ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); + if (ret) + return ret; + + return ib_modify_qp(qp, &qp_attr, qp_attr_mask); +} + int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr) { struct rdma_id_private *id_priv; struct ib_qp *qp; - int ret = 0; + int ret; id_priv = container_of(id, struct rdma_id_private, id); if (id->device != pd->device) { @@ -948,6 +961,8 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, if (id->qp_type == IB_QPT_UD) ret = cma_init_ud_qp(id_priv, qp); + else + ret = cma_init_conn_qp(id_priv, qp); if (ret) goto out_destroy; From e2a05339fa1188b6b37540f4611893ac4c534fa2 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 29 Jul 2021 17:00:38 -0500 Subject: [PATCH 399/502] RDMA/rxe: Use the correct size of wqe when processing SRQ The memcpy() that copies a WQE from a SRQ the QP uses an incorrect size. The size should have been the size of the rxe_send_wqe struct not the size of a pointer to it. The result is that IO operations using a SRQ on the responder side will fail. Fixes: ec0fa2445c18 ("RDMA/rxe: Fix over copying in get_srq_wqe") Link: https://lore.kernel.org/r/20210729220039.18549-2-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_resp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 3743dc39b60c..360ec67cb9e1 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -318,7 +318,7 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp) pr_warn("%s: invalid num_sge in SRQ entry\n", __func__); return RESPST_ERR_MALFORMED_WQE; } - size = sizeof(wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge); + size = sizeof(*wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge); memcpy(&qp->resp.srq_wqe, wqe, size); qp->resp.wqe = &qp->resp.srq_wqe.wqe; From ef4b96a5773d7f6568363b3d0c3c3f371fb690bd Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 29 Jul 2021 17:00:39 -0500 Subject: [PATCH 400/502] RDMA/rxe: Restore setting tot_len in the IPv4 header An earlier patch removed setting of tot_len in IPv4 headers because it was also set in ip_local_out. However, this change resulted in an incorrect ICRC being computed because the tot_len field is not masked out. This patch restores that line. This fixes the bug reported by Zhu Yanjun. This bug affects anyone using rxe which is currently broken. Fixes: 230bb836ee88 ("RDMA/rxe: Fix redundant call to ip_send_check") Link: https://lore.kernel.org/r/20210729220039.18549-3-rpearsonhpe@gmail.com Reported-by: Zhu Yanjun Signed-off-by: Bob Pearson Reviewed-and-tested-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index dec92928a1cd..5ac27f28ace1 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -259,6 +259,7 @@ static void prepare_ipv4_hdr(struct dst_entry *dst, struct sk_buff *skb, iph->version = IPVERSION; iph->ihl = sizeof(struct iphdr) >> 2; + iph->tot_len = htons(skb->len); iph->frag_off = df; iph->protocol = proto; iph->tos = tos; From e89afb51f97ae03ee246c1fd0b47e3e491266aef Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 15 Jun 2021 14:23:34 -0400 Subject: [PATCH 401/502] drm/vmwgfx: Fix a 64bit regression on svga3 Register accesses are always 4bytes, accidently this was changed to a void pointer whwqich badly breaks 64bit archs when running on top of svga3. Fixes: 2cd80dbd3551 ("drm/vmwgfx: Add basic support for SVGA3") Signed-off-by: Zack Rusin Reviewed-by: Martin Krastev Link: https://patchwork.freedesktop.org/patch/msgid/20210615182336.995192-3-zackr@vmware.com (cherry picked from commit 87360168759879d68550b0c052bbcc2a0339ff74) Signed-off-by: Thomas Zimmermann --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index d1cef3b69e9d..5652d982b1ce 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -492,7 +492,7 @@ struct vmw_private { resource_size_t vram_start; resource_size_t vram_size; resource_size_t prim_bb_mem; - void __iomem *rmmio; + u32 __iomem *rmmio; u32 *fifo_mem; resource_size_t fifo_mem_size; uint32_t fb_max_width; From 0541a6293298fb52789de389dfb27ef54df81f73 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 2 Aug 2021 02:17:30 +0300 Subject: [PATCH 402/502] net: bridge: validate the NUD_PERMANENT bit when adding an extern_learn FDB entry Currently it is possible to add broken extern_learn FDB entries to the bridge in two ways: 1. Entries pointing towards the bridge device that are not local/permanent: ip link add br0 type bridge bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn static 2. Entries pointing towards the bridge device or towards a port that are marked as local/permanent, however the bridge does not process the 'permanent' bit in any way, therefore they are recorded as though they aren't permanent: ip link add br0 type bridge bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn permanent Since commit 52e4bec15546 ("net: bridge: switchdev: treat local FDBs the same as entries towards the bridge"), these incorrect FDB entries can even trigger NULL pointer dereferences inside the kernel. This is because that commit made the assumption that all FDB entries that are not local/permanent have a valid destination port. For context, local / permanent FDB entries either have fdb->dst == NULL, and these point towards the bridge device and are therefore local and not to be used for forwarding, or have fdb->dst == a net_bridge_port structure (but are to be treated in the same way, i.e. not for forwarding). That assumption _is_ correct as long as things are working correctly in the bridge driver, i.e. we cannot logically have fdb->dst == NULL under any circumstance for FDB entries that are not local. However, the extern_learn code path where FDB entries are managed by a user space controller show that it is possible for the bridge kernel driver to misinterpret the NUD flags of an entry transmitted by user space, and end up having fdb->dst == NULL while not being a local entry. This is invalid and should be rejected. Before, the two commands listed above both crashed the kernel in this check from br_switchdev_fdb_notify: struct net_device *dev = info.is_local ? br->dev : dst->dev; info.is_local == false, dst == NULL. After this patch, the invalid entry added by the first command is rejected: ip link add br0 type bridge && bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn static; ip link del br0 Error: bridge: FDB entry towards bridge must be permanent. and the valid entry added by the second command is properly treated as a local address and does not crash br_switchdev_fdb_notify anymore: ip link add br0 type bridge && bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn permanent; ip link del br0 Fixes: eb100e0e24a2 ("net: bridge: allow to add externally learned entries from user-space") Reported-by: syzbot+9ba1174359adba5a5b7c@syzkaller.appspotmail.com Signed-off-by: Vladimir Oltean Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20210801231730.7493-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/bridge/br.c | 3 ++- net/bridge/br_fdb.c | 30 ++++++++++++++++++++++++------ net/bridge/br_private.h | 2 +- 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/net/bridge/br.c b/net/bridge/br.c index ef743f94254d..bbab9984f24e 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -166,7 +166,8 @@ static int br_switchdev_event(struct notifier_block *unused, case SWITCHDEV_FDB_ADD_TO_BRIDGE: fdb_info = ptr; err = br_fdb_external_learn_add(br, p, fdb_info->addr, - fdb_info->vid, false); + fdb_info->vid, + fdb_info->is_local, false); if (err) { err = notifier_from_errno(err); break; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index a16191dcaed1..835cec1e5a03 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -1019,7 +1019,8 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, - u16 nlh_flags, u16 vid, struct nlattr *nfea_tb[]) + u16 nlh_flags, u16 vid, struct nlattr *nfea_tb[], + struct netlink_ext_ack *extack) { int err = 0; @@ -1038,7 +1039,15 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br, rcu_read_unlock(); local_bh_enable(); } else if (ndm->ndm_flags & NTF_EXT_LEARNED) { - err = br_fdb_external_learn_add(br, p, addr, vid, true); + if (!p && !(ndm->ndm_state & NUD_PERMANENT)) { + NL_SET_ERR_MSG_MOD(extack, + "FDB entry towards bridge must be permanent"); + return -EINVAL; + } + + err = br_fdb_external_learn_add(br, p, addr, vid, + ndm->ndm_state & NUD_PERMANENT, + true); } else { spin_lock_bh(&br->hash_lock); err = fdb_add_entry(br, p, addr, ndm, nlh_flags, vid, nfea_tb); @@ -1110,9 +1119,11 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], } /* VID was specified, so use it. */ - err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid, nfea_tb); + err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid, nfea_tb, + extack); } else { - err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0, nfea_tb); + err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0, nfea_tb, + extack); if (err || !vg || !vg->num_vlans) goto out; @@ -1124,7 +1135,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], if (!br_vlan_should_use(v)) continue; err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid, - nfea_tb); + nfea_tb, extack); if (err) goto out; } @@ -1264,7 +1275,7 @@ void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p) } int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, - const unsigned char *addr, u16 vid, + const unsigned char *addr, u16 vid, bool is_local, bool swdev_notify) { struct net_bridge_fdb_entry *fdb; @@ -1281,6 +1292,10 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, if (swdev_notify) flags |= BIT(BR_FDB_ADDED_BY_USER); + + if (is_local) + flags |= BIT(BR_FDB_LOCAL); + fdb = fdb_create(br, p, addr, vid, flags); if (!fdb) { err = -ENOMEM; @@ -1307,6 +1322,9 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, if (swdev_notify) set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); + if (is_local) + set_bit(BR_FDB_LOCAL, &fdb->flags); + if (modified) fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify); } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 2b48b204205e..aa64d8d63ca3 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -711,7 +711,7 @@ int br_fdb_get(struct sk_buff *skb, struct nlattr *tb[], struct net_device *dev, int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p); void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p); int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, - const unsigned char *addr, u16 vid, + const unsigned char *addr, u16 vid, bool is_local, bool swdev_notify); int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid, From 8861452b2097bb0b5d0081a1c137fb3870b0a31f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 8 Nov 2019 09:43:06 +0100 Subject: [PATCH 403/502] soc: ixp4xx: fix printing resources When compile-testing with 64-bit resource_size_t, gcc reports an invalid printk format string: In file included from include/linux/dma-mapping.h:7, from drivers/soc/ixp4xx/ixp4xx-npe.c:15: drivers/soc/ixp4xx/ixp4xx-npe.c: In function 'ixp4xx_npe_probe': drivers/soc/ixp4xx/ixp4xx-npe.c:694:18: error: format '%x' expects argument of type 'unsigned int', but argument 4 has type 'resource_size_t' {aka 'long long unsigned int'} [-Werror=format=] dev_info(dev, "NPE%d at 0x%08x-0x%08x not available\n", Use the special %pR format string to print the resources. Fixes: 0b458d7b10f8 ("soc: ixp4xx: npe: Pass addresses as resources") Signed-off-by: Arnd Bergmann --- drivers/soc/ixp4xx/ixp4xx-npe.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/soc/ixp4xx/ixp4xx-npe.c b/drivers/soc/ixp4xx/ixp4xx-npe.c index fea50e04d5a1..f490c4ca51f5 100644 --- a/drivers/soc/ixp4xx/ixp4xx-npe.c +++ b/drivers/soc/ixp4xx/ixp4xx-npe.c @@ -693,8 +693,8 @@ static int ixp4xx_npe_probe(struct platform_device *pdev) if (!(ixp4xx_read_feature_bits() & (IXP4XX_FEATURE_RESET_NPEA << i))) { - dev_info(dev, "NPE%d at 0x%08x-0x%08x not available\n", - i, res->start, res->end); + dev_info(dev, "NPE%d at %pR not available\n", + i, res); continue; /* NPE already disabled or not present */ } npe->regs = devm_ioremap_resource(dev, res); @@ -702,13 +702,12 @@ static int ixp4xx_npe_probe(struct platform_device *pdev) return PTR_ERR(npe->regs); if (npe_reset(npe)) { - dev_info(dev, "NPE%d at 0x%08x-0x%08x does not reset\n", - i, res->start, res->end); + dev_info(dev, "NPE%d at %pR does not reset\n", + i, res); continue; } npe->valid = 1; - dev_info(dev, "NPE%d at 0x%08x-0x%08x registered\n", - i, res->start, res->end); + dev_info(dev, "NPE%d at %pR registered\n", i, res); found++; } From a8eee86317f11e97990d755d4615c1c0db203d08 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 3 Aug 2021 10:12:34 +0200 Subject: [PATCH 404/502] soc: ixp4xx/qmgr: fix invalid __iomem access Sparse reports a compile time warning when dereferencing an __iomem pointer: drivers/soc/ixp4xx/ixp4xx-qmgr.c:149:37: warning: dereference of noderef expression drivers/soc/ixp4xx/ixp4xx-qmgr.c:153:40: warning: dereference of noderef expression drivers/soc/ixp4xx/ixp4xx-qmgr.c:154:40: warning: dereference of noderef expression drivers/soc/ixp4xx/ixp4xx-qmgr.c:174:38: warning: dereference of noderef expression drivers/soc/ixp4xx/ixp4xx-qmgr.c:174:44: warning: dereference of noderef expression Use __raw_readl() here for consistency with the rest of the file. This should really get converted to some proper accessor, as the __raw functions are not meant to be used in drivers, but the driver has used these since the start, so for the moment, let's only fix the warning. Reported-by: kernel test robot Fixes: d4c9e9fc9751 ("IXP42x: Add QMgr support for IXP425 rev. A0 processors.") Signed-off-by: Arnd Bergmann --- drivers/soc/ixp4xx/ixp4xx-qmgr.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/soc/ixp4xx/ixp4xx-qmgr.c b/drivers/soc/ixp4xx/ixp4xx-qmgr.c index c6bf6ef257c0..9154c7029b05 100644 --- a/drivers/soc/ixp4xx/ixp4xx-qmgr.c +++ b/drivers/soc/ixp4xx/ixp4xx-qmgr.c @@ -146,12 +146,12 @@ static irqreturn_t qmgr_irq1_a0(int irq, void *pdev) /* ACK - it may clear any bits so don't rely on it */ __raw_writel(0xFFFFFFFF, &qmgr_regs->irqstat[0]); - en_bitmap = qmgr_regs->irqen[0]; + en_bitmap = __raw_readl(&qmgr_regs->irqen[0]); while (en_bitmap) { i = __fls(en_bitmap); /* number of the last "low" queue */ en_bitmap &= ~BIT(i); - src = qmgr_regs->irqsrc[i >> 3]; - stat = qmgr_regs->stat1[i >> 3]; + src = __raw_readl(&qmgr_regs->irqsrc[i >> 3]); + stat = __raw_readl(&qmgr_regs->stat1[i >> 3]); if (src & 4) /* the IRQ condition is inverted */ stat = ~stat; if (stat & BIT(src & 3)) { @@ -171,7 +171,8 @@ static irqreturn_t qmgr_irq2_a0(int irq, void *pdev) /* ACK - it may clear any bits so don't rely on it */ __raw_writel(0xFFFFFFFF, &qmgr_regs->irqstat[1]); - req_bitmap = qmgr_regs->irqen[1] & qmgr_regs->statne_h; + req_bitmap = __raw_readl(&qmgr_regs->irqen[1]) & + __raw_readl(&qmgr_regs->statne_h); while (req_bitmap) { i = __fls(req_bitmap); /* number of the last "high" queue */ req_bitmap &= ~BIT(i); From e30e8d46cf605d216a799a28c77b8a41c328613a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 2 Aug 2021 11:42:00 +0100 Subject: [PATCH 405/502] arm64: fix compat syscall return truncation Due to inconsistencies in the way we manipulate compat GPRs, we have a few issues today: * For audit and tracing, where error codes are handled as a (native) long, negative error codes are expected to be sign-extended to the native 64-bits, or they may fail to be matched correctly. Thus a syscall which fails with an error may erroneously be identified as failing. * For ptrace, *all* compat return values should be sign-extended for consistency with 32-bit arm, but we currently only do this for negative return codes. * As we may transiently set the upper 32 bits of some compat GPRs while in the kernel, these can be sampled by perf, which is somewhat confusing. This means that where a syscall returns a pointer above 2G, this will be sign-extended, but will not be mistaken for an error as error codes are constrained to the inclusive range [-4096, -1] where no user pointer can exist. To fix all of these, we must consistently use helpers to get/set the compat GPRs, ensuring that we never write the upper 32 bits of the return code, and always sign-extend when reading the return code. This patch does so, with the following changes: * We re-organise syscall_get_return_value() to always sign-extend for compat tasks, and reimplement syscall_get_error() atop. We update syscall_trace_exit() to use syscall_get_return_value(). * We consistently use syscall_set_return_value() to set the return value, ensureing the upper 32 bits are never set unexpectedly. * As the core audit code currently uses regs_return_value() rather than syscall_get_return_value(), we special-case this for compat_user_mode(regs) such that this will do the right thing. Going forward, we should try to move the core audit code over to syscall_get_return_value(). Cc: Reported-by: He Zhe Reported-by: weiyuchen Cc: Catalin Marinas Cc: Will Deacon Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20210802104200.21390-1-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/ptrace.h | 12 +++++++++++- arch/arm64/include/asm/syscall.h | 21 +++++++++++---------- arch/arm64/kernel/ptrace.c | 2 +- arch/arm64/kernel/signal.c | 3 ++- arch/arm64/kernel/syscall.c | 9 +++------ 5 files changed, 28 insertions(+), 19 deletions(-) diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index e58bca832dff..41b332c054ab 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -320,7 +320,17 @@ static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) static inline unsigned long regs_return_value(struct pt_regs *regs) { - return regs->regs[0]; + unsigned long val = regs->regs[0]; + + /* + * Audit currently uses regs_return_value() instead of + * syscall_get_return_value(). Apply the same sign-extension here until + * audit is updated to use syscall_get_return_value(). + */ + if (compat_user_mode(regs)) + val = sign_extend64(val, 31); + + return val; } static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index cfc0672013f6..03e20895453a 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -29,24 +29,25 @@ static inline void syscall_rollback(struct task_struct *task, regs->regs[0] = regs->orig_x0; } +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + unsigned long val = regs->regs[0]; + + if (is_compat_thread(task_thread_info(task))) + val = sign_extend64(val, 31); + + return val; +} static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { - unsigned long error = regs->regs[0]; - - if (is_compat_thread(task_thread_info(task))) - error = sign_extend64(error, 31); + unsigned long error = syscall_get_return_value(task, regs); return IS_ERR_VALUE(error) ? error : 0; } -static inline long syscall_get_return_value(struct task_struct *task, - struct pt_regs *regs) -{ - return regs->regs[0]; -} - static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 499b6b2f9757..b381a1ee9ea7 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1862,7 +1862,7 @@ void syscall_trace_exit(struct pt_regs *regs) audit_syscall_exit(regs); if (flags & _TIF_SYSCALL_TRACEPOINT) - trace_sys_exit(regs, regs_return_value(regs)); + trace_sys_exit(regs, syscall_get_return_value(current, regs)); if (flags & (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP)) tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT); diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index f8192f4ae0b8..23036334f4dc 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -890,7 +891,7 @@ static void do_signal(struct pt_regs *regs) retval == -ERESTART_RESTARTBLOCK || (retval == -ERESTARTSYS && !(ksig.ka.sa.sa_flags & SA_RESTART)))) { - regs->regs[0] = -EINTR; + syscall_set_return_value(current, regs, -EINTR, 0); regs->pc = continue_addr; } diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 263d6c1a525f..50a0f1a38e84 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -54,10 +54,7 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno, ret = do_ni_syscall(regs, scno); } - if (is_compat_task()) - ret = lower_32_bits(ret); - - regs->regs[0] = ret; + syscall_set_return_value(current, regs, 0, ret); /* * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), @@ -115,7 +112,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, * syscall. do_notify_resume() will send a signal to userspace * before the syscall is restarted. */ - regs->regs[0] = -ERESTARTNOINTR; + syscall_set_return_value(current, regs, -ERESTARTNOINTR, 0); return; } @@ -136,7 +133,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, * anyway. */ if (scno == NO_SYSCALL) - regs->regs[0] = -ENOSYS; + syscall_set_return_value(current, regs, -ENOSYS, 0); scno = syscall_trace_enter(regs); if (scno == NO_SYSCALL) goto trace_exit; From 64ee84c75b5f75132eec97f2c7a201a056d53698 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 1 Aug 2021 14:35:25 +0900 Subject: [PATCH 406/502] arm64: move warning about toolchains to archprepare Commit 987fdfec2410 ("arm64: move --fix-cortex-a53-843419 linker test to Kconfig") fixed the false-positive warning in the installation step. Yet, there are some cases where this false-positive is shown. For example, you can see it when you cross 987fdfec2410 during git-bisect. $ git checkout 987fdfec2410^ [ snip ] $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- defconfig all [ snip ] $ git checkout v5.13 [ snip] $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- defconfig all [ snip ] arch/arm64/Makefile:25: ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum In the stale include/config/auto.config, CONFIG_ARM64_ERRATUM_843419=y is set without CONFIG_ARM64_LD_HAS_FIX_ERRATUM_843419, so the warning is displayed while parsing the Makefiles. Make will restart with the updated include/config/auto.config, hence CONFIG_ARM64_LD_HAS_FIX_ERRATUM_843419 will be set eventually, but this warning is a surprise for users. Commit 25896d073d8a ("x86/build: Fix compiler support check for CONFIG_RETPOLINE") addressed a similar issue. Move $(warning ...) out of the parse stage of Makefiles. The same applies to CONFIG_ARM64_USE_LSE_ATOMICS. Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20210801053525.105235-1-masahiroy@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Makefile | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 7bc37d0a1b68..7b668db43261 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -21,19 +21,11 @@ LDFLAGS_vmlinux += -shared -Bsymbolic -z notext \ endif ifeq ($(CONFIG_ARM64_ERRATUM_843419),y) - ifneq ($(CONFIG_ARM64_LD_HAS_FIX_ERRATUM_843419),y) -$(warning ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum) - else + ifeq ($(CONFIG_ARM64_LD_HAS_FIX_ERRATUM_843419),y) LDFLAGS_vmlinux += --fix-cortex-a53-843419 endif endif -ifeq ($(CONFIG_ARM64_USE_LSE_ATOMICS), y) - ifneq ($(CONFIG_ARM64_LSE_ATOMICS), y) -$(warning LSE atomics not supported by binutils) - endif -endif - cc_has_k_constraint := $(call try-run,echo \ 'int main(void) { \ asm volatile("and w0, w0, %w0" :: "K" (4294967295)); \ @@ -176,6 +168,17 @@ vdso_install: archprepare: $(Q)$(MAKE) $(build)=arch/arm64/tools kapi +ifeq ($(CONFIG_ARM64_ERRATUM_843419),y) + ifneq ($(CONFIG_ARM64_LD_HAS_FIX_ERRATUM_843419),y) + @echo "warning: ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum" >&2 + endif +endif +ifeq ($(CONFIG_ARM64_USE_LSE_ATOMICS),y) + ifneq ($(CONFIG_ARM64_LSE_ATOMICS),y) + @echo "warning: LSE atomics not supported by binutils" >&2 + endif +endif + # We use MRPROPER_FILES and CLEAN_FILES now archclean: From f9c4ff2ab9fe433d44ebbc2e3c2368a49df44798 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Sat, 31 Jul 2021 00:51:31 +1200 Subject: [PATCH 407/502] arm64: fix the doc of RANDOMIZE_MODULE_REGION_FULL Obviously kaslr is setting the module region to 2GB rather than 4GB since commit b2eed9b588112 ("arm64/kernel: kaslr: reduce module randomization range to 2 GB"). So fix the size of region in Kconfig. On the other hand, even though RANDOMIZE_MODULE_REGION_FULL is not set, module_alloc() can fall back to a 2GB window if ARM64_MODULE_PLTS is set. In this case, veneers are still needed. !RANDOMIZE_MODULE_REGION_FULL doesn't necessarily mean veneers are not needed. So fix the doc to be more precise to avoid any confusion to the readers of the code. Cc: Masami Hiramatsu Cc: Ard Biesheuvel Cc: Qi Liu Signed-off-by: Barry Song Reviewed-by: Masami Hiramatsu Link: https://lore.kernel.org/r/20210730125131.13724-1-song.bao.hua@hisilicon.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 9 ++++++--- arch/arm64/kernel/kaslr.c | 4 +++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b5b13a932561..fdcd54d39c1e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1800,11 +1800,11 @@ config RANDOMIZE_BASE If unsure, say N. config RANDOMIZE_MODULE_REGION_FULL - bool "Randomize the module region over a 4 GB range" + bool "Randomize the module region over a 2 GB range" depends on RANDOMIZE_BASE default y help - Randomizes the location of the module region inside a 4 GB window + Randomizes the location of the module region inside a 2 GB window covering the core kernel. This way, it is less likely for modules to leak information about the location of core kernel data structures but it does imply that function calls between modules and the core @@ -1812,7 +1812,10 @@ config RANDOMIZE_MODULE_REGION_FULL When this option is not set, the module region will be randomized over a limited range that contains the [_stext, _etext] interval of the - core kernel, so branch relocations are always in range. + core kernel, so branch relocations are almost always in range unless + ARM64_MODULE_PLTS is enabled and the region is exhausted. In this + particular case of region exhaustion, modules might be able to fall + back to a larger 2GB area. config CC_HAVE_STACKPROTECTOR_SYSREG def_bool $(cc-option,-mstack-protector-guard=sysreg -mstack-protector-guard-reg=sp_el0 -mstack-protector-guard-offset=0) diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index cfa2cfde3019..418b2bba1521 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -162,7 +162,9 @@ u64 __init kaslr_early_init(void) * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE, * _stext) . This guarantees that the resulting region still * covers [_stext, _etext], and that all relative branches can - * be resolved without veneers. + * be resolved without veneers unless this region is exhausted + * and we fall back to a larger 2GB window in module_alloc() + * when ARM64_MODULE_PLTS is enabled. */ module_range = MODULES_VSIZE - (u64)(_etext - _stext); module_alloc_base = (u64)_etext + offset - MODULES_VSIZE; From 8d5903f457145e3fcd858578b065d667822d99ac Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 2 Aug 2021 17:48:44 +0100 Subject: [PATCH 408/502] arm64: stacktrace: fix comment Due to a copy-paste error, we describe struct stackframe::pc as a snapshot of the `fp` field rather than the `lr` field. Fix the comment. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Madhavan T. Venkataraman Cc: Mark Brown Cc: Will Deacon Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20210802164845.45506-2-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/stacktrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 1801399204d7..8aebc00c1718 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -35,7 +35,7 @@ struct stack_info { * accounting information necessary for robust unwinding. * * @fp: The fp value in the frame record (or the real fp) - * @pc: The fp value in the frame record (or the real lr) + * @pc: The lr value in the frame record (or the real lr) * * @stacks_done: Stacks which have been entirely unwound, for which it is no * longer valid to unwind to. From 0c32706dac1b0a72713184246952ab0f54327c21 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 2 Aug 2021 17:48:45 +0100 Subject: [PATCH 409/502] arm64: stacktrace: avoid tracing arch_stack_walk() When the function_graph tracer is in use, arch_stack_walk() may unwind the stack incorrectly, erroneously reporting itself, missing the final entry which is being traced, and reporting all traced entries between these off-by-one from where they should be. When ftrace hooks a function return, the original return address is saved to the fgraph ret_stack, and the return address in the LR (or the function's frame record) is replaced with `return_to_handler`. When arm64's unwinder encounter frames returning to `return_to_handler`, it finds the associated original return address from the fgraph ret stack, assuming the most recent `ret_to_hander` entry on the stack corresponds to the most recent entry in the fgraph ret stack, and so on. When arch_stack_walk() is used to dump the current task's stack, it starts from the caller of arch_stack_walk(). However, arch_stack_walk() can be traced, and so may push an entry on to the fgraph ret stack, leaving the fgraph ret stack offset by one from the expected position. This can be seen when dumping the stack via /proc/self/stack, where enabling the graph tracer results in an unexpected `stack_trace_save_tsk` entry at the start of the trace, and `el0_svc` missing form the end of the trace. This patch fixes this by marking arch_stack_walk() as notrace, as we do for all other functions on the path to ftrace_graph_get_ret_stack(). While a few helper functions are not marked notrace, their calls/returns are balanced, and will have no observable effect when examining the fgraph ret stack. It is possible for an exeption boundary to cause a similar offset if the return address of the interrupted context was in the LR. Fixing those cases will require some more substantial rework, and is left for subsequent patches. Before: | # cat /proc/self/stack | [<0>] proc_pid_stack+0xc4/0x140 | [<0>] proc_single_show+0x6c/0x120 | [<0>] seq_read_iter+0x240/0x4e0 | [<0>] seq_read+0xe8/0x140 | [<0>] vfs_read+0xb8/0x1e4 | [<0>] ksys_read+0x74/0x100 | [<0>] __arm64_sys_read+0x28/0x3c | [<0>] invoke_syscall+0x50/0x120 | [<0>] el0_svc_common.constprop.0+0xc4/0xd4 | [<0>] do_el0_svc+0x30/0x9c | [<0>] el0_svc+0x2c/0x54 | [<0>] el0t_64_sync_handler+0x1a8/0x1b0 | [<0>] el0t_64_sync+0x198/0x19c | # echo function_graph > /sys/kernel/tracing/current_tracer | # cat /proc/self/stack | [<0>] stack_trace_save_tsk+0xa4/0x110 | [<0>] proc_pid_stack+0xc4/0x140 | [<0>] proc_single_show+0x6c/0x120 | [<0>] seq_read_iter+0x240/0x4e0 | [<0>] seq_read+0xe8/0x140 | [<0>] vfs_read+0xb8/0x1e4 | [<0>] ksys_read+0x74/0x100 | [<0>] __arm64_sys_read+0x28/0x3c | [<0>] invoke_syscall+0x50/0x120 | [<0>] el0_svc_common.constprop.0+0xc4/0xd4 | [<0>] do_el0_svc+0x30/0x9c | [<0>] el0t_64_sync_handler+0x1a8/0x1b0 | [<0>] el0t_64_sync+0x198/0x19c After: | # cat /proc/self/stack | [<0>] proc_pid_stack+0xc4/0x140 | [<0>] proc_single_show+0x6c/0x120 | [<0>] seq_read_iter+0x240/0x4e0 | [<0>] seq_read+0xe8/0x140 | [<0>] vfs_read+0xb8/0x1e4 | [<0>] ksys_read+0x74/0x100 | [<0>] __arm64_sys_read+0x28/0x3c | [<0>] invoke_syscall+0x50/0x120 | [<0>] el0_svc_common.constprop.0+0xc4/0xd4 | [<0>] do_el0_svc+0x30/0x9c | [<0>] el0_svc+0x2c/0x54 | [<0>] el0t_64_sync_handler+0x1a8/0x1b0 | [<0>] el0t_64_sync+0x198/0x19c | # echo function_graph > /sys/kernel/tracing/current_tracer | # cat /proc/self/stack | [<0>] proc_pid_stack+0xc4/0x140 | [<0>] proc_single_show+0x6c/0x120 | [<0>] seq_read_iter+0x240/0x4e0 | [<0>] seq_read+0xe8/0x140 | [<0>] vfs_read+0xb8/0x1e4 | [<0>] ksys_read+0x74/0x100 | [<0>] __arm64_sys_read+0x28/0x3c | [<0>] invoke_syscall+0x50/0x120 | [<0>] el0_svc_common.constprop.0+0xc4/0xd4 | [<0>] do_el0_svc+0x30/0x9c | [<0>] el0_svc+0x2c/0x54 | [<0>] el0t_64_sync_handler+0x1a8/0x1b0 | [<0>] el0t_64_sync+0x198/0x19c Cc: Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Madhavan T. Venkataraman Cc: Mark Brown Cc: Will Deacon Reviwed-by: Mark Brown Link: https://lore.kernel.org/r/20210802164845.45506-3-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/stacktrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index b83c8d911930..8982a2b78acf 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -218,7 +218,7 @@ void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) #ifdef CONFIG_STACKTRACE -noinline void arch_stack_walk(stack_trace_consume_fn consume_entry, +noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, struct task_struct *task, struct pt_regs *regs) { From ce78ffa3ef1681065ba451cfd545da6126f5ca88 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 3 Aug 2021 11:14:03 +0100 Subject: [PATCH 410/502] net: really fix the build... Signed-off-by: David S. Miller --- drivers/bus/mhi/core/internal.h | 2 +- drivers/bus/mhi/core/main.c | 9 ++++++--- drivers/net/mhi/net.c | 2 +- drivers/net/wwan/mhi_wwan_ctrl.c | 2 +- include/linux/mhi.h | 7 ++++++- net/qrtr/mhi.c | 16 +++++++++++++++- 6 files changed, 30 insertions(+), 8 deletions(-) diff --git a/drivers/bus/mhi/core/internal.h b/drivers/bus/mhi/core/internal.h index 5b9ea66b92dc..bc239a11aa69 100644 --- a/drivers/bus/mhi/core/internal.h +++ b/drivers/bus/mhi/core/internal.h @@ -682,7 +682,7 @@ void mhi_rddm_prepare(struct mhi_controller *mhi_cntrl, struct image_info *img_info); void mhi_fw_load_handler(struct mhi_controller *mhi_cntrl); int mhi_prepare_channel(struct mhi_controller *mhi_cntrl, - struct mhi_chan *mhi_chan); + struct mhi_chan *mhi_chan, unsigned int flags); int mhi_init_chan_ctxt(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan); void mhi_deinit_chan_ctxt(struct mhi_controller *mhi_cntrl, diff --git a/drivers/bus/mhi/core/main.c b/drivers/bus/mhi/core/main.c index fc9196f11cb7..84448233f64c 100644 --- a/drivers/bus/mhi/core/main.c +++ b/drivers/bus/mhi/core/main.c @@ -1430,7 +1430,7 @@ exit_unprepare_channel: } int mhi_prepare_channel(struct mhi_controller *mhi_cntrl, - struct mhi_chan *mhi_chan) + struct mhi_chan *mhi_chan, unsigned int flags) { int ret = 0; struct device *dev = &mhi_chan->mhi_dev->dev; @@ -1455,6 +1455,9 @@ int mhi_prepare_channel(struct mhi_controller *mhi_cntrl, if (ret) goto error_pm_state; + if (mhi_chan->dir == DMA_FROM_DEVICE) + mhi_chan->pre_alloc = !!(flags & MHI_CH_INBOUND_ALLOC_BUFS); + /* Pre-allocate buffer for xfer ring */ if (mhi_chan->pre_alloc) { int nr_el = get_nr_avail_ring_elements(mhi_cntrl, @@ -1610,7 +1613,7 @@ void mhi_reset_chan(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan) } /* Move channel to start state */ -int mhi_prepare_for_transfer(struct mhi_device *mhi_dev) +int mhi_prepare_for_transfer(struct mhi_device *mhi_dev, unsigned int flags) { int ret, dir; struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl; @@ -1621,7 +1624,7 @@ int mhi_prepare_for_transfer(struct mhi_device *mhi_dev) if (!mhi_chan) continue; - ret = mhi_prepare_channel(mhi_cntrl, mhi_chan); + ret = mhi_prepare_channel(mhi_cntrl, mhi_chan, flags); if (ret) goto error_open_chan; } diff --git a/drivers/net/mhi/net.c b/drivers/net/mhi/net.c index e60e38c1f09d..11be6bcdd551 100644 --- a/drivers/net/mhi/net.c +++ b/drivers/net/mhi/net.c @@ -335,7 +335,7 @@ static int mhi_net_newlink(void *ctxt, struct net_device *ndev, u32 if_id, u64_stats_init(&mhi_netdev->stats.tx_syncp); /* Start MHI channels */ - err = mhi_prepare_for_transfer(mhi_dev); + err = mhi_prepare_for_transfer(mhi_dev, 0); if (err) goto out_err; diff --git a/drivers/net/wwan/mhi_wwan_ctrl.c b/drivers/net/wwan/mhi_wwan_ctrl.c index 1bc6b69aa530..1e18420ce404 100644 --- a/drivers/net/wwan/mhi_wwan_ctrl.c +++ b/drivers/net/wwan/mhi_wwan_ctrl.c @@ -110,7 +110,7 @@ static int mhi_wwan_ctrl_start(struct wwan_port *port) int ret; /* Start mhi device's channel(s) */ - ret = mhi_prepare_for_transfer(mhiwwan->mhi_dev); + ret = mhi_prepare_for_transfer(mhiwwan->mhi_dev, 0); if (ret) return ret; diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 944aa3aa3035..5e08468854db 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -719,8 +719,13 @@ void mhi_device_put(struct mhi_device *mhi_dev); * host and device execution environments match and * channels are in a DISABLED state. * @mhi_dev: Device associated with the channels + * @flags: MHI channel flags */ -int mhi_prepare_for_transfer(struct mhi_device *mhi_dev); +int mhi_prepare_for_transfer(struct mhi_device *mhi_dev, + unsigned int flags); + +/* Automatically allocate and queue inbound buffers */ +#define MHI_CH_INBOUND_ALLOC_BUFS BIT(0) /** * mhi_unprepare_from_transfer - Reset UL and DL channels for data transfer. diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c index fa611678af05..1dc955ca57d3 100644 --- a/net/qrtr/mhi.c +++ b/net/qrtr/mhi.c @@ -15,6 +15,7 @@ struct qrtr_mhi_dev { struct qrtr_endpoint ep; struct mhi_device *mhi_dev; struct device *dev; + struct completion ready; }; /* From MHI to QRTR */ @@ -50,6 +51,10 @@ static int qcom_mhi_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb) struct qrtr_mhi_dev *qdev = container_of(ep, struct qrtr_mhi_dev, ep); int rc; + rc = wait_for_completion_interruptible(&qdev->ready); + if (rc) + goto free_skb; + if (skb->sk) sock_hold(skb->sk); @@ -79,7 +84,7 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev, int rc; /* start channels */ - rc = mhi_prepare_for_transfer(mhi_dev); + rc = mhi_prepare_for_transfer(mhi_dev, 0); if (rc) return rc; @@ -96,6 +101,15 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev, if (rc) return rc; + /* start channels */ + rc = mhi_prepare_for_transfer(mhi_dev, MHI_CH_INBOUND_ALLOC_BUFS); + if (rc) { + qrtr_endpoint_unregister(&qdev->ep); + dev_set_drvdata(&mhi_dev->dev, NULL); + return rc; + } + + complete_all(&qdev->ready); dev_dbg(qdev->dev, "Qualcomm MHI QRTR driver probed\n"); return 0; From 2e2f1e8d0450c561c0c936b4b67e8b5a95975fb7 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 30 Jul 2021 14:26:22 +0200 Subject: [PATCH 411/502] KVM: x86: hyper-v: Check access to hypercall before reading XMM registers In case guest doesn't have access to the particular hypercall we can avoid reading XMM registers. Signed-off-by: Vitaly Kuznetsov Reviewed-by: Siddharth Chandrasekaran Signed-off-by: Paolo Bonzini Message-Id: <20210730122625.112848-2-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index b07592ca92f0..cb7e045905a5 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -2173,9 +2173,6 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) hc.rep_idx = (hc.param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff; hc.rep = !!(hc.rep_cnt || hc.rep_idx); - if (hc.fast && is_xmm_fast_hypercall(&hc)) - kvm_hv_hypercall_read_xmm(&hc); - trace_kvm_hv_hypercall(hc.code, hc.fast, hc.rep_cnt, hc.rep_idx, hc.ingpa, hc.outgpa); @@ -2184,6 +2181,9 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) goto hypercall_complete; } + if (hc.fast && is_xmm_fast_hypercall(&hc)) + kvm_hv_hypercall_read_xmm(&hc); + switch (hc.code) { case HVCALL_NOTIFY_LONG_SPIN_WAIT: if (unlikely(hc.rep)) { From f5714bbb5b3120b33dfbf3d81ffc0b98ae4cd4c1 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 30 Jul 2021 14:26:23 +0200 Subject: [PATCH 412/502] KVM: x86: Introduce trace_kvm_hv_hypercall_done() Hypercall failures are unusual with potentially far going consequences so it would be useful to see their results when tracing. Signed-off-by: Vitaly Kuznetsov Reviewed-by: Siddharth Chandrasekaran Signed-off-by: Paolo Bonzini Message-Id: <20210730122625.112848-3-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 1 + arch/x86/kvm/trace.h | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index cb7e045905a5..2945b93dbadd 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -2016,6 +2016,7 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result) { + trace_kvm_hv_hypercall_done(result); kvm_hv_hypercall_set_result(vcpu, result); ++vcpu->stat.hypercalls; return kvm_skip_emulated_instruction(vcpu); diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index b484141ea15b..03ebe368333e 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -92,6 +92,21 @@ TRACE_EVENT(kvm_hv_hypercall, __entry->outgpa) ); +TRACE_EVENT(kvm_hv_hypercall_done, + TP_PROTO(u64 result), + TP_ARGS(result), + + TP_STRUCT__entry( + __field(__u64, result) + ), + + TP_fast_assign( + __entry->result = result; + ), + + TP_printk("result 0x%llx", __entry->result) +); + /* * Tracepoint for Xen hypercall. */ From 4e62aa96d6e55c1b2a4e841f1f8601eae81e81ae Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 30 Jul 2021 14:26:24 +0200 Subject: [PATCH 413/502] KVM: x86: hyper-v: Check if guest is allowed to use XMM registers for hypercall input MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TLFS states that "Availability of the XMM fast hypercall interface is indicated via the “Hypervisor Feature Identification” CPUID Leaf (0x40000003, see section 2.4.4) ... Any attempt to use this interface when the hypervisor does not indicate availability will result in a #UD fault." Implement the check for 'strict' mode (KVM_CAP_HYPERV_ENFORCE_CPUID). Signed-off-by: Vitaly Kuznetsov Reviewed-by: Siddharth Chandrasekaran Signed-off-by: Paolo Bonzini Message-Id: <20210730122625.112848-4-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 2945b93dbadd..0b38f944c6b6 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -2140,6 +2140,7 @@ static bool hv_check_hypercall_access(struct kvm_vcpu_hv *hv_vcpu, u16 code) int kvm_hv_hypercall(struct kvm_vcpu *vcpu) { + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); struct kvm_hv_hcall hc; u64 ret = HV_STATUS_SUCCESS; @@ -2177,13 +2178,21 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) trace_kvm_hv_hypercall(hc.code, hc.fast, hc.rep_cnt, hc.rep_idx, hc.ingpa, hc.outgpa); - if (unlikely(!hv_check_hypercall_access(to_hv_vcpu(vcpu), hc.code))) { + if (unlikely(!hv_check_hypercall_access(hv_vcpu, hc.code))) { ret = HV_STATUS_ACCESS_DENIED; goto hypercall_complete; } - if (hc.fast && is_xmm_fast_hypercall(&hc)) + if (hc.fast && is_xmm_fast_hypercall(&hc)) { + if (unlikely(hv_vcpu->enforce_cpuid && + !(hv_vcpu->cpuid_cache.features_edx & + HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE))) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + kvm_hv_hypercall_read_xmm(&hc); + } switch (hc.code) { case HVCALL_NOTIFY_LONG_SPIN_WAIT: From 2476b5a1b16ced78a80629da8ff87538d5c95073 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 30 Jul 2021 14:26:25 +0200 Subject: [PATCH 414/502] KVM: selftests: Test access to XMM fast hypercalls Check that #UD is raised if bit 16 is clear in HYPERV_CPUID_FEATURES.EDX and an 'XMM fast' hypercall is issued. Signed-off-by: Vitaly Kuznetsov Reviewed-by: Siddharth Chandrasekaran Signed-off-by: Paolo Bonzini Message-Id: <20210730122625.112848-5-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/include/x86_64/hyperv.h | 5 ++- .../selftests/kvm/x86_64/hyperv_features.c | 41 +++++++++++++++++-- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h index 412eaee7884a..b66910702c0a 100644 --- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h +++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h @@ -117,7 +117,7 @@ #define HV_X64_GUEST_DEBUGGING_AVAILABLE BIT(1) #define HV_X64_PERF_MONITOR_AVAILABLE BIT(2) #define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE BIT(3) -#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE BIT(4) +#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE BIT(4) #define HV_X64_GUEST_IDLE_STATE_AVAILABLE BIT(5) #define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE BIT(8) #define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10) @@ -182,4 +182,7 @@ #define HV_STATUS_INVALID_CONNECTION_ID 18 #define HV_STATUS_INSUFFICIENT_BUFFERS 19 +/* hypercall options */ +#define HV_HYPERCALL_FAST_BIT BIT(16) + #endif /* !SELFTEST_KVM_HYPERV_H */ diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index af27c7e829c1..91d88aaa9899 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -47,6 +47,7 @@ static void do_wrmsr(u32 idx, u64 val) } static int nr_gp; +static int nr_ud; static inline u64 hypercall(u64 control, vm_vaddr_t input_address, vm_vaddr_t output_address) @@ -80,6 +81,12 @@ static void guest_gp_handler(struct ex_regs *regs) regs->rip = (uint64_t)&wrmsr_end; } +static void guest_ud_handler(struct ex_regs *regs) +{ + nr_ud++; + regs->rip += 3; +} + struct msr_data { uint32_t idx; bool available; @@ -90,6 +97,7 @@ struct msr_data { struct hcall_data { uint64_t control; uint64_t expect; + bool ud_expected; }; static void guest_msr(struct msr_data *msr) @@ -117,13 +125,26 @@ static void guest_msr(struct msr_data *msr) static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) { int i = 0; + u64 res, input, output; wrmsr(HV_X64_MSR_GUEST_OS_ID, LINUX_OS_ID); wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa); while (hcall->control) { - GUEST_ASSERT(hypercall(hcall->control, pgs_gpa, - pgs_gpa + 4096) == hcall->expect); + nr_ud = 0; + if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) { + input = pgs_gpa; + output = pgs_gpa + 4096; + } else { + input = output = 0; + } + + res = hypercall(hcall->control, input, output); + if (hcall->ud_expected) + GUEST_ASSERT(nr_ud == 1); + else + GUEST_ASSERT(res == hcall->expect); + GUEST_SYNC(i++); } @@ -552,8 +573,18 @@ static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall recomm.ebx = 0xfff; hcall->expect = HV_STATUS_SUCCESS; break; - case 17: + /* XMM fast hypercall */ + hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT; + hcall->ud_expected = true; + break; + case 18: + feat.edx |= HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE; + hcall->ud_expected = false; + hcall->expect = HV_STATUS_SUCCESS; + break; + + case 19: /* END */ hcall->control = 0; break; @@ -625,6 +656,10 @@ int main(void) /* Test hypercalls */ vm = vm_create_default(VCPU_ID, 0, guest_hcall); + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); + /* Hypercall input/output */ hcall_page = vm_vaddr_alloc_pages(vm, 2); memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); From ae954bbc451d267f7d60d7b49db811d5a68ebd7b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 1 Aug 2021 02:25:31 -0400 Subject: [PATCH 415/502] sctp: move the active_key update after sh_keys is added In commit 58acd1009226 ("sctp: update active_key for asoc when old key is being replaced"), sctp_auth_asoc_init_active_key() is called to update the active_key right after the old key is deleted and before the new key is added, and it caused that the active_key could be found with the key_id. In Ying Xu's testing, the BUG_ON in sctp_auth_asoc_init_active_key() was triggered: [ ] kernel BUG at net/sctp/auth.c:416! [ ] RIP: 0010:sctp_auth_asoc_init_active_key.part.8+0xe7/0xf0 [sctp] [ ] Call Trace: [ ] sctp_auth_set_key+0x16d/0x1b0 [sctp] [ ] sctp_setsockopt.part.33+0x1ba9/0x2bd0 [sctp] [ ] __sys_setsockopt+0xd6/0x1d0 [ ] __x64_sys_setsockopt+0x20/0x30 [ ] do_syscall_64+0x5b/0x1a0 So fix it by moving the active_key update after sh_keys is added. Fixes: 58acd1009226 ("sctp: update active_key for asoc when old key is being replaced") Reported-by: Ying Xu Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/auth.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/sctp/auth.c b/net/sctp/auth.c index fe74c5f95630..db6b7373d16c 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -857,14 +857,18 @@ int sctp_auth_set_key(struct sctp_endpoint *ep, memcpy(key->data, &auth_key->sca_key[0], auth_key->sca_keylength); cur_key->key = key; - if (replace) { - list_del_init(&shkey->key_list); - sctp_auth_shkey_release(shkey); - if (asoc && asoc->active_key_id == auth_key->sca_keynumber) - sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL); + if (!replace) { + list_add(&cur_key->key_list, sh_keys); + return 0; } + + list_del_init(&shkey->key_list); + sctp_auth_shkey_release(shkey); list_add(&cur_key->key_list, sh_keys); + if (asoc && asoc->active_key_id == auth_key->sca_keynumber) + sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL); + return 0; } From f41e57af926ad840d114439d34cafc0533bf25f0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 2 Aug 2021 17:21:53 +0200 Subject: [PATCH 416/502] net: sparx5: fix bitmask on 32-bit targets I saw the build failure that was fixed in commit 6387f65e2acb ("net: sparx5: fix compiletime_assert for GCC 4.9") and noticed another issue that was introduced in the same patch: Using GENMASK() to create a 64-bit mask does not work on 32-bit architectures. This probably won't ever happen on this driver since it's specific to a 64-bit SoC, but it's better to write it portably, so use GENMASK_ULL() instead. Fixes: f3cad2611a77 ("net: sparx5: add hostmode with phylink support") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c index 1a240e6bddd0..cb68eaaac881 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c @@ -32,7 +32,7 @@ static void __ifh_encode_bitfield(void *ifh, u64 value, u32 pos, u32 width) u32 byte = (35 - (pos / 8)); /* Calculate the Start bit position in the Start IFH byte */ u32 bit = (pos % 8); - u64 encode = GENMASK(bit + width - 1, bit) & (value << bit); + u64 encode = GENMASK_ULL(bit + width - 1, bit) & (value << bit); /* The b0-b7 goes into the start IFH byte */ if (encode & 0xFF) From 9c9c6d0ab08acfe41c9f7efa72c4ad3f133a266b Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 28 Jul 2021 16:34:11 -0700 Subject: [PATCH 417/502] drm/i915: Correct SFC_DONE register offset The register offset for SFC_DONE was missing a '0' at the end, causing us to read from a non-existent register address. We only use this register in error state dumps so the mistake hasn't caused any real problems, but fixing it will hopefully make the error state dumps a bit more useful for debugging. Fixes: e50dbdbfd9fb ("drm/i915/tgl: Add SFC instdone to error state") Cc: Mika Kuoppala Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210728233411.2365788-1-matthew.d.roper@intel.com Reviewed-by: Mika Kuoppala (cherry picked from commit 82929a2140eb99f1f1d21855f3f580e70d7abdd8) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 94fde5ca26ae..41186c1f771e 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -422,7 +422,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN12_HCP_SFC_LOCK_ACK_BIT REG_BIT(1) #define GEN12_HCP_SFC_USAGE_BIT REG_BIT(0) -#define GEN12_SFC_DONE(n) _MMIO(0x1cc00 + (n) * 0x100) +#define GEN12_SFC_DONE(n) _MMIO(0x1cc000 + (n) * 0x1000) #define GEN12_SFC_DONE_MAX 4 #define RING_PP_DIR_BASE(base) _MMIO((base) + 0x228) From 1354d830cb8f9be966cc07fc61368af27ffb7c4a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 21 Jul 2021 10:23:54 -0500 Subject: [PATCH 418/502] drm/i915: Call i915_globals_exit() if pci_register_device() fails In the unlikely event that pci_register_device() fails, we were tearing down our PMU setup but not globals. This leaves a bunch of memory slabs lying around. Signed-off-by: Jason Ekstrand Fixes: 32eb6bcfdda9 ("drm/i915: Make request allocation caches global") [danvet: Fix conflicts against removal of the globals_flush infrastructure.] Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210721152358.2893314-3-jason@jlekstrand.net (cherry picked from commit db484889d1ff0645e07e360d3e3ad306c0515821) Signed-off-by: Rodrigo Vivi [Fixed small conflict while cherry picking] --- drivers/gpu/drm/i915/i915_globals.c | 2 +- drivers/gpu/drm/i915/i915_pci.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c index 77f1911c463b..2db90e770616 100644 --- a/drivers/gpu/drm/i915/i915_globals.c +++ b/drivers/gpu/drm/i915/i915_globals.c @@ -148,7 +148,7 @@ static void __exit __i915_globals_flush(void) atomic_dec(&active); } -void __exit i915_globals_exit(void) +void i915_globals_exit(void) { GEM_BUG_ON(atomic_read(&active)); diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 83b500bb170c..2880ec57c97d 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1195,6 +1195,7 @@ static int __init i915_init(void) err = pci_register_driver(&i915_pci_driver); if (err) { i915_pmu_exit(); + i915_globals_exit(); return err; } From 97367c97226aab8b298ada954ce12659ee3ad2a4 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 3 Aug 2021 13:43:12 +0200 Subject: [PATCH 419/502] ALSA: seq: Fix racy deletion of subscriber It turned out that the current implementation of the port subscription is racy. The subscription contains two linked lists, and we have to add to or delete from both lists. Since both connection and disconnection procedures perform the same order for those two lists (i.e. src list, then dest list), when a deletion happens during a connection procedure, the src list may be deleted before the dest list addition completes, and this may lead to a use-after-free or an Oops, even though the access to both lists are protected via mutex. The simple workaround for this race is to change the access order for the disconnection, namely, dest list, then src list. This assures that the connection has been established when disconnecting, and also the concurrent deletion can be avoided. Reported-and-tested-by: folkert Cc: Link: https://lore.kernel.org/r/20210801182754.GP890690@belle.intranet.vanheusden.com Link: https://lore.kernel.org/r/20210803114312.2536-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/seq/seq_ports.c | 39 ++++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/sound/core/seq/seq_ports.c b/sound/core/seq/seq_ports.c index b9c2ce2b8d5a..84d78630463e 100644 --- a/sound/core/seq/seq_ports.c +++ b/sound/core/seq/seq_ports.c @@ -514,10 +514,11 @@ static int check_and_subscribe_port(struct snd_seq_client *client, return err; } -static void delete_and_unsubscribe_port(struct snd_seq_client *client, - struct snd_seq_client_port *port, - struct snd_seq_subscribers *subs, - bool is_src, bool ack) +/* called with grp->list_mutex held */ +static void __delete_and_unsubscribe_port(struct snd_seq_client *client, + struct snd_seq_client_port *port, + struct snd_seq_subscribers *subs, + bool is_src, bool ack) { struct snd_seq_port_subs_info *grp; struct list_head *list; @@ -525,7 +526,6 @@ static void delete_and_unsubscribe_port(struct snd_seq_client *client, grp = is_src ? &port->c_src : &port->c_dest; list = is_src ? &subs->src_list : &subs->dest_list; - down_write(&grp->list_mutex); write_lock_irq(&grp->list_lock); empty = list_empty(list); if (!empty) @@ -535,6 +535,18 @@ static void delete_and_unsubscribe_port(struct snd_seq_client *client, if (!empty) unsubscribe_port(client, port, grp, &subs->info, ack); +} + +static void delete_and_unsubscribe_port(struct snd_seq_client *client, + struct snd_seq_client_port *port, + struct snd_seq_subscribers *subs, + bool is_src, bool ack) +{ + struct snd_seq_port_subs_info *grp; + + grp = is_src ? &port->c_src : &port->c_dest; + down_write(&grp->list_mutex); + __delete_and_unsubscribe_port(client, port, subs, is_src, ack); up_write(&grp->list_mutex); } @@ -590,27 +602,30 @@ int snd_seq_port_disconnect(struct snd_seq_client *connector, struct snd_seq_client_port *dest_port, struct snd_seq_port_subscribe *info) { - struct snd_seq_port_subs_info *src = &src_port->c_src; + struct snd_seq_port_subs_info *dest = &dest_port->c_dest; struct snd_seq_subscribers *subs; int err = -ENOENT; - down_write(&src->list_mutex); + /* always start from deleting the dest port for avoiding concurrent + * deletions + */ + down_write(&dest->list_mutex); /* look for the connection */ - list_for_each_entry(subs, &src->list_head, src_list) { + list_for_each_entry(subs, &dest->list_head, dest_list) { if (match_subs_info(info, &subs->info)) { - atomic_dec(&subs->ref_count); /* mark as not ready */ + __delete_and_unsubscribe_port(dest_client, dest_port, + subs, false, + connector->number != dest_client->number); err = 0; break; } } - up_write(&src->list_mutex); + up_write(&dest->list_mutex); if (err < 0) return err; delete_and_unsubscribe_port(src_client, src_port, subs, true, connector->number != src_client->number); - delete_and_unsubscribe_port(dest_client, dest_port, subs, false, - connector->number != dest_client->number); kfree(subs); return 0; } From c87a4c542b5a796f795fec2b7a909c7d3067b11c Mon Sep 17 00:00:00 2001 From: Bijie Xu Date: Tue, 3 Aug 2021 11:40:18 +0200 Subject: [PATCH 420/502] net: flow_offload: correct comments mismatch with code Correct mismatch between the name of flow_offload_has_one_action() and its kdoc entry. Found using ./scripts/kernel-doc -Werror -none include/net/flow_offload.h Signed-off-by: Bijie Xu Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/net/flow_offload.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 69c9eabf8325..f3c2841566a0 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -293,7 +293,7 @@ static inline bool flow_action_has_entries(const struct flow_action *action) } /** - * flow_action_has_one_action() - check if exactly one action is present + * flow_offload_has_one_action() - check if exactly one action is present * @action: tc filter flow offload action * * Returns true if exactly one action is present. From 0161d151f3e36306219f5aa6f5f6b3877038afd3 Mon Sep 17 00:00:00 2001 From: Bijie Xu Date: Tue, 3 Aug 2021 11:40:19 +0200 Subject: [PATCH 421/502] net: sched: provide missing kdoc for tcf_pkt_info and tcf_ematch_ops Provide missing kdoc of fields of struct tcf_pkt_info and tcf_ematch_ops. Found using ./scripts/kernel-doc -none -Werror include/net/pkt_cls.h Signed-off-by: Bijie Xu Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index ec7823921bd2..298a8d10168b 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -337,6 +337,9 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts); /** * struct tcf_pkt_info - packet information + * + * @ptr: start of the pkt data + * @nexthdr: offset of the next header */ struct tcf_pkt_info { unsigned char * ptr; @@ -355,6 +358,7 @@ struct tcf_ematch_ops; * @ops: the operations lookup table of the corresponding ematch module * @datalen: length of the ematch specific configuration data * @data: ematch specific data + * @net: the network namespace */ struct tcf_ematch { struct tcf_ematch_ops * ops; From 9fdc5d85a8fe684cdf24dc31c6bc4a727decfe87 Mon Sep 17 00:00:00 2001 From: Fei Qin Date: Tue, 3 Aug 2021 12:39:11 +0200 Subject: [PATCH 422/502] nfp: update ethtool reporting of pauseframe control Pauseframe control is set to symmetric mode by default on the NFP. Pause frames can not be configured through ethtool now, but ethtool can report the supported mode. Fixes: 265aeb511bd5 ("nfp: add support for .get_link_ksettings()") Signed-off-by: Fei Qin Signed-off-by: Louis Peens Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 1b482446536d..8803faadd302 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -286,6 +286,8 @@ nfp_net_get_link_ksettings(struct net_device *netdev, /* Init to unknowns */ ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); + ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); + ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause); cmd->base.port = PORT_OTHER; cmd->base.speed = SPEED_UNKNOWN; cmd->base.duplex = DUPLEX_UNKNOWN; From 4039146777a91e1576da2bf38e0d8a1061a1ae47 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 3 Aug 2021 12:00:16 +0200 Subject: [PATCH 423/502] net: ipv6: fix returned variable type in ip6_skb_dst_mtu The patch fixing the returned value of ip6_skb_dst_mtu (int -> unsigned int) was rebased between its initial review and the version applied. In the meantime fade56410c22 was applied, which added a new variable (int) used as the returned value. This lead to a mismatch between the function prototype and the variable used as the return value. Fixes: 40fc3054b458 ("net: ipv6: fix return value of ip6_skb_dst_mtu") Cc: Vadim Fedorenko Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- include/net/ip6_route.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 625a38ccb5d9..0bf09a9bca4e 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -265,7 +265,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, static inline unsigned int ip6_skb_dst_mtu(struct sk_buff *skb) { - int mtu; + unsigned int mtu; struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? inet6_sk(skb->sk) : NULL; From ecd92e2167c30faa18df21e3ec3dbec510ddebaa Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 31 Jul 2021 13:13:40 +0200 Subject: [PATCH 424/502] s390: update defconfigs Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 2 +- arch/s390/configs/defconfig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 7de253f766e8..b88184019af9 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -335,7 +335,7 @@ CONFIG_L2TP_DEBUGFS=m CONFIG_L2TP_V3=y CONFIG_L2TP_IP=m CONFIG_L2TP_ETH=m -CONFIG_BRIDGE=m +CONFIG_BRIDGE=y CONFIG_BRIDGE_MRP=y CONFIG_VLAN_8021Q=m CONFIG_VLAN_8021Q_GVRP=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index b671642967ba..1667a3cdcf0a 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -325,7 +325,7 @@ CONFIG_L2TP_DEBUGFS=m CONFIG_L2TP_V3=y CONFIG_L2TP_IP=m CONFIG_L2TP_ETH=m -CONFIG_BRIDGE=m +CONFIG_BRIDGE=y CONFIG_BRIDGE_MRP=y CONFIG_VLAN_8021Q=m CONFIG_VLAN_8021Q_GVRP=y From c2ec772b87408259cb01209a22fb4e1ae7d346de Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 30 Jul 2021 16:38:07 +0200 Subject: [PATCH 425/502] cpuidle: teo: Fix alternative idle state lookup There are three mistakes in the loop in teo_select() that is looking for an alternative candidate idle state. First, it should walk all of the idle states shallower than the current candidate one, including all of the disabled ones, but it terminates after the first enabled idle state. Second, it should not terminate its last step if idle state 0 is disabled (which is related to the first issue). Finally, it may return the current alternative candidate idle state prematurely if the time span criterion is not met by the idle state under consideration at the moment. To address the issues mentioned above, make the loop in question walk all of the idle states shallower than the current candidate idle state all the way down to idle state 0 and rearrange the checks in it. Fixes: 77577558f25d ("cpuidle: teo: Rework most recent idle duration values treatment") Reported-by: Doug Smythies Tested-by: Doug Smythies Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/governors/teo.c | 44 ++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index 7b91060e82f6..8c59050ba419 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -397,32 +397,46 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, intercept_sum = 0; recent_sum = 0; - for (i = idx - 1; i >= idx0; i--) { + for (i = idx - 1; i >= 0; i--) { struct teo_bin *bin = &cpu_data->state_bins[i]; s64 span_ns; intercept_sum += bin->intercepts; recent_sum += bin->recent; - if (dev->states_usage[i].disable) - continue; - span_ns = teo_middle_of_bin(i, drv); - if (!teo_time_ok(span_ns)) { - /* - * The current state is too shallow, so select - * the first enabled deeper state. - */ - duration_ns = last_enabled_span_ns; - idx = last_enabled_idx; - break; - } if ((!alt_recent || 2 * recent_sum > idx_recent_sum) && (!alt_intercepts || 2 * intercept_sum > idx_intercept_sum)) { - idx = i; - duration_ns = span_ns; + if (teo_time_ok(span_ns) && + !dev->states_usage[i].disable) { + idx = i; + duration_ns = span_ns; + } else { + /* + * The current state is too shallow or + * disabled, so take the first enabled + * deeper state with suitable time span. + */ + idx = last_enabled_idx; + duration_ns = last_enabled_span_ns; + } + break; + } + + if (dev->states_usage[i].disable) + continue; + + if (!teo_time_ok(span_ns)) { + /* + * The current state is too shallow, but if an + * alternative candidate state has been found, + * it may still turn out to be a better choice. + */ + if (last_enabled_idx != idx) + continue; + break; } From 4adae7dd10db10f20f51833dc11b3cf7a342ad38 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 30 Jul 2021 16:38:52 +0200 Subject: [PATCH 426/502] cpuidle: teo: Rename two local variables in teo_select() Rename two local variables in teo_select() so that their names better reflect their purpose. No functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/governors/teo.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index 8c59050ba419..d9262db79cae 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -382,8 +382,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, alt_intercepts = 2 * idx_intercept_sum > cpu_data->total - idx_hit_sum; alt_recent = idx_recent_sum > NR_RECENT / 2; if (alt_recent || alt_intercepts) { - s64 last_enabled_span_ns = duration_ns; - int last_enabled_idx = idx; + s64 first_suitable_span_ns = duration_ns; + int first_suitable_idx = idx; /* * Look for the deepest idle state whose target residency had @@ -419,8 +419,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * disabled, so take the first enabled * deeper state with suitable time span. */ - idx = last_enabled_idx; - duration_ns = last_enabled_span_ns; + idx = first_suitable_idx; + duration_ns = first_suitable_span_ns; } break; } @@ -434,14 +434,14 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * alternative candidate state has been found, * it may still turn out to be a better choice. */ - if (last_enabled_idx != idx) + if (first_suitable_idx != idx) continue; break; } - last_enabled_span_ns = span_ns; - last_enabled_idx = i; + first_suitable_span_ns = span_ns; + first_suitable_idx = i; } } From 6511a8b5b7a65037340cd8ee91a377811effbc83 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 3 Aug 2021 18:14:44 +0200 Subject: [PATCH 427/502] Revert "ACPICA: Fix memory leak caused by _CID repair function" Revert commit c27bac0314131 ("ACPICA: Fix memory leak caused by _CID repair function") which is reported to cause a boot issue on Acer Swift 3 (SF314-51). Reported-by: Adrien Precigout Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/nsrepair2.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c index 38e10ab976e6..14b71b41e845 100644 --- a/drivers/acpi/acpica/nsrepair2.c +++ b/drivers/acpi/acpica/nsrepair2.c @@ -379,13 +379,6 @@ acpi_ns_repair_CID(struct acpi_evaluate_info *info, (*element_ptr)->common.reference_count = original_ref_count; - - /* - * The original_element holds a reference from the package object - * that represents _HID. Since a new element was created by _HID, - * remove the reference from the _CID package. - */ - acpi_ut_remove_reference(original_element); } element_ptr++; From 8b436a99cd708bd158231a0630ffa49b1d6175e4 Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Mon, 2 Aug 2021 14:56:14 +0800 Subject: [PATCH 428/502] RDMA/hns: Fix the double unlock problem of poll_sem If hns_roce_cmd_use_events() fails then it means that the poll_sem is not obtained, but the poll_sem is released in hns_roce_cmd_use_polling(), this will cause an unlock problem. This is the static checker warning: drivers/infiniband/hw/hns/hns_roce_main.c:926 hns_roce_init() error: double unlocked '&hr_dev->cmd.poll_sem' (orig line 879) Event mode and polling mode are mutually exclusive and resources are separated, so there is no need to process polling mode resources in event mode. The initial mode of cmd is polling mode, so even if cmd fails to switch to event mode, it is not necessary to switch to polling mode. Fixes: a389d016c030 ("RDMA/hns: Enable all CMDQ context") Fixes: 3d50503b3b33 ("RDMA/hns: Optimize cmd init and mode selection for hip08") Link: https://lore.kernel.org/r/1627887374-20019-1-git-send-email-liangwenpeng@huawei.com Reported-by: Dan Carpenter Signed-off-by: Yangyang Li Signed-off-by: Wenpeng Liang Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_cmd.c | 7 +++---- drivers/infiniband/hw/hns/hns_roce_main.c | 4 +--- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c index 8f68cc3ff193..84f3f2b5f097 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.c +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c @@ -213,8 +213,10 @@ int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev) hr_cmd->context = kcalloc(hr_cmd->max_cmds, sizeof(*hr_cmd->context), GFP_KERNEL); - if (!hr_cmd->context) + if (!hr_cmd->context) { + hr_dev->cmd_mod = 0; return -ENOMEM; + } for (i = 0; i < hr_cmd->max_cmds; ++i) { hr_cmd->context[i].token = i; @@ -228,7 +230,6 @@ int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev) spin_lock_init(&hr_cmd->context_lock); hr_cmd->use_events = 1; - down(&hr_cmd->poll_sem); return 0; } @@ -239,8 +240,6 @@ void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev) kfree(hr_cmd->context); hr_cmd->use_events = 0; - - up(&hr_cmd->poll_sem); } struct hns_roce_cmd_mailbox * diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 078a97193f0e..cc6eab14a222 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -873,11 +873,9 @@ int hns_roce_init(struct hns_roce_dev *hr_dev) if (hr_dev->cmd_mod) { ret = hns_roce_cmd_use_events(hr_dev); - if (ret) { + if (ret) dev_warn(dev, "Cmd event mode failed, set back to poll!\n"); - hns_roce_cmd_use_polling(hr_dev); - } } ret = hns_roce_init_hem(hr_dev); From abc7285d89ffd089739a1a3059ddd843dd019637 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 2 Aug 2021 16:19:14 -0700 Subject: [PATCH 429/502] mptcp: drop unused rcu member in mptcp_pm_addr_entry kfree_rcu() had been removed from pm_netlink.c, so this rcu field in struct mptcp_pm_addr_entry became useless. Let's drop it. Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20210802231914.54709-1-mathew.j.martineau@linux.intel.com Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index d2591ebf01d9..56263c2c4014 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -27,7 +27,6 @@ struct mptcp_pm_addr_entry { struct mptcp_addr_info addr; u8 flags; int ifindex; - struct rcu_head rcu; struct socket *lsk; }; From e3ea110d6e796146920e1be0108464ebcf283ef7 Mon Sep 17 00:00:00 2001 From: Harshavardhan Unnibhavi Date: Mon, 2 Aug 2021 19:35:06 +0200 Subject: [PATCH 430/502] VSOCK: handle VIRTIO_VSOCK_OP_CREDIT_REQUEST The original implementation of the virtio-vsock driver does not handle a VIRTIO_VSOCK_OP_CREDIT_REQUEST as required by the virtio-vsock specification. The vsock device emulated by vhost-vsock and the virtio-vsock driver never uses this request, which was probably why nobody noticed it. However, another implementation of the device may use this request type. Hence, this commit introduces a way to handle an explicit credit request by responding with a corresponding credit update as required by the virtio-vsock specification. Fixes: 06a8fc78367d ("VSOCK: Introduce virtio_vsock_common.ko") Signed-off-by: Harshavardhan Unnibhavi Reviewed-by: Stefano Garzarella Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20210802173506.2383-1-harshanavkis@gmail.com Signed-off-by: Jakub Kicinski --- net/vmw_vsock/virtio_transport_common.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 169ba8b72a63..081e7ae93cb1 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -1079,6 +1079,9 @@ virtio_transport_recv_connected(struct sock *sk, virtio_transport_recv_enqueue(vsk, pkt); sk->sk_data_ready(sk); return err; + case VIRTIO_VSOCK_OP_CREDIT_REQUEST: + virtio_transport_send_credit_update(vsk); + break; case VIRTIO_VSOCK_OP_CREDIT_UPDATE: sk->sk_write_space(sk); break; From d1a58c013a5837451e3213e7a426d350fa524ead Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 3 Aug 2021 08:37:46 +0200 Subject: [PATCH 431/502] net: dsa: qca: ar9331: reorder MDIO write sequence In case of this switch we work with 32bit registers on top of 16bit bus. Some registers (for example access to forwarding database) have trigger bit on the first 16bit half of request and the result + configuration of request in the second half. Without this patch, we would trigger database operation and overwrite result in one run. To make it work properly, we should do the second part of transfer before the first one is done. So far, this rule seems to work for all registers on this switch. Fixes: ec6698c272de ("net: dsa: add support for Atheros AR9331 built-in switch") Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20210803063746.3600-1-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/dsa/qca/ar9331.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c index ca2ad77b71f1..6686192e1883 100644 --- a/drivers/net/dsa/qca/ar9331.c +++ b/drivers/net/dsa/qca/ar9331.c @@ -837,16 +837,24 @@ static int ar9331_mdio_write(void *ctx, u32 reg, u32 val) return 0; } - ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg, val); - if (ret < 0) - goto error; - + /* In case of this switch we work with 32bit registers on top of 16bit + * bus. Some registers (for example access to forwarding database) have + * trigger bit on the first 16bit half of request, the result and + * configuration of request in the second half. + * To make it work properly, we should do the second part of transfer + * before the first one is done. + */ ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg + 2, val >> 16); if (ret < 0) goto error; + ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg, val); + if (ret < 0) + goto error; + return 0; + error: dev_err_ratelimited(&sbus->dev, "Bus error. Failed to write register.\n"); return ret; From d09560435cb712c9ec1e62b8a43a79b0af69fe77 Mon Sep 17 00:00:00 2001 From: Qiu Wenbo Date: Sun, 4 Jul 2021 16:34:41 +0800 Subject: [PATCH 432/502] riscv: dts: fix memory size for the SiFive HiFive Unmatched The production version of HiFive Unmatched have 16GB memory. Signed-off-by: Qiu Wenbo Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts index b1c3c596578f..2e4ea84f27e7 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts @@ -24,7 +24,7 @@ memory@80000000 { device_type = "memory"; - reg = <0x0 0x80000000 0x2 0x00000000>; + reg = <0x0 0x80000000 0x4 0x00000000>; }; soc { From a18b14d8886614b3c7d290c4cfc33389822b0535 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 6 Jul 2021 09:26:21 -0700 Subject: [PATCH 433/502] riscv: Disable STACKPROTECTOR_PER_TASK if GCC_PLUGIN_RANDSTRUCT is enabled riscv uses the value of TSK_STACK_CANARY to set stack-protector-guard-offset. With GCC_PLUGIN_RANDSTRUCT enabled, that value is non-deterministic, and with riscv:allmodconfig often results in build errors such as cc1: error: '8120' is not a valid offset in '-mstack-protector-guard-offset=' Enable STACKPROTECTOR_PER_TASK only if GCC_PLUGIN_RANDSTRUCT is disabled to fix the problem. Fixes: fea2fed201ee5 ("riscv: Enable per-task stack canaries") Signed-off-by: Guenter Roeck Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 8fcceb8eda07..31f9e92f1402 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -492,6 +492,7 @@ config CC_HAVE_STACKPROTECTOR_TLS config STACKPROTECTOR_PER_TASK def_bool y + depends on !GCC_PLUGIN_RANDSTRUCT depends on STACKPROTECTOR && CC_HAVE_STACKPROTECTOR_TLS config PHYS_RAM_BASE From 5648c073c33d33a0a19d0cb1194a4eb88efe2b71 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Tue, 3 Aug 2021 21:47:11 +0200 Subject: [PATCH 434/502] USB: serial: option: add Telit FD980 composition 0x1056 Add the following Telit FD980 composition 0x1056: Cfg #1: mass storage Cfg #2: rndis, tty, adb, tty, tty, tty, tty Signed-off-by: Daniele Palmas Link: https://lore.kernel.org/r/20210803194711.3036-1-dnlplm@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 0fbe253dc570..039450069ca4 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1203,6 +1203,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(2) | RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1055, 0xff), /* Telit FN980 (PCIe) */ .driver_info = NCTRL(0) | RSVD(1) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1056, 0xff), /* Telit FD980 */ + .driver_info = NCTRL(2) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), From 06f5553e0f0c2182268179b93856187d9cb86dd5 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Tue, 3 Aug 2021 18:58:21 +0800 Subject: [PATCH 435/502] net: sched: fix lockdep_set_class() typo error for sch->seqlock According to comment in qdisc_alloc(), sch->seqlock's lockdep class key should be set to qdisc_tx_busylock, due to possible type error, sch->busylock's lockdep class key is set to qdisc_tx_busylock, which is duplicated because sch->busylock's lockdep class key is already set in qdisc_alloc(). So fix it by replacing sch->busylock with sch->seqlock. Fixes: 96009c7d500e ("sched: replace __QDISC_STATE_RUNNING bit with a spin lock") Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index d9ac60ffe927..a8dd06c74e31 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -913,7 +913,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, /* seqlock has the same scope of busylock, for NOLOCK qdisc */ spin_lock_init(&sch->seqlock); - lockdep_set_class(&sch->busylock, + lockdep_set_class(&sch->seqlock, dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); seqcount_init(&sch->running); From 13a9c4ac319a23c792e2e03ac73777b6710132c3 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 3 Aug 2021 15:00:43 +0300 Subject: [PATCH 436/502] net/prestera: Fix devlink groups leakage in error flow Devlink trap group is registered but not released in error flow, add the missing devlink_trap_groups_unregister() call. Fixes: 0a9003f45e91 ("net: marvell: prestera: devlink: add traps/groups implementation") Signed-off-by: Leon Romanovsky Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/prestera/prestera_devlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c index d12e21db9fd6..fa7a0682ad1e 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c @@ -530,6 +530,8 @@ err_trap_register: prestera_trap = &prestera_trap_items_arr[i]; devlink_traps_unregister(devlink, &prestera_trap->trap, 1); } + devlink_trap_groups_unregister(devlink, prestera_trap_groups_arr, + groups_count); err_groups_register: kfree(trap_data->trap_items_arr); err_trap_items_alloc: From 3212a99349cee5fb611d3ffcf0e65bc3cd6dcf2f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 4 Aug 2021 11:31:00 +0200 Subject: [PATCH 437/502] USB: serial: pl2303: fix GT type detection At least some PL2303GT have a bcdDevice of 0x305 instead of 0x100 as the datasheet claims. Add it to the list of known release numbers for the HXN (G) type. Fixes: 894758d0571d ("USB: serial: pl2303: tighten type HXN (G) detection") Reported-by: Vasily Khoruzhick Tested-by: Vasily Khoruzhick Cc: stable@vger.kernel.org # 5.13 Link: https://lore.kernel.org/r/20210804093100.24811-1-johan@kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/pl2303.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 17601e32083e..930b3d50a330 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -432,6 +432,7 @@ static int pl2303_detect_type(struct usb_serial *serial) case 0x200: switch (bcdDevice) { case 0x100: + case 0x305: /* * Assume it's an HXN-type if the device doesn't * support the old read request value. From 8a160e2e9aeb8318159b48701ad8a6e22274372d Mon Sep 17 00:00:00 2001 From: Petko Manolov Date: Tue, 3 Aug 2021 20:25:23 +0300 Subject: [PATCH 438/502] net: usb: pegasus: Check the return value of get_geristers() and friends; Certain call sites of get_geristers() did not do proper error handling. This could be a problem as get_geristers() typically return the data via pointer to a buffer. If an error occurred the code is carelessly manipulating the wrong data. Signed-off-by: Petko Manolov Reviewed-by: Pavel Skripkin Signed-off-by: David S. Miller --- drivers/net/usb/pegasus.c | 108 ++++++++++++++++++++++++++------------ 1 file changed, 75 insertions(+), 33 deletions(-) diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index 9a907182569c..22353bab76c8 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -132,9 +132,15 @@ static int get_registers(pegasus_t *pegasus, __u16 indx, __u16 size, void *data) static int set_registers(pegasus_t *pegasus, __u16 indx, __u16 size, const void *data) { - return usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REGS, + int ret; + + ret = usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REGS, PEGASUS_REQT_WRITE, 0, indx, data, size, 1000, GFP_NOIO); + if (ret < 0) + netif_dbg(pegasus, drv, pegasus->net, "%s failed with %d\n", __func__, ret); + + return ret; } /* @@ -145,10 +151,15 @@ static int set_registers(pegasus_t *pegasus, __u16 indx, __u16 size, static int set_register(pegasus_t *pegasus, __u16 indx, __u8 data) { void *buf = &data; + int ret; - return usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REG, + ret = usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REG, PEGASUS_REQT_WRITE, data, indx, buf, 1, 1000, GFP_NOIO); + if (ret < 0) + netif_dbg(pegasus, drv, pegasus->net, "%s failed with %d\n", __func__, ret); + + return ret; } static int update_eth_regs_async(pegasus_t *pegasus) @@ -188,10 +199,9 @@ static int update_eth_regs_async(pegasus_t *pegasus) static int __mii_op(pegasus_t *p, __u8 phy, __u8 indx, __u16 *regd, __u8 cmd) { - int i; - __u8 data[4] = { phy, 0, 0, indx }; + int i, ret; __le16 regdi; - int ret = -ETIMEDOUT; + __u8 data[4] = { phy, 0, 0, indx }; if (cmd & PHY_WRITE) { __le16 *t = (__le16 *) & data[1]; @@ -207,12 +217,15 @@ static int __mii_op(pegasus_t *p, __u8 phy, __u8 indx, __u16 *regd, __u8 cmd) if (data[0] & PHY_DONE) break; } - if (i >= REG_TIMEOUT) + if (i >= REG_TIMEOUT) { + ret = -ETIMEDOUT; goto fail; + } if (cmd & PHY_READ) { ret = get_registers(p, PhyData, 2, ®di); + if (ret < 0) + goto fail; *regd = le16_to_cpu(regdi); - return ret; } return 0; fail: @@ -235,9 +248,13 @@ static int write_mii_word(pegasus_t *pegasus, __u8 phy, __u8 indx, __u16 *regd) static int mdio_read(struct net_device *dev, int phy_id, int loc) { pegasus_t *pegasus = netdev_priv(dev); + int ret; u16 res; - read_mii_word(pegasus, phy_id, loc, &res); + ret = read_mii_word(pegasus, phy_id, loc, &res); + if (ret < 0) + return ret; + return (int)res; } @@ -251,10 +268,9 @@ static void mdio_write(struct net_device *dev, int phy_id, int loc, int val) static int read_eprom_word(pegasus_t *pegasus, __u8 index, __u16 *retdata) { - int i; - __u8 tmp = 0; + int ret, i; __le16 retdatai; - int ret; + __u8 tmp = 0; set_register(pegasus, EpromCtrl, 0); set_register(pegasus, EpromOffset, index); @@ -262,21 +278,25 @@ static int read_eprom_word(pegasus_t *pegasus, __u8 index, __u16 *retdata) for (i = 0; i < REG_TIMEOUT; i++) { ret = get_registers(pegasus, EpromCtrl, 1, &tmp); + if (ret < 0) + goto fail; if (tmp & EPROM_DONE) break; - if (ret == -ESHUTDOWN) - goto fail; } - if (i >= REG_TIMEOUT) + if (i >= REG_TIMEOUT) { + ret = -ETIMEDOUT; goto fail; + } ret = get_registers(pegasus, EpromData, 2, &retdatai); + if (ret < 0) + goto fail; *retdata = le16_to_cpu(retdatai); return ret; fail: - netif_warn(pegasus, drv, pegasus->net, "%s failed\n", __func__); - return -ETIMEDOUT; + netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__); + return ret; } #ifdef PEGASUS_WRITE_EEPROM @@ -324,10 +344,10 @@ static int write_eprom_word(pegasus_t *pegasus, __u8 index, __u16 data) return ret; fail: - netif_warn(pegasus, drv, pegasus->net, "%s failed\n", __func__); + netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__); return -ETIMEDOUT; } -#endif /* PEGASUS_WRITE_EEPROM */ +#endif /* PEGASUS_WRITE_EEPROM */ static inline int get_node_id(pegasus_t *pegasus, u8 *id) { @@ -367,19 +387,21 @@ static void set_ethernet_addr(pegasus_t *pegasus) return; err: eth_hw_addr_random(pegasus->net); - dev_info(&pegasus->intf->dev, "software assigned MAC address.\n"); + netif_dbg(pegasus, drv, pegasus->net, "software assigned MAC address.\n"); return; } static inline int reset_mac(pegasus_t *pegasus) { + int ret, i; __u8 data = 0x8; - int i; set_register(pegasus, EthCtrl1, data); for (i = 0; i < REG_TIMEOUT; i++) { - get_registers(pegasus, EthCtrl1, 1, &data); + ret = get_registers(pegasus, EthCtrl1, 1, &data); + if (ret < 0) + goto fail; if (~data & 0x08) { if (loopback) break; @@ -402,22 +424,29 @@ static inline int reset_mac(pegasus_t *pegasus) } if (usb_dev_id[pegasus->dev_index].vendor == VENDOR_ELCON) { __u16 auxmode; - read_mii_word(pegasus, 3, 0x1b, &auxmode); + ret = read_mii_word(pegasus, 3, 0x1b, &auxmode); + if (ret < 0) + goto fail; auxmode |= 4; write_mii_word(pegasus, 3, 0x1b, &auxmode); } return 0; +fail: + netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__); + return ret; } static int enable_net_traffic(struct net_device *dev, struct usb_device *usb) { - __u16 linkpart; - __u8 data[4]; pegasus_t *pegasus = netdev_priv(dev); int ret; + __u16 linkpart; + __u8 data[4]; - read_mii_word(pegasus, pegasus->phy, MII_LPA, &linkpart); + ret = read_mii_word(pegasus, pegasus->phy, MII_LPA, &linkpart); + if (ret < 0) + goto fail; data[0] = 0xc8; /* TX & RX enable, append status, no CRC */ data[1] = 0; if (linkpart & (ADVERTISE_100FULL | ADVERTISE_10FULL)) @@ -435,11 +464,16 @@ static int enable_net_traffic(struct net_device *dev, struct usb_device *usb) usb_dev_id[pegasus->dev_index].vendor == VENDOR_LINKSYS2 || usb_dev_id[pegasus->dev_index].vendor == VENDOR_DLINK) { u16 auxmode; - read_mii_word(pegasus, 0, 0x1b, &auxmode); + ret = read_mii_word(pegasus, 0, 0x1b, &auxmode); + if (ret < 0) + goto fail; auxmode |= 4; write_mii_word(pegasus, 0, 0x1b, &auxmode); } + return 0; +fail: + netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__); return ret; } @@ -447,9 +481,9 @@ static void read_bulk_callback(struct urb *urb) { pegasus_t *pegasus = urb->context; struct net_device *net; + u8 *buf = urb->transfer_buffer; int rx_status, count = urb->actual_length; int status = urb->status; - u8 *buf = urb->transfer_buffer; __u16 pkt_len; if (!pegasus) @@ -998,8 +1032,7 @@ static int pegasus_ioctl(struct net_device *net, struct ifreq *rq, int cmd) data[0] = pegasus->phy; fallthrough; case SIOCDEVPRIVATE + 1: - read_mii_word(pegasus, data[0], data[1] & 0x1f, &data[3]); - res = 0; + res = read_mii_word(pegasus, data[0], data[1] & 0x1f, &data[3]); break; case SIOCDEVPRIVATE + 2: if (!capable(CAP_NET_ADMIN)) @@ -1033,22 +1066,25 @@ static void pegasus_set_multicast(struct net_device *net) static __u8 mii_phy_probe(pegasus_t *pegasus) { - int i; + int i, ret; __u16 tmp; for (i = 0; i < 32; i++) { - read_mii_word(pegasus, i, MII_BMSR, &tmp); + ret = read_mii_word(pegasus, i, MII_BMSR, &tmp); + if (ret < 0) + goto fail; if (tmp == 0 || tmp == 0xffff || (tmp & BMSR_MEDIA) == 0) continue; else return i; } - +fail: return 0xff; } static inline void setup_pegasus_II(pegasus_t *pegasus) { + int ret; __u8 data = 0xa5; set_register(pegasus, Reg1d, 0); @@ -1060,7 +1096,9 @@ static inline void setup_pegasus_II(pegasus_t *pegasus) set_register(pegasus, Reg7b, 2); set_register(pegasus, 0x83, data); - get_registers(pegasus, 0x83, 1, &data); + ret = get_registers(pegasus, 0x83, 1, &data); + if (ret < 0) + goto fail; if (data == 0xa5) pegasus->chip = 0x8513; @@ -1075,6 +1113,10 @@ static inline void setup_pegasus_II(pegasus_t *pegasus) set_register(pegasus, Reg81, 6); else set_register(pegasus, Reg81, 2); + + return; +fail: + netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__); } static void check_carrier(struct work_struct *work) From bc65bacf239d0bc1d00d92cd535a4031921dd78a Mon Sep 17 00:00:00 2001 From: Petko Manolov Date: Tue, 3 Aug 2021 20:25:24 +0300 Subject: [PATCH 439/502] net: usb: pegasus: Remove the changelog and DRIVER_VERSION. These are now deemed redundant. Signed-off-by: Petko Manolov Acked-by: Greg Kroah-Hartman Signed-off-by: David S. Miller --- drivers/net/usb/pegasus.c | 30 ++---------------------------- 1 file changed, 2 insertions(+), 28 deletions(-) diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index 22353bab76c8..f18b03be1b87 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -1,31 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (c) 1999-2013 Petko Manolov (petkan@nucleusys.com) + * Copyright (c) 1999-2021 Petko Manolov (petkan@nucleusys.com) * - * ChangeLog: - * .... Most of the time spent on reading sources & docs. - * v0.2.x First official release for the Linux kernel. - * v0.3.0 Beutified and structured, some bugs fixed. - * v0.3.x URBifying bulk requests and bugfixing. First relatively - * stable release. Still can touch device's registers only - * from top-halves. - * v0.4.0 Control messages remained unurbified are now URBs. - * Now we can touch the HW at any time. - * v0.4.9 Control urbs again use process context to wait. Argh... - * Some long standing bugs (enable_net_traffic) fixed. - * Also nasty trick about resubmiting control urb from - * interrupt context used. Please let me know how it - * behaves. Pegasus II support added since this version. - * TODO: suppressing HCD warnings spewage on disconnect. - * v0.4.13 Ethernet address is now set at probe(), not at open() - * time as this seems to break dhcpd. - * v0.5.0 branch to 2.5.x kernels - * v0.5.1 ethtool support added - * v0.5.5 rx socket buffers are in a pool and the their allocation - * is out of the interrupt routine. - * ... - * v0.9.3 simplified [get|set]_register(s), async update registers - * logic revisited, receive skb_pool removed. */ #include @@ -45,7 +21,6 @@ /* * Version Information */ -#define DRIVER_VERSION "v0.9.3 (2013/04/25)" #define DRIVER_AUTHOR "Petko Manolov " #define DRIVER_DESC "Pegasus/Pegasus II USB Ethernet driver" @@ -914,7 +889,6 @@ static void pegasus_get_drvinfo(struct net_device *dev, pegasus_t *pegasus = netdev_priv(dev); strlcpy(info->driver, driver_name, sizeof(info->driver)); - strlcpy(info->version, DRIVER_VERSION, sizeof(info->version)); usb_make_path(pegasus->usb, info->bus_info, sizeof(info->bus_info)); } @@ -1338,7 +1312,7 @@ static void __init parse_id(char *id) static int __init pegasus_init(void) { - pr_info("%s: %s, " DRIVER_DESC "\n", driver_name, DRIVER_VERSION); + pr_info("%s: " DRIVER_DESC "\n", driver_name); if (devid) parse_id(devid); return usb_register(&pegasus_driver); From 85cd39af14f498f791d8aab3fbd64cd175787f1a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 4 Aug 2021 05:28:52 -0400 Subject: [PATCH 440/502] KVM: Do not leak memory for duplicate debugfs directories KVM creates a debugfs directory for each VM in order to store statistics about the virtual machine. The directory name is built from the process pid and a VM fd. While generally unique, it is possible to keep a file descriptor alive in a way that causes duplicate directories, which manifests as these messages: [ 471.846235] debugfs: Directory '20245-4' with parent 'kvm' already present! Even though this should not happen in practice, it is more or less expected in the case of KVM for testcases that call KVM_CREATE_VM and close the resulting file descriptor repeatedly and in parallel. When this happens, debugfs_create_dir() returns an error but kvm_create_vm_debugfs() goes on to allocate stat data structs which are later leaked. The slow memory leak was spotted by syzkaller, where it caused OOM reports. Since the issue only affects debugfs, do a lookup before calling debugfs_create_dir, so that the message is downgraded and rate-limited. While at it, ensure kvm->debugfs_dentry is NULL rather than an error if it is not created. This fixes kvm_destroy_vm_debugfs, which was not checking IS_ERR_OR_NULL correctly. Cc: stable@vger.kernel.org Fixes: 536a6f88c49d ("KVM: Create debugfs dir and stat files for each VM") Reported-by: Alexey Kardashevskiy Suggested-by: Greg Kroah-Hartman Acked-by: Greg Kroah-Hartman Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d20fba0fc290..b50dbe269f4b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -892,6 +892,8 @@ static void kvm_destroy_vm_debugfs(struct kvm *kvm) static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) { + static DEFINE_MUTEX(kvm_debugfs_lock); + struct dentry *dent; char dir_name[ITOA_MAX_LEN * 2]; struct kvm_stat_data *stat_data; const struct _kvm_stats_desc *pdesc; @@ -903,8 +905,20 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) return 0; snprintf(dir_name, sizeof(dir_name), "%d-%d", task_pid_nr(current), fd); - kvm->debugfs_dentry = debugfs_create_dir(dir_name, kvm_debugfs_dir); + mutex_lock(&kvm_debugfs_lock); + dent = debugfs_lookup(dir_name, kvm_debugfs_dir); + if (dent) { + pr_warn_ratelimited("KVM: debugfs: duplicate directory %s\n", dir_name); + dput(dent); + mutex_unlock(&kvm_debugfs_lock); + return 0; + } + dent = debugfs_create_dir(dir_name, kvm_debugfs_dir); + mutex_unlock(&kvm_debugfs_lock); + if (IS_ERR(dent)) + return 0; + kvm->debugfs_dentry = dent; kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries, sizeof(*kvm->debugfs_stat_data), GFP_KERNEL_ACCOUNT); @@ -5201,7 +5215,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) } add_uevent_var(env, "PID=%d", kvm->userspace_pid); - if (!IS_ERR_OR_NULL(kvm->debugfs_dentry)) { + if (kvm->debugfs_dentry) { char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT); if (p) { From 179c6c27bf487273652efc99acd3ba512a23c137 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 3 Aug 2021 09:27:46 -0700 Subject: [PATCH 441/502] KVM: SVM: Fix off-by-one indexing when nullifying last used SEV VMCB Use the raw ASID, not ASID-1, when nullifying the last used VMCB when freeing an SEV ASID. The consumer, pre_sev_run(), indexes the array by the raw ASID, thus KVM could get a false negative when checking for a different VMCB if KVM manages to reallocate the same ASID+VMCB combo for a new VM. Note, this cannot cause a functional issue _in the current code_, as pre_sev_run() also checks which pCPU last did VMRUN for the vCPU, and last_vmentry_cpu is initialized to -1 during vCPU creation, i.e. is guaranteed to mismatch on the first VMRUN. However, prior to commit 8a14fe4f0c54 ("kvm: x86: Move last_cpu into kvm_vcpu_arch as last_vmentry_cpu"), SVM tracked pCPU on its own and zero-initialized the last_cpu variable. Thus it's theoretically possible that older versions of KVM could miss a TLB flush if the first VMRUN is on pCPU0 and the ASID and VMCB exactly match those of a prior VM. Fixes: 70cd94e60c73 ("KVM: SVM: VMRUN should use associated ASID when SEV is enabled") Cc: Tom Lendacky Cc: Brijesh Singh Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 6710d9ee2e4b..4d0aba185412 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -189,7 +189,7 @@ static void sev_asid_free(struct kvm_sev_info *sev) for_each_possible_cpu(cpu) { sd = per_cpu(svm_data, cpu); - sd->sev_vmcbs[pos] = NULL; + sd->sev_vmcbs[sev->asid] = NULL; } mutex_unlock(&sev_bitmap_lock); From 396492b4c5f249f616002bb5de787d060d2b2974 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 3 Aug 2021 16:14:15 -0700 Subject: [PATCH 442/502] docs: networking: netdevsim rules There are aspects of netdevsim which are commonly misunderstood and pointed out in review. Cong suggest we document them. Suggested-by: Cong Wang Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/networking/netdev-FAQ.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst index 91b2cf712801..e26532f49760 100644 --- a/Documentation/networking/netdev-FAQ.rst +++ b/Documentation/networking/netdev-FAQ.rst @@ -228,6 +228,23 @@ before posting to the mailing list. The patchwork build bot instance gets overloaded very easily and netdev@vger really doesn't need more traffic if we can help it. +netdevsim is great, can I extend it for my out-of-tree tests? +------------------------------------------------------------- + +No, `netdevsim` is a test vehicle solely for upstream tests. +(Please add your tests under tools/testing/selftests/.) + +We also give no guarantees that `netdevsim` won't change in the future +in a way which would break what would normally be considered uAPI. + +Is netdevsim considered a "user" of an API? +------------------------------------------- + +Linux kernel has a long standing rule that no API should be added unless +it has a real, in-tree user. Mock-ups and tests based on `netdevsim` are +strongly encouraged when adding new APIs, but `netdevsim` in itself +is **not** considered a use case/user. + Any other tips to help ensure my net/net-next patch gets OK'd? -------------------------------------------------------------- Attention to detail. Re-read your own work as if you were the From 6b67d4d63edece1033972214704c04f36c5be89a Mon Sep 17 00:00:00 2001 From: "Ivan T. Ivanov" Date: Wed, 4 Aug 2021 11:13:39 +0300 Subject: [PATCH 443/502] net: usb: lan78xx: don't modify phy_device state concurrently Currently phy_device state could be left in inconsistent state shown by following alert message[1]. This is because phy_read_status could be called concurrently from lan78xx_delayedwork, phy_state_machine and __ethtool_get_link. Fix this by making sure that phy_device state is updated atomically. [1] lan78xx 1-1.1.1:1.0 eth0: No phy led trigger registered for speed(-1) Signed-off-by: Ivan T. Ivanov Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 25489389ea49..6d092d78e0cb 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1154,7 +1154,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) { struct phy_device *phydev = dev->net->phydev; struct ethtool_link_ksettings ecmd; - int ladv, radv, ret; + int ladv, radv, ret, link; u32 buf; /* clear LAN78xx interrupt status */ @@ -1162,9 +1162,12 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) if (unlikely(ret < 0)) return -EIO; + mutex_lock(&phydev->lock); phy_read_status(phydev); + link = phydev->link; + mutex_unlock(&phydev->lock); - if (!phydev->link && dev->link_on) { + if (!link && dev->link_on) { dev->link_on = false; /* reset MAC */ @@ -1177,7 +1180,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) return -EIO; del_timer(&dev->stat_monitor); - } else if (phydev->link && !dev->link_on) { + } else if (link && !dev->link_on) { dev->link_on = true; phy_ethtool_ksettings_get(phydev, &ecmd); @@ -1466,9 +1469,14 @@ static int lan78xx_set_eee(struct net_device *net, struct ethtool_eee *edata) static u32 lan78xx_get_link(struct net_device *net) { - phy_read_status(net->phydev); + u32 link; - return net->phydev->link; + mutex_lock(&net->phydev->lock); + phy_read_status(net->phydev); + link = net->phydev->link; + mutex_unlock(&net->phydev->lock); + + return link; } static void lan78xx_get_drvinfo(struct net_device *net, From f558c2b834ec27e75d37b1c860c139e7b7c3a8e4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 3 Aug 2021 12:45:01 +0200 Subject: [PATCH 444/502] sched/rt: Fix double enqueue caused by rt_effective_prio Double enqueues in rt runqueues (list) have been reported while running a simple test that spawns a number of threads doing a short sleep/run pattern while being concurrently setscheduled between rt and fair class. WARNING: CPU: 3 PID: 2825 at kernel/sched/rt.c:1294 enqueue_task_rt+0x355/0x360 CPU: 3 PID: 2825 Comm: setsched__13 RIP: 0010:enqueue_task_rt+0x355/0x360 Call Trace: __sched_setscheduler+0x581/0x9d0 _sched_setscheduler+0x63/0xa0 do_sched_setscheduler+0xa0/0x150 __x64_sys_sched_setscheduler+0x1a/0x30 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xae list_add double add: new=ffff9867cb629b40, prev=ffff9867cb629b40, next=ffff98679fc67ca0. kernel BUG at lib/list_debug.c:31! invalid opcode: 0000 [#1] PREEMPT_RT SMP PTI CPU: 3 PID: 2825 Comm: setsched__13 RIP: 0010:__list_add_valid+0x41/0x50 Call Trace: enqueue_task_rt+0x291/0x360 __sched_setscheduler+0x581/0x9d0 _sched_setscheduler+0x63/0xa0 do_sched_setscheduler+0xa0/0x150 __x64_sys_sched_setscheduler+0x1a/0x30 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xae __sched_setscheduler() uses rt_effective_prio() to handle proper queuing of priority boosted tasks that are setscheduled while being boosted. rt_effective_prio() is however called twice per each __sched_setscheduler() call: first directly by __sched_setscheduler() before dequeuing the task and then by __setscheduler() to actually do the priority change. If the priority of the pi_top_task is concurrently being changed however, it might happen that the two calls return different results. If, for example, the first call returned the same rt priority the task was running at and the second one a fair priority, the task won't be removed by the rt list (on_list still set) and then enqueued in the fair runqueue. When eventually setscheduled back to rt it will be seen as enqueued already and the WARNING/BUG be issued. Fix this by calling rt_effective_prio() only once and then reusing the return value. While at it refactor code as well for clarity. Concurrent priority inheritance handling is still safe and will eventually converge to a new state by following the inheritance chain(s). Fixes: 0782e63bc6fe ("sched: Handle priority boosted tasks proper in setscheduler()") [squashed Peterz changes; added changelog] Reported-by: Mark Simmons Signed-off-by: Juri Lelli Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210803104501.38333-1-juri.lelli@redhat.com --- kernel/sched/core.c | 90 ++++++++++++++++++--------------------------- 1 file changed, 35 insertions(+), 55 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2d9ff40f4661..20ffcc044134 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1981,12 +1981,18 @@ void deactivate_task(struct rq *rq, struct task_struct *p, int flags) dequeue_task(rq, p, flags); } -/* - * __normal_prio - return the priority that is based on the static prio - */ -static inline int __normal_prio(struct task_struct *p) +static inline int __normal_prio(int policy, int rt_prio, int nice) { - return p->static_prio; + int prio; + + if (dl_policy(policy)) + prio = MAX_DL_PRIO - 1; + else if (rt_policy(policy)) + prio = MAX_RT_PRIO - 1 - rt_prio; + else + prio = NICE_TO_PRIO(nice); + + return prio; } /* @@ -1998,15 +2004,7 @@ static inline int __normal_prio(struct task_struct *p) */ static inline int normal_prio(struct task_struct *p) { - int prio; - - if (task_has_dl_policy(p)) - prio = MAX_DL_PRIO-1; - else if (task_has_rt_policy(p)) - prio = MAX_RT_PRIO-1 - p->rt_priority; - else - prio = __normal_prio(p); - return prio; + return __normal_prio(p->policy, p->rt_priority, PRIO_TO_NICE(p->static_prio)); } /* @@ -4099,7 +4097,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) } else if (PRIO_TO_NICE(p->static_prio) < 0) p->static_prio = NICE_TO_PRIO(0); - p->prio = p->normal_prio = __normal_prio(p); + p->prio = p->normal_prio = p->static_prio; set_load_weight(p, false); /* @@ -6341,6 +6339,18 @@ int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flag } EXPORT_SYMBOL(default_wake_function); +static void __setscheduler_prio(struct task_struct *p, int prio) +{ + if (dl_prio(prio)) + p->sched_class = &dl_sched_class; + else if (rt_prio(prio)) + p->sched_class = &rt_sched_class; + else + p->sched_class = &fair_sched_class; + + p->prio = prio; +} + #ifdef CONFIG_RT_MUTEXES static inline int __rt_effective_prio(struct task_struct *pi_task, int prio) @@ -6456,22 +6466,19 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) } else { p->dl.pi_se = &p->dl; } - p->sched_class = &dl_sched_class; } else if (rt_prio(prio)) { if (dl_prio(oldprio)) p->dl.pi_se = &p->dl; if (oldprio < prio) queue_flag |= ENQUEUE_HEAD; - p->sched_class = &rt_sched_class; } else { if (dl_prio(oldprio)) p->dl.pi_se = &p->dl; if (rt_prio(oldprio)) p->rt.timeout = 0; - p->sched_class = &fair_sched_class; } - p->prio = prio; + __setscheduler_prio(p, prio); if (queued) enqueue_task(rq, p, queue_flag); @@ -6824,35 +6831,6 @@ static void __setscheduler_params(struct task_struct *p, set_load_weight(p, true); } -/* Actually do priority change: must hold pi & rq lock. */ -static void __setscheduler(struct rq *rq, struct task_struct *p, - const struct sched_attr *attr, bool keep_boost) -{ - /* - * If params can't change scheduling class changes aren't allowed - * either. - */ - if (attr->sched_flags & SCHED_FLAG_KEEP_PARAMS) - return; - - __setscheduler_params(p, attr); - - /* - * Keep a potential priority boosting if called from - * sched_setscheduler(). - */ - p->prio = normal_prio(p); - if (keep_boost) - p->prio = rt_effective_prio(p, p->prio); - - if (dl_prio(p->prio)) - p->sched_class = &dl_sched_class; - else if (rt_prio(p->prio)) - p->sched_class = &rt_sched_class; - else - p->sched_class = &fair_sched_class; -} - /* * Check the target process has a UID that matches the current process's: */ @@ -6873,10 +6851,8 @@ static int __sched_setscheduler(struct task_struct *p, const struct sched_attr *attr, bool user, bool pi) { - int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 : - MAX_RT_PRIO - 1 - attr->sched_priority; - int retval, oldprio, oldpolicy = -1, queued, running; - int new_effective_prio, policy = attr->sched_policy; + int oldpolicy = -1, policy = attr->sched_policy; + int retval, oldprio, newprio, queued, running; const struct sched_class *prev_class; struct callback_head *head; struct rq_flags rf; @@ -7074,6 +7050,7 @@ change: p->sched_reset_on_fork = reset_on_fork; oldprio = p->prio; + newprio = __normal_prio(policy, attr->sched_priority, attr->sched_nice); if (pi) { /* * Take priority boosted tasks into account. If the new @@ -7082,8 +7059,8 @@ change: * the runqueue. This will be done when the task deboost * itself. */ - new_effective_prio = rt_effective_prio(p, newprio); - if (new_effective_prio == oldprio) + newprio = rt_effective_prio(p, newprio); + if (newprio == oldprio) queue_flags &= ~DEQUEUE_MOVE; } @@ -7096,7 +7073,10 @@ change: prev_class = p->sched_class; - __setscheduler(rq, p, attr, pi); + if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) { + __setscheduler_params(p, attr); + __setscheduler_prio(p, newprio); + } __setscheduler_uclamp(p, attr); if (queued) { From f4b4b45652578357031fbbef7f7a1b04f6fa2dc3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 29 Jul 2021 11:14:57 +0200 Subject: [PATCH 445/502] perf/x86: Fix out of bound MSR access On Wed, Jul 28, 2021 at 12:49:43PM -0400, Vince Weaver wrote: > [32694.087403] unchecked MSR access error: WRMSR to 0x318 (tried to write 0x0000000000000000) at rIP: 0xffffffff8106f854 (native_write_msr+0x4/0x20) > [32694.101374] Call Trace: > [32694.103974] perf_clear_dirty_counters+0x86/0x100 The problem being that it doesn't filter out all fake counters, in specific the above (erroneously) tries to use FIXED_BTS. Limit the fixed counters indexes to the hardware supplied number. Reported-by: Vince Weaver Signed-off-by: Peter Zijlstra (Intel) Tested-by: Vince Weaver Tested-by: Like Xu Link: https://lkml.kernel.org/r/YQJxka3dxgdIdebG@hirez.programming.kicks-ass.net --- arch/x86/events/core.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 1eb45139fcc6..3092fbf9dbe4 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2489,13 +2489,15 @@ void perf_clear_dirty_counters(void) return; for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) { - /* Metrics and fake events don't have corresponding HW counters. */ - if (is_metric_idx(i) || (i == INTEL_PMC_IDX_FIXED_VLBR)) - continue; - else if (i >= INTEL_PMC_IDX_FIXED) + if (i >= INTEL_PMC_IDX_FIXED) { + /* Metrics and fake events don't have corresponding HW counters. */ + if ((i - INTEL_PMC_IDX_FIXED) >= hybrid(cpuc->pmu, num_counters_fixed)) + continue; + wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0); - else + } else { wrmsrl(x86_pmu_event_addr(i), 0); + } } bitmap_zero(cpuc->dirty, X86_PMC_IDX_MAX); From df51fe7ea1c1c2c3bfdb81279712fdd2e4ea6c27 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Mon, 2 Aug 2021 15:08:50 +0800 Subject: [PATCH 446/502] perf/x86/amd: Don't touch the AMD64_EVENTSEL_HOSTONLY bit inside the guest If we use "perf record" in an AMD Milan guest, dmesg reports a #GP warning from an unchecked MSR access error on MSR_F15H_PERF_CTLx: [] unchecked MSR access error: WRMSR to 0xc0010200 (tried to write 0x0000020000110076) at rIP: 0xffffffff8106ddb4 (native_write_msr+0x4/0x20) [] Call Trace: [] amd_pmu_disable_event+0x22/0x90 [] x86_pmu_stop+0x4c/0xa0 [] x86_pmu_del+0x3a/0x140 The AMD64_EVENTSEL_HOSTONLY bit is defined and used on the host, while the guest perf driver should avoid such use. Fixes: 1018faa6cf23 ("perf/x86/kvm: Fix Host-Only/Guest-Only counting with SVM disabled") Signed-off-by: Like Xu Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Liam Merwick Tested-by: Kim Phillips Tested-by: Liam Merwick Link: https://lkml.kernel.org/r/20210802070850.35295-1-likexu@tencent.com --- arch/x86/events/perf_event.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 2bf1c7ea2758..2938c902ffbe 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -1115,9 +1115,10 @@ void x86_pmu_stop(struct perf_event *event, int flags); static inline void x86_pmu_disable_event(struct perf_event *event) { + u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask); struct hw_perf_event *hwc = &event->hw; - wrmsrl(hwc->config_base, hwc->config); + wrmsrl(hwc->config_base, hwc->config & ~disable_mask); if (is_counter_pair(hwc)) wrmsrl(x86_pmu_config_addr(hwc->idx + 1), 0); From bb2baeb214a71cda47d50dce80414016117ddda0 Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Mon, 2 Aug 2021 11:09:03 -0700 Subject: [PATCH 447/502] KVM: SVM: improve the code readability for ASID management KVM SEV code uses bitmaps to manage ASID states. ASID 0 was always skipped because it is never used by VM. Thus, in existing code, ASID value and its bitmap postion always has an 'offset-by-1' relationship. Both SEV and SEV-ES shares the ASID space, thus KVM uses a dynamic range [min_asid, max_asid] to handle SEV and SEV-ES ASIDs separately. Existing code mixes the usage of ASID value and its bitmap position by using the same variable called 'min_asid'. Fix the min_asid usage: ensure that its usage is consistent with its name; allocate extra size for ASID 0 to ensure that each ASID has the same value with its bitmap position. Add comments on ASID bitmap allocation to clarify the size change. Signed-off-by: Mingwei Zhang Cc: Tom Lendacky Cc: Marc Orr Cc: David Rientjes Cc: Alper Gun Cc: Dionna Glaze Cc: Sean Christopherson Cc: Vipin Sharma Cc: Peter Gonda Cc: Joerg Roedel Message-Id: <20210802180903.159381-1-mizhang@google.com> [Fix up sev_asid_free to also index by ASID, as suggested by Sean Christopherson, and use nr_asids in sev_cpu_init. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 43 +++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 4d0aba185412..7fbce342eec4 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -64,6 +64,7 @@ static DEFINE_MUTEX(sev_bitmap_lock); unsigned int max_sev_asid; static unsigned int min_sev_asid; static unsigned long sev_me_mask; +static unsigned int nr_asids; static unsigned long *sev_asid_bitmap; static unsigned long *sev_reclaim_asid_bitmap; @@ -78,11 +79,11 @@ struct enc_region { /* Called with the sev_bitmap_lock held, or on shutdown */ static int sev_flush_asids(int min_asid, int max_asid) { - int ret, pos, error = 0; + int ret, asid, error = 0; /* Check if there are any ASIDs to reclaim before performing a flush */ - pos = find_next_bit(sev_reclaim_asid_bitmap, max_asid, min_asid); - if (pos >= max_asid) + asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid); + if (asid > max_asid) return -EBUSY; /* @@ -115,15 +116,15 @@ static bool __sev_recycle_asids(int min_asid, int max_asid) /* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */ bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap, - max_sev_asid); - bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid); + nr_asids); + bitmap_zero(sev_reclaim_asid_bitmap, nr_asids); return true; } static int sev_asid_new(struct kvm_sev_info *sev) { - int pos, min_asid, max_asid, ret; + int asid, min_asid, max_asid, ret; bool retry = true; enum misc_res_type type; @@ -143,11 +144,11 @@ static int sev_asid_new(struct kvm_sev_info *sev) * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. */ - min_asid = sev->es_active ? 0 : min_sev_asid - 1; + min_asid = sev->es_active ? 1 : min_sev_asid; max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; again: - pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_asid); - if (pos >= max_asid) { + asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid); + if (asid > max_asid) { if (retry && __sev_recycle_asids(min_asid, max_asid)) { retry = false; goto again; @@ -157,11 +158,11 @@ again: goto e_uncharge; } - __set_bit(pos, sev_asid_bitmap); + __set_bit(asid, sev_asid_bitmap); mutex_unlock(&sev_bitmap_lock); - return pos + 1; + return asid; e_uncharge: misc_cg_uncharge(type, sev->misc_cg, 1); put_misc_cg(sev->misc_cg); @@ -179,13 +180,12 @@ static int sev_get_asid(struct kvm *kvm) static void sev_asid_free(struct kvm_sev_info *sev) { struct svm_cpu_data *sd; - int cpu, pos; + int cpu; enum misc_res_type type; mutex_lock(&sev_bitmap_lock); - pos = sev->asid - 1; - __set_bit(pos, sev_reclaim_asid_bitmap); + __set_bit(sev->asid, sev_reclaim_asid_bitmap); for_each_possible_cpu(cpu) { sd = per_cpu(svm_data, cpu); @@ -1857,12 +1857,17 @@ void __init sev_hardware_setup(void) min_sev_asid = edx; sev_me_mask = 1UL << (ebx & 0x3f); - /* Initialize SEV ASID bitmaps */ - sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL); + /* + * Initialize SEV ASID bitmaps. Allocate space for ASID 0 in the bitmap, + * even though it's never used, so that the bitmap is indexed by the + * actual ASID. + */ + nr_asids = max_sev_asid + 1; + sev_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL); if (!sev_asid_bitmap) goto out; - sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL); + sev_reclaim_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL); if (!sev_reclaim_asid_bitmap) { bitmap_free(sev_asid_bitmap); sev_asid_bitmap = NULL; @@ -1907,7 +1912,7 @@ void sev_hardware_teardown(void) return; /* No need to take sev_bitmap_lock, all VMs have been destroyed. */ - sev_flush_asids(0, max_sev_asid); + sev_flush_asids(1, max_sev_asid); bitmap_free(sev_asid_bitmap); bitmap_free(sev_reclaim_asid_bitmap); @@ -1921,7 +1926,7 @@ int sev_cpu_init(struct svm_cpu_data *sd) if (!sev_enabled) return 0; - sd->sev_vmcbs = kcalloc(max_sev_asid + 1, sizeof(void *), GFP_KERNEL); + sd->sev_vmcbs = kcalloc(nr_asids, sizeof(void *), GFP_KERNEL); if (!sd->sev_vmcbs) return -ENOMEM; From 13c2c3cfe01952575b1dd5e24d450fcccff93bc0 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Wed, 4 Aug 2021 14:20:57 +0300 Subject: [PATCH 448/502] KVM: selftests: fix hyperv_clock test The test was mistakenly using addr_gpa2hva on a gva and that happened to work accidentally. Commit 106a2e766eae ("KVM: selftests: Lower the min virtual address for misc page allocations") revealed this bug. Fixes: 2c7f76b4c42b ("selftests: kvm: Add basic Hyper-V clocksources tests", 2021-03-18) Signed-off-by: Maxim Levitsky Message-Id: <20210804112057.409498-1-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86_64/hyperv_clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c index bab10ae787b6..e0b2bb1339b1 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c @@ -215,7 +215,7 @@ int main(void) vcpu_set_hv_cpuid(vm, VCPU_ID); tsc_page_gva = vm_vaddr_alloc_page(vm); - memset(addr_gpa2hva(vm, tsc_page_gva), 0x0, getpagesize()); + memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize()); TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0, "TSC page has to be page aligned\n"); vcpu_args_set(vm, VCPU_ID, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva)); From 952835edb4fdad49361d5330da918be8b765b787 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Wed, 4 Aug 2021 17:18:00 +0200 Subject: [PATCH 449/502] s390/dasd: fix use after free in dasd path handling When new configuration data is obtained after a path event it is stored in the per path array. The old data needs to be freed. The first valid configuration data is also referenced in the device private structure to identify the device. When the old per path configuration data was freed the device still pointed to the already freed data leading to a use after free. Fix by replacing also the device configuration data with the newly obtained one before the old data gets freed. Fixes: 460181217a24 ("s390/dasd: Store path configuration data during path handling") Cc: stable@vger.kernel.org # 5.11+ Signed-off-by: Stefan Haberland Reviewed-by: Jan Hoeppner Link: https://lore.kernel.org/r/20210804151800.4031761-2-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_eckd.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 0de1a463c509..fb5d8152652d 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1004,15 +1004,23 @@ static unsigned char dasd_eckd_path_access(void *conf_data, int conf_len) static void dasd_eckd_store_conf_data(struct dasd_device *device, struct dasd_conf_data *conf_data, int chp) { + struct dasd_eckd_private *private = device->private; struct channel_path_desc_fmt0 *chp_desc; struct subchannel_id sch_id; + void *cdp; - ccw_device_get_schid(device->cdev, &sch_id); /* * path handling and read_conf allocate data * free it before replacing the pointer + * also replace the old private->conf_data pointer + * with the new one if this points to the same data */ - kfree(device->path[chp].conf_data); + cdp = device->path[chp].conf_data; + if (private->conf_data == cdp) { + private->conf_data = (void *)conf_data; + dasd_eckd_identify_conf_parts(private); + } + ccw_device_get_schid(device->cdev, &sch_id); device->path[chp].conf_data = conf_data; device->path[chp].cssid = sch_id.cssid; device->path[chp].ssid = sch_id.ssid; @@ -1020,6 +1028,7 @@ static void dasd_eckd_store_conf_data(struct dasd_device *device, if (chp_desc) device->path[chp].chpid = chp_desc->chpid; kfree(chp_desc); + kfree(cdp); } static void dasd_eckd_clear_conf_data(struct dasd_device *device) From 402e0b8cd00284a25c6eb8c0a43319bc8430b1c7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 4 Aug 2021 11:49:58 +0200 Subject: [PATCH 450/502] n64cart: fix the dma address in n64cart_do_bvec dma_map_bvec already takes bv_offset into account. Fixes: 9b2a2bbbb4d0 ("block: Add n64 cart driver") Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/n64cart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c index 7b4dd10af9ec..c84be0028f63 100644 --- a/drivers/block/n64cart.c +++ b/drivers/block/n64cart.c @@ -74,7 +74,7 @@ static bool n64cart_do_bvec(struct device *dev, struct bio_vec *bv, u32 pos) n64cart_wait_dma(); - n64cart_write_reg(PI_DRAM_REG, dma_addr + bv->bv_offset); + n64cart_write_reg(PI_DRAM_REG, dma_addr); n64cart_write_reg(PI_CART_REG, (bstart | CART_DOMAIN) & CART_MAX); n64cart_write_reg(PI_WRITE_REG, bv->bv_len - 1); From 83d6c39310b6d11199179f6384c2b0a415389597 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 3 Aug 2021 09:14:35 -0600 Subject: [PATCH 451/502] io-wq: fix race between worker exiting and activating free worker Nadav correctly reports that we have a race between a worker exiting, and new work being queued. This can lead to work being queued behind an existing worker that could be sleeping on an event before it can run to completion, and hence introducing potential big latency gaps if we hit this race condition: cpu0 cpu1 ---- ---- io_wqe_worker() schedule_timeout() // timed out io_wqe_enqueue() io_wqe_wake_worker() // work_flags & IO_WQ_WORK_CONCURRENT io_wqe_activate_free_worker() io_worker_exit() Fix this by having the exiting worker go through the normal decrement of a running worker, which will spawn a new one if needed. The free worker activation is modified to only return success if we were able to find a sleeping worker - if not, we keep looking through the list. If we fail, we create a new worker as per usual. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/io-uring/BFF746C0-FEDE-4646-A253-3021C57C26C9@gmail.com/ Reported-by: Nadav Amit Tested-by: Nadav Amit Signed-off-by: Jens Axboe --- fs/io-wq.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index cf086b01c6c6..50dc93ffc153 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -130,6 +130,7 @@ struct io_cb_cancel_data { }; static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index); +static void io_wqe_dec_running(struct io_worker *worker); static bool io_worker_get(struct io_worker *worker) { @@ -168,26 +169,21 @@ static void io_worker_exit(struct io_worker *worker) { struct io_wqe *wqe = worker->wqe; struct io_wqe_acct *acct = io_wqe_get_acct(worker); - unsigned flags; if (refcount_dec_and_test(&worker->ref)) complete(&worker->ref_done); wait_for_completion(&worker->ref_done); - preempt_disable(); - current->flags &= ~PF_IO_WORKER; - flags = worker->flags; - worker->flags = 0; - if (flags & IO_WORKER_F_RUNNING) - atomic_dec(&acct->nr_running); - worker->flags = 0; - preempt_enable(); - raw_spin_lock_irq(&wqe->lock); - if (flags & IO_WORKER_F_FREE) + if (worker->flags & IO_WORKER_F_FREE) hlist_nulls_del_rcu(&worker->nulls_node); list_del_rcu(&worker->all_list); acct->nr_workers--; + preempt_disable(); + io_wqe_dec_running(worker); + worker->flags = 0; + current->flags &= ~PF_IO_WORKER; + preempt_enable(); raw_spin_unlock_irq(&wqe->lock); kfree_rcu(worker, rcu); @@ -214,15 +210,19 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe) struct hlist_nulls_node *n; struct io_worker *worker; - n = rcu_dereference(hlist_nulls_first_rcu(&wqe->free_list)); - if (is_a_nulls(n)) - return false; - - worker = hlist_nulls_entry(n, struct io_worker, nulls_node); - if (io_worker_get(worker)) { - wake_up_process(worker->task); + /* + * Iterate free_list and see if we can find an idle worker to + * activate. If a given worker is on the free_list but in the process + * of exiting, keep trying. + */ + hlist_nulls_for_each_entry_rcu(worker, n, &wqe->free_list, nulls_node) { + if (!io_worker_get(worker)) + continue; + if (wake_up_process(worker->task)) { + io_worker_release(worker); + return true; + } io_worker_release(worker); - return true; } return false; From a07296453bf2778952a09b6244a695bf7607babb Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 4 Aug 2021 13:41:47 -0700 Subject: [PATCH 452/502] drm/i915: fix i915_globals_exit() section mismatch error Fix modpost Section mismatch error in i915_globals_exit(). Since both an __init function and an __exit function can call i915_globals_exit(), any function that i915_globals_exit() calls should not be marked as __init or __exit. I.e., it needs to be available for either of them. WARNING: modpost: vmlinux.o(.text+0x8b796a): Section mismatch in reference from the function i915_globals_exit() to the function .exit.text:__i915_globals_flush() The function i915_globals_exit() references a function in an exit section. Often the function __i915_globals_flush() has valid usage outside the exit section and the fix is to remove the __exit annotation of __i915_globals_flush. ERROR: modpost: Section mismatches detected. Set CONFIG_SECTION_MISMATCH_WARN_ONLY=y to allow them. Fixes: 1354d830cb8f ("drm/i915: Call i915_globals_exit() if pci_register_device() fails") Signed-off-by: Randy Dunlap Cc: Jason Ekstrand Cc: Daniel Vetter Cc: Rodrigo Vivi Cc: Jani Nikula Cc: Joonas Lahtinen Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20210804204147.2070-1-rdunlap@infradead.org --- drivers/gpu/drm/i915/i915_globals.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c index 2db90e770616..3acb0b6be284 100644 --- a/drivers/gpu/drm/i915/i915_globals.c +++ b/drivers/gpu/drm/i915/i915_globals.c @@ -138,7 +138,7 @@ void i915_globals_unpark(void) atomic_inc(&active); } -static void __exit __i915_globals_flush(void) +static void __i915_globals_flush(void) { atomic_inc(&active); /* skip shrinking */ From 2c05caa7ba8803209769b9e4fe02c38d77ae88d0 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 30 Jul 2021 17:19:51 -0400 Subject: [PATCH 453/502] tracing / histogram: Give calculation hist_fields a size When working on my user space applications, I found a bug in the synthetic event code where the automated synthetic event field was not matching the event field calculation it was attached to. Looking deeper into it, it was because the calculation hist_field was not given a size. The synthetic event fields are matched to their hist_fields either by having the field have an identical string type, or if that does not match, then the size and signed values are used to match the fields. The problem arose when I tried to match a calculation where the fields were "unsigned int". My tool created a synthetic event of type "u32". But it failed to match. The string was: diff=field1-field2:onmatch(event).trace(synth,$diff) Adding debugging into the kernel, I found that the size of "diff" was 0. And since it was given "unsigned int" as a type, the histogram fallback code used size and signed. The signed matched, but the size of u32 (4) did not match zero, and the event failed to be created. This can be worse if the field you want to match is not one of the acceptable fields for a synthetic event. As event fields can have any type that is supported in Linux, this can cause an issue. For example, if a type is an enum. Then there's no way to use that with any calculations. Have the calculation field simply take on the size of what it is calculating. Link: https://lkml.kernel.org/r/20210730171951.59c7743f@oasis.local.home Cc: Tom Zanussi Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Ingo Molnar Cc: Andrew Morton Cc: stable@vger.kernel.org Fixes: 100719dcef447 ("tracing: Add simple expression support to hist triggers") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 34325f41ebc0..362db9b81b8d 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -2287,6 +2287,10 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, expr->operands[0] = operand1; expr->operands[1] = operand2; + + /* The operand sizes should be the same, so just pick one */ + expr->size = operand1->size; + expr->operator = field_op; expr->name = expr_str(expr, 0); expr->type = kstrdup(operand1->type, GFP_KERNEL); From a9d10ca4986571bffc19778742d508cc8dd13e02 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 28 Jul 2021 07:55:43 +0900 Subject: [PATCH 454/502] tracing: Reject string operand in the histogram expression Since the string type can not be the target of the addition / subtraction operation, it must be rejected. Without this fix, the string type silently converted to digits. Link: https://lkml.kernel.org/r/162742654278.290973.1523000673366456634.stgit@devnote2 Cc: stable@vger.kernel.org Fixes: 100719dcef447 ("tracing: Add simple expression support to hist triggers") Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 362db9b81b8d..949ef09dc537 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -65,7 +65,8 @@ C(INVALID_SORT_MODIFIER,"Invalid sort modifier"), \ C(EMPTY_SORT_FIELD, "Empty sort field"), \ C(TOO_MANY_SORT_FIELDS, "Too many sort fields (Max = 2)"), \ - C(INVALID_SORT_FIELD, "Sort field must be a key or a val"), + C(INVALID_SORT_FIELD, "Sort field must be a key or a val"), \ + C(INVALID_STR_OPERAND, "String type can not be an operand in expression"), #undef C #define C(a, b) HIST_ERR_##a @@ -2156,6 +2157,13 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, ret = PTR_ERR(operand1); goto free; } + if (operand1->flags & HIST_FIELD_FL_STRING) { + /* String type can not be the operand of unary operator. */ + hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(str)); + destroy_hist_field(operand1, 0); + ret = -EINVAL; + goto free; + } expr->flags |= operand1->flags & (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS); @@ -2257,6 +2265,11 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, operand1 = NULL; goto free; } + if (operand1->flags & HIST_FIELD_FL_STRING) { + hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(operand1_str)); + ret = -EINVAL; + goto free; + } /* rest of string could be another expression e.g. b+c in a+b+c */ operand_flags = 0; @@ -2266,6 +2279,11 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, operand2 = NULL; goto free; } + if (operand2->flags & HIST_FIELD_FL_STRING) { + hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(str)); + ret = -EINVAL; + goto free; + } ret = check_expr_operands(file->tr, operand1, operand2); if (ret) From b18b851ba85a5855cb53865fcff3cd2c17b44b0b Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 2 Aug 2021 14:03:07 -0700 Subject: [PATCH 455/502] scripts/recordmcount.pl: Remove check_objcopy() and $can_use_local When building ARCH=riscv allmodconfig with llvm-objcopy, the objcopy version warning from this script appears: WARNING: could not find objcopy version or version is less than 2.17. Local function references are disabled. The check_objcopy() function in scripts/recordmcount.pl is set up to parse GNU objcopy's version string, not llvm-objcopy's, which triggers the warning. Commit 799c43415442 ("kbuild: thin archives make default for all archs") made binutils 2.20 mandatory and commit ba64beb17493 ("kbuild: check the minimum assembler version in Kconfig") enforces this at configuration time so just remove check_objcopy() and $can_use_local instead, assuming --globalize-symbol is always available. llvm-objcopy has supported --globalize-symbol since LLVM 7.0.0 in 2018 and the minimum version for building the kernel with LLVM is 10.0.1 so there is no issue introduced: Link: https://github.com/llvm/llvm-project/commit/ee5be798dae30d5f9414b01f76ff807edbc881aa Link: https://lkml.kernel.org/r/20210802210307.3202472-1-nathan@kernel.org Reviewed-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Steven Rostedt (VMware) --- Makefile | 1 - scripts/recordmcount.pl | 40 ---------------------------------------- 2 files changed, 41 deletions(-) diff --git a/Makefile b/Makefile index e4f5895badb5..d6915f361aa4 100644 --- a/Makefile +++ b/Makefile @@ -546,7 +546,6 @@ export RCS_TAR_IGNORE := --exclude SCCS --exclude BitKeeper --exclude .svn \ PHONY += scripts_basic scripts_basic: $(Q)$(MAKE) $(build)=scripts/basic - $(Q)rm -f .tmp_quiet_recordmcount PHONY += outputmakefile ifdef building_out_of_srctree diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index c17e48020ec3..8f6b13ae46bf 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -173,39 +173,6 @@ my $mcount_regex; # Find the call site to mcount (return offset) my $mcount_adjust; # Address adjustment to mcount offset my $alignment; # The .align value to use for $mcount_section my $section_type; # Section header plus possible alignment command -my $can_use_local = 0; # If we can use local function references - -# Shut up recordmcount if user has older objcopy -my $quiet_recordmcount = ".tmp_quiet_recordmcount"; -my $print_warning = 1; -$print_warning = 0 if ( -f $quiet_recordmcount); - -## -# check_objcopy - whether objcopy supports --globalize-symbols -# -# --globalize-symbols came out in 2.17, we must test the version -# of objcopy, and if it is less than 2.17, then we can not -# record local functions. -sub check_objcopy -{ - open (IN, "$objcopy --version |") or die "error running $objcopy"; - while () { - if (/objcopy.*\s(\d+)\.(\d+)/) { - $can_use_local = 1 if ($1 > 2 || ($1 == 2 && $2 >= 17)); - last; - } - } - close (IN); - - if (!$can_use_local && $print_warning) { - print STDERR "WARNING: could not find objcopy version or version " . - "is less than 2.17.\n" . - "\tLocal function references are disabled.\n"; - open (QUIET, ">$quiet_recordmcount"); - printf QUIET "Disables the warning from recordmcount.pl\n"; - close QUIET; - } -} if ($arch =~ /(x86(_64)?)|(i386)/) { if ($bits == 64) { @@ -434,8 +401,6 @@ if ($filename =~ m,^(.*)(\.\S),) { my $mcount_s = $dirname . "/.tmp_mc_" . $prefix . ".s"; my $mcount_o = $dirname . "/.tmp_mc_" . $prefix . ".o"; -check_objcopy(); - # # Step 1: find all the local (static functions) and weak symbols. # 't' is local, 'w/W' is weak @@ -473,11 +438,6 @@ sub update_funcs # is this function static? If so, note this fact. if (defined $locals{$ref_func}) { - - # only use locals if objcopy supports globalize-symbols - if (!$can_use_local) { - return; - } $convert{$ref_func} = 1; } From 1c0cec64a7cc545eb49f374a43e9f7190a14defa Mon Sep 17 00:00:00 2001 From: Hui Su Date: Fri, 11 Jun 2021 10:21:07 +0800 Subject: [PATCH 456/502] scripts/tracing: fix the bug that can't parse raw_trace_func Since commit 77271ce4b2c0 ("tracing: Add irq, preempt-count and need resched info to default trace output"), the default trace output format has been changed to: -0 [009] d.h. 22420.068695: _raw_spin_lock_irqsave <-hrtimer_interrupt -0 [000] ..s. 22420.068695: _nohz_idle_balance <-run_rebalance_domains -0 [011] d.h. 22420.068695: account_process_tick <-update_process_times origin trace output format:(before v3.2.0) # tracer: nop # # TASK-PID CPU# TIMESTAMP FUNCTION # | | | | | migration/0-6 [000] 50.025810: rcu_note_context_switch <-__schedule migration/0-6 [000] 50.025812: trace_rcu_utilization <-rcu_note_context_switch migration/0-6 [000] 50.025813: rcu_sched_qs <-rcu_note_context_switch migration/0-6 [000] 50.025815: rcu_preempt_qs <-rcu_note_context_switch migration/0-6 [000] 50.025817: trace_rcu_utilization <-rcu_note_context_switch migration/0-6 [000] 50.025818: debug_lockdep_rcu_enabled <-__schedule migration/0-6 [000] 50.025820: debug_lockdep_rcu_enabled <-__schedule The draw_functrace.py(introduced in v2.6.28) can't parse the new version format trace_func, So we need modify draw_functrace.py to adapt the new version trace output format. Link: https://lkml.kernel.org/r/20210611022107.608787-1-suhui@zeku.com Cc: stable@vger.kernel.org Fixes: 77271ce4b2c0 tracing: Add irq, preempt-count and need resched info to default trace output Signed-off-by: Hui Su Signed-off-by: Steven Rostedt (VMware) --- scripts/tracing/draw_functrace.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/tracing/draw_functrace.py b/scripts/tracing/draw_functrace.py index 74f8aadfd4cb..7011fbe003ff 100755 --- a/scripts/tracing/draw_functrace.py +++ b/scripts/tracing/draw_functrace.py @@ -17,7 +17,7 @@ Usage: $ cat /sys/kernel/debug/tracing/trace_pipe > ~/raw_trace_func Wait some times but not too much, the script is a bit slow. Break the pipe (Ctrl + Z) - $ scripts/draw_functrace.py < raw_trace_func > draw_functrace + $ scripts/tracing/draw_functrace.py < ~/raw_trace_func > draw_functrace Then you have your drawn trace in draw_functrace """ @@ -103,10 +103,10 @@ def parseLine(line): line = line.strip() if line.startswith("#"): raise CommentLineException - m = re.match("[^]]+?\\] +([0-9.]+): (\\w+) <-(\\w+)", line) + m = re.match("[^]]+?\\] +([a-z.]+) +([0-9.]+): (\\w+) <-(\\w+)", line) if m is None: raise BrokenLineException - return (m.group(1), m.group(2), m.group(3)) + return (m.group(2), m.group(3), m.group(4)) def main(): From d5aaad6f83420efb8357ac8e11c868708b22d0a9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 4 Aug 2021 14:46:09 -0700 Subject: [PATCH 457/502] KVM: x86/mmu: Fix per-cpu counter corruption on 32-bit builds Take a signed 'long' instead of an 'unsigned long' for the number of pages to add/subtract to the total number of pages used by the MMU. This fixes a zero-extension bug on 32-bit kernels that effectively corrupts the per-cpu counter used by the shrinker. Per-cpu counters take a signed 64-bit value on both 32-bit and 64-bit kernels, whereas kvm_mod_used_mmu_pages() takes an unsigned long and thus an unsigned 32-bit value on 32-bit kernels. As a result, the value used to adjust the per-cpu counter is zero-extended (unsigned -> signed), not sign-extended (signed -> signed), and so KVM's intended -1 gets morphed to 4294967295 and effectively corrupts the counter. This was found by a staggering amount of sheer dumb luck when running kvm-unit-tests on a 32-bit KVM build. The shrinker just happened to kick in while running tests and do_shrink_slab() logged an error about trying to free a negative number of objects. The truly lucky part is that the kernel just happened to be a slightly stale build, as the shrinker no longer yells about negative objects as of commit 18bb473e5031 ("mm: vmscan: shrink deferred objects proportional to priority"). vmscan: shrink_slab: mmu_shrink_scan+0x0/0x210 [kvm] negative objects to delete nr=-858993460 Fixes: bc8a3d8925a8 ("kvm: mmu: Fix overflow on kvm mmu page limit calculation") Cc: stable@vger.kernel.org Cc: Ben Gardon Signed-off-by: Sean Christopherson Message-Id: <20210804214609.1096003-1-seanjc@google.com> Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 66f7f5bc3482..c4f4fa23320e 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1644,7 +1644,7 @@ static int is_empty_shadow_page(u64 *spt) * aggregate version in order to make the slab shrinker * faster */ -static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, unsigned long nr) +static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, long nr) { kvm->arch.n_used_mmu_pages += nr; percpu_counter_add(&kvm_total_used_mmu_pages, nr); From 8da0e55c7988ef9f08a708c38e5c75ecd8862cf8 Mon Sep 17 00:00:00 2001 From: David Bauer Date: Thu, 5 Aug 2021 01:25:22 +0200 Subject: [PATCH 458/502] USB: serial: ftdi_sio: add device ID for Auto-M3 OP-COM v2 The Auto-M3 OP-COM v2 is a OBD diagnostic device using a FTD232 for the USB connection. Signed-off-by: David Bauer Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/ftdi_sio.c | 1 + drivers/usb/serial/ftdi_sio_ids.h | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 4a1f3a95d017..33bbb3470ca3 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -219,6 +219,7 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(FTDI_VID, FTDI_MTXORB_6_PID) }, { USB_DEVICE(FTDI_VID, FTDI_R2000KU_TRUE_RNG) }, { USB_DEVICE(FTDI_VID, FTDI_VARDAAN_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_AUTO_M3_OP_COM_V2_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0100_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0101_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_0102_PID) }, diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index add602bebd82..755858ca20ba 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -159,6 +159,9 @@ /* Vardaan Enterprises Serial Interface VEUSB422R3 */ #define FTDI_VARDAAN_PID 0xF070 +/* Auto-M3 Ltd. - OP-COM USB V2 - OBD interface Adapter */ +#define FTDI_AUTO_M3_OP_COM_V2_PID 0x4f50 + /* * Xsens Technologies BV products (http://www.xsens.com). */ From d25d85061bd856d6be221626605319154f9b5043 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Thu, 29 Jul 2021 00:33:14 -0700 Subject: [PATCH 459/502] usb: dwc3: gadget: Use list_replace_init() before traversing lists The list_for_each_entry_safe() macro saves the current item (n) and the item after (n+1), so that n can be safely removed without corrupting the list. However, when traversing the list and removing items using gadget giveback, the DWC3 lock is briefly released, allowing other routines to execute. There is a situation where, while items are being removed from the cancelled_list using dwc3_gadget_ep_cleanup_cancelled_requests(), the pullup disable routine is running in parallel (due to UDC unbind). As the cleanup routine removes n, and the pullup disable removes n+1, once the cleanup retakes the DWC3 lock, it references a request who was already removed/handled. With list debug enabled, this leads to a panic. Ensure all instances of the macro are replaced where gadget giveback is used. Example call stack: Thread#1: __dwc3_gadget_ep_set_halt() - CLEAR HALT -> dwc3_gadget_ep_cleanup_cancelled_requests() ->list_for_each_entry_safe() ->dwc3_gadget_giveback(n) ->dwc3_gadget_del_and_unmap_request()- n deleted[cancelled_list] ->spin_unlock ->Thread#2 executes ... ->dwc3_gadget_giveback(n+1) ->Already removed! Thread#2: dwc3_gadget_pullup() ->waiting for dwc3 spin_lock ... ->Thread#1 released lock ->dwc3_stop_active_transfers() ->dwc3_remove_requests() ->fetches n+1 item from cancelled_list (n removed by Thread#1) ->dwc3_gadget_giveback() ->dwc3_gadget_del_and_unmap_request()- n+1 deleted[cancelled_list] ->spin_unlock Fix this condition by utilizing list_replace_init(), and traversing through a local copy of the current elements in the endpoint lists. This will also set the parent list as empty, so if another thread is also looping through the list, it will be empty on the next iteration. Fixes: d4f1afe5e896 ("usb: dwc3: gadget: move requests to cancelled_list") Cc: stable Acked-by: Felipe Balbi Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/1627543994-20327-1-git-send-email-wcheng@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 45f2bc0807e8..a1b262669574 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1741,9 +1741,13 @@ static void dwc3_gadget_ep_cleanup_cancelled_requests(struct dwc3_ep *dep) { struct dwc3_request *req; struct dwc3_request *tmp; + struct list_head local; struct dwc3 *dwc = dep->dwc; - list_for_each_entry_safe(req, tmp, &dep->cancelled_list, list) { +restart: + list_replace_init(&dep->cancelled_list, &local); + + list_for_each_entry_safe(req, tmp, &local, list) { dwc3_gadget_ep_skip_trbs(dep, req); switch (req->status) { case DWC3_REQUEST_STATUS_DISCONNECTED: @@ -1761,6 +1765,9 @@ static void dwc3_gadget_ep_cleanup_cancelled_requests(struct dwc3_ep *dep) break; } } + + if (!list_empty(&dep->cancelled_list)) + goto restart; } static int dwc3_gadget_ep_dequeue(struct usb_ep *ep, @@ -2958,8 +2965,12 @@ static void dwc3_gadget_ep_cleanup_completed_requests(struct dwc3_ep *dep, { struct dwc3_request *req; struct dwc3_request *tmp; + struct list_head local; - list_for_each_entry_safe(req, tmp, &dep->started_list, list) { +restart: + list_replace_init(&dep->started_list, &local); + + list_for_each_entry_safe(req, tmp, &local, list) { int ret; ret = dwc3_gadget_ep_cleanup_completed_request(dep, event, @@ -2967,6 +2978,9 @@ static void dwc3_gadget_ep_cleanup_completed_requests(struct dwc3_ep *dep, if (ret) break; } + + if (!list_empty(&dep->started_list)) + goto restart; } static bool dwc3_gadget_ep_should_continue(struct dwc3_ep *dep) From cb10f68ad8150f243964b19391711aaac5e8ff42 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Tue, 3 Aug 2021 23:24:05 -0700 Subject: [PATCH 460/502] usb: dwc3: gadget: Avoid runtime resume if disabling pullup If the device is already in the runtime suspended state, any call to the pullup routine will issue a runtime resume on the DWC3 core device. If the USB gadget is disabling the pullup, then avoid having to issue a runtime resume, as DWC3 gadget has already been halted/stopped. This fixes an issue where the following condition occurs: usb_gadget_remove_driver() -->usb_gadget_disconnect() -->dwc3_gadget_pullup(0) -->pm_runtime_get_sync() -> ret = 0 -->pm_runtime_put() [async] -->usb_gadget_udc_stop() -->dwc3_gadget_stop() -->dwc->gadget_driver = NULL ... dwc3_suspend_common() -->dwc3_gadget_suspend() -->DWC3 halt/stop routine skipped, driver_data == NULL This leads to a situation where the DWC3 gadget is not properly stopped, as the runtime resume would have re-enabled EP0 and event interrupts, and since we avoided the DWC3 gadget suspend, these resources were never disabled. Fixes: 77adb8bdf422 ("usb: dwc3: gadget: Allow runtime suspend if UDC unbinded") Cc: stable Acked-by: Felipe Balbi Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/1628058245-30692-1-git-send-email-wcheng@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index a1b262669574..b8d4b2d327b2 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2256,6 +2256,17 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) } } + /* + * Avoid issuing a runtime resume if the device is already in the + * suspended state during gadget disconnect. DWC3 gadget was already + * halted/stopped during runtime suspend. + */ + if (!is_on) { + pm_runtime_barrier(dwc->dev); + if (pm_runtime_suspended(dwc->dev)) + return 0; + } + /* * Check the return value for successful resume, or error. For a * successful resume, the DWC3 runtime PM resume routine will handle From 6aa32467299e9e12280a6aec9dbc21bf2db830b0 Mon Sep 17 00:00:00 2001 From: Huang Pei Date: Wed, 21 Jul 2021 17:30:45 +0800 Subject: [PATCH 461/502] MIPS: check return value of pgtable_pmd_page_ctor +. According to Documentation/vm/split_page_table_lock, handle failure of pgtable_pmd_page_ctor +. Use GFP_KERNEL_ACCOUNT instead of GFP_KERNEL|__GFP_ACCOUNT +. Adjust coding style Fixes: ed914d48b6a1 ("MIPS: add PMD table accounting into MIPS') Reported-by: Joshua Kinard Signed-off-by: Huang Pei Reviewed-by: Joshua Kinard Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/pgalloc.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h index 4b2567d6b2df..c7925d0e9874 100644 --- a/arch/mips/include/asm/pgalloc.h +++ b/arch/mips/include/asm/pgalloc.h @@ -58,15 +58,20 @@ do { \ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) { - pmd_t *pmd = NULL; + pmd_t *pmd; struct page *pg; - pg = alloc_pages(GFP_KERNEL | __GFP_ACCOUNT, PMD_ORDER); - if (pg) { - pgtable_pmd_page_ctor(pg); - pmd = (pmd_t *)page_address(pg); - pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table); + pg = alloc_pages(GFP_KERNEL_ACCOUNT, PMD_ORDER); + if (!pg) + return NULL; + + if (!pgtable_pmd_page_ctor(pg)) { + __free_pages(pg, PMD_ORDER); + return NULL; } + + pmd = (pmd_t *)page_address(pg); + pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table); return pmd; } From 43ad944cd73f2360ec8ff31d29ea44830b3119af Mon Sep 17 00:00:00 2001 From: Kyle Tso Date: Tue, 3 Aug 2021 17:13:14 +0800 Subject: [PATCH 462/502] usb: typec: tcpm: Keep other events when receiving FRS and Sourcing_vbus events When receiving FRS and Sourcing_Vbus events from low-level drivers, keep other events which come a bit earlier so that they will not be ignored in the event handler. Fixes: 8dc4bd073663 ("usb: typec: tcpm: Add support for Sink Fast Role SWAP(FRS)") Cc: stable Cc: Badhri Jagan Sridharan Reviewed-by: Guenter Roeck Reviewed-by: Badhri Jagan Sridharan Signed-off-by: Kyle Tso Link: https://lore.kernel.org/r/20210803091314.3051302-1-kyletso@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 5b22a1c931a9..b9bb63d749ec 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -5369,7 +5369,7 @@ EXPORT_SYMBOL_GPL(tcpm_pd_hard_reset); void tcpm_sink_frs(struct tcpm_port *port) { spin_lock(&port->pd_event_lock); - port->pd_events = TCPM_FRS_EVENT; + port->pd_events |= TCPM_FRS_EVENT; spin_unlock(&port->pd_event_lock); kthread_queue_work(port->wq, &port->event_work); } @@ -5378,7 +5378,7 @@ EXPORT_SYMBOL_GPL(tcpm_sink_frs); void tcpm_sourcing_vbus(struct tcpm_port *port) { spin_lock(&port->pd_event_lock); - port->pd_events = TCPM_SOURCING_VBUS; + port->pd_events |= TCPM_SOURCING_VBUS; spin_unlock(&port->pd_event_lock); kthread_queue_work(port->wq, &port->event_work); } From 5a7c1b2a5bb4461967b15f3484a0ff75d3199719 Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Wed, 4 Aug 2021 21:39:49 +0530 Subject: [PATCH 463/502] net: wwan: iosm: fix lkp buildbot warning Correct td buffer type casting & format specifier to fix lkp buildbot warning. Reported-by: kernel test robot Signed-off-by: M Chetan Kumar Signed-off-by: David S. Miller --- drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c b/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c index 91109e27efd3..35d590743d3a 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c +++ b/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c @@ -412,8 +412,8 @@ struct sk_buff *ipc_protocol_dl_td_process(struct iosm_protocol *ipc_protocol, } if (p_td->buffer.address != IPC_CB(skb)->mapping) { - dev_err(ipc_protocol->dev, "invalid buf=%p or skb=%p", - (void *)p_td->buffer.address, skb->data); + dev_err(ipc_protocol->dev, "invalid buf=%llx or skb=%p", + (unsigned long long)p_td->buffer.address, skb->data); ipc_pcie_kfree_skb(ipc_protocol->pcie, skb); skb = NULL; goto ret; From b46c5795d641b759eb0f001ab21852fe5df5ef92 Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Wed, 4 Aug 2021 21:39:50 +0530 Subject: [PATCH 464/502] net: wwan: iosm: endianness type correction Endianness type correction for nr_of_bytes. This field is exchanged as part of host-device protocol communication. Signed-off-by: M Chetan Kumar Reviewed-by: Loic Poulain Signed-off-by: David S. Miller --- drivers/net/wwan/iosm/iosm_ipc_mux_codec.c | 4 ++-- drivers/net/wwan/iosm/iosm_ipc_mux_codec.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c index 562de275797a..bdb2d32cdb6d 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c +++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c @@ -320,7 +320,7 @@ static void ipc_mux_dl_fcth_decode(struct iosm_mux *ipc_mux, return; } - ul_credits = fct->vfl.nr_of_bytes; + ul_credits = le32_to_cpu(fct->vfl.nr_of_bytes); dev_dbg(ipc_mux->dev, "Flow_Credit:: if_id[%d] Old: %d Grants: %d", if_id, ipc_mux->session[if_id].ul_flow_credits, ul_credits); @@ -586,7 +586,7 @@ static bool ipc_mux_lite_send_qlt(struct iosm_mux *ipc_mux) qlt->reserved[0] = 0; qlt->reserved[1] = 0; - qlt->vfl.nr_of_bytes = session->ul_list.qlen; + qlt->vfl.nr_of_bytes = cpu_to_le32(session->ul_list.qlen); /* Add QLT to the transfer list. */ skb_queue_tail(&ipc_mux->channel->ul_list, diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h index 4a74e3c9457f..aae83db5cbb8 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h +++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h @@ -106,7 +106,7 @@ struct mux_lite_cmdh { * @nr_of_bytes: Number of bytes available to transmit in the queue. */ struct mux_lite_vfl { - u32 nr_of_bytes; + __le32 nr_of_bytes; }; /** From c98f5220e9703db2d73b4e89c07879dc61eeab14 Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Wed, 4 Aug 2021 21:39:51 +0530 Subject: [PATCH 465/502] net: wwan: iosm: correct data protocol mask bit Correct ul/dl data protocol mask bit to know which protocol capability does device implement. Signed-off-by: M Chetan Kumar Reviewed-by: Loic Poulain Signed-off-by: David S. Miller --- drivers/net/wwan/iosm/iosm_ipc_mmio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wwan/iosm/iosm_ipc_mmio.h b/drivers/net/wwan/iosm/iosm_ipc_mmio.h index 45e6923da78f..f861994a6d90 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mmio.h +++ b/drivers/net/wwan/iosm/iosm_ipc_mmio.h @@ -10,10 +10,10 @@ #define IOSM_CP_VERSION 0x0100UL /* DL dir Aggregation support mask */ -#define DL_AGGR BIT(23) +#define DL_AGGR BIT(9) /* UL dir Aggregation support mask */ -#define UL_AGGR BIT(22) +#define UL_AGGR BIT(8) /* UL flow credit support mask */ #define UL_FLOW_CREDIT BIT(21) From 679505baaaabed98359c1dfb78f81600e299af21 Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Wed, 4 Aug 2021 21:39:52 +0530 Subject: [PATCH 466/502] net: wwan: iosm: fix recursive lock acquire in unregister Calling unregister_netdevice() inside wwan del link is trying to acquire the held lock in ndo_stop_cb(). Instead, queue net dev to be unregistered later. Signed-off-by: M Chetan Kumar Reviewed-by: Loic Poulain Signed-off-by: David S. Miller --- drivers/net/wwan/iosm/iosm_ipc_wwan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wwan/iosm/iosm_ipc_wwan.c b/drivers/net/wwan/iosm/iosm_ipc_wwan.c index b2357ad5d517..b571d9cedba4 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_wwan.c +++ b/drivers/net/wwan/iosm/iosm_ipc_wwan.c @@ -228,7 +228,7 @@ static void ipc_wwan_dellink(void *ctxt, struct net_device *dev, RCU_INIT_POINTER(ipc_wwan->sub_netlist[if_id], NULL); /* unregistering includes synchronize_net() */ - unregister_netdevice(dev); + unregister_netdevice_queue(dev, head); unlock: mutex_unlock(&ipc_wwan->if_mutex); From fa953adfad7cf9c7e30d9ea0e4ccfd38cfb5495d Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Thu, 8 Jul 2021 10:57:09 +0200 Subject: [PATCH 467/502] x86/tools/relocs: Fix non-POSIX regexp Trying to run a cross-compiled x86 relocs tool on a BSD based HOSTCC leads to errors like VOFFSET arch/x86/boot/compressed/../voffset.h - due to: vmlinux CC arch/x86/boot/compressed/misc.o - due to: arch/x86/boot/compressed/../voffset.h OBJCOPY arch/x86/boot/compressed/vmlinux.bin - due to: vmlinux RELOCS arch/x86/boot/compressed/vmlinux.relocs - due to: vmlinux empty (sub)expressionarch/x86/boot/compressed/Makefile:118: recipe for target 'arch/x86/boot/compressed/vmlinux.relocs' failed make[3]: *** [arch/x86/boot/compressed/vmlinux.relocs] Error 1 It turns out that relocs.c uses patterns like "something(|_end)" This is not valid syntax or gives undefined results according to POSIX 9.5.3 ERE Grammar https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html It seems to be silently accepted by the Linux regexp() implementation while a BSD host complains. Such patterns can be replaced by a transformation like "(|p1|p2)" -> "(p1|p2)?" Fixes: fd952815307f ("x86-32, relocs: Whitelist more symbols for ld bug workaround") Signed-off-by: H. Nikolaus Schaller Signed-off-by: Masahiro Yamada --- arch/x86/tools/relocs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 04c5a44b9682..9ba700dc47de 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -57,12 +57,12 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = { [S_REL] = "^(__init_(begin|end)|" "__x86_cpu_dev_(start|end)|" - "(__parainstructions|__alt_instructions)(|_end)|" - "(__iommu_table|__apicdrivers|__smp_locks)(|_end)|" + "(__parainstructions|__alt_instructions)(_end)?|" + "(__iommu_table|__apicdrivers|__smp_locks)(_end)?|" "__(start|end)_pci_.*|" "__(start|end)_builtin_fw|" - "__(start|stop)___ksymtab(|_gpl)|" - "__(start|stop)___kcrctab(|_gpl)|" + "__(start|stop)___ksymtab(_gpl)?|" + "__(start|stop)___kcrctab(_gpl)?|" "__(start|stop)___param|" "__(start|stop)___modver|" "__(start|stop)___bug_table|" From 28bbbb9875a35975904e46f9b06fa689d051b290 Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Thu, 8 Jul 2021 10:57:10 +0200 Subject: [PATCH 468/502] mips: Fix non-POSIX regexp When cross compiling a MIPS kernel on a BSD based HOSTCC leads to errors like SYNC include/config/auto.conf.cmd - due to: .config egrep: empty (sub)expression UPD include/config/kernel.release HOSTCC scripts/dtc/dtc.o - due to target missing It turns out that egrep uses this egrep pattern: (|MINOR_|PATCHLEVEL_) This is not valid syntax or gives undefined results according to POSIX 9.5.3 ERE Grammar https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html It seems to be silently accepted by the Linux egrep implementation while a BSD host complains. Such patterns can be replaced by a transformation like "(|p1|p2)" -> "(p1|p2)?" Fixes: 48c35b2d245f ("[MIPS] There is no __GNUC_MAJOR__") Signed-off-by: H. Nikolaus Schaller Signed-off-by: Masahiro Yamada --- arch/mips/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 4e942b7ef022..653befc1b176 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -321,7 +321,7 @@ KBUILD_LDFLAGS += -m $(ld-emul) ifdef CONFIG_MIPS CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \ - egrep -vw '__GNUC_(|MINOR_|PATCHLEVEL_)_' | \ + egrep -vw '__GNUC_(MINOR_|PATCHLEVEL_)?_' | \ sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g') endif From 54eacba0e3bbda9777788b44b45a5186918569f2 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 26 Jul 2021 19:57:37 -0700 Subject: [PATCH 469/502] scripts: checkversion: modernize linux/version.h search strings Update scripts/checkversion.pl to recognize the current contents of and both of its current locations. Also update my email address. Signed-off-by: Randy Dunlap Signed-off-by: Masahiro Yamada --- scripts/checkversion.pl | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/scripts/checkversion.pl b/scripts/checkversion.pl index f67b125c5269..94cd49eff605 100755 --- a/scripts/checkversion.pl +++ b/scripts/checkversion.pl @@ -1,10 +1,10 @@ #! /usr/bin/env perl # SPDX-License-Identifier: GPL-2.0 # -# checkversion find uses of LINUX_VERSION_CODE or KERNEL_VERSION -# without including , or cases of -# including that don't need it. -# Copyright (C) 2003, Randy Dunlap +# checkversion finds uses of all macros in +# where the source files do not #include ; or cases +# of including where it is not needed. +# Copyright (C) 2003, Randy Dunlap use strict; @@ -13,7 +13,8 @@ $| = 1; my $debugging; foreach my $file (@ARGV) { - next if $file =~ "include/linux/version\.h"; + next if $file =~ "include/generated/uapi/linux/version\.h"; + next if $file =~ "usr/include/linux/version\.h"; # Open this file. open( my $f, '<', $file ) or die "Can't open $file: $!\n"; @@ -41,8 +42,11 @@ foreach my $file (@ARGV) { $iLinuxVersion = $. if m/^\s*#\s*include\s*/o; } - # Look for uses: LINUX_VERSION_CODE, KERNEL_VERSION, UTS_RELEASE - if (($_ =~ /LINUX_VERSION_CODE/) || ($_ =~ /\WKERNEL_VERSION/)) { + # Look for uses: LINUX_VERSION_CODE, KERNEL_VERSION, + # LINUX_VERSION_MAJOR, LINUX_VERSION_PATCHLEVEL, LINUX_VERSION_SUBLEVEL + if (($_ =~ /LINUX_VERSION_CODE/) || ($_ =~ /\WKERNEL_VERSION/) || + ($_ =~ /LINUX_VERSION_MAJOR/) || ($_ =~ /LINUX_VERSION_PATCHLEVEL/) || + ($_ =~ /LINUX_VERSION_SUBLEVEL/)) { $fUseVersion = 1; last if $iLinuxVersion; } From 14ccc638b02f9ec500c17d9e39efe979145a4b61 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 29 Jul 2021 09:12:54 +0900 Subject: [PATCH 470/502] kbuild: cancel sub_make_done for the install target to fix DKMS Since commit bcf637f54f6d ("kbuild: parse C= and M= before changing the working directory"), external module builds invoked by DKMS fail because M= option is not parsed. I wanted to add 'unset sub_make_done' in install.sh but similar scripts, arch/*/boot/install.sh, are duplicated, so I set sub_make_done empty in the top Makefile. Fixes: bcf637f54f6d ("kbuild: parse C= and M= before changing the working directory") Reported-by: John S Gruber Signed-off-by: Masahiro Yamada Tested-by: John S Gruber --- Makefile | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Makefile b/Makefile index 27a072cffcb9..efb294568f28 100644 --- a/Makefile +++ b/Makefile @@ -1317,6 +1317,16 @@ PHONY += scripts_unifdef scripts_unifdef: scripts_basic $(Q)$(MAKE) $(build)=scripts scripts/unifdef +# --------------------------------------------------------------------------- +# Install + +# Many distributions have the custom install script, /sbin/installkernel. +# If DKMS is installed, 'make install' will eventually recuses back +# to the this Makefile to build and install external modules. +# Cancel sub_make_done so that options such as M=, V=, etc. are parsed. + +install: sub_make_done := + # --------------------------------------------------------------------------- # Tools From fb653827c758725b149b5c924a5eb50ab4812750 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 5 Aug 2021 13:38:26 +0300 Subject: [PATCH 471/502] bnx2x: fix an error code in bnx2x_nic_load() Set the error code if bnx2x_alloc_fw_stats_mem() fails. The current code returns success. Fixes: ad5afc89365e ("bnx2x: Separate VF and PF logic") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 1a6ec1a12d53..b5d954cb409a 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -2669,7 +2669,8 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) } /* Allocated memory for FW statistics */ - if (bnx2x_alloc_fw_stats_mem(bp)) + rc = bnx2x_alloc_fw_stats_mem(bp); + if (rc) LOAD_ERROR_EXIT(bp, load_error0); /* request pf to initialize status blocks */ From ae03d189bae306e1e00aa631feee090ebda6cf63 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Thu, 5 Aug 2021 13:14:09 +0300 Subject: [PATCH 472/502] net: ethernet: ti: am65-cpsw: fix crash in am65_cpsw_port_offload_fwd_mark_update() The am65_cpsw_port_offload_fwd_mark_update() causes NULL exception crash when there is at least one disabled port and any other port added to the bridge first time. Unable to handle kernel NULL pointer dereference at virtual address 0000000000000858 pc : am65_cpsw_port_offload_fwd_mark_update+0x54/0x68 lr : am65_cpsw_netdevice_event+0x8c/0xf0 Call trace: am65_cpsw_port_offload_fwd_mark_update+0x54/0x68 notifier_call_chain+0x54/0x98 raw_notifier_call_chain+0x14/0x20 call_netdevice_notifiers_info+0x34/0x78 __netdev_upper_dev_link+0x1c8/0x290 netdev_master_upper_dev_link+0x1c/0x28 br_add_if+0x3f0/0x6d0 [bridge] Fix it by adding proper check for port->ndev != NULL. Fixes: 2934db9bcb30 ("net: ti: am65-cpsw-nuss: Add netdevice notifiers") Signed-off-by: Grygorii Strashko Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 718539cdd2f2..67a08cbba859 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2060,8 +2060,12 @@ static void am65_cpsw_port_offload_fwd_mark_update(struct am65_cpsw_common *comm for (i = 1; i <= common->port_num; i++) { struct am65_cpsw_port *port = am65_common_get_port(common, i); - struct am65_cpsw_ndev_priv *priv = am65_ndev_to_priv(port->ndev); + struct am65_cpsw_ndev_priv *priv; + if (!port->ndev) + continue; + + priv = am65_ndev_to_priv(port->ndev); priv->offload_fwd_mark = set_val; } } From 51397dc6f283bb570e1cf8226017d300d8ea1f5b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 4 Aug 2021 14:18:48 -0400 Subject: [PATCH 473/502] tracing: Quiet smp_processor_id() use in preemptable warning in hwlat The hardware latency detector (hwlat) has a mode that it runs one thread across CPUs. The logic to move from the currently running CPU to the next one in the list does a smp_processor_id() to find where it currently is. Unfortunately, it's done with preemption enabled, and this triggers a warning for using smp_processor_id() in a preempt enabled section. As it is only using smp_processor_id() to get information on where it currently is in order to simply move it to the next CPU, it doesn't really care if it got moved in the mean time. It will simply balance out later if such a case arises. Switch smp_processor_id() to raw_smp_processor_id() to quiet that warning. Link: https://lkml.kernel.org/r/20210804141848.79edadc0@oasis.local.home Acked-by: Daniel Bristot de Oliveira Fixes: 8fa826b7344d ("trace/hwlat: Implement the mode config option") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_hwlat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index a6c0cdaf4b87..14f46aae1981 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -327,7 +327,7 @@ static void move_to_next_cpu(void) get_online_cpus(); cpumask_and(current_mask, cpu_online_mask, tr->tracing_cpumask); - next_cpu = cpumask_next(smp_processor_id(), current_mask); + next_cpu = cpumask_next(raw_smp_processor_id(), current_mask); put_online_cpus(); if (next_cpu >= nr_cpu_ids) From af35fc37354cda3c9c8cc4961b1d24bdc9d27903 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Wed, 4 Aug 2021 17:30:05 +0300 Subject: [PATCH 474/502] net: pegasus: fix uninit-value in get_interrupt_interval Syzbot reported uninit value pegasus_probe(). The problem was in missing error handling. get_interrupt_interval() internally calls read_eprom_word() which can fail in some cases. For example: failed to receive usb control message. These cases should be handled to prevent uninit value bug, since read_eprom_word() will not initialize passed stack variable in case of internal failure. Fail log: BUG: KMSAN: uninit-value in get_interrupt_interval drivers/net/usb/pegasus.c:746 [inline] BUG: KMSAN: uninit-value in pegasus_probe+0x10e7/0x4080 drivers/net/usb/pegasus.c:1152 CPU: 1 PID: 825 Comm: kworker/1:1 Not tainted 5.12.0-rc6-syzkaller #0 ... Workqueue: usb_hub_wq hub_event Call Trace: __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0x24c/0x2e0 lib/dump_stack.c:120 kmsan_report+0xfb/0x1e0 mm/kmsan/kmsan_report.c:118 __msan_warning+0x5c/0xa0 mm/kmsan/kmsan_instr.c:197 get_interrupt_interval drivers/net/usb/pegasus.c:746 [inline] pegasus_probe+0x10e7/0x4080 drivers/net/usb/pegasus.c:1152 .... Local variable ----data.i@pegasus_probe created at: get_interrupt_interval drivers/net/usb/pegasus.c:1151 [inline] pegasus_probe+0xe57/0x4080 drivers/net/usb/pegasus.c:1152 get_interrupt_interval drivers/net/usb/pegasus.c:1151 [inline] pegasus_probe+0xe57/0x4080 drivers/net/usb/pegasus.c:1152 Reported-and-tested-by: syzbot+02c9f70f3afae308464a@syzkaller.appspotmail.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Pavel Skripkin Link: https://lore.kernel.org/r/20210804143005.439-1-paskripkin@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/pegasus.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index f18b03be1b87..652e9fcf0b77 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -744,12 +744,16 @@ static inline void disable_net_traffic(pegasus_t *pegasus) set_registers(pegasus, EthCtrl0, sizeof(tmp), &tmp); } -static inline void get_interrupt_interval(pegasus_t *pegasus) +static inline int get_interrupt_interval(pegasus_t *pegasus) { u16 data; u8 interval; + int ret; + + ret = read_eprom_word(pegasus, 4, &data); + if (ret < 0) + return ret; - read_eprom_word(pegasus, 4, &data); interval = data >> 8; if (pegasus->usb->speed != USB_SPEED_HIGH) { if (interval < 0x80) { @@ -764,6 +768,8 @@ static inline void get_interrupt_interval(pegasus_t *pegasus) } } pegasus->intr_interval = interval; + + return 0; } static void set_carrier(struct net_device *net) @@ -1165,7 +1171,9 @@ static int pegasus_probe(struct usb_interface *intf, | NETIF_MSG_PROBE | NETIF_MSG_LINK); pegasus->features = usb_dev_id[dev_index].private; - get_interrupt_interval(pegasus); + res = get_interrupt_interval(pegasus); + if (res) + goto out2; if (reset_mac(pegasus)) { dev_err(&intf->dev, "can't reset MAC\n"); res = -EIO; From 44712965bf12ae1758cec4de53816ed4b914ca1a Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Wed, 4 Aug 2021 18:51:51 +0300 Subject: [PATCH 475/502] net: fec: fix use-after-free in fec_drv_remove Smatch says: drivers/net/ethernet/freescale/fec_main.c:3994 fec_drv_remove() error: Using fep after free_{netdev,candev}(ndev); drivers/net/ethernet/freescale/fec_main.c:3995 fec_drv_remove() error: Using fep after free_{netdev,candev}(ndev); Since fep pointer is netdev private data, accessing it after free_netdev() call can cause use-after-free bug. Fix it by moving free_netdev() call at the end of the function Reported-by: Dan Carpenter Fixes: a31eda65ba21 ("net: fec: fix clock count mis-match") Signed-off-by: Pavel Skripkin Reviewed-by: Joakim Zhang Reviewed-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fec_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 8aea707a65a7..7e4c4980ced7 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3843,13 +3843,13 @@ fec_drv_remove(struct platform_device *pdev) if (of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); of_node_put(fep->phy_node); - free_netdev(ndev); clk_disable_unprepare(fep->clk_ahb); clk_disable_unprepare(fep->clk_ipg); pm_runtime_put_noidle(&pdev->dev); pm_runtime_disable(&pdev->dev); + free_netdev(ndev); return 0; } From 942e560a3d3862dd5dee1411dbdd7097d29b8416 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Wed, 4 Aug 2021 18:52:20 +0300 Subject: [PATCH 476/502] net: vxge: fix use-after-free in vxge_device_unregister Smatch says: drivers/net/ethernet/neterion/vxge/vxge-main.c:3518 vxge_device_unregister() error: Using vdev after free_{netdev,candev}(dev); drivers/net/ethernet/neterion/vxge/vxge-main.c:3518 vxge_device_unregister() error: Using vdev after free_{netdev,candev}(dev); drivers/net/ethernet/neterion/vxge/vxge-main.c:3520 vxge_device_unregister() error: Using vdev after free_{netdev,candev}(dev); drivers/net/ethernet/neterion/vxge/vxge-main.c:3520 vxge_device_unregister() error: Using vdev after free_{netdev,candev}(dev); Since vdev pointer is netdev private data accessing it after free_netdev() call can cause use-after-free bug. Fix it by moving free_netdev() call at the end of the function Fixes: 6cca200362b4 ("vxge: cleanup probe error paths") Reported-by: Dan Carpenter Signed-off-by: Pavel Skripkin Reviewed-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/neterion/vxge/vxge-main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index 82eef4c72f01..7abd13e69471 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -3512,13 +3512,13 @@ static void vxge_device_unregister(struct __vxge_hw_device *hldev) kfree(vdev->vpaths); - /* we are safe to free it now */ - free_netdev(dev); - vxge_debug_init(vdev->level_trace, "%s: ethernet device unregistered", buf); vxge_debug_entryexit(vdev->level_trace, "%s: %s:%d Exiting...", buf, __func__, __LINE__); + + /* we are safe to free it now */ + free_netdev(dev); } /* From 8d75d0eff6887bcac7225e12b9c75595e523d92d Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 5 Aug 2021 20:46:45 +0800 Subject: [PATCH 477/502] blk-iolatency: error out if blk_get_queue() failed in iolatency_set_limit() If queue is dying while iolatency_set_limit() is in progress, blk_get_queue() won't increment the refcount of the queue. However, blk_put_queue() will still decrement the refcount later, which will cause the refcout to be unbalanced. Thus error out in such case to fix the problem. Fixes: 8c772a9bfc7c ("blk-iolatency: fix IO hang due to negative inflight counter") Signed-off-by: Yu Kuai Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20210805124645.543797-1-yukuai3@huawei.com Signed-off-by: Jens Axboe --- block/blk-iolatency.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 81be0096411d..d8b0d8bd132b 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -833,7 +833,11 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, enable = iolatency_set_min_lat_nsec(blkg, lat_val); if (enable) { - WARN_ON_ONCE(!blk_get_queue(blkg->q)); + if (!blk_get_queue(blkg->q)) { + ret = -ENODEV; + goto out; + } + blkg_get(blkg); } From 46c4c9d1beb7f5b4cec4dd90e7728720583ee348 Mon Sep 17 00:00:00 2001 From: "Alex Xu (Hello71)" Date: Thu, 5 Aug 2021 10:40:47 -0400 Subject: [PATCH 478/502] pipe: increase minimum default pipe size to 2 pages This program always prints 4096 and hangs before the patch, and always prints 8192 and exits successfully after: int main() { int pipefd[2]; for (int i = 0; i < 1025; i++) if (pipe(pipefd) == -1) return 1; size_t bufsz = fcntl(pipefd[1], F_GETPIPE_SZ); printf("%zd\n", bufsz); char *buf = calloc(bufsz, 1); write(pipefd[1], buf, bufsz); read(pipefd[0], buf, bufsz-1); write(pipefd[1], buf, 1); } Note that you may need to increase your RLIMIT_NOFILE before running the program. Fixes: 759c01142a ("pipe: limit the per-user amount of pages allocated in pipes") Cc: Link: https://lore.kernel.org/lkml/1628086770.5rn8p04n6j.none@localhost/ Link: https://lore.kernel.org/lkml/1628127094.lxxn016tj7.none@localhost/ Signed-off-by: Alex Xu (Hello71) Signed-off-by: Linus Torvalds --- fs/pipe.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index 9ef4231cce61..8e6ef62aeb1c 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -31,6 +31,21 @@ #include "internal.h" +/* + * New pipe buffers will be restricted to this size while the user is exceeding + * their pipe buffer quota. The general pipe use case needs at least two + * buffers: one for data yet to be read, and one for new data. If this is less + * than two, then a write to a non-empty pipe may block even if the pipe is not + * full. This can occur with GNU make jobserver or similar uses of pipes as + * semaphores: multiple processes may be waiting to write tokens back to the + * pipe before reading tokens: https://lore.kernel.org/lkml/1628086770.5rn8p04n6j.none@localhost/. + * + * Users can reduce their pipe buffers with F_SETPIPE_SZ below this at their + * own risk, namely: pipe writes to non-full pipes may block until the pipe is + * emptied. + */ +#define PIPE_MIN_DEF_BUFFERS 2 + /* * The max size that a non-root user is allowed to grow the pipe. Can * be set by root in /proc/sys/fs/pipe-max-size @@ -781,8 +796,8 @@ struct pipe_inode_info *alloc_pipe_info(void) user_bufs = account_pipe_buffers(user, 0, pipe_bufs); if (too_many_pipe_buffers_soft(user_bufs) && pipe_is_unprivileged_user()) { - user_bufs = account_pipe_buffers(user, pipe_bufs, 1); - pipe_bufs = 1; + user_bufs = account_pipe_buffers(user, pipe_bufs, PIPE_MIN_DEF_BUFFERS); + pipe_bufs = PIPE_MIN_DEF_BUFFERS; } if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user()) From 2e9fb2c11e0ec3113fcf0e8e052c99ecd82fcd4e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 5 Aug 2021 10:34:47 -0700 Subject: [PATCH 479/502] block/partitions/ldm.c: Fix a kernel-doc warning Fix the following kernel-doc warning that appears when building with W=1: block/partitions/ldm.c:31: warning: expecting prototype for ldm(). Prototype was for ldm_debug() instead Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20210805173447.3249906-1-bvanassche@acm.org Signed-off-by: Jens Axboe --- block/partitions/ldm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/partitions/ldm.c b/block/partitions/ldm.c index d333786b5c7e..14b124cdacfc 100644 --- a/block/partitions/ldm.c +++ b/block/partitions/ldm.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later -/** +/* * ldm - Support for Windows Logical Disk Manager (Dynamic Disks) * * Copyright (C) 2001,2002 Richard Russon From e04480920d1eec9c061841399aa6f35b6f987d8b Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 4 Aug 2021 19:26:56 +0900 Subject: [PATCH 480/502] Bluetooth: defer cleanup of resources in hci_unregister_dev() syzbot is hitting might_sleep() warning at hci_sock_dev_event() due to calling lock_sock() with rw spinlock held [1]. It seems that history of this locking problem is a trial and error. Commit b40df5743ee8 ("[PATCH] bluetooth: fix socket locking in hci_sock_dev_event()") in 2.6.21-rc4 changed bh_lock_sock() to lock_sock() as an attempt to fix lockdep warning. Then, commit 4ce61d1c7a8e ("[BLUETOOTH]: Fix locking in hci_sock_dev_event().") in 2.6.22-rc2 changed lock_sock() to local_bh_disable() + bh_lock_sock_nested() as an attempt to fix the sleep in atomic context warning. Then, commit 4b5dd696f81b ("Bluetooth: Remove local_bh_disable() from hci_sock.c") in 3.3-rc1 removed local_bh_disable(). Then, commit e305509e678b ("Bluetooth: use correct lock to prevent UAF of hdev object") in 5.13-rc5 again changed bh_lock_sock_nested() to lock_sock() as an attempt to fix CVE-2021-3573. This difficulty comes from current implementation that hci_sock_dev_event(HCI_DEV_UNREG) is responsible for dropping all references from sockets because hci_unregister_dev() immediately reclaims resources as soon as returning from hci_sock_dev_event(HCI_DEV_UNREG). But the history suggests that hci_sock_dev_event(HCI_DEV_UNREG) was not doing what it should do. Therefore, instead of trying to detach sockets from device, let's accept not detaching sockets from device at hci_sock_dev_event(HCI_DEV_UNREG), by moving actual cleanup of resources from hci_unregister_dev() to hci_cleanup_dev() which is called by bt_host_release() when all references to this unregistered device (which is a kobject) are gone. Since hci_sock_dev_event(HCI_DEV_UNREG) no longer resets hci_pi(sk)->hdev, we need to check whether this device was unregistered and return an error based on HCI_UNREGISTER flag. There might be subtle behavioral difference in "monitor the hdev" functionality; please report if you found something went wrong due to this patch. Link: https://syzkaller.appspot.com/bug?extid=a5df189917e79d5e59c9 [1] Reported-by: syzbot Suggested-by: Linus Torvalds Signed-off-by: Tetsuo Handa Fixes: e305509e678b ("Bluetooth: use correct lock to prevent UAF of hdev object") Acked-by: Luiz Augusto von Dentz Signed-off-by: Linus Torvalds --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_core.c | 16 +++++------ net/bluetooth/hci_sock.c | 49 +++++++++++++++++++++----------- net/bluetooth/hci_sysfs.c | 3 ++ 4 files changed, 45 insertions(+), 24 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index a53e94459ecd..db4312e44d47 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1230,6 +1230,7 @@ struct hci_dev *hci_alloc_dev(void); void hci_free_dev(struct hci_dev *hdev); int hci_register_dev(struct hci_dev *hdev); void hci_unregister_dev(struct hci_dev *hdev); +void hci_cleanup_dev(struct hci_dev *hdev); int hci_suspend_dev(struct hci_dev *hdev); int hci_resume_dev(struct hci_dev *hdev); int hci_reset_dev(struct hci_dev *hdev); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 2560ed2f144d..e1a545c8a69f 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3996,14 +3996,10 @@ EXPORT_SYMBOL(hci_register_dev); /* Unregister HCI device */ void hci_unregister_dev(struct hci_dev *hdev) { - int id; - BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); hci_dev_set_flag(hdev, HCI_UNREGISTER); - id = hdev->id; - write_lock(&hci_dev_list_lock); list_del(&hdev->list); write_unlock(&hci_dev_list_lock); @@ -4038,7 +4034,14 @@ void hci_unregister_dev(struct hci_dev *hdev) } device_del(&hdev->dev); + /* Actual cleanup is deferred until hci_cleanup_dev(). */ + hci_dev_put(hdev); +} +EXPORT_SYMBOL(hci_unregister_dev); +/* Cleanup HCI device */ +void hci_cleanup_dev(struct hci_dev *hdev) +{ debugfs_remove_recursive(hdev->debugfs); kfree_const(hdev->hw_info); kfree_const(hdev->fw_info); @@ -4063,11 +4066,8 @@ void hci_unregister_dev(struct hci_dev *hdev) hci_blocked_keys_clear(hdev); hci_dev_unlock(hdev); - hci_dev_put(hdev); - - ida_simple_remove(&hci_index_ida, id); + ida_simple_remove(&hci_index_ida, hdev->id); } -EXPORT_SYMBOL(hci_unregister_dev); /* Suspend HCI device */ int hci_suspend_dev(struct hci_dev *hdev) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index b04a5a02ecf3..f1128c2134f0 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -59,6 +59,17 @@ struct hci_pinfo { char comm[TASK_COMM_LEN]; }; +static struct hci_dev *hci_hdev_from_sock(struct sock *sk) +{ + struct hci_dev *hdev = hci_pi(sk)->hdev; + + if (!hdev) + return ERR_PTR(-EBADFD); + if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) + return ERR_PTR(-EPIPE); + return hdev; +} + void hci_sock_set_flag(struct sock *sk, int nr) { set_bit(nr, &hci_pi(sk)->flags); @@ -759,19 +770,13 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event) if (event == HCI_DEV_UNREG) { struct sock *sk; - /* Detach sockets from device */ + /* Wake up sockets using this dead device */ read_lock(&hci_sk_list.lock); sk_for_each(sk, &hci_sk_list.head) { - lock_sock(sk); if (hci_pi(sk)->hdev == hdev) { - hci_pi(sk)->hdev = NULL; sk->sk_err = EPIPE; - sk->sk_state = BT_OPEN; sk->sk_state_change(sk); - - hci_dev_put(hdev); } - release_sock(sk); } read_unlock(&hci_sk_list.lock); } @@ -930,10 +935,10 @@ static int hci_sock_reject_list_del(struct hci_dev *hdev, void __user *arg) static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg) { - struct hci_dev *hdev = hci_pi(sk)->hdev; + struct hci_dev *hdev = hci_hdev_from_sock(sk); - if (!hdev) - return -EBADFD; + if (IS_ERR(hdev)) + return PTR_ERR(hdev); if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) return -EBUSY; @@ -1103,6 +1108,18 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, lock_sock(sk); + /* Allow detaching from dead device and attaching to alive device, if + * the caller wants to re-bind (instead of close) this socket in + * response to hci_sock_dev_event(HCI_DEV_UNREG) notification. + */ + hdev = hci_pi(sk)->hdev; + if (hdev && hci_dev_test_flag(hdev, HCI_UNREGISTER)) { + hci_pi(sk)->hdev = NULL; + sk->sk_state = BT_OPEN; + hci_dev_put(hdev); + } + hdev = NULL; + if (sk->sk_state == BT_BOUND) { err = -EALREADY; goto done; @@ -1379,9 +1396,9 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr, lock_sock(sk); - hdev = hci_pi(sk)->hdev; - if (!hdev) { - err = -EBADFD; + hdev = hci_hdev_from_sock(sk); + if (IS_ERR(hdev)) { + err = PTR_ERR(hdev); goto done; } @@ -1743,9 +1760,9 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, goto done; } - hdev = hci_pi(sk)->hdev; - if (!hdev) { - err = -EBADFD; + hdev = hci_hdev_from_sock(sk); + if (IS_ERR(hdev)) { + err = PTR_ERR(hdev); goto done; } diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 9874844a95a9..b69d88b88d2e 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -83,6 +83,9 @@ void hci_conn_del_sysfs(struct hci_conn *conn) static void bt_host_release(struct device *dev) { struct hci_dev *hdev = to_hci_dev(dev); + + if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) + hci_cleanup_dev(hdev); kfree(hdev); module_put(THIS_MODULE); } From 0395be967b067d99494113d78470574e86a02ed4 Mon Sep 17 00:00:00 2001 From: Apurva Nandan Date: Fri, 16 Jul 2021 23:25:03 +0000 Subject: [PATCH 481/502] spi: cadence-quadspi: Fix check condition for DTR ops buswidth and dtr fields in spi_mem_op are only valid when the corresponding spi_mem_op phase has a non-zero length. For example, SPI NAND core doesn't set buswidth when using SPI_MEM_OP_NO_ADDR phase. Fix the dtr checks in set_protocol() and suppports_mem_op() to ignore empty spi_mem_op phases, as checking for dtr field in empty phase will result in false negatives. Signed-off-by: Apurva Nandan Link: https://lore.kernel.org/r/20210716232504.182-3-a-nandan@ti.com Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-quadspi.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index a2de23516553..101cc71bffa7 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -325,7 +325,15 @@ static int cqspi_set_protocol(struct cqspi_flash_pdata *f_pdata, f_pdata->inst_width = CQSPI_INST_TYPE_SINGLE; f_pdata->addr_width = CQSPI_INST_TYPE_SINGLE; f_pdata->data_width = CQSPI_INST_TYPE_SINGLE; - f_pdata->dtr = op->data.dtr && op->cmd.dtr && op->addr.dtr; + + /* + * For an op to be DTR, cmd phase along with every other non-empty + * phase should have dtr field set to 1. If an op phase has zero + * nbytes, ignore its dtr field; otherwise, check its dtr field. + */ + f_pdata->dtr = op->cmd.dtr && + (!op->addr.nbytes || op->addr.dtr) && + (!op->data.nbytes || op->data.dtr); switch (op->data.buswidth) { case 0: @@ -1228,8 +1236,15 @@ static bool cqspi_supports_mem_op(struct spi_mem *mem, { bool all_true, all_false; - all_true = op->cmd.dtr && op->addr.dtr && op->dummy.dtr && - op->data.dtr; + /* + * op->dummy.dtr is required for converting nbytes into ncycles. + * Also, don't check the dtr field of the op phase having zero nbytes. + */ + all_true = op->cmd.dtr && + (!op->addr.nbytes || op->addr.dtr) && + (!op->dummy.nbytes || op->dummy.dtr) && + (!op->data.nbytes || op->data.dtr); + all_false = !op->cmd.dtr && !op->addr.dtr && !op->dummy.dtr && !op->data.dtr; From f7ec4121256393e1d03274acdca73eb18958f27e Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 5 Aug 2021 09:27:15 -0400 Subject: [PATCH 482/502] tracepoint: static call: Compare data on transition from 2->1 callees On transition from 2->1 callees, we should be comparing .data rather than .func, because the same callback can be registered twice with different data, and what we care about here is that the data of array element 0 is unchanged to skip rcu sync. Link: https://lkml.kernel.org/r/20210805132717.23813-2-mathieu.desnoyers@efficios.com Link: https://lore.kernel.org/io-uring/4ebea8f0-58c9-e571-fd30-0ce4f6f09c70@samba.org/ Cc: stable@vger.kernel.org Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Andrew Morton Cc: "Paul E. McKenney" Cc: Stefan Metzmacher Fixes: 547305a64632 ("tracepoint: Fix out of sync data passing by static caller") Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (VMware) --- kernel/tracepoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index fc32821f8240..133b6454b287 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -338,7 +338,7 @@ static int tracepoint_remove_func(struct tracepoint *tp, } else { rcu_assign_pointer(tp->funcs, tp_funcs); tracepoint_update_call(tp, tp_funcs, - tp_funcs[0].func != old[0].func); + tp_funcs[0].data != old[0].data); } release_probes(old); return 0; From 231264d6927f6740af36855a622d0e240be9d94c Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 5 Aug 2021 09:27:16 -0400 Subject: [PATCH 483/502] tracepoint: Fix static call function vs data state mismatch On a 1->0->1 callbacks transition, there is an issue with the new callback using the old callback's data. Considering __DO_TRACE_CALL: do { \ struct tracepoint_func *it_func_ptr; \ void *__data; \ it_func_ptr = \ rcu_dereference_raw((&__tracepoint_##name)->funcs); \ if (it_func_ptr) { \ __data = (it_func_ptr)->data; \ ----> [ delayed here on one CPU (e.g. vcpu preempted by the host) ] static_call(tp_func_##name)(__data, args); \ } \ } while (0) It has loaded the tp->funcs of the old callback, so it will try to use the old data. This can be fixed by adding a RCU sync anywhere in the 1->0->1 transition chain. On a N->2->1 transition, we need an rcu-sync because you may have a sequence of 3->2->1 (or 1->2->1) where the element 0 data is unchanged between 2->1, but was changed from 3->2 (or from 1->2), which may be observed by the static call. This can be fixed by adding an unconditional RCU sync in transition 2->1. Note, this fixes a correctness issue at the cost of adding a tremendous performance regression to the disabling of tracepoints. Before this commit: # trace-cmd start -e all # time trace-cmd start -p nop real 0m0.778s user 0m0.000s sys 0m0.061s After this commit: # trace-cmd start -e all # time trace-cmd start -p nop real 0m10.593s user 0m0.017s sys 0m0.259s A follow up fix will introduce a more lightweight scheme based on RCU get_state and cond_sync, that will return the performance back to what it was. As both this change and the lightweight versions are complex on their own, for bisecting any issues that this may cause, they are kept as two separate changes. Link: https://lkml.kernel.org/r/20210805132717.23813-3-mathieu.desnoyers@efficios.com Link: https://lore.kernel.org/io-uring/4ebea8f0-58c9-e571-fd30-0ce4f6f09c70@samba.org/ Cc: stable@vger.kernel.org Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Andrew Morton Cc: "Paul E. McKenney" Cc: Stefan Metzmacher Fixes: d25e37d89dd2 ("tracepoint: Optimize using static_call()") Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (VMware) --- kernel/tracepoint.c | 102 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 82 insertions(+), 20 deletions(-) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 133b6454b287..8d772bd6894d 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -15,6 +15,13 @@ #include #include +enum tp_func_state { + TP_FUNC_0, + TP_FUNC_1, + TP_FUNC_2, + TP_FUNC_N, +}; + extern tracepoint_ptr_t __start___tracepoints_ptrs[]; extern tracepoint_ptr_t __stop___tracepoints_ptrs[]; @@ -246,26 +253,29 @@ static void *func_remove(struct tracepoint_func **funcs, return old; } -static void tracepoint_update_call(struct tracepoint *tp, struct tracepoint_func *tp_funcs, bool sync) +/* + * Count the number of functions (enum tp_func_state) in a tp_funcs array. + */ +static enum tp_func_state nr_func_state(const struct tracepoint_func *tp_funcs) +{ + if (!tp_funcs) + return TP_FUNC_0; + if (!tp_funcs[1].func) + return TP_FUNC_1; + if (!tp_funcs[2].func) + return TP_FUNC_2; + return TP_FUNC_N; /* 3 or more */ +} + +static void tracepoint_update_call(struct tracepoint *tp, struct tracepoint_func *tp_funcs) { void *func = tp->iterator; /* Synthetic events do not have static call sites */ if (!tp->static_call_key) return; - - if (!tp_funcs[1].func) { + if (nr_func_state(tp_funcs) == TP_FUNC_1) func = tp_funcs[0].func; - /* - * If going from the iterator back to a single caller, - * we need to synchronize with __DO_TRACE to make sure - * that the data passed to the callback is the one that - * belongs to that callback. - */ - if (sync) - tracepoint_synchronize_unregister(); - } - __static_call_update(tp->static_call_key, tp->static_call_tramp, func); } @@ -299,9 +309,31 @@ static int tracepoint_add_func(struct tracepoint *tp, * a pointer to it. This array is referenced by __DO_TRACE from * include/linux/tracepoint.h using rcu_dereference_sched(). */ - tracepoint_update_call(tp, tp_funcs, false); - rcu_assign_pointer(tp->funcs, tp_funcs); - static_key_enable(&tp->key); + switch (nr_func_state(tp_funcs)) { + case TP_FUNC_1: /* 0->1 */ + /* Set static call to first function */ + tracepoint_update_call(tp, tp_funcs); + /* Both iterator and static call handle NULL tp->funcs */ + rcu_assign_pointer(tp->funcs, tp_funcs); + static_key_enable(&tp->key); + break; + case TP_FUNC_2: /* 1->2 */ + /* Set iterator static call */ + tracepoint_update_call(tp, tp_funcs); + /* + * Iterator callback installed before updating tp->funcs. + * Requires ordering between RCU assign/dereference and + * static call update/call. + */ + rcu_assign_pointer(tp->funcs, tp_funcs); + break; + case TP_FUNC_N: /* N->N+1 (N>1) */ + rcu_assign_pointer(tp->funcs, tp_funcs); + break; + default: + WARN_ON_ONCE(1); + break; + } release_probes(old); return 0; @@ -328,17 +360,47 @@ static int tracepoint_remove_func(struct tracepoint *tp, /* Failed allocating new tp_funcs, replaced func with stub */ return 0; - if (!tp_funcs) { + switch (nr_func_state(tp_funcs)) { + case TP_FUNC_0: /* 1->0 */ /* Removed last function */ if (tp->unregfunc && static_key_enabled(&tp->key)) tp->unregfunc(); static_key_disable(&tp->key); + /* Set iterator static call */ + tracepoint_update_call(tp, tp_funcs); + /* Both iterator and static call handle NULL tp->funcs */ + rcu_assign_pointer(tp->funcs, NULL); + /* + * Make sure new func never uses old data after a 1->0->1 + * transition sequence. + * Considering that transition 0->1 is the common case + * and don't have rcu-sync, issue rcu-sync after + * transition 1->0 to break that sequence by waiting for + * readers to be quiescent. + */ + tracepoint_synchronize_unregister(); + break; + case TP_FUNC_1: /* 2->1 */ rcu_assign_pointer(tp->funcs, tp_funcs); - } else { + /* + * On 2->1 transition, RCU sync is needed before setting + * static call to first callback, because the observer + * may have loaded any prior tp->funcs after the last one + * associated with an rcu-sync. + */ + tracepoint_synchronize_unregister(); + /* Set static call to first function */ + tracepoint_update_call(tp, tp_funcs); + break; + case TP_FUNC_2: /* N->N-1 (N>2) */ + fallthrough; + case TP_FUNC_N: rcu_assign_pointer(tp->funcs, tp_funcs); - tracepoint_update_call(tp, tp_funcs, - tp_funcs[0].data != old[0].data); + break; + default: + WARN_ON_ONCE(1); + break; } release_probes(old); return 0; From 23c0ebac20de19e3f54e5e81f4c3fa0caf2f8395 Mon Sep 17 00:00:00 2001 From: Xiaomeng Hou Date: Thu, 29 Jul 2021 15:44:39 +0800 Subject: [PATCH 484/502] drm/amd/pm: update yellow carp pmfw interface version Correct yellow carp driver-PMFW interface version to v4. Signed-off-by: Xiaomeng Hou Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/inc/smu_v13_0.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h index 3fea2430dec0..dc91eb608791 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h @@ -26,7 +26,7 @@ #include "amdgpu_smu.h" #define SMU13_DRIVER_IF_VERSION_INV 0xFFFFFFFF -#define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x03 +#define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04 #define SMU13_DRIVER_IF_VERSION_ALDE 0x07 /* MP Apertures */ From 5706cb3c910cc8283f344bc37a889a8d523a2c6d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 29 Jul 2021 20:03:47 -0700 Subject: [PATCH 485/502] drm/amdgpu: fix checking pmops when PM_SLEEP is not enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'pm_suspend_target_state' is only available when CONFIG_PM_SLEEP is set/enabled. OTOH, when both SUSPEND and HIBERNATION are not set, PM_SLEEP is not set, so this variable cannot be used. ../drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c: In function ‘amdgpu_acpi_is_s0ix_active’: ../drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c:1046:11: error: ‘pm_suspend_target_state’ undeclared (first use in this function); did you mean ‘__KSYM_pm_suspend_target_state’? return pm_suspend_target_state == PM_SUSPEND_TO_IDLE; ^~~~~~~~~~~~~~~~~~~~~~~ __KSYM_pm_suspend_target_state Also use shorter IS_ENABLED(CONFIG_foo) notation for checking the 2 config symbols. Fixes: 91e273712ab8dd ("drm/amdgpu: Check pmops for desired suspend state") Signed-off-by: Randy Dunlap Cc: Alex Deucher Cc: Christian König Cc: "Pan, Xinhui" Cc: amd-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Cc: linux-next@vger.kernel.org Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 6cc0d4fa4d0a..4137e848f6a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -1040,7 +1040,7 @@ void amdgpu_acpi_detect(void) */ bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev) { -#if defined(CONFIG_AMD_PMC) || defined(CONFIG_AMD_PMC_MODULE) +#if IS_ENABLED(CONFIG_AMD_PMC) && IS_ENABLED(CONFIG_PM_SLEEP) if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) { if (adev->flags & AMD_IS_APU) return pm_suspend_target_state == PM_SUSPEND_TO_IDLE; From ffb9ee8eb272ba2b5a7325e69bb98118869637db Mon Sep 17 00:00:00 2001 From: Wesley Chalmers Date: Mon, 19 Jul 2021 13:13:33 -0400 Subject: [PATCH 486/502] drm/amd/display: Assume LTTPR interop for DCN31+ [WHY] For DCN31 onward, LTTPR is to be enabled and set to Transparent by VBIOS. Driver is to assume that VBIOS has done this without needing to check the VBIOS interop bit. [HOW] Add LTTPR enable and interop VBIOS bits into dc->caps, and force-set the interop bit to true for DCN31+. Reviewed-by: Jun Lei Acked-by: Aurabindo Pillai Signed-off-by: Wesley Chalmers Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_link_dp.c | 21 ++----------------- drivers/gpu/drm/amd/display/dc/dc.h | 2 ++ .../drm/amd/display/dc/dcn30/dcn30_resource.c | 20 ++++++++++++++++++ .../drm/amd/display/dc/dcn31/dcn31_resource.c | 16 ++++++++++++++ 4 files changed, 40 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 9fb8c46dc606..a6d0fd24fd02 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -3602,29 +3602,12 @@ static bool dpcd_read_sink_ext_caps(struct dc_link *link) bool dp_retrieve_lttpr_cap(struct dc_link *link) { uint8_t lttpr_dpcd_data[6]; - bool vbios_lttpr_enable = false; - bool vbios_lttpr_interop = false; - struct dc_bios *bios = link->dc->ctx->dc_bios; + bool vbios_lttpr_enable = link->dc->caps.vbios_lttpr_enable; + bool vbios_lttpr_interop = link->dc->caps.vbios_lttpr_aware; enum dc_status status = DC_ERROR_UNEXPECTED; bool is_lttpr_present = false; memset(lttpr_dpcd_data, '\0', sizeof(lttpr_dpcd_data)); - /* Query BIOS to determine if LTTPR functionality is forced on by system */ - if (bios->funcs->get_lttpr_caps) { - enum bp_result bp_query_result; - uint8_t is_vbios_lttpr_enable = 0; - - bp_query_result = bios->funcs->get_lttpr_caps(bios, &is_vbios_lttpr_enable); - vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; - } - - if (bios->funcs->get_lttpr_interop) { - enum bp_result bp_query_result; - uint8_t is_vbios_interop_enabled = 0; - - bp_query_result = bios->funcs->get_lttpr_interop(bios, &is_vbios_interop_enabled); - vbios_lttpr_interop = (bp_query_result == BP_RESULT_OK) && !!is_vbios_interop_enabled; - } /* * Logic to determine LTTPR mode diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 8dcea8ff5c5a..af7b60108e9d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -183,6 +183,8 @@ struct dc_caps { unsigned int cursor_cache_size; struct dc_plane_cap planes[MAX_PLANES]; struct dc_color_caps color; + bool vbios_lttpr_aware; + bool vbios_lttpr_enable; }; struct dc_bug_wa { diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 596c97dce67e..253654d605c2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -2617,6 +2617,26 @@ static bool dcn30_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + /* read VBIOS LTTPR caps */ + { + if (ctx->dc_bios->funcs->get_lttpr_caps) { + enum bp_result bp_query_result; + uint8_t is_vbios_lttpr_enable = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); + dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; + } + + if (ctx->dc_bios->funcs->get_lttpr_interop) { + enum bp_result bp_query_result; + uint8_t is_vbios_interop_enabled = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_interop(ctx->dc_bios, + &is_vbios_interop_enabled); + dc->caps.vbios_lttpr_aware = (bp_query_result == BP_RESULT_OK) && !!is_vbios_interop_enabled; + } + } + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; else if (dc->ctx->dce_environment == DCE_ENV_FPGA_MAXIMUS) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 38c010afade1..cd3248dc31d8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -1968,6 +1968,22 @@ static bool dcn31_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + /* read VBIOS LTTPR caps */ + { + if (ctx->dc_bios->funcs->get_lttpr_caps) { + enum bp_result bp_query_result; + uint8_t is_vbios_lttpr_enable = 0; + + bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable); + dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable; + } + + /* interop bit is implicit */ + { + dc->caps.vbios_lttpr_aware = true; + } + } + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; else if (dc->ctx->dce_environment == DCE_ENV_FPGA_MAXIMUS) { From 06050a0f01dbac2ca33145ef19a72041206ea983 Mon Sep 17 00:00:00 2001 From: Bing Guo Date: Mon, 19 Jul 2021 18:24:06 -0400 Subject: [PATCH 487/502] drm/amd/display: Fix Dynamic bpp issue with 8K30 with Navi 1X Why: In DCN2x, HW doesn't automatically divide MASTER_UPDATE_LOCK_DB_X by the number of pipes ODM Combined. How: Set MASTER_UPDATE_LOCK_DB_X to the value that is adjusted by the number of pipes ODM Combined. Reviewed-by: Martin Leung Acked-by: Aurabindo Pillai Signed-off-by: Bing Guo Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c index 7fa9fc656b0c..f6e747f25ebe 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c @@ -464,7 +464,7 @@ void optc2_lock_doublebuffer_enable(struct timing_generator *optc) REG_UPDATE_2(OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_X, - h_blank_start - 200 - 1, + (h_blank_start - 200 - 1) / optc1->opp_count, MASTER_UPDATE_LOCK_DB_Y, v_blank_start - 1); } From cd7b0531a61811429e7907c875e864ab918f3e62 Mon Sep 17 00:00:00 2001 From: Bing Guo Date: Tue, 20 Jul 2021 15:13:38 -0400 Subject: [PATCH 488/502] drm/amd/display: Increase stutter watermark for dcn303 [Why&How] Hardware team suggested to use SRExitTime= 35.5us as w/a to prevent underflow in certain modes. Reviewed-by: Martin Leung Acked-by: Aurabindo Pillai Signed-off-by: Bing Guo Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c index 833ab13fa834..dc7823d23ba8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c @@ -146,8 +146,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_03_soc = { .min_dcfclk = 500.0, /* TODO: set this to actual min DCFCLK */ .num_states = 1, - .sr_exit_time_us = 26.5, - .sr_enter_plus_exit_time_us = 31, + .sr_exit_time_us = 35.5, + .sr_enter_plus_exit_time_us = 40, .urgent_latency_us = 4.0, .urgent_latency_pixel_data_only_us = 4.0, .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, From d5c5ac3a7bca35261eb599204cbf1efee0af22cc Mon Sep 17 00:00:00 2001 From: Jude Shih Date: Tue, 6 Jul 2021 18:04:11 +0800 Subject: [PATCH 489/502] drm/amd/display: Fix resetting DCN3.1 HW when resuming from S4 [Why] On S4 resume we also need to fix detection of when to reload DMCUB firmware because we're currently using the VBIOS version which isn't compatible with the driver version. [How] Update the hardware init check for DCN31 since it's the ASIC that has this issue. Reviewed-by: Nicholas Kazlauskas Acked-by: Aurabindo Pillai Signed-off-by: Jude Shih Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c index 973de346410d..27c7fa3110c8 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c @@ -267,11 +267,13 @@ void dmub_dcn31_set_outbox1_rptr(struct dmub_srv *dmub, uint32_t rptr_offset) bool dmub_dcn31_is_hw_init(struct dmub_srv *dmub) { - uint32_t is_hw_init; + union dmub_fw_boot_status status; + uint32_t is_enable; - REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_hw_init); + status.all = REG_READ(DMCUB_SCRATCH0); + REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_enable); - return is_hw_init != 0; + return is_enable != 0 && status.bits.dal_fw; } bool dmub_dcn31_is_supported(struct dmub_srv *dmub) From c4152b297d56d3696ad0a9003169bc5b98ad7b72 Mon Sep 17 00:00:00 2001 From: Qingqing Zhuo Date: Thu, 22 Jul 2021 14:48:54 -0400 Subject: [PATCH 490/502] drm/amd/display: workaround for hard hang on HPD on native DP [Why] HPD disable and enable sequences are not mutually exclusive on Linux. For HPDs that spans over 1s (i.e. HPD low = 1s), part of the disable sequence (specifically, a request to SMU to lower refclk) could come right before the call to PHY enable, causing DMUB to access an unresponsive PHY and thus a hard hang on the system. [How] Disable 48mhz refclk off on native DP. Reviewed-by: Hersen Wu Acked-by: Aurabindo Pillai Signed-off-by: Qingqing Zhuo Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index c6f494f0dcea..6185f9475fa2 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -66,9 +66,11 @@ int rn_get_active_display_cnt_wa( for (i = 0; i < context->stream_count; i++) { const struct dc_stream_state *stream = context->streams[i]; + /* Extend the WA to DP for Linux*/ if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A || stream->signal == SIGNAL_TYPE_DVI_SINGLE_LINK || - stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK) + stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK || + stream->signal == SIGNAL_TYPE_DISPLAY_PORT) tmds_present = true; } From 0e99e960ce6d5ff586fc0733bc393c087f52c27b Mon Sep 17 00:00:00 2001 From: Shirish S Date: Tue, 3 Aug 2021 14:03:44 +0530 Subject: [PATCH 491/502] drm/amdgpu/display: fix DMUB firmware version info DMUB firmware info is printed before it gets initialized. Correct this order to ensure true value is conveyed. Signed-off-by: Shirish S Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b53f49a23ddc..c0ae73b0691c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1548,6 +1548,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) } hdr = (const struct dmcub_firmware_header_v1_0 *)adev->dm.dmub_fw->data; + adev->dm.dmcub_fw_version = le32_to_cpu(hdr->header.ucode_version); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { adev->firmware.ucode[AMDGPU_UCODE_ID_DMCUB].ucode_id = @@ -1561,7 +1562,6 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) adev->dm.dmcub_fw_version); } - adev->dm.dmcub_fw_version = le32_to_cpu(hdr->header.ucode_version); adev->dm.dmub_srv = kzalloc(sizeof(*adev->dm.dmub_srv), GFP_KERNEL); dmub_srv = adev->dm.dmub_srv; From e00f543d3596c71201438d967877138ab33bb3de Mon Sep 17 00:00:00 2001 From: Chengming Gui Date: Wed, 24 Feb 2021 11:48:23 +0800 Subject: [PATCH 492/502] drm/amdgpu: add DID for beige goby Add device ids. Signed-off-by: Chengming Gui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 361b86b71b56..5ed8381ae0f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1213,6 +1213,13 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x740F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, {0x1002, 0x7410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, + /* BEIGE_GOBY */ + {0x1002, 0x7420, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, + {0x1002, 0x7421, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, + {0x1002, 0x7422, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, + {0x1002, 0x7423, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, + {0x1002, 0x743F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, + {0, 0, 0} }; From 2638a32348bbb1c384dbbd515fd2b12c155f0188 Mon Sep 17 00:00:00 2001 From: Dakshaja Uppalapati Date: Thu, 5 Aug 2021 18:13:32 +0530 Subject: [PATCH 493/502] RDMA/iw_cxgb4: Fix refcount underflow while destroying cqs. Previous atomic increment/decrement logic expects the atomic count to be '0' after the final decrement. Replacing atomic count with refcount does not allow that, as refcount_dec() considers count of 1 as underflow and triggers a kernel splat. Fix the current refcount logic by using the usual pattern of decrementing the refcount and test if it is '0' on the final deref in c4iw_destroy_cq(). Use wait_for_completion() instead of wait_event(). Fixes: 7183451f846d ("RDMA/cxgb4: Use refcount_t instead of atomic_t for reference counting") Link: https://lore.kernel.org/r/1628167412-12114-1-git-send-email-dakshaja@chelsio.com Signed-off-by: Dakshaja Uppalapati Reviewed-by: Potnuri Bharat Teja Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/cq.c | 12 +++++++++--- drivers/infiniband/hw/cxgb4/ev.c | 6 ++---- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 3 ++- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 6c8c910f4e86..c7e8d7b3baa1 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -967,6 +967,12 @@ int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) return !err || err == -ENODATA ? npolled : err; } +void c4iw_cq_rem_ref(struct c4iw_cq *chp) +{ + if (refcount_dec_and_test(&chp->refcnt)) + complete(&chp->cq_rel_comp); +} + int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct c4iw_cq *chp; @@ -976,8 +982,8 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) chp = to_c4iw_cq(ib_cq); xa_erase_irq(&chp->rhp->cqs, chp->cq.cqid); - refcount_dec(&chp->refcnt); - wait_event(chp->wait, !refcount_read(&chp->refcnt)); + c4iw_cq_rem_ref(chp); + wait_for_completion(&chp->cq_rel_comp); ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext, ibucontext); @@ -1081,7 +1087,7 @@ int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, spin_lock_init(&chp->lock); spin_lock_init(&chp->comp_handler_lock); refcount_set(&chp->refcnt, 1); - init_waitqueue_head(&chp->wait); + init_completion(&chp->cq_rel_comp); ret = xa_insert_irq(&rhp->cqs, chp->cq.cqid, chp, GFP_KERNEL); if (ret) goto err_destroy_cq; diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c index 7798d090888b..34211a533d5c 100644 --- a/drivers/infiniband/hw/cxgb4/ev.c +++ b/drivers/infiniband/hw/cxgb4/ev.c @@ -213,8 +213,7 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe) break; } done: - if (refcount_dec_and_test(&chp->refcnt)) - wake_up(&chp->wait); + c4iw_cq_rem_ref(chp); c4iw_qp_rem_ref(&qhp->ibqp); out: return; @@ -234,8 +233,7 @@ int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid) spin_lock_irqsave(&chp->comp_handler_lock, flag); (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); spin_unlock_irqrestore(&chp->comp_handler_lock, flag); - if (refcount_dec_and_test(&chp->refcnt)) - wake_up(&chp->wait); + c4iw_cq_rem_ref(chp); } else { pr_debug("unknown cqid 0x%x\n", qid); xa_unlock_irqrestore(&dev->cqs, flag); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 3883af3d2312..ac5f581aff4c 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -428,7 +428,7 @@ struct c4iw_cq { spinlock_t lock; spinlock_t comp_handler_lock; refcount_t refcnt; - wait_queue_head_t wait; + struct completion cq_rel_comp; struct c4iw_wr_wait *wr_waitp; }; @@ -979,6 +979,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc); int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata); int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); +void c4iw_cq_rem_ref(struct c4iw_cq *chp); int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); From acade6379930dfa7987f4bd9b26d1a701cc1b542 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 3 Aug 2021 06:25:28 -0700 Subject: [PATCH 494/502] perf/x86/intel: Apply mid ACK for small core A warning as below may be occasionally triggered in an ADL machine when these conditions occur: - Two perf record commands run one by one. Both record a PEBS event. - Both runs on small cores. - They have different adaptive PEBS configuration (PEBS_DATA_CFG). [ ] WARNING: CPU: 4 PID: 9874 at arch/x86/events/intel/ds.c:1743 setup_pebs_adaptive_sample_data+0x55e/0x5b0 [ ] RIP: 0010:setup_pebs_adaptive_sample_data+0x55e/0x5b0 [ ] Call Trace: [ ] [ ] intel_pmu_drain_pebs_icl+0x48b/0x810 [ ] perf_event_nmi_handler+0x41/0x80 [ ] [ ] __perf_event_task_sched_in+0x2c2/0x3a0 Different from the big core, the small core requires the ACK right before re-enabling counters in the NMI handler, otherwise a stale PEBS record may be dumped into the later NMI handler, which trigger the warning. Add a new mid_ack flag to track the case. Add all PMI handler bits in the struct x86_hybrid_pmu to track the bits for different types of PMUs. Apply mid ACK for the small cores on an Alder Lake machine. The existing hybrid() macro has a compile error when taking address of a bit-field variable. Add a new macro hybrid_bit() to get the bit-field value of a given PMU. Fixes: f83d2f91d259 ("perf/x86/intel: Add Alder Lake Hybrid support") Reported-by: Ammy Yi Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andi Kleen Tested-by: Ammy Yi Link: https://lkml.kernel.org/r/1627997128-57891-1-git-send-email-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 23 +++++++++++++++-------- arch/x86/events/perf_event.h | 15 +++++++++++++++ 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index fca7a6e2242f..ac6fd2dabf6a 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2904,24 +2904,28 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) */ static int intel_pmu_handle_irq(struct pt_regs *regs) { - struct cpu_hw_events *cpuc; + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + bool late_ack = hybrid_bit(cpuc->pmu, late_ack); + bool mid_ack = hybrid_bit(cpuc->pmu, mid_ack); int loops; u64 status; int handled; int pmu_enabled; - cpuc = this_cpu_ptr(&cpu_hw_events); - /* * Save the PMU state. * It needs to be restored when leaving the handler. */ pmu_enabled = cpuc->enabled; /* - * No known reason to not always do late ACK, - * but just in case do it opt-in. + * In general, the early ACK is only applied for old platforms. + * For the big core starts from Haswell, the late ACK should be + * applied. + * For the small core after Tremont, we have to do the ACK right + * before re-enabling counters, which is in the middle of the + * NMI handler. */ - if (!x86_pmu.late_ack) + if (!late_ack && !mid_ack) apic_write(APIC_LVTPC, APIC_DM_NMI); intel_bts_disable_local(); cpuc->enabled = 0; @@ -2958,6 +2962,8 @@ again: goto again; done: + if (mid_ack) + apic_write(APIC_LVTPC, APIC_DM_NMI); /* Only restore PMU state when it's active. See x86_pmu_disable(). */ cpuc->enabled = pmu_enabled; if (pmu_enabled) @@ -2969,7 +2975,7 @@ done: * have been reset. This avoids spurious NMIs on * Haswell CPUs. */ - if (x86_pmu.late_ack) + if (late_ack) apic_write(APIC_LVTPC, APIC_DM_NMI); return handled; } @@ -6129,7 +6135,6 @@ __init int intel_pmu_init(void) static_branch_enable(&perf_is_hybrid); x86_pmu.num_hybrid_pmus = X86_HYBRID_NUM_PMUS; - x86_pmu.late_ack = true; x86_pmu.pebs_aliases = NULL; x86_pmu.pebs_prec_dist = true; x86_pmu.pebs_block = true; @@ -6167,6 +6172,7 @@ __init int intel_pmu_init(void) pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; pmu->name = "cpu_core"; pmu->cpu_type = hybrid_big; + pmu->late_ack = true; if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) { pmu->num_counters = x86_pmu.num_counters + 2; pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1; @@ -6192,6 +6198,7 @@ __init int intel_pmu_init(void) pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; pmu->name = "cpu_atom"; pmu->cpu_type = hybrid_small; + pmu->mid_ack = true; pmu->num_counters = x86_pmu.num_counters; pmu->num_counters_fixed = x86_pmu.num_counters_fixed; pmu->max_pebs_events = x86_pmu.max_pebs_events; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 2938c902ffbe..e3ac05c97b5e 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -656,6 +656,10 @@ struct x86_hybrid_pmu { struct event_constraint *event_constraints; struct event_constraint *pebs_constraints; struct extra_reg *extra_regs; + + unsigned int late_ack :1, + mid_ack :1, + enabled_ack :1; }; static __always_inline struct x86_hybrid_pmu *hybrid_pmu(struct pmu *pmu) @@ -686,6 +690,16 @@ extern struct static_key_false perf_is_hybrid; __Fp; \ })) +#define hybrid_bit(_pmu, _field) \ +({ \ + bool __Fp = x86_pmu._field; \ + \ + if (is_hybrid() && (_pmu)) \ + __Fp = hybrid_pmu(_pmu)->_field; \ + \ + __Fp; \ +}) + enum hybrid_pmu_type { hybrid_big = 0x40, hybrid_small = 0x20, @@ -755,6 +769,7 @@ struct x86_pmu { /* PMI handler bits */ unsigned int late_ack :1, + mid_ack :1, enabled_ack :1; /* * sysfs attrs From 3d4e4face9c1548752a2891e98b38b100feee336 Mon Sep 17 00:00:00 2001 From: Hao Xu Date: Thu, 5 Aug 2021 18:05:37 +0800 Subject: [PATCH 495/502] io-wq: fix no lock protection of acct->nr_worker There is an acct->nr_worker visit without lock protection. Think about the case: two callers call io_wqe_wake_worker(), one is the original context and the other one is an io-worker(by calling io_wqe_enqueue(wqe, linked)), on two cpus paralelly, this may cause nr_worker to be larger than max_worker. Let's fix it by adding lock for it, and let's do nr_workers++ before create_io_worker. There may be a edge cause that the first caller fails to create an io-worker, but the second caller doesn't know it and then quit creating io-worker as well: say nr_worker = max_worker - 1 cpu 0 cpu 1 io_wqe_wake_worker() io_wqe_wake_worker() nr_worker < max_worker nr_worker++ create_io_worker() nr_worker == max_worker failed return return But the chance of this case is very slim. Fixes: 685fe7feedb9 ("io-wq: eliminate the need for a manager thread") Signed-off-by: Hao Xu [axboe: fix unconditional create_io_worker() call] Signed-off-by: Jens Axboe --- fs/io-wq.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 50dc93ffc153..64d904ce4f2d 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -247,10 +247,19 @@ static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct) ret = io_wqe_activate_free_worker(wqe); rcu_read_unlock(); - if (!ret && acct->nr_workers < acct->max_workers) { - atomic_inc(&acct->nr_running); - atomic_inc(&wqe->wq->worker_refs); - create_io_worker(wqe->wq, wqe, acct->index); + if (!ret) { + bool do_create = false; + + raw_spin_lock_irq(&wqe->lock); + if (acct->nr_workers < acct->max_workers) { + atomic_inc(&acct->nr_running); + atomic_inc(&wqe->wq->worker_refs); + acct->nr_workers++; + do_create = true; + } + raw_spin_unlock_irq(&wqe->lock); + if (do_create) + create_io_worker(wqe->wq, wqe, acct->index); } } @@ -635,6 +644,9 @@ static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) kfree(worker); fail: atomic_dec(&acct->nr_running); + raw_spin_lock_irq(&wqe->lock); + acct->nr_workers--; + raw_spin_unlock_irq(&wqe->lock); io_worker_ref_put(wq); return; } @@ -650,9 +662,8 @@ fail: worker->flags |= IO_WORKER_F_FREE; if (index == IO_WQ_ACCT_BOUND) worker->flags |= IO_WORKER_F_BOUND; - if (!acct->nr_workers && (worker->flags & IO_WORKER_F_BOUND)) + if ((acct->nr_workers == 1) && (worker->flags & IO_WORKER_F_BOUND)) worker->flags |= IO_WORKER_F_FIXED; - acct->nr_workers++; raw_spin_unlock_irq(&wqe->lock); wake_up_new_task(tsk); } From 21698274da5b6fc724b005bc7ec3e6b9fbcfaa06 Mon Sep 17 00:00:00 2001 From: Hao Xu Date: Thu, 5 Aug 2021 18:05:38 +0800 Subject: [PATCH 496/502] io-wq: fix lack of acct->nr_workers < acct->max_workers judgement There should be this judgement before we create an io-worker Fixes: 685fe7feedb9 ("io-wq: eliminate the need for a manager thread") Signed-off-by: Hao Xu Signed-off-by: Jens Axboe --- fs/io-wq.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 64d904ce4f2d..12fc19353bb0 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -280,9 +280,17 @@ static void create_worker_cb(struct callback_head *cb) { struct create_worker_data *cwd; struct io_wq *wq; + struct io_wqe *wqe; + struct io_wqe_acct *acct; cwd = container_of(cb, struct create_worker_data, work); - wq = cwd->wqe->wq; + wqe = cwd->wqe; + wq = wqe->wq; + acct = &wqe->acct[cwd->index]; + raw_spin_lock_irq(&wqe->lock); + if (acct->nr_workers < acct->max_workers) + acct->nr_workers++; + raw_spin_unlock_irq(&wqe->lock); create_io_worker(wq, cwd->wqe, cwd->index); kfree(cwd); } From 7b40066c97ec66a44e388f82fcf694987451768f Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 5 Aug 2021 15:29:54 -0400 Subject: [PATCH 497/502] tracepoint: Use rcu get state and cond sync for static call updates State transitions from 1->0->1 and N->2->1 callbacks require RCU synchronization. Rather than performing the RCU synchronization every time the state change occurs, which is quite slow when many tracepoints are registered in batch, instead keep a snapshot of the RCU state on the most recent transitions which belong to a chain, and conditionally wait for a grace period on the last transition of the chain if one g.p. has not elapsed since the last snapshot. This applies to both RCU and SRCU. This brings the performance regression caused by commit 231264d6927f ("Fix: tracepoint: static call function vs data state mismatch") back to what it was originally. Before this commit: # trace-cmd start -e all # time trace-cmd start -p nop real 0m10.593s user 0m0.017s sys 0m0.259s After this commit: # trace-cmd start -e all # time trace-cmd start -p nop real 0m0.878s user 0m0.000s sys 0m0.103s Link: https://lkml.kernel.org/r/20210805192954.30688-1-mathieu.desnoyers@efficios.com Link: https://lore.kernel.org/io-uring/4ebea8f0-58c9-e571-fd30-0ce4f6f09c70@samba.org/ Cc: stable@vger.kernel.org Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Andrew Morton Cc: "Paul E. McKenney" Cc: Stefan Metzmacher Fixes: 231264d6927f ("Fix: tracepoint: static call function vs data state mismatch") Signed-off-by: Mathieu Desnoyers Reviewed-by: Paul E. McKenney Signed-off-by: Steven Rostedt (VMware) --- kernel/tracepoint.c | 81 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 67 insertions(+), 14 deletions(-) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 8d772bd6894d..efd14c79fab4 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -28,6 +28,44 @@ extern tracepoint_ptr_t __stop___tracepoints_ptrs[]; DEFINE_SRCU(tracepoint_srcu); EXPORT_SYMBOL_GPL(tracepoint_srcu); +enum tp_transition_sync { + TP_TRANSITION_SYNC_1_0_1, + TP_TRANSITION_SYNC_N_2_1, + + _NR_TP_TRANSITION_SYNC, +}; + +struct tp_transition_snapshot { + unsigned long rcu; + unsigned long srcu; + bool ongoing; +}; + +/* Protected by tracepoints_mutex */ +static struct tp_transition_snapshot tp_transition_snapshot[_NR_TP_TRANSITION_SYNC]; + +static void tp_rcu_get_state(enum tp_transition_sync sync) +{ + struct tp_transition_snapshot *snapshot = &tp_transition_snapshot[sync]; + + /* Keep the latest get_state snapshot. */ + snapshot->rcu = get_state_synchronize_rcu(); + snapshot->srcu = start_poll_synchronize_srcu(&tracepoint_srcu); + snapshot->ongoing = true; +} + +static void tp_rcu_cond_sync(enum tp_transition_sync sync) +{ + struct tp_transition_snapshot *snapshot = &tp_transition_snapshot[sync]; + + if (!snapshot->ongoing) + return; + cond_synchronize_rcu(snapshot->rcu); + if (!poll_state_synchronize_srcu(&tracepoint_srcu, snapshot->srcu)) + synchronize_srcu(&tracepoint_srcu); + snapshot->ongoing = false; +} + /* Set to 1 to enable tracepoint debug output */ static const int tracepoint_debug; @@ -311,6 +349,11 @@ static int tracepoint_add_func(struct tracepoint *tp, */ switch (nr_func_state(tp_funcs)) { case TP_FUNC_1: /* 0->1 */ + /* + * Make sure new static func never uses old data after a + * 1->0->1 transition sequence. + */ + tp_rcu_cond_sync(TP_TRANSITION_SYNC_1_0_1); /* Set static call to first function */ tracepoint_update_call(tp, tp_funcs); /* Both iterator and static call handle NULL tp->funcs */ @@ -325,10 +368,15 @@ static int tracepoint_add_func(struct tracepoint *tp, * Requires ordering between RCU assign/dereference and * static call update/call. */ - rcu_assign_pointer(tp->funcs, tp_funcs); - break; + fallthrough; case TP_FUNC_N: /* N->N+1 (N>1) */ rcu_assign_pointer(tp->funcs, tp_funcs); + /* + * Make sure static func never uses incorrect data after a + * N->...->2->1 (N>1) transition sequence. + */ + if (tp_funcs[0].data != old[0].data) + tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1); break; default: WARN_ON_ONCE(1); @@ -372,24 +420,23 @@ static int tracepoint_remove_func(struct tracepoint *tp, /* Both iterator and static call handle NULL tp->funcs */ rcu_assign_pointer(tp->funcs, NULL); /* - * Make sure new func never uses old data after a 1->0->1 - * transition sequence. - * Considering that transition 0->1 is the common case - * and don't have rcu-sync, issue rcu-sync after - * transition 1->0 to break that sequence by waiting for - * readers to be quiescent. + * Make sure new static func never uses old data after a + * 1->0->1 transition sequence. */ - tracepoint_synchronize_unregister(); + tp_rcu_get_state(TP_TRANSITION_SYNC_1_0_1); break; case TP_FUNC_1: /* 2->1 */ rcu_assign_pointer(tp->funcs, tp_funcs); /* - * On 2->1 transition, RCU sync is needed before setting - * static call to first callback, because the observer - * may have loaded any prior tp->funcs after the last one - * associated with an rcu-sync. + * Make sure static func never uses incorrect data after a + * N->...->2->1 (N>2) transition sequence. If the first + * element's data has changed, then force the synchronization + * to prevent current readers that have loaded the old data + * from calling the new function. */ - tracepoint_synchronize_unregister(); + if (tp_funcs[0].data != old[0].data) + tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1); + tp_rcu_cond_sync(TP_TRANSITION_SYNC_N_2_1); /* Set static call to first function */ tracepoint_update_call(tp, tp_funcs); break; @@ -397,6 +444,12 @@ static int tracepoint_remove_func(struct tracepoint *tp, fallthrough; case TP_FUNC_N: rcu_assign_pointer(tp->funcs, tp_funcs); + /* + * Make sure static func never uses incorrect data after a + * N->...->2->1 (N>2) transition sequence. + */ + if (tp_funcs[0].data != old[0].data) + tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1); break; default: WARN_ON_ONCE(1); From 877ba3f729fd3d8ef0e29bc2a55e57cfa54b2e43 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 4 Aug 2021 14:23:55 -0400 Subject: [PATCH 498/502] ext4: fix potential htree corruption when growing large_dir directories MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit b5776e7524af ("ext4: fix potential htree index checksum corruption) removed a required restart when multiple levels of index nodes need to be split. Fix this to avoid directory htree corruptions when using the large_dir feature. Cc: stable@kernel.org # v5.11 Cc: Благодаренко Артём Fixes: b5776e7524af ("ext4: fix potential htree index checksum corruption) Reported-by: Denis Signed-off-by: Theodore Ts'o --- fs/ext4/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 5fd56f616cf0..f3bbcd4efb56 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2517,7 +2517,7 @@ again: goto journal_error; err = ext4_handle_dirty_dx_node(handle, dir, frame->bh); - if (err) + if (restart || err) goto journal_error; } else { struct dx_root *dxroot; From fb7b9b0231ba8f77587c23f5257a4fdb6df1219e Mon Sep 17 00:00:00 2001 From: Vincent Fu Date: Wed, 4 Aug 2021 19:49:23 +0000 Subject: [PATCH 499/502] kyber: make trace_block_rq call consistent with documentation The kyber ioscheduler calls trace_block_rq_insert() *after* the request is added to the queue but the documentation for trace_block_rq_insert() says that the call should be made *before* the request is added to the queue. Move the tracepoint for the kyber ioscheduler so that it is consistent with the documentation. Signed-off-by: Vincent Fu Link: https://lore.kernel.org/r/20210804194913.10497-1-vincent.fu@samsung.com Reviewed by: Adam Manzanares Signed-off-by: Jens Axboe --- block/kyber-iosched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index 81e3279ecd57..15a8be57203d 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -596,13 +596,13 @@ static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx, struct list_head *head = &kcq->rq_list[sched_domain]; spin_lock(&kcq->lock); + trace_block_rq_insert(rq); if (at_head) list_move(&rq->queuelist, head); else list_move_tail(&rq->queuelist, head); sbitmap_set_bit(&khd->kcq_map[sched_domain], rq->mq_ctx->index_hw[hctx->type]); - trace_block_rq_insert(rq); spin_unlock(&kcq->lock); } } From 6d7f91d914bc90a15ebc426440c26081337ceaa1 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Wed, 21 Jul 2021 09:59:35 +0200 Subject: [PATCH 500/502] riscv: Get rid of CONFIG_PHYS_RAM_BASE in kernel physical address conversion The usage of CONFIG_PHYS_RAM_BASE for all kernel types was a mistake: this value is implementation-specific and this breaks the genericity of the RISC-V kernel. Fix this by introducing a new variable phys_ram_base that holds this value at runtime and use it in the kernel physical address conversion macro. Since this value is used only for XIP kernels, evaluate it only if CONFIG_XIP_KERNEL is set which in addition optimizes this macro for standard kernels at compile-time. Signed-off-by: Alexandre Ghiti Tested-by: Emil Renner Berthing Reviewed-by: Jisheng Zhang Fixes: 44c922572952 ("RISC-V: enable XIP") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/page.h | 7 ++++--- arch/riscv/mm/init.c | 17 ++++++++++++----- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index cca8764aed83..b0ca5058e7ae 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -103,6 +103,7 @@ struct kernel_mapping { }; extern struct kernel_mapping kernel_map; +extern phys_addr_t phys_ram_base; #ifdef CONFIG_64BIT #define is_kernel_mapping(x) \ @@ -113,9 +114,9 @@ extern struct kernel_mapping kernel_map; #define linear_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + kernel_map.va_pa_offset)) #define kernel_mapping_pa_to_va(y) ({ \ unsigned long _y = y; \ - (_y >= CONFIG_PHYS_RAM_BASE) ? \ - (void *)((unsigned long)(_y) + kernel_map.va_kernel_pa_offset + XIP_OFFSET) : \ - (void *)((unsigned long)(_y) + kernel_map.va_kernel_xip_pa_offset); \ + (IS_ENABLED(CONFIG_XIP_KERNEL) && _y < phys_ram_base) ? \ + (void *)((unsigned long)(_y) + kernel_map.va_kernel_xip_pa_offset) : \ + (void *)((unsigned long)(_y) + kernel_map.va_kernel_pa_offset + XIP_OFFSET); \ }) #define __pa_to_va_nodebug(x) linear_mapping_pa_to_va(x) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index a14bf3910eec..88134cc288d9 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -36,6 +36,9 @@ EXPORT_SYMBOL(kernel_map); #define kernel_map (*(struct kernel_mapping *)XIP_FIXUP(&kernel_map)) #endif +phys_addr_t phys_ram_base __ro_after_init; +EXPORT_SYMBOL(phys_ram_base); + #ifdef CONFIG_XIP_KERNEL extern char _xiprom[], _exiprom[]; #endif @@ -160,7 +163,7 @@ static void __init setup_bootmem(void) phys_addr_t vmlinux_end = __pa_symbol(&_end); phys_addr_t vmlinux_start = __pa_symbol(&_start); phys_addr_t __maybe_unused max_mapped_addr; - phys_addr_t dram_end; + phys_addr_t phys_ram_end; #ifdef CONFIG_XIP_KERNEL vmlinux_start = __pa_symbol(&_sdata); @@ -181,9 +184,12 @@ static void __init setup_bootmem(void) #endif memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); - dram_end = memblock_end_of_DRAM(); + phys_ram_end = memblock_end_of_DRAM(); #ifndef CONFIG_64BIT +#ifndef CONFIG_XIP_KERNEL + phys_ram_base = memblock_start_of_DRAM(); +#endif /* * memblock allocator is not aware of the fact that last 4K bytes of * the addressable memory can not be mapped because of IS_ERR_VALUE @@ -194,12 +200,12 @@ static void __init setup_bootmem(void) * be done in create_kernel_page_table. */ max_mapped_addr = __pa(~(ulong)0); - if (max_mapped_addr == (dram_end - 1)) + if (max_mapped_addr == (phys_ram_end - 1)) memblock_set_current_limit(max_mapped_addr - 4096); #endif - min_low_pfn = PFN_UP(memblock_start_of_DRAM()); - max_low_pfn = max_pfn = PFN_DOWN(dram_end); + min_low_pfn = PFN_UP(phys_ram_base); + max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end); dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn)); set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET); @@ -558,6 +564,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR; kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom); + phys_ram_base = CONFIG_PHYS_RAM_BASE; kernel_map.phys_addr = (uintptr_t)CONFIG_PHYS_RAM_BASE; kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_sdata); From 867432bec1c6e7df21a361d7f12022a8c5f54022 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Wed, 21 Jul 2021 09:59:36 +0200 Subject: [PATCH 501/502] Revert "riscv: Remove CONFIG_PHYS_RAM_BASE_FIXED" This reverts commit 9b79878ced8f7ab85c57623f8b1f6882e484a316. The removal of this config exposes CONFIG_PHYS_RAM_BASE for all kernel types: this value being implementation-specific, this breaks the genericity of the RISC-V kernel so revert it. Signed-off-by: Alexandre Ghiti Tested-by: Emil Renner Berthing Reviewed-by: Jisheng Zhang Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 31f9e92f1402..4f7b70ae7c31 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -495,8 +495,13 @@ config STACKPROTECTOR_PER_TASK depends on !GCC_PLUGIN_RANDSTRUCT depends on STACKPROTECTOR && CC_HAVE_STACKPROTECTOR_TLS +config PHYS_RAM_BASE_FIXED + bool "Explicitly specified physical RAM address" + default n + config PHYS_RAM_BASE hex "Platform Physical RAM address" + depends on PHYS_RAM_BASE_FIXED default "0x80000000" help This is the physical address of RAM in the system. It has to be @@ -509,6 +514,7 @@ config XIP_KERNEL # This prevents XIP from being enabled by all{yes,mod}config, which # fail to build since XIP doesn't support large kernels. depends on !COMPILE_TEST + select PHYS_RAM_BASE_FIXED help Execute-In-Place allows the kernel to run from non-volatile storage directly addressable by the CPU, such as NOR flash. This saves RAM From 36a21d51725af2ce0700c6ebcb6b9594aac658a6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 8 Aug 2021 13:49:31 -0700 Subject: [PATCH 502/502] Linux 5.14-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9ca176ff1e40..eae1314a5b86 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Opossums on Parade # *DOCUMENTATION*