From 543576ec15b17c0c93301ac8297333c7b6e84ac7 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 26 Apr 2024 16:16:18 -0700 Subject: [PATCH 01/31] bpf: Add BPF_PROG_TYPE_CGROUP_SKB attach type enforcement in BPF_LINK_CREATE bpf_prog_attach uses attach_type_to_prog_type to enforce proper attach type for BPF_PROG_TYPE_CGROUP_SKB. link_create uses bpf_prog_get and relies on bpf_prog_attach_check_attach_type to properly verify prog_type <> attach_type association. Add missing attach_type enforcement for the link_create case. Otherwise, it's currently possible to attach cgroup_skb prog types to other cgroup hooks. Fixes: af6eea57437a ("bpf: Implement bpf_link-based cgroup BPF program attachment") Link: https://lore.kernel.org/bpf/0000000000004792a90615a1dde0@google.com/ Reported-by: syzbot+838346b979830606c854@syzkaller.appspotmail.com Signed-off-by: Stanislav Fomichev Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20240426231621.2716876-2-sdf@google.com Signed-off-by: Martin KaFai Lau --- kernel/bpf/syscall.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index c287925471f6..cb61d8880dbe 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3985,6 +3985,11 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, * check permissions at attach time. */ return -EPERM; + + ptype = attach_type_to_prog_type(attach_type); + if (prog->type != ptype) + return -EINVAL; + return prog->enforce_expected_attach_type && prog->expected_attach_type != attach_type ? -EINVAL : 0; From d70b2660e75b85bdaa9d75f9c4224c2f6f89cf23 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 26 Apr 2024 16:16:19 -0700 Subject: [PATCH 02/31] selftests/bpf: Extend sockopt tests to use BPF_LINK_CREATE Run all existing test cases with the attachment created via BPF_LINK_CREATE. Next commit will add extra test cases to verify link_create attach_type enforcement. Signed-off-by: Stanislav Fomichev Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20240426231621.2716876-3-sdf@google.com Signed-off-by: Martin KaFai Lau --- .../selftests/bpf/prog_tests/sockopt.c | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt.c b/tools/testing/selftests/bpf/prog_tests/sockopt.c index 5a4491d4edfe..dea340996e97 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt.c @@ -1036,9 +1036,10 @@ static int call_getsockopt(bool use_io_uring, int fd, int level, int optname, return getsockopt(fd, level, optname, optval, optlen); } -static int run_test(int cgroup_fd, struct sockopt_test *test, bool use_io_uring) +static int run_test(int cgroup_fd, struct sockopt_test *test, bool use_io_uring, + bool use_link) { - int sock_fd, err, prog_fd; + int sock_fd, err, prog_fd, link_fd = -1; void *optval = NULL; int ret = 0; @@ -1051,7 +1052,12 @@ static int run_test(int cgroup_fd, struct sockopt_test *test, bool use_io_uring) return -1; } - err = bpf_prog_attach(prog_fd, cgroup_fd, test->attach_type, 0); + if (use_link) { + err = bpf_link_create(prog_fd, cgroup_fd, test->attach_type, NULL); + link_fd = err; + } else { + err = bpf_prog_attach(prog_fd, cgroup_fd, test->attach_type, 0); + } if (err < 0) { if (test->error == DENY_ATTACH) goto close_prog_fd; @@ -1142,7 +1148,12 @@ free_optval: close_sock_fd: close(sock_fd); detach_prog: - bpf_prog_detach2(prog_fd, cgroup_fd, test->attach_type); + if (use_link) { + if (link_fd >= 0) + close(link_fd); + } else { + bpf_prog_detach2(prog_fd, cgroup_fd, test->attach_type); + } close_prog_fd: close(prog_fd); return ret; @@ -1160,10 +1171,12 @@ void test_sockopt(void) if (!test__start_subtest(tests[i].descr)) continue; - ASSERT_OK(run_test(cgroup_fd, &tests[i], false), + ASSERT_OK(run_test(cgroup_fd, &tests[i], false, false), + tests[i].descr); + ASSERT_OK(run_test(cgroup_fd, &tests[i], false, true), tests[i].descr); if (tests[i].io_uring_support) - ASSERT_OK(run_test(cgroup_fd, &tests[i], true), + ASSERT_OK(run_test(cgroup_fd, &tests[i], true, false), tests[i].descr); } From 095ddb501b39b7842e5da555915ad89e370b9888 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 26 Apr 2024 16:16:20 -0700 Subject: [PATCH 03/31] selftests/bpf: Add sockopt case to verify prog_type Make sure only sockopt programs can be attached to the setsockopt and getsockopt hooks. Signed-off-by: Stanislav Fomichev Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20240426231621.2716876-4-sdf@google.com Signed-off-by: Martin KaFai Lau --- .../selftests/bpf/prog_tests/sockopt.c | 40 ++++++++++++++++++- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt.c b/tools/testing/selftests/bpf/prog_tests/sockopt.c index dea340996e97..eaac83a7f388 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt.c @@ -24,6 +24,7 @@ enum sockopt_test_error { static struct sockopt_test { const char *descr; const struct bpf_insn insns[64]; + enum bpf_prog_type prog_type; enum bpf_attach_type attach_type; enum bpf_attach_type expected_attach_type; @@ -928,9 +929,40 @@ static struct sockopt_test { .error = EPERM_SETSOCKOPT, }, + + /* ==================== prog_type ==================== */ + + { + .descr = "can attach only BPF_CGROUP_SETSOCKOP", + .insns = { + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .attach_type = BPF_CGROUP_SETSOCKOPT, + .expected_attach_type = 0, + .error = DENY_ATTACH, + }, + + { + .descr = "can attach only BPF_CGROUP_GETSOCKOP", + .insns = { + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .attach_type = BPF_CGROUP_GETSOCKOPT, + .expected_attach_type = 0, + .error = DENY_ATTACH, + }, }; static int load_prog(const struct bpf_insn *insns, + enum bpf_prog_type prog_type, enum bpf_attach_type expected_attach_type) { LIBBPF_OPTS(bpf_prog_load_opts, opts, @@ -947,7 +979,7 @@ static int load_prog(const struct bpf_insn *insns, } insns_cnt++; - fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCKOPT, NULL, "GPL", insns, insns_cnt, &opts); + fd = bpf_prog_load(prog_type, NULL, "GPL", insns, insns_cnt, &opts); if (verbose && fd < 0) fprintf(stderr, "%s\n", bpf_log_buf); @@ -1039,11 +1071,15 @@ static int call_getsockopt(bool use_io_uring, int fd, int level, int optname, static int run_test(int cgroup_fd, struct sockopt_test *test, bool use_io_uring, bool use_link) { + int prog_type = BPF_PROG_TYPE_CGROUP_SOCKOPT; int sock_fd, err, prog_fd, link_fd = -1; void *optval = NULL; int ret = 0; - prog_fd = load_prog(test->insns, test->expected_attach_type); + if (test->prog_type) + prog_type = test->prog_type; + + prog_fd = load_prog(test->insns, prog_type, test->expected_attach_type); if (prog_fd < 0) { if (test->error == DENY_LOAD) return 0; From a2c78977950da00aca83a3f8865d1f54e715770d Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Wed, 8 May 2024 13:21:11 +0000 Subject: [PATCH 04/31] ptp: ocp: fix DPLL functions In ptp_ocp driver pin actions assume sma_nr starts with 1, but for DPLL subsystem callback 0-based index was used. Fix it providing proper index. Fixes: 09eeb3aecc6c ("ptp_ocp: implement DPLL ops") Signed-off-by: Vadim Fedorenko Link: https://lore.kernel.org/r/20240508132111.11545-1-vadim.fedorenko@linux.dev Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_ocp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 6506cfb89aa9..ee2ced88ab34 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -4562,7 +4562,7 @@ static int ptp_ocp_dpll_direction_set(const struct dpll_pin *pin, return -EOPNOTSUPP; mode = direction == DPLL_PIN_DIRECTION_INPUT ? SMA_MODE_IN : SMA_MODE_OUT; - return ptp_ocp_sma_store_val(bp, 0, mode, sma_nr); + return ptp_ocp_sma_store_val(bp, 0, mode, sma_nr + 1); } static int ptp_ocp_dpll_frequency_set(const struct dpll_pin *pin, @@ -4583,7 +4583,7 @@ static int ptp_ocp_dpll_frequency_set(const struct dpll_pin *pin, tbl = bp->sma_op->tbl[sma->mode]; for (i = 0; tbl[i].name; i++) if (tbl[i].frequency == frequency) - return ptp_ocp_sma_store_val(bp, i, sma->mode, sma_nr); + return ptp_ocp_sma_store_val(bp, i, sma->mode, sma_nr + 1); return -EINVAL; } @@ -4600,7 +4600,7 @@ static int ptp_ocp_dpll_frequency_get(const struct dpll_pin *pin, u32 val; int i; - val = bp->sma_op->get(bp, sma_nr); + val = bp->sma_op->get(bp, sma_nr + 1); tbl = bp->sma_op->tbl[sma->mode]; for (i = 0; tbl[i].name; i++) if (val == tbl[i].value) { From ac0a230f719b02432d8c7eba7615ebd691da86f4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 8 May 2024 06:45:04 -0700 Subject: [PATCH 05/31] eth: sungem: remove .ndo_poll_controller to avoid deadlocks Erhard reports netpoll warnings from sungem: netpoll_send_skb_on_dev(): eth0 enabled interrupts in poll (gem_start_xmit+0x0/0x398) WARNING: CPU: 1 PID: 1 at net/core/netpoll.c:370 netpoll_send_skb+0x1fc/0x20c gem_poll_controller() disables interrupts, which may sleep. We can't sleep in netpoll, it has interrupts disabled completely. Strangely, gem_poll_controller() doesn't even poll the completions, and instead acts as if an interrupt has fired so it just schedules NAPI and exits. None of this has been necessary for years, since netpoll invokes NAPI directly. Fixes: fe09bb619096 ("sungem: Spring cleaning and GRO support") Reported-and-tested-by: Erhard Furtner Link: https://lore.kernel.org/all/20240428125306.2c3080ef@legion Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240508134504.3560956-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sun/sungem.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c index 9bd1df8308d2..d3a2fbb14140 100644 --- a/drivers/net/ethernet/sun/sungem.c +++ b/drivers/net/ethernet/sun/sungem.c @@ -949,17 +949,6 @@ static irqreturn_t gem_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -#ifdef CONFIG_NET_POLL_CONTROLLER -static void gem_poll_controller(struct net_device *dev) -{ - struct gem *gp = netdev_priv(dev); - - disable_irq(gp->pdev->irq); - gem_interrupt(gp->pdev->irq, dev); - enable_irq(gp->pdev->irq); -} -#endif - static void gem_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct gem *gp = netdev_priv(dev); @@ -2839,9 +2828,6 @@ static const struct net_device_ops gem_netdev_ops = { .ndo_change_mtu = gem_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = gem_set_mac_address, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = gem_poll_controller, -#endif }; static int gem_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) From c499fe96d3f75a5cf50de6089dd8f1cddd1301a9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 9 May 2024 09:19:19 -0700 Subject: [PATCH 06/31] selftests: net: add missing config for amt.sh Test needs IPv6 multicast. smcroute currently crashes when trying to install a route in a kernel without IPv6 multicast. Fixes: c08e8baea78e ("selftests: add amt interface selftest script") Link: https://lore.kernel.org/r/20240509161919.3939966-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 5e4390cac17e..04de7a6ba6f3 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -30,6 +30,7 @@ CONFIG_IP_GRE=m CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_NF_CONNTRACK=m +CONFIG_IPV6_MROUTE=y CONFIG_IPV6_SIT=y CONFIG_IP_DCCP=m CONFIG_NF_NAT=m From 4c639b6a7b9db236c0907aca8e92d1537076f2cd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 9 May 2024 09:19:52 -0700 Subject: [PATCH 07/31] selftests: net: move amt to socat for better compatibility The test seems to expect that nc will exit after the first received message. This is not the case with Ncat 7.94. There are multiple versions of nc out there, switch to socat for better compatibility. Tell socat to exit after 128 bytes and pad the message. Since the test sets -e make sure we don't set exit code (|| true) and print the pass / fail rather then silently moving over the test and just setting non-zero exit code with no output indicating what failed. Fixes: c08e8baea78e ("selftests: add amt interface selftest script") Acked-by: Paolo Abeni Tested-by: Taehee Yoo Link: https://lore.kernel.org/r/20240509161952.3940476-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/amt.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/net/amt.sh b/tools/testing/selftests/net/amt.sh index 75528788cb95..5175a42cbe8a 100755 --- a/tools/testing/selftests/net/amt.sh +++ b/tools/testing/selftests/net/amt.sh @@ -210,8 +210,8 @@ check_features() test_ipv4_forward() { - RESULT4=$(ip netns exec "${LISTENER}" nc -w 1 -l -u 239.0.0.1 4000) - if [ "$RESULT4" == "172.17.0.2" ]; then + RESULT4=$(ip netns exec "${LISTENER}" timeout 15 socat - UDP4-LISTEN:4000,readbytes=128 || true) + if echo "$RESULT4" | grep -q "172.17.0.2"; then printf "TEST: %-60s [ OK ]\n" "IPv4 amt multicast forwarding" exit 0 else @@ -222,8 +222,8 @@ test_ipv4_forward() test_ipv6_forward() { - RESULT6=$(ip netns exec "${LISTENER}" nc -w 1 -l -u ff0e::5:6 6000) - if [ "$RESULT6" == "2001:db8:3::2" ]; then + RESULT6=$(ip netns exec "${LISTENER}" timeout 15 socat - UDP6-LISTEN:6000,readbytes=128 || true) + if echo "$RESULT6" | grep -q "2001:db8:3::2"; then printf "TEST: %-60s [ OK ]\n" "IPv6 amt multicast forwarding" exit 0 else @@ -236,14 +236,14 @@ send_mcast4() { sleep 2 ip netns exec "${SOURCE}" bash -c \ - 'echo 172.17.0.2 | nc -w 1 -u 239.0.0.1 4000' & + 'printf "%s %128s" 172.17.0.2 | nc -w 1 -u 239.0.0.1 4000' & } send_mcast6() { sleep 2 ip netns exec "${SOURCE}" bash -c \ - 'echo 2001:db8:3::2 | nc -w 1 -u ff0e::5:6 6000' & + 'printf "%s %128s" 2001:db8:3::2 | nc -w 1 -u ff0e::5:6 6000' & } check_features From ecb51fa37ee22f137a87fa140b1e9f1759949f9a Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 8 May 2024 11:43:34 +0100 Subject: [PATCH 08/31] net: ethernet: mediatek: split tx and rx fields in mtk_soc_data struct Split tx and rx fields in mtk_soc_data struct. This is a preliminary patch to roll back to ADMAv1 for MT7986 and MT7981 SoC in order to fix a hw hang if the device receives a corrupted packet when using ADMAv2.0. Fixes: 197c9e9b17b1 ("net: ethernet: mtk_eth_soc: introduce support for mt7986 chipset") Signed-off-by: Lorenzo Bianconi Signed-off-by: Daniel Golle Reviewed-by: Przemek Kitszel Link: https://lore.kernel.org/r/70a799b1f060ec2f57883e88ccb420ac0fb0abb5.1715164770.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 210 ++++++++++++-------- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 29 +-- 2 files changed, 139 insertions(+), 100 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index caa13b9cedff..3eefb735ce19 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1139,7 +1139,7 @@ static int mtk_init_fq_dma(struct mtk_eth *eth) eth->scratch_ring = eth->sram_base; else eth->scratch_ring = dma_alloc_coherent(eth->dma_dev, - cnt * soc->txrx.txd_size, + cnt * soc->tx.desc_size, ð->phy_scratch_ring, GFP_KERNEL); if (unlikely(!eth->scratch_ring)) @@ -1155,17 +1155,17 @@ static int mtk_init_fq_dma(struct mtk_eth *eth) if (unlikely(dma_mapping_error(eth->dma_dev, dma_addr))) return -ENOMEM; - phy_ring_tail = eth->phy_scratch_ring + soc->txrx.txd_size * (cnt - 1); + phy_ring_tail = eth->phy_scratch_ring + soc->tx.desc_size * (cnt - 1); for (i = 0; i < cnt; i++) { dma_addr_t addr = dma_addr + i * MTK_QDMA_PAGE_SIZE; struct mtk_tx_dma_v2 *txd; - txd = eth->scratch_ring + i * soc->txrx.txd_size; + txd = eth->scratch_ring + i * soc->tx.desc_size; txd->txd1 = addr; if (i < cnt - 1) txd->txd2 = eth->phy_scratch_ring + - (i + 1) * soc->txrx.txd_size; + (i + 1) * soc->tx.desc_size; txd->txd3 = TX_DMA_PLEN0(MTK_QDMA_PAGE_SIZE); if (MTK_HAS_CAPS(soc->caps, MTK_36BIT_DMA)) @@ -1416,7 +1416,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, if (itxd == ring->last_free) return -ENOMEM; - itx_buf = mtk_desc_to_tx_buf(ring, itxd, soc->txrx.txd_size); + itx_buf = mtk_desc_to_tx_buf(ring, itxd, soc->tx.desc_size); memset(itx_buf, 0, sizeof(*itx_buf)); txd_info.addr = dma_map_single(eth->dma_dev, skb->data, txd_info.size, @@ -1457,7 +1457,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, memset(&txd_info, 0, sizeof(struct mtk_tx_dma_desc_info)); txd_info.size = min_t(unsigned int, frag_size, - soc->txrx.dma_max_len); + soc->tx.dma_max_len); txd_info.qid = queue; txd_info.last = i == skb_shinfo(skb)->nr_frags - 1 && !(frag_size - txd_info.size); @@ -1470,7 +1470,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, mtk_tx_set_dma_desc(dev, txd, &txd_info); tx_buf = mtk_desc_to_tx_buf(ring, txd, - soc->txrx.txd_size); + soc->tx.desc_size); if (new_desc) memset(tx_buf, 0, sizeof(*tx_buf)); tx_buf->data = (void *)MTK_DMA_DUMMY_DESC; @@ -1513,7 +1513,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, } else { int next_idx; - next_idx = NEXT_DESP_IDX(txd_to_idx(ring, txd, soc->txrx.txd_size), + next_idx = NEXT_DESP_IDX(txd_to_idx(ring, txd, soc->tx.desc_size), ring->dma_size); mtk_w32(eth, next_idx, MT7628_TX_CTX_IDX0); } @@ -1522,7 +1522,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, err_dma: do { - tx_buf = mtk_desc_to_tx_buf(ring, itxd, soc->txrx.txd_size); + tx_buf = mtk_desc_to_tx_buf(ring, itxd, soc->tx.desc_size); /* unmap dma */ mtk_tx_unmap(eth, tx_buf, NULL, false); @@ -1547,7 +1547,7 @@ static int mtk_cal_txd_req(struct mtk_eth *eth, struct sk_buff *skb) for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { frag = &skb_shinfo(skb)->frags[i]; nfrags += DIV_ROUND_UP(skb_frag_size(frag), - eth->soc->txrx.dma_max_len); + eth->soc->tx.dma_max_len); } } else { nfrags += skb_shinfo(skb)->nr_frags; @@ -1654,7 +1654,7 @@ static struct mtk_rx_ring *mtk_get_rx_ring(struct mtk_eth *eth) ring = ð->rx_ring[i]; idx = NEXT_DESP_IDX(ring->calc_idx, ring->dma_size); - rxd = ring->dma + idx * eth->soc->txrx.rxd_size; + rxd = ring->dma + idx * eth->soc->rx.desc_size; if (rxd->rxd2 & RX_DMA_DONE) { ring->calc_idx_update = true; return ring; @@ -1822,7 +1822,7 @@ static int mtk_xdp_submit_frame(struct mtk_eth *eth, struct xdp_frame *xdpf, } htxd = txd; - tx_buf = mtk_desc_to_tx_buf(ring, txd, soc->txrx.txd_size); + tx_buf = mtk_desc_to_tx_buf(ring, txd, soc->tx.desc_size); memset(tx_buf, 0, sizeof(*tx_buf)); htx_buf = tx_buf; @@ -1841,7 +1841,7 @@ static int mtk_xdp_submit_frame(struct mtk_eth *eth, struct xdp_frame *xdpf, goto unmap; tx_buf = mtk_desc_to_tx_buf(ring, txd, - soc->txrx.txd_size); + soc->tx.desc_size); memset(tx_buf, 0, sizeof(*tx_buf)); n_desc++; } @@ -1879,7 +1879,7 @@ static int mtk_xdp_submit_frame(struct mtk_eth *eth, struct xdp_frame *xdpf, } else { int idx; - idx = txd_to_idx(ring, txd, soc->txrx.txd_size); + idx = txd_to_idx(ring, txd, soc->tx.desc_size); mtk_w32(eth, NEXT_DESP_IDX(idx, ring->dma_size), MT7628_TX_CTX_IDX0); } @@ -1890,7 +1890,7 @@ static int mtk_xdp_submit_frame(struct mtk_eth *eth, struct xdp_frame *xdpf, unmap: while (htxd != txd) { - tx_buf = mtk_desc_to_tx_buf(ring, htxd, soc->txrx.txd_size); + tx_buf = mtk_desc_to_tx_buf(ring, htxd, soc->tx.desc_size); mtk_tx_unmap(eth, tx_buf, NULL, false); htxd->txd3 = TX_DMA_LS0 | TX_DMA_OWNER_CPU; @@ -2021,7 +2021,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, goto rx_done; idx = NEXT_DESP_IDX(ring->calc_idx, ring->dma_size); - rxd = ring->dma + idx * eth->soc->txrx.rxd_size; + rxd = ring->dma + idx * eth->soc->rx.desc_size; data = ring->data[idx]; if (!mtk_rx_get_desc(eth, &trxd, rxd)) @@ -2156,7 +2156,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, rxdcsum = &trxd.rxd4; } - if (*rxdcsum & eth->soc->txrx.rx_dma_l4_valid) + if (*rxdcsum & eth->soc->rx.dma_l4_valid) skb->ip_summed = CHECKSUM_UNNECESSARY; else skb_checksum_none_assert(skb); @@ -2280,7 +2280,7 @@ static int mtk_poll_tx_qdma(struct mtk_eth *eth, int budget, break; tx_buf = mtk_desc_to_tx_buf(ring, desc, - eth->soc->txrx.txd_size); + eth->soc->tx.desc_size); if (!tx_buf->data) break; @@ -2331,7 +2331,7 @@ static int mtk_poll_tx_pdma(struct mtk_eth *eth, int budget, } mtk_tx_unmap(eth, tx_buf, &bq, true); - desc = ring->dma + cpu * eth->soc->txrx.txd_size; + desc = ring->dma + cpu * eth->soc->tx.desc_size; ring->last_free = desc; atomic_inc(&ring->free_count); @@ -2421,7 +2421,7 @@ static int mtk_napi_rx(struct napi_struct *napi, int budget) do { int rx_done; - mtk_w32(eth, eth->soc->txrx.rx_irq_done_mask, + mtk_w32(eth, eth->soc->rx.irq_done_mask, reg_map->pdma.irq_status); rx_done = mtk_poll_rx(napi, budget - rx_done_total, eth); rx_done_total += rx_done; @@ -2437,10 +2437,10 @@ static int mtk_napi_rx(struct napi_struct *napi, int budget) return budget; } while (mtk_r32(eth, reg_map->pdma.irq_status) & - eth->soc->txrx.rx_irq_done_mask); + eth->soc->rx.irq_done_mask); if (napi_complete_done(napi, rx_done_total)) - mtk_rx_irq_enable(eth, eth->soc->txrx.rx_irq_done_mask); + mtk_rx_irq_enable(eth, eth->soc->rx.irq_done_mask); return rx_done_total; } @@ -2449,7 +2449,7 @@ static int mtk_tx_alloc(struct mtk_eth *eth) { const struct mtk_soc_data *soc = eth->soc; struct mtk_tx_ring *ring = ð->tx_ring; - int i, sz = soc->txrx.txd_size; + int i, sz = soc->tx.desc_size; struct mtk_tx_dma_v2 *txd; int ring_size; u32 ofs, val; @@ -2572,14 +2572,14 @@ static void mtk_tx_clean(struct mtk_eth *eth) } if (!MTK_HAS_CAPS(soc->caps, MTK_SRAM) && ring->dma) { dma_free_coherent(eth->dma_dev, - ring->dma_size * soc->txrx.txd_size, + ring->dma_size * soc->tx.desc_size, ring->dma, ring->phys); ring->dma = NULL; } if (ring->dma_pdma) { dma_free_coherent(eth->dma_dev, - ring->dma_size * soc->txrx.txd_size, + ring->dma_size * soc->tx.desc_size, ring->dma_pdma, ring->phys_pdma); ring->dma_pdma = NULL; } @@ -2634,15 +2634,15 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag) if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SRAM) || rx_flag != MTK_RX_FLAGS_NORMAL) { ring->dma = dma_alloc_coherent(eth->dma_dev, - rx_dma_size * eth->soc->txrx.rxd_size, - &ring->phys, GFP_KERNEL); + rx_dma_size * eth->soc->rx.desc_size, + &ring->phys, GFP_KERNEL); } else { struct mtk_tx_ring *tx_ring = ð->tx_ring; ring->dma = tx_ring->dma + tx_ring_size * - eth->soc->txrx.txd_size * (ring_no + 1); + eth->soc->tx.desc_size * (ring_no + 1); ring->phys = tx_ring->phys + tx_ring_size * - eth->soc->txrx.txd_size * (ring_no + 1); + eth->soc->tx.desc_size * (ring_no + 1); } if (!ring->dma) @@ -2653,7 +2653,7 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag) dma_addr_t dma_addr; void *data; - rxd = ring->dma + i * eth->soc->txrx.rxd_size; + rxd = ring->dma + i * eth->soc->rx.desc_size; if (ring->page_pool) { data = mtk_page_pool_get_buff(ring->page_pool, &dma_addr, GFP_KERNEL); @@ -2744,7 +2744,7 @@ static void mtk_rx_clean(struct mtk_eth *eth, struct mtk_rx_ring *ring, bool in_ if (!ring->data[i]) continue; - rxd = ring->dma + i * eth->soc->txrx.rxd_size; + rxd = ring->dma + i * eth->soc->rx.desc_size; if (!rxd->rxd1) continue; @@ -2761,7 +2761,7 @@ static void mtk_rx_clean(struct mtk_eth *eth, struct mtk_rx_ring *ring, bool in_ if (!in_sram && ring->dma) { dma_free_coherent(eth->dma_dev, - ring->dma_size * eth->soc->txrx.rxd_size, + ring->dma_size * eth->soc->rx.desc_size, ring->dma, ring->phys); ring->dma = NULL; } @@ -3124,7 +3124,7 @@ static void mtk_dma_free(struct mtk_eth *eth) netdev_reset_queue(eth->netdev[i]); if (!MTK_HAS_CAPS(soc->caps, MTK_SRAM) && eth->scratch_ring) { dma_free_coherent(eth->dma_dev, - MTK_QDMA_RING_SIZE * soc->txrx.txd_size, + MTK_QDMA_RING_SIZE * soc->tx.desc_size, eth->scratch_ring, eth->phy_scratch_ring); eth->scratch_ring = NULL; eth->phy_scratch_ring = 0; @@ -3174,7 +3174,7 @@ static irqreturn_t mtk_handle_irq_rx(int irq, void *_eth) eth->rx_events++; if (likely(napi_schedule_prep(ð->rx_napi))) { - mtk_rx_irq_disable(eth, eth->soc->txrx.rx_irq_done_mask); + mtk_rx_irq_disable(eth, eth->soc->rx.irq_done_mask); __napi_schedule(ð->rx_napi); } @@ -3200,9 +3200,9 @@ static irqreturn_t mtk_handle_irq(int irq, void *_eth) const struct mtk_reg_map *reg_map = eth->soc->reg_map; if (mtk_r32(eth, reg_map->pdma.irq_mask) & - eth->soc->txrx.rx_irq_done_mask) { + eth->soc->rx.irq_done_mask) { if (mtk_r32(eth, reg_map->pdma.irq_status) & - eth->soc->txrx.rx_irq_done_mask) + eth->soc->rx.irq_done_mask) mtk_handle_irq_rx(irq, _eth); } if (mtk_r32(eth, reg_map->tx_irq_mask) & MTK_TX_DONE_INT) { @@ -3220,10 +3220,10 @@ static void mtk_poll_controller(struct net_device *dev) struct mtk_eth *eth = mac->hw; mtk_tx_irq_disable(eth, MTK_TX_DONE_INT); - mtk_rx_irq_disable(eth, eth->soc->txrx.rx_irq_done_mask); + mtk_rx_irq_disable(eth, eth->soc->rx.irq_done_mask); mtk_handle_irq_rx(eth->irq[2], dev); mtk_tx_irq_enable(eth, MTK_TX_DONE_INT); - mtk_rx_irq_enable(eth, eth->soc->txrx.rx_irq_done_mask); + mtk_rx_irq_enable(eth, eth->soc->rx.irq_done_mask); } #endif @@ -3387,7 +3387,7 @@ static int mtk_open(struct net_device *dev) napi_enable(ð->tx_napi); napi_enable(ð->rx_napi); mtk_tx_irq_enable(eth, MTK_TX_DONE_INT); - mtk_rx_irq_enable(eth, soc->txrx.rx_irq_done_mask); + mtk_rx_irq_enable(eth, soc->rx.irq_done_mask); refcount_set(ð->dma_refcnt, 1); } else @@ -3471,7 +3471,7 @@ static int mtk_stop(struct net_device *dev) mtk_gdm_config(eth, MTK_GDMA_DROP_ALL); mtk_tx_irq_disable(eth, MTK_TX_DONE_INT); - mtk_rx_irq_disable(eth, eth->soc->txrx.rx_irq_done_mask); + mtk_rx_irq_disable(eth, eth->soc->rx.irq_done_mask); napi_disable(ð->tx_napi); napi_disable(ð->rx_napi); @@ -3947,9 +3947,9 @@ static int mtk_hw_init(struct mtk_eth *eth, bool reset) /* FE int grouping */ mtk_w32(eth, MTK_TX_DONE_INT, reg_map->pdma.int_grp); - mtk_w32(eth, eth->soc->txrx.rx_irq_done_mask, reg_map->pdma.int_grp + 4); + mtk_w32(eth, eth->soc->rx.irq_done_mask, reg_map->pdma.int_grp + 4); mtk_w32(eth, MTK_TX_DONE_INT, reg_map->qdma.int_grp); - mtk_w32(eth, eth->soc->txrx.rx_irq_done_mask, reg_map->qdma.int_grp + 4); + mtk_w32(eth, eth->soc->rx.irq_done_mask, reg_map->qdma.int_grp + 4); mtk_w32(eth, 0x21021000, MTK_FE_INT_GRP); if (mtk_is_netsys_v3_or_greater(eth)) { @@ -5039,11 +5039,15 @@ static const struct mtk_soc_data mt2701_data = { .required_clks = MT7623_CLKS_BITMAP, .required_pctl = true, .version = 1, - .txrx = { - .txd_size = sizeof(struct mtk_tx_dma), - .rxd_size = sizeof(struct mtk_rx_dma), - .rx_irq_done_mask = MTK_RX_DONE_INT, - .rx_dma_l4_valid = RX_DMA_L4_VALID, + .tx = { + .desc_size = sizeof(struct mtk_tx_dma), + .dma_max_len = MTK_TX_DMA_BUF_LEN, + .dma_len_offset = 16, + }, + .rx = { + .desc_size = sizeof(struct mtk_rx_dma), + .irq_done_mask = MTK_RX_DONE_INT, + .dma_l4_valid = RX_DMA_L4_VALID, .dma_max_len = MTK_TX_DMA_BUF_LEN, .dma_len_offset = 16, }, @@ -5059,11 +5063,15 @@ static const struct mtk_soc_data mt7621_data = { .offload_version = 1, .hash_offset = 2, .foe_entry_size = MTK_FOE_ENTRY_V1_SIZE, - .txrx = { - .txd_size = sizeof(struct mtk_tx_dma), - .rxd_size = sizeof(struct mtk_rx_dma), - .rx_irq_done_mask = MTK_RX_DONE_INT, - .rx_dma_l4_valid = RX_DMA_L4_VALID, + .tx = { + .desc_size = sizeof(struct mtk_tx_dma), + .dma_max_len = MTK_TX_DMA_BUF_LEN, + .dma_len_offset = 16, + }, + .rx = { + .desc_size = sizeof(struct mtk_rx_dma), + .irq_done_mask = MTK_RX_DONE_INT, + .dma_l4_valid = RX_DMA_L4_VALID, .dma_max_len = MTK_TX_DMA_BUF_LEN, .dma_len_offset = 16, }, @@ -5081,11 +5089,15 @@ static const struct mtk_soc_data mt7622_data = { .hash_offset = 2, .has_accounting = true, .foe_entry_size = MTK_FOE_ENTRY_V1_SIZE, - .txrx = { - .txd_size = sizeof(struct mtk_tx_dma), - .rxd_size = sizeof(struct mtk_rx_dma), - .rx_irq_done_mask = MTK_RX_DONE_INT, - .rx_dma_l4_valid = RX_DMA_L4_VALID, + .tx = { + .desc_size = sizeof(struct mtk_tx_dma), + .dma_max_len = MTK_TX_DMA_BUF_LEN, + .dma_len_offset = 16, + }, + .rx = { + .desc_size = sizeof(struct mtk_rx_dma), + .irq_done_mask = MTK_RX_DONE_INT, + .dma_l4_valid = RX_DMA_L4_VALID, .dma_max_len = MTK_TX_DMA_BUF_LEN, .dma_len_offset = 16, }, @@ -5102,11 +5114,15 @@ static const struct mtk_soc_data mt7623_data = { .hash_offset = 2, .foe_entry_size = MTK_FOE_ENTRY_V1_SIZE, .disable_pll_modes = true, - .txrx = { - .txd_size = sizeof(struct mtk_tx_dma), - .rxd_size = sizeof(struct mtk_rx_dma), - .rx_irq_done_mask = MTK_RX_DONE_INT, - .rx_dma_l4_valid = RX_DMA_L4_VALID, + .tx = { + .desc_size = sizeof(struct mtk_tx_dma), + .dma_max_len = MTK_TX_DMA_BUF_LEN, + .dma_len_offset = 16, + }, + .rx = { + .desc_size = sizeof(struct mtk_rx_dma), + .irq_done_mask = MTK_RX_DONE_INT, + .dma_l4_valid = RX_DMA_L4_VALID, .dma_max_len = MTK_TX_DMA_BUF_LEN, .dma_len_offset = 16, }, @@ -5121,11 +5137,15 @@ static const struct mtk_soc_data mt7629_data = { .required_pctl = false, .has_accounting = true, .version = 1, - .txrx = { - .txd_size = sizeof(struct mtk_tx_dma), - .rxd_size = sizeof(struct mtk_rx_dma), - .rx_irq_done_mask = MTK_RX_DONE_INT, - .rx_dma_l4_valid = RX_DMA_L4_VALID, + .tx = { + .desc_size = sizeof(struct mtk_tx_dma), + .dma_max_len = MTK_TX_DMA_BUF_LEN, + .dma_len_offset = 16, + }, + .rx = { + .desc_size = sizeof(struct mtk_rx_dma), + .irq_done_mask = MTK_RX_DONE_INT, + .dma_l4_valid = RX_DMA_L4_VALID, .dma_max_len = MTK_TX_DMA_BUF_LEN, .dma_len_offset = 16, }, @@ -5143,11 +5163,15 @@ static const struct mtk_soc_data mt7981_data = { .hash_offset = 4, .has_accounting = true, .foe_entry_size = MTK_FOE_ENTRY_V2_SIZE, - .txrx = { - .txd_size = sizeof(struct mtk_tx_dma_v2), - .rxd_size = sizeof(struct mtk_rx_dma_v2), - .rx_irq_done_mask = MTK_RX_DONE_INT_V2, - .rx_dma_l4_valid = RX_DMA_L4_VALID_V2, + .tx = { + .desc_size = sizeof(struct mtk_tx_dma_v2), + .dma_max_len = MTK_TX_DMA_BUF_LEN_V2, + .dma_len_offset = 8, + }, + .rx = { + .desc_size = sizeof(struct mtk_rx_dma_v2), + .irq_done_mask = MTK_RX_DONE_INT_V2, + .dma_l4_valid = RX_DMA_L4_VALID_V2, .dma_max_len = MTK_TX_DMA_BUF_LEN_V2, .dma_len_offset = 8, }, @@ -5165,11 +5189,15 @@ static const struct mtk_soc_data mt7986_data = { .hash_offset = 4, .has_accounting = true, .foe_entry_size = MTK_FOE_ENTRY_V2_SIZE, - .txrx = { - .txd_size = sizeof(struct mtk_tx_dma_v2), - .rxd_size = sizeof(struct mtk_rx_dma_v2), - .rx_irq_done_mask = MTK_RX_DONE_INT_V2, - .rx_dma_l4_valid = RX_DMA_L4_VALID_V2, + .tx = { + .desc_size = sizeof(struct mtk_tx_dma_v2), + .dma_max_len = MTK_TX_DMA_BUF_LEN_V2, + .dma_len_offset = 8, + }, + .rx = { + .desc_size = sizeof(struct mtk_rx_dma_v2), + .irq_done_mask = MTK_RX_DONE_INT_V2, + .dma_l4_valid = RX_DMA_L4_VALID_V2, .dma_max_len = MTK_TX_DMA_BUF_LEN_V2, .dma_len_offset = 8, }, @@ -5187,11 +5215,15 @@ static const struct mtk_soc_data mt7988_data = { .hash_offset = 4, .has_accounting = true, .foe_entry_size = MTK_FOE_ENTRY_V3_SIZE, - .txrx = { - .txd_size = sizeof(struct mtk_tx_dma_v2), - .rxd_size = sizeof(struct mtk_rx_dma_v2), - .rx_irq_done_mask = MTK_RX_DONE_INT_V2, - .rx_dma_l4_valid = RX_DMA_L4_VALID_V2, + .tx = { + .desc_size = sizeof(struct mtk_tx_dma_v2), + .dma_max_len = MTK_TX_DMA_BUF_LEN_V2, + .dma_len_offset = 8, + }, + .rx = { + .desc_size = sizeof(struct mtk_rx_dma_v2), + .irq_done_mask = MTK_RX_DONE_INT_V2, + .dma_l4_valid = RX_DMA_L4_VALID_V2, .dma_max_len = MTK_TX_DMA_BUF_LEN_V2, .dma_len_offset = 8, }, @@ -5204,11 +5236,15 @@ static const struct mtk_soc_data rt5350_data = { .required_clks = MT7628_CLKS_BITMAP, .required_pctl = false, .version = 1, - .txrx = { - .txd_size = sizeof(struct mtk_tx_dma), - .rxd_size = sizeof(struct mtk_rx_dma), - .rx_irq_done_mask = MTK_RX_DONE_INT, - .rx_dma_l4_valid = RX_DMA_L4_VALID_PDMA, + .tx = { + .desc_size = sizeof(struct mtk_tx_dma), + .dma_max_len = MTK_TX_DMA_BUF_LEN, + .dma_len_offset = 16, + }, + .rx = { + .desc_size = sizeof(struct mtk_rx_dma), + .irq_done_mask = MTK_RX_DONE_INT, + .dma_l4_valid = RX_DMA_L4_VALID_PDMA, .dma_max_len = MTK_TX_DMA_BUF_LEN, .dma_len_offset = 16, }, diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 9ae3b8a71d0e..39b50de1decb 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -327,8 +327,8 @@ /* QDMA descriptor txd3 */ #define TX_DMA_OWNER_CPU BIT(31) #define TX_DMA_LS0 BIT(30) -#define TX_DMA_PLEN0(x) (((x) & eth->soc->txrx.dma_max_len) << eth->soc->txrx.dma_len_offset) -#define TX_DMA_PLEN1(x) ((x) & eth->soc->txrx.dma_max_len) +#define TX_DMA_PLEN0(x) (((x) & eth->soc->tx.dma_max_len) << eth->soc->tx.dma_len_offset) +#define TX_DMA_PLEN1(x) ((x) & eth->soc->tx.dma_max_len) #define TX_DMA_SWC BIT(14) #define TX_DMA_PQID GENMASK(3, 0) #define TX_DMA_ADDR64_MASK GENMASK(3, 0) @@ -348,8 +348,8 @@ /* QDMA descriptor rxd2 */ #define RX_DMA_DONE BIT(31) #define RX_DMA_LSO BIT(30) -#define RX_DMA_PREP_PLEN0(x) (((x) & eth->soc->txrx.dma_max_len) << eth->soc->txrx.dma_len_offset) -#define RX_DMA_GET_PLEN0(x) (((x) >> eth->soc->txrx.dma_len_offset) & eth->soc->txrx.dma_max_len) +#define RX_DMA_PREP_PLEN0(x) (((x) & eth->soc->rx.dma_max_len) << eth->soc->rx.dma_len_offset) +#define RX_DMA_GET_PLEN0(x) (((x) >> eth->soc->rx.dma_len_offset) & eth->soc->rx.dma_max_len) #define RX_DMA_VTAG BIT(15) #define RX_DMA_ADDR64_MASK GENMASK(3, 0) #if IS_ENABLED(CONFIG_64BIT) @@ -1153,10 +1153,9 @@ struct mtk_reg_map { * @foe_entry_size Foe table entry size. * @has_accounting Bool indicating support for accounting of * offloaded flows. - * @txd_size Tx DMA descriptor size. - * @rxd_size Rx DMA descriptor size. - * @rx_irq_done_mask Rx irq done register mask. - * @rx_dma_l4_valid Rx DMA valid register mask. + * @desc_size Tx/Rx DMA descriptor size. + * @irq_done_mask Rx irq done register mask. + * @dma_l4_valid Rx DMA valid register mask. * @dma_max_len Max DMA tx/rx buffer length. * @dma_len_offset Tx/Rx DMA length field offset. */ @@ -1174,13 +1173,17 @@ struct mtk_soc_data { bool has_accounting; bool disable_pll_modes; struct { - u32 txd_size; - u32 rxd_size; - u32 rx_irq_done_mask; - u32 rx_dma_l4_valid; + u32 desc_size; u32 dma_max_len; u32 dma_len_offset; - } txrx; + } tx; + struct { + u32 desc_size; + u32 irq_done_mask; + u32 dma_l4_valid; + u32 dma_max_len; + u32 dma_len_offset; + } rx; }; #define MTK_DMA_MONITOR_TIMEOUT msecs_to_jiffies(1000) From 5e69ff84f3e6cc54502a902043847b37ed78afd4 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Wed, 8 May 2024 11:43:56 +0100 Subject: [PATCH 09/31] net: ethernet: mediatek: use ADMAv1 instead of ADMAv2.0 on MT7981 and MT7986 ADMAv2.0 is plagued by RX hangs which can't easily detected and happen upon receival of a corrupted Ethernet frame. Use ADMAv1 instead which is also still present and usable, and doesn't suffer from that problem. Fixes: 197c9e9b17b1 ("net: ethernet: mtk_eth_soc: introduce support for mt7986 chipset") Co-developed-by: Lorenzo Bianconi Signed-off-by: Lorenzo Bianconi Signed-off-by: Daniel Golle Link: https://lore.kernel.org/r/57cef74bbd0c243366ad1ff4221e3f72f437ec80.1715164770.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 46 ++++++++++----------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 3eefb735ce19..d7d73295f0dc 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -110,16 +110,16 @@ static const struct mtk_reg_map mt7986_reg_map = { .tx_irq_mask = 0x461c, .tx_irq_status = 0x4618, .pdma = { - .rx_ptr = 0x6100, - .rx_cnt_cfg = 0x6104, - .pcrx_ptr = 0x6108, - .glo_cfg = 0x6204, - .rst_idx = 0x6208, - .delay_irq = 0x620c, - .irq_status = 0x6220, - .irq_mask = 0x6228, - .adma_rx_dbg0 = 0x6238, - .int_grp = 0x6250, + .rx_ptr = 0x4100, + .rx_cnt_cfg = 0x4104, + .pcrx_ptr = 0x4108, + .glo_cfg = 0x4204, + .rst_idx = 0x4208, + .delay_irq = 0x420c, + .irq_status = 0x4220, + .irq_mask = 0x4228, + .adma_rx_dbg0 = 0x4238, + .int_grp = 0x4250, }, .qdma = { .qtx_cfg = 0x4400, @@ -1107,7 +1107,7 @@ static bool mtk_rx_get_desc(struct mtk_eth *eth, struct mtk_rx_dma_v2 *rxd, rxd->rxd1 = READ_ONCE(dma_rxd->rxd1); rxd->rxd3 = READ_ONCE(dma_rxd->rxd3); rxd->rxd4 = READ_ONCE(dma_rxd->rxd4); - if (mtk_is_netsys_v2_or_greater(eth)) { + if (mtk_is_netsys_v3_or_greater(eth)) { rxd->rxd5 = READ_ONCE(dma_rxd->rxd5); rxd->rxd6 = READ_ONCE(dma_rxd->rxd6); } @@ -2028,7 +2028,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, break; /* find out which mac the packet come from. values start at 1 */ - if (mtk_is_netsys_v2_or_greater(eth)) { + if (mtk_is_netsys_v3_or_greater(eth)) { u32 val = RX_DMA_GET_SPORT_V2(trxd.rxd5); switch (val) { @@ -2140,7 +2140,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, skb->dev = netdev; bytes += skb->len; - if (mtk_is_netsys_v2_or_greater(eth)) { + if (mtk_is_netsys_v3_or_greater(eth)) { reason = FIELD_GET(MTK_RXD5_PPE_CPU_REASON, trxd.rxd5); hash = trxd.rxd5 & MTK_RXD5_FOE_ENTRY; if (hash != MTK_RXD5_FOE_ENTRY) @@ -2690,7 +2690,7 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag) rxd->rxd3 = 0; rxd->rxd4 = 0; - if (mtk_is_netsys_v2_or_greater(eth)) { + if (mtk_is_netsys_v3_or_greater(eth)) { rxd->rxd5 = 0; rxd->rxd6 = 0; rxd->rxd7 = 0; @@ -3893,7 +3893,7 @@ static int mtk_hw_init(struct mtk_eth *eth, bool reset) else mtk_hw_reset(eth); - if (mtk_is_netsys_v2_or_greater(eth)) { + if (mtk_is_netsys_v3_or_greater(eth)) { /* Set FE to PDMAv2 if necessary */ val = mtk_r32(eth, MTK_FE_GLO_MISC); mtk_w32(eth, val | BIT(4), MTK_FE_GLO_MISC); @@ -5169,11 +5169,11 @@ static const struct mtk_soc_data mt7981_data = { .dma_len_offset = 8, }, .rx = { - .desc_size = sizeof(struct mtk_rx_dma_v2), - .irq_done_mask = MTK_RX_DONE_INT_V2, + .desc_size = sizeof(struct mtk_rx_dma), + .irq_done_mask = MTK_RX_DONE_INT, .dma_l4_valid = RX_DMA_L4_VALID_V2, - .dma_max_len = MTK_TX_DMA_BUF_LEN_V2, - .dma_len_offset = 8, + .dma_max_len = MTK_TX_DMA_BUF_LEN, + .dma_len_offset = 16, }, }; @@ -5195,11 +5195,11 @@ static const struct mtk_soc_data mt7986_data = { .dma_len_offset = 8, }, .rx = { - .desc_size = sizeof(struct mtk_rx_dma_v2), - .irq_done_mask = MTK_RX_DONE_INT_V2, + .desc_size = sizeof(struct mtk_rx_dma), + .irq_done_mask = MTK_RX_DONE_INT, .dma_l4_valid = RX_DMA_L4_VALID_V2, - .dma_max_len = MTK_TX_DMA_BUF_LEN_V2, - .dma_len_offset = 8, + .dma_max_len = MTK_TX_DMA_BUF_LEN, + .dma_len_offset = 16, }, }; From 6d51d44ecddb5c2962688ef06e55e4fbc949f04a Mon Sep 17 00:00:00 2001 From: Dan Nowlin Date: Wed, 8 May 2024 10:19:07 -0700 Subject: [PATCH 10/31] ice: Fix package download algorithm Previously, the driver assumed that all signature segments would contain one or more buffers to download. In the future, there will be signature segments that will contain no buffers to download. Correct download flow to allow for signature segments that have zero download buffers and skip the download in this case. Fixes: 3cbdb0343022 ("ice: Add support for E830 DDP package segment") Reviewed-by: Przemek Kitszel Signed-off-by: Dan Nowlin Signed-off-by: Paul Greenwalt Reviewed-by: Paul Menzel Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20240508171908.2760776-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_ddp.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.c b/drivers/net/ethernet/intel/ice/ice_ddp.c index fc91c4d41186..4df561d64bc3 100644 --- a/drivers/net/ethernet/intel/ice/ice_ddp.c +++ b/drivers/net/ethernet/intel/ice/ice_ddp.c @@ -1424,13 +1424,13 @@ ice_dwnld_sign_and_cfg_segs(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr, goto exit; } + count = le32_to_cpu(seg->signed_buf_count); + state = ice_download_pkg_sig_seg(hw, seg); + if (state || !count) + goto exit; + conf_idx = le32_to_cpu(seg->signed_seg_idx); start = le32_to_cpu(seg->signed_buf_start); - count = le32_to_cpu(seg->signed_buf_count); - - state = ice_download_pkg_sig_seg(hw, seg); - if (state) - goto exit; state = ice_download_pkg_config_seg(hw, pkg_hdr, conf_idx, start, count); From 812552808f7ff71133fc59768cdc253c5b8ca1bf Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 9 May 2024 09:44:54 +0200 Subject: [PATCH 11/31] net: ethernet: cortina: Locking fixes This fixes a probably long standing problem in the Cortina Gemini ethernet driver: there are some paths in the code where the IRQ registers are written without taking the proper locks. Fixes: 4d5ae32f5e1e ("net: ethernet: Add a driver for Gemini gigabit ethernet") Signed-off-by: Linus Walleij Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240509-gemini-ethernet-locking-v1-1-afd00a528b95@linaro.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cortina/gemini.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index 705c3eb19cd3..d1fbadbf86d4 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -1107,10 +1107,13 @@ static void gmac_tx_irq_enable(struct net_device *netdev, { struct gemini_ethernet_port *port = netdev_priv(netdev); struct gemini_ethernet *geth = port->geth; + unsigned long flags; u32 val, mask; netdev_dbg(netdev, "%s device %d\n", __func__, netdev->dev_id); + spin_lock_irqsave(&geth->irq_lock, flags); + mask = GMAC0_IRQ0_TXQ0_INTS << (6 * netdev->dev_id + txq); if (en) @@ -1119,6 +1122,8 @@ static void gmac_tx_irq_enable(struct net_device *netdev, val = readl(geth->base + GLOBAL_INTERRUPT_ENABLE_0_REG); val = en ? val | mask : val & ~mask; writel(val, geth->base + GLOBAL_INTERRUPT_ENABLE_0_REG); + + spin_unlock_irqrestore(&geth->irq_lock, flags); } static void gmac_tx_irq(struct net_device *netdev, unsigned int txq_num) @@ -1415,15 +1420,19 @@ static unsigned int gmac_rx(struct net_device *netdev, unsigned int budget) union gmac_rxdesc_3 word3; struct page *page = NULL; unsigned int page_offs; + unsigned long flags; unsigned short r, w; union dma_rwptr rw; dma_addr_t mapping; int frag_nr = 0; + spin_lock_irqsave(&geth->irq_lock, flags); rw.bits32 = readl(ptr_reg); /* Reset interrupt as all packages until here are taken into account */ writel(DEFAULT_Q0_INT_BIT << netdev->dev_id, geth->base + GLOBAL_INTERRUPT_STATUS_1_REG); + spin_unlock_irqrestore(&geth->irq_lock, flags); + r = rw.bits.rptr; w = rw.bits.wptr; @@ -1726,10 +1735,9 @@ static irqreturn_t gmac_irq(int irq, void *data) gmac_update_hw_stats(netdev); if (val & (GMAC0_RX_OVERRUN_INT_BIT << (netdev->dev_id * 8))) { + spin_lock(&geth->irq_lock); writel(GMAC0_RXDERR_INT_BIT << (netdev->dev_id * 8), geth->base + GLOBAL_INTERRUPT_STATUS_4_REG); - - spin_lock(&geth->irq_lock); u64_stats_update_begin(&port->ir_stats_syncp); ++port->stats.rx_fifo_errors; u64_stats_update_end(&port->ir_stats_syncp); From 540bf24fba16b88c1b3b9353927204b4f1074e25 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 9 May 2024 01:14:46 -0700 Subject: [PATCH 12/31] af_unix: Fix data races in unix_release_sock/unix_stream_sendmsg A data-race condition has been identified in af_unix. In one data path, the write function unix_release_sock() atomically writes to sk->sk_shutdown using WRITE_ONCE. However, on the reader side, unix_stream_sendmsg() does not read it atomically. Consequently, this issue is causing the following KCSAN splat to occur: BUG: KCSAN: data-race in unix_release_sock / unix_stream_sendmsg write (marked) to 0xffff88867256ddbb of 1 bytes by task 7270 on cpu 28: unix_release_sock (net/unix/af_unix.c:640) unix_release (net/unix/af_unix.c:1050) sock_close (net/socket.c:659 net/socket.c:1421) __fput (fs/file_table.c:422) __fput_sync (fs/file_table.c:508) __se_sys_close (fs/open.c:1559 fs/open.c:1541) __x64_sys_close (fs/open.c:1541) x64_sys_call (arch/x86/entry/syscall_64.c:33) do_syscall_64 (arch/x86/entry/common.c:?) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) read to 0xffff88867256ddbb of 1 bytes by task 989 on cpu 14: unix_stream_sendmsg (net/unix/af_unix.c:2273) __sock_sendmsg (net/socket.c:730 net/socket.c:745) ____sys_sendmsg (net/socket.c:2584) __sys_sendmmsg (net/socket.c:2638 net/socket.c:2724) __x64_sys_sendmmsg (net/socket.c:2753 net/socket.c:2750 net/socket.c:2750) x64_sys_call (arch/x86/entry/syscall_64.c:33) do_syscall_64 (arch/x86/entry/common.c:?) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) value changed: 0x01 -> 0x03 The line numbers are related to commit dd5a440a31fa ("Linux 6.9-rc7"). Commit e1d09c2c2f57 ("af_unix: Fix data races around sk->sk_shutdown.") addressed a comparable issue in the past regarding sk->sk_shutdown. However, it overlooked resolving this particular data path. This patch only offending unix_stream_sendmsg() function, since the other reads seem to be protected by unix_state_lock() as discussed in Link: https://lore.kernel.org/all/20240508173324.53565-1-kuniyu@amazon.com/ Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Breno Leitao Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240509081459.2807828-1-leitao@debian.org Signed-off-by: Jakub Kicinski --- net/unix/af_unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 9a6ad5974dff..e94839d89b09 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2270,7 +2270,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, goto out_err; } - if (sk->sk_shutdown & SEND_SHUTDOWN) + if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) goto pipe_err; while (sent < len) { From 7c988176b6c16c516474f6fceebe0f055af5eb56 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Thu, 9 May 2024 11:38:05 +0200 Subject: [PATCH 13/31] net: openvswitch: fix overwriting ct original tuple for ICMPv6 OVS_PACKET_CMD_EXECUTE has 3 main attributes: - OVS_PACKET_ATTR_KEY - Packet metadata in a netlink format. - OVS_PACKET_ATTR_PACKET - Binary packet content. - OVS_PACKET_ATTR_ACTIONS - Actions to execute on the packet. OVS_PACKET_ATTR_KEY is parsed first to populate sw_flow_key structure with the metadata like conntrack state, input port, recirculation id, etc. Then the packet itself gets parsed to populate the rest of the keys from the packet headers. Whenever the packet parsing code starts parsing the ICMPv6 header, it first zeroes out fields in the key corresponding to Neighbor Discovery information even if it is not an ND packet. It is an 'ipv6.nd' field. However, the 'ipv6' is a union that shares the space between 'nd' and 'ct_orig' that holds the original tuple conntrack metadata parsed from the OVS_PACKET_ATTR_KEY. ND packets should not normally have conntrack state, so it's fine to share the space, but normal ICMPv6 Echo packets or maybe other types of ICMPv6 can have the state attached and it should not be overwritten. The issue results in all but the last 4 bytes of the destination address being wiped from the original conntrack tuple leading to incorrect packet matching and potentially executing wrong actions in case this packet recirculates within the datapath or goes back to userspace. ND fields should not be accessed in non-ND packets, so not clearing them should be fine. Executing memset() only for actual ND packets to avoid the issue. Initializing the whole thing before parsing is needed because ND packet may not contain all the options. The issue only affects the OVS_PACKET_CMD_EXECUTE path and doesn't affect packets entering OVS datapath from network interfaces, because in this case CT metadata is populated from skb after the packet is already parsed. Fixes: 9dd7f8907c37 ("openvswitch: Add original direction conntrack tuple to sw_flow_key.") Reported-by: Antonin Bas Closes: https://github.com/openvswitch/ovs-issues/issues/327 Signed-off-by: Ilya Maximets Acked-by: Aaron Conole Acked-by: Eelco Chaudron Link: https://lore.kernel.org/r/20240509094228.1035477-1-i.maximets@ovn.org Signed-off-by: Jakub Kicinski --- net/openvswitch/flow.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 33b21a0c0548..8a848ce72e29 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -561,7 +561,6 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, */ key->tp.src = htons(icmp->icmp6_type); key->tp.dst = htons(icmp->icmp6_code); - memset(&key->ipv6.nd, 0, sizeof(key->ipv6.nd)); if (icmp->icmp6_code == 0 && (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION || @@ -570,6 +569,8 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, struct nd_msg *nd; int offset; + memset(&key->ipv6.nd, 0, sizeof(key->ipv6.nd)); + /* In order to process neighbor discovery options, we need the * entire packet. */ From 3321687e321307629c71b664225b861ebf3e5753 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 9 May 2024 21:18:10 +0800 Subject: [PATCH 14/31] ipv6: sr: add missing seg6_local_exit Currently, we only call seg6_local_exit() in seg6_init() if seg6_local_init() failed. But forgot to call it in seg6_exit(). Fixes: d1df6fd8a1d2 ("ipv6: sr: define core operations for seg6local lightweight tunnel") Signed-off-by: Hangbin Liu Reviewed-by: Sabrina Dubroca Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240509131812.1662197-2-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/seg6.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 35508abd76f4..5423f1f2aa62 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -564,6 +564,7 @@ void seg6_exit(void) seg6_hmac_exit(); #endif #ifdef CONFIG_IPV6_SEG6_LWTUNNEL + seg6_local_exit(); seg6_iptunnel_exit(); #endif unregister_pernet_subsys(&ip6_segments_ops); From 6e370a771d2985107e82d0f6174381c1acb49c20 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 9 May 2024 21:18:11 +0800 Subject: [PATCH 15/31] ipv6: sr: fix incorrect unregister order Commit 5559cea2d5aa ("ipv6: sr: fix possible use-after-free and null-ptr-deref") changed the register order in seg6_init(). But the unregister order in seg6_exit() is not updated. Fixes: 5559cea2d5aa ("ipv6: sr: fix possible use-after-free and null-ptr-deref") Signed-off-by: Hangbin Liu Reviewed-by: Sabrina Dubroca Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240509131812.1662197-3-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/seg6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 5423f1f2aa62..c4ef96c8fdac 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -567,6 +567,6 @@ void seg6_exit(void) seg6_local_exit(); seg6_iptunnel_exit(); #endif - unregister_pernet_subsys(&ip6_segments_ops); genl_unregister_family(&seg6_genl_family); + unregister_pernet_subsys(&ip6_segments_ops); } From 160e9d2752181fcf18c662e74022d77d3164cd45 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 9 May 2024 21:18:12 +0800 Subject: [PATCH 16/31] ipv6: sr: fix invalid unregister error path The error path of seg6_init() is wrong in case CONFIG_IPV6_SEG6_LWTUNNEL is not defined. In that case if seg6_hmac_init() fails, the genl_unregister_family() isn't called. This issue exist since commit 46738b1317e1 ("ipv6: sr: add option to control lwtunnel support"), and commit 5559cea2d5aa ("ipv6: sr: fix possible use-after-free and null-ptr-deref") replaced unregister_pernet_subsys() with genl_unregister_family() in this error path. Fixes: 46738b1317e1 ("ipv6: sr: add option to control lwtunnel support") Reported-by: Guillaume Nault Signed-off-by: Hangbin Liu Reviewed-by: Sabrina Dubroca Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240509131812.1662197-4-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/seg6.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index c4ef96c8fdac..a31521e270f7 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -551,6 +551,8 @@ out_unregister_iptun: #endif #ifdef CONFIG_IPV6_SEG6_LWTUNNEL out_unregister_genl: +#endif +#if IS_ENABLED(CONFIG_IPV6_SEG6_LWTUNNEL) || IS_ENABLED(CONFIG_IPV6_SEG6_HMAC) genl_unregister_family(&seg6_genl_family); #endif out_unregister_pernet: From 3d5918477f94e4c2f064567875c475468e264644 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Thu, 9 May 2024 14:29:47 +0300 Subject: [PATCH 17/31] net/mlx5e: Fix netif state handling mlx5e_suspend cleans resources only if netif_device_present() returns true. However, mlx5e_resume changes the state of netif, via mlx5e_nic_enable, only if reg_state == NETREG_REGISTERED. In the below case, the above leads to NULL-ptr Oops[1] and memory leaks: mlx5e_probe _mlx5e_resume mlx5e_attach_netdev mlx5e_nic_enable <-- netdev not reg, not calling netif_device_attach() register_netdev <-- failed for some reason. ERROR_FLOW: _mlx5e_suspend <-- netif_device_present return false, resources aren't freed :( Hence, clean resources in this case as well. [1] BUG: kernel NULL pointer dereference, address: 0000000000000000 PGD 0 P4D 0 Oops: 0010 [#1] SMP CPU: 2 PID: 9345 Comm: test-ovs-ct-gen Not tainted 6.5.0_for_upstream_min_debug_2023_09_05_16_01 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:0x0 Code: Unable to access opcode bytes at0xffffffffffffffd6. RSP: 0018:ffff888178aaf758 EFLAGS: 00010246 Call Trace: ? __die+0x20/0x60 ? page_fault_oops+0x14c/0x3c0 ? exc_page_fault+0x75/0x140 ? asm_exc_page_fault+0x22/0x30 notifier_call_chain+0x35/0xb0 blocking_notifier_call_chain+0x3d/0x60 mlx5_blocking_notifier_call_chain+0x22/0x30 [mlx5_core] mlx5_core_uplink_netdev_event_replay+0x3e/0x60 [mlx5_core] mlx5_mdev_netdev_track+0x53/0x60 [mlx5_ib] mlx5_ib_roce_init+0xc3/0x340 [mlx5_ib] __mlx5_ib_add+0x34/0xd0 [mlx5_ib] mlx5r_probe+0xe1/0x210 [mlx5_ib] ? auxiliary_match_id+0x6a/0x90 auxiliary_bus_probe+0x38/0x80 ? driver_sysfs_add+0x51/0x80 really_probe+0xc9/0x3e0 ? driver_probe_device+0x90/0x90 __driver_probe_device+0x80/0x160 driver_probe_device+0x1e/0x90 __device_attach_driver+0x7d/0x100 bus_for_each_drv+0x80/0xd0 __device_attach+0xbc/0x1f0 bus_probe_device+0x86/0xa0 device_add+0x637/0x840 __auxiliary_device_add+0x3b/0xa0 add_adev+0xc9/0x140 [mlx5_core] mlx5_rescan_drivers_locked+0x22a/0x310 [mlx5_core] mlx5_register_device+0x53/0xa0 [mlx5_core] mlx5_init_one_devl_locked+0x5c4/0x9c0 [mlx5_core] mlx5_init_one+0x3b/0x60 [mlx5_core] probe_one+0x44c/0x730 [mlx5_core] local_pci_probe+0x3e/0x90 pci_device_probe+0xbf/0x210 ? kernfs_create_link+0x5d/0xa0 ? sysfs_do_create_link_sd+0x60/0xc0 really_probe+0xc9/0x3e0 ? driver_probe_device+0x90/0x90 __driver_probe_device+0x80/0x160 driver_probe_device+0x1e/0x90 __device_attach_driver+0x7d/0x100 bus_for_each_drv+0x80/0xd0 __device_attach+0xbc/0x1f0 pci_bus_add_device+0x54/0x80 pci_iov_add_virtfn+0x2e6/0x320 sriov_enable+0x208/0x420 mlx5_core_sriov_configure+0x9e/0x200 [mlx5_core] sriov_numvfs_store+0xae/0x1a0 kernfs_fop_write_iter+0x10c/0x1a0 vfs_write+0x291/0x3c0 ksys_write+0x5f/0xe0 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- Fixes: 2c3b5beec46a ("net/mlx5e: More generic netdev management API") Signed-off-by: Shay Drory Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240509112951.590184-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 319930c04093..64497b6eebd3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -6058,7 +6058,7 @@ static int mlx5e_resume(struct auxiliary_device *adev) return 0; } -static int _mlx5e_suspend(struct auxiliary_device *adev) +static int _mlx5e_suspend(struct auxiliary_device *adev, bool pre_netdev_reg) { struct mlx5e_dev *mlx5e_dev = auxiliary_get_drvdata(adev); struct mlx5e_priv *priv = mlx5e_dev->priv; @@ -6067,7 +6067,7 @@ static int _mlx5e_suspend(struct auxiliary_device *adev) struct mlx5_core_dev *pos; int i; - if (!netif_device_present(netdev)) { + if (!pre_netdev_reg && !netif_device_present(netdev)) { if (test_bit(MLX5E_STATE_DESTROYING, &priv->state)) mlx5_sd_for_each_dev(i, mdev, pos) mlx5e_destroy_mdev_resources(pos); @@ -6090,7 +6090,7 @@ static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state) actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx); if (actual_adev) - err = _mlx5e_suspend(actual_adev); + err = _mlx5e_suspend(actual_adev, false); mlx5_sd_cleanup(mdev); return err; @@ -6157,7 +6157,7 @@ static int _mlx5e_probe(struct auxiliary_device *adev) return 0; err_resume: - _mlx5e_suspend(adev); + _mlx5e_suspend(adev, true); err_profile_cleanup: profile->cleanup(priv); err_destroy_netdev: @@ -6197,7 +6197,7 @@ static void _mlx5e_remove(struct auxiliary_device *adev) mlx5_core_uplink_netdev_set(mdev, NULL); mlx5e_dcbnl_delete_app(priv); unregister_netdev(priv->netdev); - _mlx5e_suspend(adev); + _mlx5e_suspend(adev, false); priv->profile->cleanup(priv); mlx5e_destroy_netdev(priv); mlx5e_devlink_port_unregister(mlx5e_dev); From 3c453e8cc672de1f9c662948dba43176bc68d7f0 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Thu, 9 May 2024 14:29:48 +0300 Subject: [PATCH 18/31] net/mlx5: Fix peer devlink set for SF representor devlink port The cited patch change register devlink flow, and neglect to reflect the changes for peer devlink set logic. Peer devlink set is triggering a call trace if done after devl_register.[1] Hence, align peer devlink set logic with register devlink flow. [1] WARNING: CPU: 4 PID: 3394 at net/devlink/core.c:155 devlink_rel_nested_in_add+0x177/0x180 CPU: 4 PID: 3394 Comm: kworker/u40:1 Not tainted 6.9.0-rc4_for_linust_min_debug_2024_04_16_14_08 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Workqueue: mlx5_vhca_event0 mlx5_vhca_state_work_handler [mlx5_core] RIP: 0010:devlink_rel_nested_in_add+0x177/0x180 Call Trace: ? __warn+0x78/0x120 ? devlink_rel_nested_in_add+0x177/0x180 ? report_bug+0x16d/0x180 ? handle_bug+0x3c/0x60 ? exc_invalid_op+0x14/0x70 ? asm_exc_invalid_op+0x16/0x20 ? devlink_port_init+0x30/0x30 ? devlink_port_type_clear+0x50/0x50 ? devlink_rel_nested_in_add+0x177/0x180 ? devlink_rel_nested_in_add+0xdd/0x180 mlx5_sf_mdev_event+0x74/0xb0 [mlx5_core] notifier_call_chain+0x35/0xb0 blocking_notifier_call_chain+0x3d/0x60 mlx5_blocking_notifier_call_chain+0x22/0x30 [mlx5_core] mlx5_sf_dev_probe+0x185/0x3e0 [mlx5_core] auxiliary_bus_probe+0x38/0x80 ? driver_sysfs_add+0x51/0x80 really_probe+0xc5/0x3a0 ? driver_probe_device+0x90/0x90 __driver_probe_device+0x80/0x160 driver_probe_device+0x1e/0x90 __device_attach_driver+0x7d/0x100 bus_for_each_drv+0x80/0xd0 __device_attach+0xbc/0x1f0 bus_probe_device+0x86/0xa0 device_add+0x64f/0x860 __auxiliary_device_add+0x3b/0xa0 mlx5_sf_dev_add+0x139/0x330 [mlx5_core] mlx5_sf_dev_state_change_handler+0x1e4/0x250 [mlx5_core] notifier_call_chain+0x35/0xb0 blocking_notifier_call_chain+0x3d/0x60 mlx5_vhca_state_work_handler+0x151/0x200 [mlx5_core] process_one_work+0x13f/0x2e0 worker_thread+0x2bd/0x3c0 ? rescuer_thread+0x410/0x410 kthread+0xc4/0xf0 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x2d/0x50 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork_asm+0x11/0x20 Fixes: bf729988303a ("net/mlx5: Restore mistakenly dropped parts in register devlink flow") Fixes: c6e77aa9dd82 ("net/mlx5: Register devlink first under devlink lock") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240509112951.590184-3-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/main.c | 14 +++++--------- .../mellanox/mlx5/core/sf/dev/driver.c | 19 ++++++++----------- 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 331ce47f51a1..6574c145dc1e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1680,6 +1680,8 @@ int mlx5_init_one_light(struct mlx5_core_dev *dev) struct devlink *devlink = priv_to_devlink(dev); int err; + devl_lock(devlink); + devl_register(devlink); dev->state = MLX5_DEVICE_STATE_UP; err = mlx5_function_enable(dev, true, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT)); if (err) { @@ -1693,27 +1695,21 @@ int mlx5_init_one_light(struct mlx5_core_dev *dev) goto query_hca_caps_err; } - devl_lock(devlink); - devl_register(devlink); - err = mlx5_devlink_params_register(priv_to_devlink(dev)); if (err) { mlx5_core_warn(dev, "mlx5_devlink_param_reg err = %d\n", err); - goto params_reg_err; + goto query_hca_caps_err; } devl_unlock(devlink); return 0; -params_reg_err: - devl_unregister(devlink); - devl_unlock(devlink); query_hca_caps_err: - devl_unregister(devlink); - devl_unlock(devlink); mlx5_function_disable(dev, true); out: dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + devl_unregister(devlink); + devl_unlock(devlink); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c index 7ebe71280827..b2986175d9af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c @@ -60,6 +60,13 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia goto remap_err; } + /* Peer devlink logic expects to work on unregistered devlink instance. */ + err = mlx5_core_peer_devlink_set(sf_dev, devlink); + if (err) { + mlx5_core_warn(mdev, "mlx5_core_peer_devlink_set err=%d\n", err); + goto peer_devlink_set_err; + } + if (MLX5_ESWITCH_MANAGER(sf_dev->parent_mdev)) err = mlx5_init_one_light(mdev); else @@ -69,20 +76,10 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia goto init_one_err; } - err = mlx5_core_peer_devlink_set(sf_dev, devlink); - if (err) { - mlx5_core_warn(mdev, "mlx5_core_peer_devlink_set err=%d\n", err); - goto peer_devlink_set_err; - } - return 0; -peer_devlink_set_err: - if (mlx5_dev_is_lightweight(sf_dev->mdev)) - mlx5_uninit_one_light(sf_dev->mdev); - else - mlx5_uninit_one(sf_dev->mdev); init_one_err: +peer_devlink_set_err: iounmap(mdev->iseg); remap_err: mlx5_mdev_uninit(mdev); From 0f06228d4a2dcc1fca5b3ddb0eefa09c05b102c4 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Thu, 9 May 2024 14:29:49 +0300 Subject: [PATCH 19/31] net/mlx5: Reload only IB representors upon lag disable/enable On lag disable, the bond IB device along with all of its representors are destroyed, and then the slaves' representors get reloaded. In case the slave IB representor load fails, the eswitch error flow unloads all representors, including ethernet representors, where the netdevs get detached and removed from lag bond. Such flow is inaccurate as the lag driver is not responsible for loading/unloading ethernet representors. Furthermore, the flow described above begins by holding lag lock to prevent bond changes during disable flow. However, when reaching the ethernet representors detachment from lag, the lag lock is required again, triggering the following deadlock: Call trace: __switch_to+0xf4/0x148 __schedule+0x2c8/0x7d0 schedule+0x50/0xe0 schedule_preempt_disabled+0x18/0x28 __mutex_lock.isra.13+0x2b8/0x570 __mutex_lock_slowpath+0x1c/0x28 mutex_lock+0x4c/0x68 mlx5_lag_remove_netdev+0x3c/0x1a0 [mlx5_core] mlx5e_uplink_rep_disable+0x70/0xa0 [mlx5_core] mlx5e_detach_netdev+0x6c/0xb0 [mlx5_core] mlx5e_netdev_change_profile+0x44/0x138 [mlx5_core] mlx5e_netdev_attach_nic_profile+0x28/0x38 [mlx5_core] mlx5e_vport_rep_unload+0x184/0x1b8 [mlx5_core] mlx5_esw_offloads_rep_load+0xd8/0xe0 [mlx5_core] mlx5_eswitch_reload_reps+0x74/0xd0 [mlx5_core] mlx5_disable_lag+0x130/0x138 [mlx5_core] mlx5_lag_disable_change+0x6c/0x70 [mlx5_core] // hold ldev->lock mlx5_devlink_eswitch_mode_set+0xc0/0x410 [mlx5_core] devlink_nl_cmd_eswitch_set_doit+0xdc/0x180 genl_family_rcv_msg_doit.isra.17+0xe8/0x138 genl_rcv_msg+0xe4/0x220 netlink_rcv_skb+0x44/0x108 genl_rcv+0x40/0x58 netlink_unicast+0x198/0x268 netlink_sendmsg+0x1d4/0x418 sock_sendmsg+0x54/0x60 __sys_sendto+0xf4/0x120 __arm64_sys_sendto+0x30/0x40 el0_svc_common+0x8c/0x120 do_el0_svc+0x30/0xa0 el0_svc+0x20/0x30 el0_sync_handler+0x90/0xb8 el0_sync+0x160/0x180 Thus, upon lag enable/disable, load and unload only the IB representors of the slaves preventing the deadlock mentioned above. While at it, refactor the mlx5_esw_offloads_rep_load() function to have a static helper method for its internal logic, in symmetry with the representor unload design. Fixes: 598fe77df855 ("net/mlx5: Lag, Create shared FDB when in switchdev mode") Co-developed-by: Mark Bloch Signed-off-by: Mark Bloch Signed-off-by: Maher Sanalla Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240509112951.590184-4-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 4 +-- .../mellanox/mlx5/core/eswitch_offloads.c | 28 ++++++++++++------- .../net/ethernet/mellanox/mlx5/core/lag/lag.c | 6 ++-- .../ethernet/mellanox/mlx5/core/lag/mpesw.c | 4 +-- 4 files changed, 25 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 349e28a6dd8d..ef55674876cb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -833,7 +833,7 @@ int mlx5_eswitch_offloads_single_fdb_add_one(struct mlx5_eswitch *master_esw, struct mlx5_eswitch *slave_esw, int max_slaves); void mlx5_eswitch_offloads_single_fdb_del_one(struct mlx5_eswitch *master_esw, struct mlx5_eswitch *slave_esw); -int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw); +int mlx5_eswitch_reload_ib_reps(struct mlx5_eswitch *esw); bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev); void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev); @@ -925,7 +925,7 @@ mlx5_eswitch_offloads_single_fdb_del_one(struct mlx5_eswitch *master_esw, static inline int mlx5_eswitch_get_npeers(struct mlx5_eswitch *esw) { return 0; } static inline int -mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw) +mlx5_eswitch_reload_ib_reps(struct mlx5_eswitch *esw) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 844d3e3a65dd..e8caf12f4c4f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2502,6 +2502,16 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw) esw_offloads_cleanup_reps(esw); } +static int __esw_offloads_load_rep(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, u8 rep_type) +{ + if (atomic_cmpxchg(&rep->rep_data[rep_type].state, + REP_REGISTERED, REP_LOADED) == REP_REGISTERED) + return esw->offloads.rep_ops[rep_type]->load(esw->dev, rep); + + return 0; +} + static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, u8 rep_type) { @@ -2526,13 +2536,11 @@ static int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num) int err; rep = mlx5_eswitch_get_rep(esw, vport_num); - for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) - if (atomic_cmpxchg(&rep->rep_data[rep_type].state, - REP_REGISTERED, REP_LOADED) == REP_REGISTERED) { - err = esw->offloads.rep_ops[rep_type]->load(esw->dev, rep); - if (err) - goto err_reps; - } + for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) { + err = __esw_offloads_load_rep(esw, rep, rep_type); + if (err) + goto err_reps; + } return 0; @@ -3277,7 +3285,7 @@ static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw) esw_vport_destroy_offloads_acl_tables(esw, vport); } -int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw) +int mlx5_eswitch_reload_ib_reps(struct mlx5_eswitch *esw) { struct mlx5_eswitch_rep *rep; unsigned long i; @@ -3290,13 +3298,13 @@ int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw) if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED) return 0; - ret = mlx5_esw_offloads_rep_load(esw, MLX5_VPORT_UPLINK); + ret = __esw_offloads_load_rep(esw, rep, REP_IB); if (ret) return ret; mlx5_esw_for_each_rep(esw, i, rep) { if (atomic_read(&rep->rep_data[REP_ETH].state) == REP_LOADED) - mlx5_esw_offloads_rep_load(esw, rep->vport); + __esw_offloads_load_rep(esw, rep, REP_IB); } return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 69d482f7c5a2..37598d116f3b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -814,7 +814,7 @@ void mlx5_disable_lag(struct mlx5_lag *ldev) if (shared_fdb) for (i = 0; i < ldev->ports; i++) if (!(ldev->pf[i].dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) - mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch); + mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); } static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) @@ -922,7 +922,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) mlx5_rescan_drivers_locked(dev0); for (i = 0; i < ldev->ports; i++) { - err = mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch); + err = mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); if (err) break; } @@ -933,7 +933,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) mlx5_deactivate_lag(ldev); mlx5_lag_add_devices(ldev); for (i = 0; i < ldev->ports; i++) - mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch); + mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); mlx5_core_err(dev0, "Failed to enable lag\n"); return; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c index 82889f30506e..571ea26edd0c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c @@ -99,7 +99,7 @@ static int enable_mpesw(struct mlx5_lag *ldev) dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); for (i = 0; i < ldev->ports; i++) { - err = mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch); + err = mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); if (err) goto err_rescan_drivers; } @@ -113,7 +113,7 @@ err_rescan_drivers: err_add_devices: mlx5_lag_add_devices(ldev); for (i = 0; i < ldev->ports; i++) - mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch); + mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); mlx5_mpesw_metadata_cleanup(ldev); return err; } From 485d65e1357123a697c591a5aeb773994b247ad7 Mon Sep 17 00:00:00 2001 From: Akiva Goldberger Date: Thu, 9 May 2024 14:29:50 +0300 Subject: [PATCH 20/31] net/mlx5: Add a timeout to acquire the command queue semaphore Prevent forced completion handling on an entry that has not yet been assigned an index, causing an out of bounds access on idx = -22. Instead of waiting indefinitely for the sem, blocking flow now waits for index to be allocated or a sem acquisition timeout before beginning the timer for FW completion. Kernel log example: mlx5_core 0000:06:00.0: wait_func_handle_exec_timeout:1128:(pid 185911): cmd[-22]: CREATE_UCTX(0xa04) No done completion Fixes: 8e715cd613a1 ("net/mlx5: Set command entry semaphore up once got index free") Signed-off-by: Akiva Goldberger Reviewed-by: Moshe Shemesh Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240509112951.590184-5-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 43 ++++++++++++++----- include/linux/mlx5/driver.h | 1 + 2 files changed, 34 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 4957412ff1f6..511e7fee39ac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -969,19 +969,32 @@ static void cmd_work_handler(struct work_struct *work) bool poll_cmd = ent->polling; struct mlx5_cmd_layout *lay; struct mlx5_core_dev *dev; - unsigned long cb_timeout; - struct semaphore *sem; + unsigned long timeout; unsigned long flags; int alloc_ret; int cmd_mode; - dev = container_of(cmd, struct mlx5_core_dev, cmd); - cb_timeout = msecs_to_jiffies(mlx5_tout_ms(dev, CMD)); - complete(&ent->handling); - sem = ent->page_queue ? &cmd->vars.pages_sem : &cmd->vars.sem; - down(sem); + + dev = container_of(cmd, struct mlx5_core_dev, cmd); + timeout = msecs_to_jiffies(mlx5_tout_ms(dev, CMD)); + if (!ent->page_queue) { + if (down_timeout(&cmd->vars.sem, timeout)) { + mlx5_core_warn(dev, "%s(0x%x) timed out while waiting for a slot.\n", + mlx5_command_str(ent->op), ent->op); + if (ent->callback) { + ent->callback(-EBUSY, ent->context); + mlx5_free_cmd_msg(dev, ent->out); + free_msg(dev, ent->in); + cmd_ent_put(ent); + } else { + ent->ret = -EBUSY; + complete(&ent->done); + } + complete(&ent->slotted); + return; + } alloc_ret = cmd_alloc_index(cmd, ent); if (alloc_ret < 0) { mlx5_core_err_rl(dev, "failed to allocate command entry\n"); @@ -994,10 +1007,11 @@ static void cmd_work_handler(struct work_struct *work) ent->ret = -EAGAIN; complete(&ent->done); } - up(sem); + up(&cmd->vars.sem); return; } } else { + down(&cmd->vars.pages_sem); ent->idx = cmd->vars.max_reg_cmds; spin_lock_irqsave(&cmd->alloc_lock, flags); clear_bit(ent->idx, &cmd->vars.bitmask); @@ -1005,6 +1019,8 @@ static void cmd_work_handler(struct work_struct *work) spin_unlock_irqrestore(&cmd->alloc_lock, flags); } + complete(&ent->slotted); + lay = get_inst(cmd, ent->idx); ent->lay = lay; memset(lay, 0, sizeof(*lay)); @@ -1023,7 +1039,7 @@ static void cmd_work_handler(struct work_struct *work) ent->ts1 = ktime_get_ns(); cmd_mode = cmd->mode; - if (ent->callback && schedule_delayed_work(&ent->cb_timeout_work, cb_timeout)) + if (ent->callback && schedule_delayed_work(&ent->cb_timeout_work, timeout)) cmd_ent_get(ent); set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state); @@ -1143,6 +1159,9 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) ent->ret = -ECANCELED; goto out_err; } + + wait_for_completion(&ent->slotted); + if (cmd->mode == CMD_MODE_POLLING || ent->polling) wait_for_completion(&ent->done); else if (!wait_for_completion_timeout(&ent->done, timeout)) @@ -1157,6 +1176,9 @@ out_err: } else if (err == -ECANCELED) { mlx5_core_warn(dev, "%s(0x%x) canceled on out of queue timeout.\n", mlx5_command_str(ent->op), ent->op); + } else if (err == -EBUSY) { + mlx5_core_warn(dev, "%s(0x%x) timeout while waiting for command semaphore.\n", + mlx5_command_str(ent->op), ent->op); } mlx5_core_dbg(dev, "err %d, delivery status %s(%d)\n", err, deliv_status_to_str(ent->status), ent->status); @@ -1208,6 +1230,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, ent->polling = force_polling; init_completion(&ent->handling); + init_completion(&ent->slotted); if (!callback) init_completion(&ent->done); @@ -1225,7 +1248,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, return 0; /* mlx5_cmd_comp_handler() will put(ent) */ err = wait_func(dev, ent); - if (err == -ETIMEDOUT || err == -ECANCELED) + if (err == -ETIMEDOUT || err == -ECANCELED || err == -EBUSY) goto out_free; ds = ent->ts2 - ent->ts1; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index bf9324a31ae9..80452bd98253 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -862,6 +862,7 @@ struct mlx5_cmd_work_ent { void *context; int idx; struct completion handling; + struct completion slotted; struct completion done; struct mlx5_cmd *cmd; struct work_struct work; From db9b31aa9bc56ff0d15b78f7e827d61c4a096e40 Mon Sep 17 00:00:00 2001 From: Akiva Goldberger Date: Thu, 9 May 2024 14:29:51 +0300 Subject: [PATCH 21/31] net/mlx5: Discard command completions in internal error Fix use after free when FW completion arrives while device is in internal error state. Avoid calling completion handler in this case, since the device will flush the command interface and trigger all completions manually. Kernel log: ------------[ cut here ]------------ refcount_t: underflow; use-after-free. ... RIP: 0010:refcount_warn_saturate+0xd8/0xe0 ... Call Trace: ? __warn+0x79/0x120 ? refcount_warn_saturate+0xd8/0xe0 ? report_bug+0x17c/0x190 ? handle_bug+0x3c/0x60 ? exc_invalid_op+0x14/0x70 ? asm_exc_invalid_op+0x16/0x20 ? refcount_warn_saturate+0xd8/0xe0 cmd_ent_put+0x13b/0x160 [mlx5_core] mlx5_cmd_comp_handler+0x5f9/0x670 [mlx5_core] cmd_comp_notifier+0x1f/0x30 [mlx5_core] notifier_call_chain+0x35/0xb0 atomic_notifier_call_chain+0x16/0x20 mlx5_eq_async_int+0xf6/0x290 [mlx5_core] notifier_call_chain+0x35/0xb0 atomic_notifier_call_chain+0x16/0x20 irq_int_handler+0x19/0x30 [mlx5_core] __handle_irq_event_percpu+0x4b/0x160 handle_irq_event+0x2e/0x80 handle_edge_irq+0x98/0x230 __common_interrupt+0x3b/0xa0 common_interrupt+0x7b/0xa0 asm_common_interrupt+0x22/0x40 Fixes: 51d138c2610a ("net/mlx5: Fix health error state handling") Signed-off-by: Akiva Goldberger Reviewed-by: Moshe Shemesh Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240509112951.590184-6-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 511e7fee39ac..20768ef2e9d2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1634,6 +1634,9 @@ static int cmd_comp_notifier(struct notifier_block *nb, dev = container_of(cmd, struct mlx5_core_dev, cmd); eqe = data; + if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + return NOTIFY_DONE; + mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false); return NOTIFY_OK; From 1af7f88af269c4e06a4dc3bc920ff6cdf7471124 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 10 May 2024 07:29:32 +0000 Subject: [PATCH 22/31] inet: fix inet_fill_ifaddr() flags truncation I missed that (struct ifaddrmsg)->ifa_flags was only 8bits, while (struct in_ifaddr)->ifa_flags is 32bits. Use a temporary 32bit variable as I did in set_ifa_lifetime() and check_lifetime(). Fixes: 3ddc2231c810 ("inet: annotate data-races around ifa->ifa_flags") Reported-by: Yu Watanabe Dianosed-by: Yu Watanabe Closes: https://github.com/systemd/systemd/pull/32666#issuecomment-2103977928 Signed-off-by: Eric Dumazet Reviewed-by: Larysa Zaremba Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240510072932.2678952-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/devinet.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 7a437f0d4190..7e45c34c8340 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1683,6 +1683,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa, struct nlmsghdr *nlh; unsigned long tstamp; u32 preferred, valid; + u32 flags; nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm), args->flags); @@ -1692,7 +1693,13 @@ static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa, ifm = nlmsg_data(nlh); ifm->ifa_family = AF_INET; ifm->ifa_prefixlen = ifa->ifa_prefixlen; - ifm->ifa_flags = READ_ONCE(ifa->ifa_flags); + + flags = READ_ONCE(ifa->ifa_flags); + /* Warning : ifm->ifa_flags is an __u8, it holds only 8 bits. + * The 32bit value is given in IFA_FLAGS attribute. + */ + ifm->ifa_flags = (__u8)flags; + ifm->ifa_scope = ifa->ifa_scope; ifm->ifa_index = ifa->ifa_dev->dev->ifindex; @@ -1701,7 +1708,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa, goto nla_put_failure; tstamp = READ_ONCE(ifa->ifa_tstamp); - if (!(ifm->ifa_flags & IFA_F_PERMANENT)) { + if (!(flags & IFA_F_PERMANENT)) { preferred = READ_ONCE(ifa->ifa_preferred_lft); valid = READ_ONCE(ifa->ifa_valid_lft); if (preferred != INFINITY_LIFE_TIME) { @@ -1732,7 +1739,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa, nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || (ifa->ifa_proto && nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) || - nla_put_u32(skb, IFA_FLAGS, ifm->ifa_flags) || + nla_put_u32(skb, IFA_FLAGS, flags) || (ifa->ifa_rt_priority && nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) || put_cacheinfo(skb, READ_ONCE(ifa->ifa_cstamp), tstamp, From 8ec9897ec2e93a412b9e3119e3137583a85969e0 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Fri, 10 May 2024 19:19:12 +0200 Subject: [PATCH 23/31] netlabel: fix RCU annotation for IPv4 options on socket creation Xiumei reports the following splat when netlabel and TCP socket are used: ============================= WARNING: suspicious RCU usage 6.9.0-rc2+ #637 Not tainted ----------------------------- net/ipv4/cipso_ipv4.c:1880 suspicious rcu_dereference_protected() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 1 lock held by ncat/23333: #0: ffffffff906030c0 (rcu_read_lock){....}-{1:2}, at: netlbl_sock_setattr+0x25/0x1b0 stack backtrace: CPU: 11 PID: 23333 Comm: ncat Kdump: loaded Not tainted 6.9.0-rc2+ #637 Hardware name: Supermicro SYS-6027R-72RF/X9DRH-7TF/7F/iTF/iF, BIOS 3.0 07/26/2013 Call Trace: dump_stack_lvl+0xa9/0xc0 lockdep_rcu_suspicious+0x117/0x190 cipso_v4_sock_setattr+0x1ab/0x1b0 netlbl_sock_setattr+0x13e/0x1b0 selinux_netlbl_socket_post_create+0x3f/0x80 selinux_socket_post_create+0x1a0/0x460 security_socket_post_create+0x42/0x60 __sock_create+0x342/0x3a0 __sys_socket_create.part.22+0x42/0x70 __sys_socket+0x37/0xb0 __x64_sys_socket+0x16/0x20 do_syscall_64+0x96/0x180 ? do_user_addr_fault+0x68d/0xa30 ? exc_page_fault+0x171/0x280 ? asm_exc_page_fault+0x22/0x30 entry_SYSCALL_64_after_hwframe+0x71/0x79 RIP: 0033:0x7fbc0ca3fc1b Code: 73 01 c3 48 8b 0d 05 f2 1b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 29 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d d5 f1 1b 00 f7 d8 64 89 01 48 RSP: 002b:00007fff18635208 EFLAGS: 00000246 ORIG_RAX: 0000000000000029 RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007fbc0ca3fc1b RDX: 0000000000000006 RSI: 0000000000000001 RDI: 0000000000000002 RBP: 000055d24f80f8a0 R08: 0000000000000003 R09: 0000000000000001 R10: 0000000000020000 R11: 0000000000000246 R12: 000055d24f80f8a0 R13: 0000000000000000 R14: 000055d24f80fb88 R15: 0000000000000000 The current implementation of cipso_v4_sock_setattr() replaces IP options under the assumption that the caller holds the socket lock; however, such assumption is not true, nor needed, in selinux_socket_post_create() hook. Let all callers of cipso_v4_sock_setattr() specify the "socket lock held" condition, except selinux_socket_post_create() _ where such condition can safely be set as true even without holding the socket lock. Fixes: f6d8bd051c39 ("inet: add RCU protection to inet->opt") Reported-by: Xiumei Mu Signed-off-by: Davide Caratti Acked-by: Casey Schaufler Acked-by: Paul Moore Link: https://lore.kernel.org/r/f4260d000a3a55b9e8b6a3b4e3fffc7da9f82d41.1715359817.git.dcaratti@redhat.com Signed-off-by: Jakub Kicinski --- include/net/cipso_ipv4.h | 6 ++++-- include/net/netlabel.h | 12 ++++++++++-- net/ipv4/cipso_ipv4.c | 7 ++++--- net/netlabel/netlabel_kapi.c | 31 ++++++++++++++++++++++++++++--- security/selinux/netlabel.c | 5 ++++- security/smack/smack_lsm.c | 3 ++- 6 files changed, 52 insertions(+), 12 deletions(-) diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index 53dd7d988a2d..c9111bb2f59b 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -183,7 +183,8 @@ int cipso_v4_getattr(const unsigned char *cipso, struct netlbl_lsm_secattr *secattr); int cipso_v4_sock_setattr(struct sock *sk, const struct cipso_v4_doi *doi_def, - const struct netlbl_lsm_secattr *secattr); + const struct netlbl_lsm_secattr *secattr, + bool sk_locked); void cipso_v4_sock_delattr(struct sock *sk); int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr); int cipso_v4_req_setattr(struct request_sock *req, @@ -214,7 +215,8 @@ static inline int cipso_v4_getattr(const unsigned char *cipso, static inline int cipso_v4_sock_setattr(struct sock *sk, const struct cipso_v4_doi *doi_def, - const struct netlbl_lsm_secattr *secattr) + const struct netlbl_lsm_secattr *secattr, + bool sk_locked) { return -ENOSYS; } diff --git a/include/net/netlabel.h b/include/net/netlabel.h index f3ab0b8a4b18..2133ad723fc1 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -470,7 +470,8 @@ void netlbl_bitmap_setbit(unsigned char *bitmap, u32 bit, u8 state); int netlbl_enabled(void); int netlbl_sock_setattr(struct sock *sk, u16 family, - const struct netlbl_lsm_secattr *secattr); + const struct netlbl_lsm_secattr *secattr, + bool sk_locked); void netlbl_sock_delattr(struct sock *sk); int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr); @@ -487,6 +488,7 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb, u16 family, struct netlbl_lsm_secattr *secattr); void netlbl_skbuff_err(struct sk_buff *skb, u16 family, int error, int gateway); +bool netlbl_sk_lock_check(struct sock *sk); /* * LSM label mapping cache operations @@ -614,7 +616,8 @@ static inline int netlbl_enabled(void) } static inline int netlbl_sock_setattr(struct sock *sk, u16 family, - const struct netlbl_lsm_secattr *secattr) + const struct netlbl_lsm_secattr *secattr, + bool sk_locked) { return -ENOSYS; } @@ -673,6 +676,11 @@ static inline struct audit_buffer *netlbl_audit_start(int type, { return NULL; } + +static inline bool netlbl_sk_lock_check(struct sock *sk) +{ + return true; +} #endif /* CONFIG_NETLABEL */ const struct netlbl_calipso_ops * diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 8b17d83e5fde..dd6d46015058 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1815,6 +1815,7 @@ static int cipso_v4_genopt(unsigned char *buf, u32 buf_len, * @sk: the socket * @doi_def: the CIPSO DOI to use * @secattr: the specific security attributes of the socket + * @sk_locked: true if caller holds the socket lock * * Description: * Set the CIPSO option on the given socket using the DOI definition and @@ -1826,7 +1827,8 @@ static int cipso_v4_genopt(unsigned char *buf, u32 buf_len, */ int cipso_v4_sock_setattr(struct sock *sk, const struct cipso_v4_doi *doi_def, - const struct netlbl_lsm_secattr *secattr) + const struct netlbl_lsm_secattr *secattr, + bool sk_locked) { int ret_val = -EPERM; unsigned char *buf = NULL; @@ -1876,8 +1878,7 @@ int cipso_v4_sock_setattr(struct sock *sk, sk_inet = inet_sk(sk); - old = rcu_dereference_protected(sk_inet->inet_opt, - lockdep_sock_is_held(sk)); + old = rcu_dereference_protected(sk_inet->inet_opt, sk_locked); if (inet_test_bit(IS_ICSK, sk)) { sk_conn = inet_csk(sk); if (old) diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 1ba4f58e1d35..cd9160bbc919 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -965,6 +965,7 @@ int netlbl_enabled(void) * @sk: the socket to label * @family: protocol family * @secattr: the security attributes + * @sk_locked: true if caller holds the socket lock * * Description: * Attach the correct label to the given socket using the security attributes @@ -977,7 +978,8 @@ int netlbl_enabled(void) */ int netlbl_sock_setattr(struct sock *sk, u16 family, - const struct netlbl_lsm_secattr *secattr) + const struct netlbl_lsm_secattr *secattr, + bool sk_locked) { int ret_val; struct netlbl_dom_map *dom_entry; @@ -997,7 +999,7 @@ int netlbl_sock_setattr(struct sock *sk, case NETLBL_NLTYPE_CIPSOV4: ret_val = cipso_v4_sock_setattr(sk, dom_entry->def.cipso, - secattr); + secattr, sk_locked); break; case NETLBL_NLTYPE_UNLABELED: ret_val = 0; @@ -1090,6 +1092,28 @@ int netlbl_sock_getattr(struct sock *sk, return ret_val; } +/** + * netlbl_sk_lock_check - Check if the socket lock has been acquired. + * @sk: the socket to be checked + * + * Return: true if socket @sk is locked or if lock debugging is disabled at + * runtime or compile-time; false otherwise + * + */ +#ifdef CONFIG_LOCKDEP +bool netlbl_sk_lock_check(struct sock *sk) +{ + if (debug_locks) + return lockdep_sock_is_held(sk); + return true; +} +#else +bool netlbl_sk_lock_check(struct sock *sk) +{ + return true; +} +#endif + /** * netlbl_conn_setattr - Label a connected socket using the correct protocol * @sk: the socket to label @@ -1126,7 +1150,8 @@ int netlbl_conn_setattr(struct sock *sk, switch (entry->type) { case NETLBL_NLTYPE_CIPSOV4: ret_val = cipso_v4_sock_setattr(sk, - entry->cipso, secattr); + entry->cipso, secattr, + netlbl_sk_lock_check(sk)); break; case NETLBL_NLTYPE_UNLABELED: /* just delete the protocols we support for right now diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c index 8f182800e412..55885634e880 100644 --- a/security/selinux/netlabel.c +++ b/security/selinux/netlabel.c @@ -402,7 +402,10 @@ int selinux_netlbl_socket_post_create(struct sock *sk, u16 family) secattr = selinux_netlbl_sock_genattr(sk); if (secattr == NULL) return -ENOMEM; - rc = netlbl_sock_setattr(sk, family, secattr); + /* On socket creation, replacement of IP options is safe even if + * the caller does not hold the socket lock. + */ + rc = netlbl_sock_setattr(sk, family, secattr, true); switch (rc) { case 0: sksec->nlbl_state = NLBL_LABELED; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 146667937811..efeac8365ad0 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -2565,7 +2565,8 @@ static int smack_netlbl_add(struct sock *sk) local_bh_disable(); bh_lock_sock_nested(sk); - rc = netlbl_sock_setattr(sk, sk->sk_family, &skp->smk_netlabel); + rc = netlbl_sock_setattr(sk, sk->sk_family, &skp->smk_netlabel, + netlbl_sk_lock_check(sk)); switch (rc) { case 0: ssp->smk_state = SMK_NETLBL_LABELED; From 5eefb477d21a26183bc3499aeefa991198315a2d Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Fri, 10 May 2024 13:30:55 +0200 Subject: [PATCH 24/31] net: smc91x: Fix m68k kernel compilation for ColdFire CPU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiling the m68k kernel with support for the ColdFire CPU family fails with the following error: In file included from drivers/net/ethernet/smsc/smc91x.c:80: drivers/net/ethernet/smsc/smc91x.c: In function ‘smc_reset’: drivers/net/ethernet/smsc/smc91x.h:160:40: error: implicit declaration of function ‘_swapw’; did you mean ‘swap’? [-Werror=implicit-function-declaration] 160 | #define SMC_outw(lp, v, a, r) writew(_swapw(v), (a) + (r)) | ^~~~~~ drivers/net/ethernet/smsc/smc91x.h:904:25: note: in expansion of macro ‘SMC_outw’ 904 | SMC_outw(lp, x, ioaddr, BANK_SELECT); \ | ^~~~~~~~ drivers/net/ethernet/smsc/smc91x.c:250:9: note: in expansion of macro ‘SMC_SELECT_BANK’ 250 | SMC_SELECT_BANK(lp, 2); | ^~~~~~~~~~~~~~~ cc1: some warnings being treated as errors The function _swapw() was removed in commit d97cf70af097 ("m68k: use asm-generic/io.h for non-MMU io access functions"), but is still used in drivers/net/ethernet/smsc/smc91x.h. Use ioread16be() and iowrite16be() to resolve the error. Cc: stable@vger.kernel.org Fixes: d97cf70af097 ("m68k: use asm-generic/io.h for non-MMU io access functions") Signed-off-by: Thorsten Blum Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20240510113054.186648-2-thorsten.blum@toblux.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/smsc/smc91x.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/smsc/smc91x.h b/drivers/net/ethernet/smsc/smc91x.h index 46eee747c699..45ef5ac0788a 100644 --- a/drivers/net/ethernet/smsc/smc91x.h +++ b/drivers/net/ethernet/smsc/smc91x.h @@ -156,8 +156,8 @@ static inline void mcf_outsw(void *a, unsigned char *p, int l) writew(*wp++, a); } -#define SMC_inw(a, r) _swapw(readw((a) + (r))) -#define SMC_outw(lp, v, a, r) writew(_swapw(v), (a) + (r)) +#define SMC_inw(a, r) ioread16be((a) + (r)) +#define SMC_outw(lp, v, a, r) iowrite16be(v, (a) + (r)) #define SMC_insw(a, r, p, l) mcf_insw(a + r, p, l) #define SMC_outsw(a, r, p, l) mcf_outsw(a + r, p, l) From ecf848eb934b03959918f5269f64c0e52bc23998 Mon Sep 17 00:00:00 2001 From: Jose Ignacio Tornos Martinez Date: Fri, 10 May 2024 11:08:28 +0200 Subject: [PATCH 25/31] net: usb: ax88179_178a: fix link status when link is set to down/up The idea was to keep only one reset at initialization stage in order to reduce the total delay, or the reset from usbnet_probe or the reset from usbnet_open. I have seen that restarting from usbnet_probe is necessary to avoid doing too complex things. But when the link is set to down/up (for example to configure a different mac address) the link is not correctly recovered unless a reset is commanded from usbnet_open. So, detect the initialization stage (first call) to not reset from usbnet_open after the reset from usbnet_probe and after this stage, always reset from usbnet_open too (when the link needs to be rechecked). Apply to all the possible devices, the behavior now is going to be the same. cc: stable@vger.kernel.org # 6.6+ Fixes: 56f78615bcb1 ("net: usb: ax88179_178a: avoid writing the mac address before first reading") Reported-by: Isaac Ganoung Reported-by: Yongqin Liu Signed-off-by: Jose Ignacio Tornos Martinez Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240510090846.328201-1-jtornosm@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/ax88179_178a.c | 37 ++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index df9d767cb524..56ede5fa0261 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -174,6 +174,7 @@ struct ax88179_data { u32 wol_supported; u32 wolopts; u8 disconnecting; + u8 initialized; }; struct ax88179_int_data { @@ -1673,6 +1674,18 @@ static int ax88179_reset(struct usbnet *dev) return 0; } +static int ax88179_net_reset(struct usbnet *dev) +{ + struct ax88179_data *ax179_data = dev->driver_priv; + + if (ax179_data->initialized) + ax88179_reset(dev); + else + ax179_data->initialized = 1; + + return 0; +} + static int ax88179_stop(struct usbnet *dev) { u16 tmp16; @@ -1692,6 +1705,7 @@ static const struct driver_info ax88179_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1704,6 +1718,7 @@ static const struct driver_info ax88178a_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1716,7 +1731,7 @@ static const struct driver_info cypress_GX3_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1729,7 +1744,7 @@ static const struct driver_info dlink_dub1312_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1742,7 +1757,7 @@ static const struct driver_info sitecom_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1755,7 +1770,7 @@ static const struct driver_info samsung_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1768,7 +1783,7 @@ static const struct driver_info lenovo_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1781,7 +1796,7 @@ static const struct driver_info belkin_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1794,7 +1809,7 @@ static const struct driver_info toshiba_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1807,7 +1822,7 @@ static const struct driver_info mct_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1820,7 +1835,7 @@ static const struct driver_info at_umc2000_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1833,7 +1848,7 @@ static const struct driver_info at_umc200_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1846,7 +1861,7 @@ static const struct driver_info at_umc2000sp_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, From a7d6e36b9ad052926ba2ecba3a59d8bb67dabcb4 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Thu, 9 May 2024 17:36:33 +0800 Subject: [PATCH 26/31] ax25: Use kernel universal linked list to implement ax25_dev_list The origin ax25_dev_list implements its own single linked list, which is complicated and error-prone. For example, when deleting the node of ax25_dev_list in ax25_dev_device_down(), we have to operate on the head node and other nodes separately. This patch uses kernel universal linked list to replace original ax25_dev_list, which make the operation of ax25_dev_list easier. We should do "dev->ax25_ptr = ax25_dev;" and "dev->ax25_ptr = NULL;" while holding the spinlock, otherwise the ax25_dev_device_up() and ax25_dev_device_down() could race. Suggested-by: Dan Carpenter Signed-off-by: Duoming Zhou Reviewed-by: Dan Carpenter Link: https://lore.kernel.org/r/85bba3af651ca0e1a519da8d0d715b949891171c.1715247018.git.duoming@zju.edu.cn Signed-off-by: Jakub Kicinski --- include/net/ax25.h | 3 +-- net/ax25/ax25_dev.c | 40 +++++++++++++++------------------------- 2 files changed, 16 insertions(+), 27 deletions(-) diff --git a/include/net/ax25.h b/include/net/ax25.h index 0d939e5aee4e..c2a85fd3f5ea 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -216,7 +216,7 @@ typedef struct { struct ctl_table; typedef struct ax25_dev { - struct ax25_dev *next; + struct list_head list; struct net_device *dev; netdevice_tracker dev_tracker; @@ -330,7 +330,6 @@ int ax25_addr_size(const ax25_digi *); void ax25_digi_invert(const ax25_digi *, ax25_digi *); /* ax25_dev.c */ -extern ax25_dev *ax25_dev_list; extern spinlock_t ax25_dev_lock; #if IS_ENABLED(CONFIG_AX25) diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index 282ec581c072..f16ee5c09d07 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -22,11 +22,12 @@ #include #include #include +#include #include #include #include -ax25_dev *ax25_dev_list; +static LIST_HEAD(ax25_dev_list); DEFINE_SPINLOCK(ax25_dev_lock); ax25_dev *ax25_addr_ax25dev(ax25_address *addr) @@ -34,7 +35,7 @@ ax25_dev *ax25_addr_ax25dev(ax25_address *addr) ax25_dev *ax25_dev, *res = NULL; spin_lock_bh(&ax25_dev_lock); - for (ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next) + list_for_each_entry(ax25_dev, &ax25_dev_list, list) if (ax25cmp(addr, (const ax25_address *)ax25_dev->dev->dev_addr) == 0) { res = ax25_dev; ax25_dev_hold(ax25_dev); @@ -59,7 +60,6 @@ void ax25_dev_device_up(struct net_device *dev) } refcount_set(&ax25_dev->refcount, 1); - dev->ax25_ptr = ax25_dev; ax25_dev->dev = dev; netdev_hold(dev, &ax25_dev->dev_tracker, GFP_KERNEL); ax25_dev->forward = NULL; @@ -85,8 +85,8 @@ void ax25_dev_device_up(struct net_device *dev) #endif spin_lock_bh(&ax25_dev_lock); - ax25_dev->next = ax25_dev_list; - ax25_dev_list = ax25_dev; + list_add(&ax25_dev->list, &ax25_dev_list); + dev->ax25_ptr = ax25_dev; spin_unlock_bh(&ax25_dev_lock); ax25_dev_hold(ax25_dev); @@ -111,32 +111,25 @@ void ax25_dev_device_down(struct net_device *dev) /* * Remove any packet forwarding that points to this device. */ - for (s = ax25_dev_list; s != NULL; s = s->next) + list_for_each_entry(s, &ax25_dev_list, list) if (s->forward == dev) s->forward = NULL; - if ((s = ax25_dev_list) == ax25_dev) { - ax25_dev_list = s->next; - goto unlock_put; - } - - while (s != NULL && s->next != NULL) { - if (s->next == ax25_dev) { - s->next = ax25_dev->next; + list_for_each_entry(s, &ax25_dev_list, list) { + if (s == ax25_dev) { + list_del(&s->list); goto unlock_put; } - - s = s->next; } - spin_unlock_bh(&ax25_dev_lock); dev->ax25_ptr = NULL; + spin_unlock_bh(&ax25_dev_lock); ax25_dev_put(ax25_dev); return; unlock_put: + dev->ax25_ptr = NULL; spin_unlock_bh(&ax25_dev_lock); ax25_dev_put(ax25_dev); - dev->ax25_ptr = NULL; netdev_put(dev, &ax25_dev->dev_tracker); ax25_dev_put(ax25_dev); } @@ -200,16 +193,13 @@ struct net_device *ax25_fwd_dev(struct net_device *dev) */ void __exit ax25_dev_free(void) { - ax25_dev *s, *ax25_dev; + ax25_dev *s, *n; spin_lock_bh(&ax25_dev_lock); - ax25_dev = ax25_dev_list; - while (ax25_dev != NULL) { - s = ax25_dev; - netdev_put(ax25_dev->dev, &ax25_dev->dev_tracker); - ax25_dev = ax25_dev->next; + list_for_each_entry_safe(s, n, &ax25_dev_list, list) { + netdev_put(s->dev, &s->dev_tracker); + list_del(&s->list); kfree(s); } - ax25_dev_list = NULL; spin_unlock_bh(&ax25_dev_lock); } From b505e0319852b08a3a716b64620168eab21f4ced Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Thu, 9 May 2024 17:36:47 +0800 Subject: [PATCH 27/31] ax25: Fix reference count leak issues of ax25_dev The ax25_addr_ax25dev() and ax25_dev_device_down() exist a reference count leak issue of the object "ax25_dev". Memory leak issue in ax25_addr_ax25dev(): The reference count of the object "ax25_dev" can be increased multiple times in ax25_addr_ax25dev(). This will cause a memory leak. Memory leak issues in ax25_dev_device_down(): The reference count of ax25_dev is set to 1 in ax25_dev_device_up() and then increase the reference count when ax25_dev is added to ax25_dev_list. As a result, the reference count of ax25_dev is 2. But when the device is shutting down. The ax25_dev_device_down() drops the reference count once or twice depending on if we goto unlock_put or not, which will cause memory leak. As for the issue of ax25_addr_ax25dev(), it is impossible for one pointer to be on a list twice. So add a break in ax25_addr_ax25dev(). As for the issue of ax25_dev_device_down(), increase the reference count of ax25_dev once in ax25_dev_device_up() and decrease the reference count of ax25_dev after it is removed from the ax25_dev_list. Fixes: d01ffb9eee4a ("ax25: add refcount in ax25_dev to avoid UAF bugs") Suggested-by: Dan Carpenter Signed-off-by: Duoming Zhou Reviewed-by: Dan Carpenter Link: https://lore.kernel.org/r/361bbf2a4b091e120006279ec3b382d73c4a0c17.1715247018.git.duoming@zju.edu.cn Signed-off-by: Jakub Kicinski --- net/ax25/ax25_dev.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index f16ee5c09d07..52ccc37d5687 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -39,6 +39,7 @@ ax25_dev *ax25_addr_ax25dev(ax25_address *addr) if (ax25cmp(addr, (const ax25_address *)ax25_dev->dev->dev_addr) == 0) { res = ax25_dev; ax25_dev_hold(ax25_dev); + break; } spin_unlock_bh(&ax25_dev_lock); @@ -88,7 +89,6 @@ void ax25_dev_device_up(struct net_device *dev) list_add(&ax25_dev->list, &ax25_dev_list); dev->ax25_ptr = ax25_dev; spin_unlock_bh(&ax25_dev_lock); - ax25_dev_hold(ax25_dev); ax25_register_dev_sysctl(ax25_dev); } @@ -129,7 +129,6 @@ void ax25_dev_device_down(struct net_device *dev) unlock_put: dev->ax25_ptr = NULL; spin_unlock_bh(&ax25_dev_lock); - ax25_dev_put(ax25_dev); netdev_put(dev, &ax25_dev->dev_tracker); ax25_dev_put(ax25_dev); } From 36e56b1b002bb26440403053f19f9e1a8bc075b2 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Thu, 9 May 2024 17:37:02 +0800 Subject: [PATCH 28/31] ax25: Fix reference count leak issue of net_device There is a reference count leak issue of the object "net_device" in ax25_dev_device_down(). When the ax25 device is shutting down, the ax25_dev_device_down() drops the reference count of net_device one or zero times depending on if we goto unlock_put or not, which will cause memory leak. In order to solve the above issue, decrease the reference count of net_device after dev->ax25_ptr is set to null. Fixes: d01ffb9eee4a ("ax25: add refcount in ax25_dev to avoid UAF bugs") Suggested-by: Dan Carpenter Signed-off-by: Duoming Zhou Reviewed-by: Dan Carpenter Link: https://lore.kernel.org/r/7ce3b23a40d9084657ba1125432f0ecc380cbc80.1715247018.git.duoming@zju.edu.cn Signed-off-by: Jakub Kicinski --- net/ax25/ax25_dev.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index 52ccc37d5687..c9d55b99a7a5 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -118,15 +118,10 @@ void ax25_dev_device_down(struct net_device *dev) list_for_each_entry(s, &ax25_dev_list, list) { if (s == ax25_dev) { list_del(&s->list); - goto unlock_put; + break; } } - dev->ax25_ptr = NULL; - spin_unlock_bh(&ax25_dev_lock); - ax25_dev_put(ax25_dev); - return; -unlock_put: dev->ax25_ptr = NULL; spin_unlock_bh(&ax25_dev_lock); netdev_put(dev, &ax25_dev->dev_tracker); From ad506586cb69292b6ac59ab95468aadd54b19ab7 Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Mon, 13 May 2024 11:28:24 +0800 Subject: [PATCH 29/31] dpll: fix return value check for kmemdup The return value of kmemdup() is dst->freq_supported, not src->freq_supported. Update the check accordingly. Fixes: 830ead5fb0c5 ("dpll: fix pin dump crash for rebound module") Signed-off-by: Chen Ni Reviewed-by: Przemek Kitszel Reviewed-by: Arkadiusz Kubalewski Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240513032824.2410459-1-nichen@iscas.ac.cn Signed-off-by: Jakub Kicinski --- drivers/dpll/dpll_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c index d0f6693ca142..32019dc33cca 100644 --- a/drivers/dpll/dpll_core.c +++ b/drivers/dpll/dpll_core.c @@ -449,7 +449,7 @@ static int dpll_pin_prop_dup(const struct dpll_pin_properties *src, sizeof(*src->freq_supported); dst->freq_supported = kmemdup(src->freq_supported, freq_size, GFP_KERNEL); - if (!src->freq_supported) + if (!dst->freq_supported) return -ENOMEM; } if (src->board_label) { From c2e0c58b25a0a0c37ec643255558c5af4450c9f5 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Sat, 11 May 2024 14:20:09 +0800 Subject: [PATCH 30/31] net: fec: remove .ndo_poll_controller to avoid deadlocks There is a deadlock issue found in sungem driver, please refer to the commit ac0a230f719b ("eth: sungem: remove .ndo_poll_controller to avoid deadlocks"). The root cause of the issue is that netpoll is in atomic context and disable_irq() is called by .ndo_poll_controller interface of sungem driver, however, disable_irq() might sleep. After analyzing the implementation of fec_poll_controller(), the fec driver should have the same issue. Due to the fec driver uses NAPI for TX completions, the .ndo_poll_controller is unnecessary to be implemented in the fec driver, so fec_poll_controller() can be safely removed. Fixes: 7f5c6addcdc0 ("net/fec: add poll controller function for fec nic") Signed-off-by: Wei Fang Link: https://lore.kernel.org/r/20240511062009.652918-1-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fec_main.c | 26 ----------------------- 1 file changed, 26 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 8bd213da8fb6..a72d8a2eb0b3 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3674,29 +3674,6 @@ fec_set_mac_address(struct net_device *ndev, void *p) return 0; } -#ifdef CONFIG_NET_POLL_CONTROLLER -/** - * fec_poll_controller - FEC Poll controller function - * @dev: The FEC network adapter - * - * Polled functionality used by netconsole and others in non interrupt mode - * - */ -static void fec_poll_controller(struct net_device *dev) -{ - int i; - struct fec_enet_private *fep = netdev_priv(dev); - - for (i = 0; i < FEC_IRQ_NUM; i++) { - if (fep->irq[i] > 0) { - disable_irq(fep->irq[i]); - fec_enet_interrupt(fep->irq[i], dev); - enable_irq(fep->irq[i]); - } - } -} -#endif - static inline void fec_enet_set_netdev_features(struct net_device *netdev, netdev_features_t features) { @@ -4003,9 +3980,6 @@ static const struct net_device_ops fec_netdev_ops = { .ndo_tx_timeout = fec_timeout, .ndo_set_mac_address = fec_set_mac_address, .ndo_eth_ioctl = phy_do_ioctl_running, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = fec_poll_controller, -#endif .ndo_set_features = fec_set_features, .ndo_bpf = fec_enet_bpf, .ndo_xdp_xmit = fec_enet_xdp_xmit, From aea27a92a41dae14843f92c79e9e42d8f570105c Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Mon, 13 May 2024 21:21:57 +0200 Subject: [PATCH 31/31] net: micrel: Fix receiving the timestamp in the frame for lan8841 The blamed commit started to use the ptp workqueue to get the second part of the timestamp. And when the port was set down, then this workqueue is stopped. But if the config option NETWORK_PHY_TIMESTAMPING is not enabled, then the ptp_clock is not initialized so then it would crash when it would try to access the delayed work. So then basically by setting up and then down the port, it would crash. The fix consists in checking if the ptp_clock is initialized and only then cancel the delayed work. Fixes: cc7554954848 ("net: micrel: Change to receive timestamp in the frame for lan8841") Signed-off-by: Horatiu Vultur Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index ddb50a0e2bc8..87780465cd0d 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -4676,7 +4676,8 @@ static int lan8841_suspend(struct phy_device *phydev) struct kszphy_priv *priv = phydev->priv; struct kszphy_ptp_priv *ptp_priv = &priv->ptp_priv; - ptp_cancel_worker_sync(ptp_priv->ptp_clock); + if (ptp_priv->ptp_clock) + ptp_cancel_worker_sync(ptp_priv->ptp_clock); return genphy_suspend(phydev); }