From e96b2933152fd87b6a41765b2f58b158fde855b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cs=C3=B3k=C3=A1s=2C=20Bence?= Date: Wed, 5 Jun 2024 10:42:51 +0200 Subject: [PATCH 01/35] net: sfp: Always call `sfp_sm_mod_remove()` on remove MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the module is in SFP_MOD_ERROR, `sfp_sm_mod_remove()` will not be run. As a consequence, `sfp_hwmon_remove()` is not getting run either, leaving a stale `hwmon` device behind. `sfp_sm_mod_remove()` itself checks `sfp->sm_mod_state` anyways, so this check was not really needed in the first place. Fixes: d2e816c0293f ("net: sfp: handle module remove outside state machine") Signed-off-by: "Csókás, Bence" Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20240605084251.63502-1-csokas.bence@prolan.hu Signed-off-by: Jakub Kicinski --- drivers/net/phy/sfp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 3f9cbd797fd6..a5684ef5884b 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -2429,8 +2429,7 @@ static void sfp_sm_module(struct sfp *sfp, unsigned int event) /* Handle remove event globally, it resets this state machine */ if (event == SFP_E_REMOVE) { - if (sfp->sm_mod_state > SFP_MOD_PROBE) - sfp_sm_mod_remove(sfp); + sfp_sm_mod_remove(sfp); sfp_sm_mod_next(sfp, SFP_MOD_EMPTY, 0); return; } From b472b996a43404a912c5cb4f27050022fdbce10c Mon Sep 17 00:00:00 2001 From: Udit Kumar Date: Fri, 31 May 2024 22:27:25 +0530 Subject: [PATCH 02/35] dt-bindings: net: dp8386x: Add MIT license along with GPL-2.0 Modify license to include dual licensing as GPL-2.0-only OR MIT license for TI specific phy header files. This allows for Linux kernel files to be used in other Operating System ecosystems such as Zephyr or FreeBSD. While at this, update the GPL-2.0 to be GPL-2.0-only to be in sync with latest SPDX conventions (GPL-2.0 is deprecated). While at this, update the TI copyright year to sync with current year to indicate license change. Cc: Thomas Gleixner Cc: Trent Piepho Cc: Wadim Egorov Cc: Kip Broadhurst Signed-off-by: Udit Kumar Acked-by: Wadim Egorov Acked-by: Rob Herring Signed-off-by: David S. Miller --- include/dt-bindings/net/ti-dp83867.h | 4 ++-- include/dt-bindings/net/ti-dp83869.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/dt-bindings/net/ti-dp83867.h b/include/dt-bindings/net/ti-dp83867.h index 6fc4b445d3a1..b8a4f3ff4a3b 100644 --- a/include/dt-bindings/net/ti-dp83867.h +++ b/include/dt-bindings/net/ti-dp83867.h @@ -1,10 +1,10 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ /* * Device Tree constants for the Texas Instruments DP83867 PHY * * Author: Dan Murphy * - * Copyright: (C) 2015 Texas Instruments, Inc. + * Copyright (C) 2015-2024 Texas Instruments Incorporated - https://www.ti.com/ */ #ifndef _DT_BINDINGS_TI_DP83867_H diff --git a/include/dt-bindings/net/ti-dp83869.h b/include/dt-bindings/net/ti-dp83869.h index 218b1a64e975..917114aad7d0 100644 --- a/include/dt-bindings/net/ti-dp83869.h +++ b/include/dt-bindings/net/ti-dp83869.h @@ -1,10 +1,10 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ /* * Device Tree constants for the Texas Instruments DP83869 PHY * * Author: Dan Murphy * - * Copyright: (C) 2019 Texas Instruments, Inc. + * Copyright (C) 2015-2024 Texas Instruments Incorporated - https://www.ti.com/ */ #ifndef _DT_BINDINGS_TI_DP83869_H From 12cda920212a49fa22d9e8b9492ac4ea013310a4 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Wed, 5 Jun 2024 15:20:57 +0800 Subject: [PATCH 03/35] net: hns3: fix kernel crash problem in concurrent scenario When link status change, the nic driver need to notify the roce driver to handle this event, but at this time, the roce driver may uninit, then cause kernel crash. To fix the problem, when link status change, need to check whether the roce registered, and when uninit, need to wait link update finish. Fixes: 45e92b7e4e27 ("net: hns3: add calling roce callback function when link status change") Signed-off-by: Yonglong Liu Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hclge_main.c | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 43cc6ee4d87d..82574ce0194f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -3086,9 +3086,7 @@ static void hclge_push_link_status(struct hclge_dev *hdev) static void hclge_update_link_status(struct hclge_dev *hdev) { - struct hnae3_handle *rhandle = &hdev->vport[0].roce; struct hnae3_handle *handle = &hdev->vport[0].nic; - struct hnae3_client *rclient = hdev->roce_client; struct hnae3_client *client = hdev->nic_client; int state; int ret; @@ -3112,8 +3110,15 @@ static void hclge_update_link_status(struct hclge_dev *hdev) client->ops->link_status_change(handle, state); hclge_config_mac_tnl_int(hdev, state); - if (rclient && rclient->ops->link_status_change) - rclient->ops->link_status_change(rhandle, state); + + if (test_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state)) { + struct hnae3_handle *rhandle = &hdev->vport[0].roce; + struct hnae3_client *rclient = hdev->roce_client; + + if (rclient && rclient->ops->link_status_change) + rclient->ops->link_status_change(rhandle, + state); + } hclge_push_link_status(hdev); } @@ -11319,6 +11324,12 @@ clear_roce: return ret; } +static bool hclge_uninit_need_wait(struct hclge_dev *hdev) +{ + return test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) || + test_bit(HCLGE_STATE_LINK_UPDATING, &hdev->state); +} + static void hclge_uninit_client_instance(struct hnae3_client *client, struct hnae3_ae_dev *ae_dev) { @@ -11327,7 +11338,7 @@ static void hclge_uninit_client_instance(struct hnae3_client *client, if (hdev->roce_client) { clear_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state); - while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) + while (hclge_uninit_need_wait(hdev)) msleep(HCLGE_WAIT_RESET_DONE); hdev->roce_client->ops->uninit_instance(&vport->roce, 0); From 968fde83841a8c23558dfbd0a0c69d636db52b55 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Wed, 5 Jun 2024 15:20:58 +0800 Subject: [PATCH 04/35] net: hns3: add cond_resched() to hns3 ring buffer init process Currently hns3 ring buffer init process would hold cpu too long with big Tx/Rx ring depth. This could cause soft lockup. So this patch adds cond_resched() to the process. Then cpu can break to run other tasks instead of busy looping. Fixes: a723fb8efe29 ("net: hns3: refine for set ring parameters") Signed-off-by: Jie Wang Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 4 ++++ drivers/net/ethernet/hisilicon/hns3/hns3_enet.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index ff71fb1eced9..a5fc0209d628 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -3535,6 +3535,9 @@ static int hns3_alloc_ring_buffers(struct hns3_enet_ring *ring) ret = hns3_alloc_and_attach_buffer(ring, i); if (ret) goto out_buffer_fail; + + if (!(i % HNS3_RESCHED_BD_NUM)) + cond_resched(); } return 0; @@ -5107,6 +5110,7 @@ int hns3_init_all_ring(struct hns3_nic_priv *priv) } u64_stats_init(&priv->ring[i].syncp); + cond_resched(); } return 0; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index acd756b0c7c9..d36c4ed16d8d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -214,6 +214,8 @@ enum hns3_nic_state { #define HNS3_CQ_MODE_EQE 1U #define HNS3_CQ_MODE_CQE 0U +#define HNS3_RESCHED_BD_NUM 1024 + enum hns3_pkt_l2t_type { HNS3_L2_TYPE_UNICAST, HNS3_L2_TYPE_MULTICAST, From c44711b78608c98a3e6b49ce91678cd0917d5349 Mon Sep 17 00:00:00 2001 From: Aleksandr Mishin Date: Wed, 5 Jun 2024 13:11:35 +0300 Subject: [PATCH 05/35] liquidio: Adjust a NULL pointer handling path in lio_vf_rep_copy_packet In lio_vf_rep_copy_packet() pg_info->page is compared to a NULL value, but then it is unconditionally passed to skb_add_rx_frag() which looks strange and could lead to null pointer dereference. lio_vf_rep_copy_packet() call trace looks like: octeon_droq_process_packets octeon_droq_fast_process_packets octeon_droq_dispatch_pkt octeon_create_recv_info ...search in the dispatch_list... ->disp_fn(rdisp->rinfo, ...) lio_vf_rep_pkt_recv(struct octeon_recv_info *recv_info, ...) In this path there is no code which sets pg_info->page to NULL. So this check looks unneeded and doesn't solve potential problem. But I guess the author had reason to add a check and I have no such card and can't do real test. In addition, the code in the function liquidio_push_packet() in liquidio/lio_core.c does exactly the same. Based on this, I consider the most acceptable compromise solution to adjust this issue by moving skb_add_rx_frag() into conditional scope. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 1f233f327913 ("liquidio: switchdev support for LiquidIO NIC") Signed-off-by: Aleksandr Mishin Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c index 96c6ea12279f..989b4ddae342 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c @@ -272,13 +272,12 @@ lio_vf_rep_copy_packet(struct octeon_device *oct, pg_info->page_offset; memcpy(skb->data, va, MIN_SKB_SIZE); skb_put(skb, MIN_SKB_SIZE); + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + pg_info->page, + pg_info->page_offset + MIN_SKB_SIZE, + len - MIN_SKB_SIZE, + LIO_RXBUFFER_SZ); } - - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, - pg_info->page, - pg_info->page_offset + MIN_SKB_SIZE, - len - MIN_SKB_SIZE, - LIO_RXBUFFER_SZ); } else { struct octeon_skb_page_info *pg_info = ((struct octeon_skb_page_info *)(skb->cb)); From 0579f27249047006a818e463ee66a6c314d04cea Mon Sep 17 00:00:00 2001 From: Sagar Cheluvegowda Date: Wed, 5 Jun 2024 11:57:18 -0700 Subject: [PATCH 06/35] net: stmmac: dwmac-qcom-ethqos: Configure host DMA width Commit 070246e4674b ("net: stmmac: Fix for mismatched host/device DMA address width") added support in the stmmac driver for platform drivers to indicate the host DMA width, but left it up to authors of the specific platforms to indicate if their width differed from the addr64 register read from the MAC itself. Qualcomm's EMAC4 integration supports only up to 36 bit width (as opposed to the addr64 register indicating 40 bit width). Let's indicate that in the platform driver to avoid a scenario where the driver will allocate descriptors of size that is supported by the CPU which in our case is 36 bit, but as the addr64 register is still capable of 40 bits the device will use two descriptors as one address. Fixes: 8c4d92e82d50 ("net: stmmac: dwmac-qcom-ethqos: add support for emac4 on sa8775p platforms") Signed-off-by: Sagar Cheluvegowda Reviewed-by: Simon Horman Reviewed-by: Andrew Halaney Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index e254b21fdb59..65d7370b47d5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -93,6 +93,7 @@ struct ethqos_emac_driver_data { bool has_emac_ge_3; const char *link_clk_name; bool has_integrated_pcs; + u32 dma_addr_width; struct dwmac4_addrs dwmac4_addrs; }; @@ -276,6 +277,7 @@ static const struct ethqos_emac_driver_data emac_v4_0_0_data = { .has_emac_ge_3 = true, .link_clk_name = "phyaux", .has_integrated_pcs = true, + .dma_addr_width = 36, .dwmac4_addrs = { .dma_chan = 0x00008100, .dma_chan_offset = 0x1000, @@ -845,6 +847,8 @@ static int qcom_ethqos_probe(struct platform_device *pdev) plat_dat->flags |= STMMAC_FLAG_RX_CLK_RUNS_IN_LPI; if (data->has_integrated_pcs) plat_dat->flags |= STMMAC_FLAG_HAS_INTEGRATED_PCS; + if (data->dma_addr_width) + plat_dat->host_dma_width = data->dma_addr_width; if (ethqos->serdes_phy) { plat_dat->serdes_powerup = qcom_ethqos_serdes_powerup; From 5add2f7288468f35a374620dabf126c13baaea9c Mon Sep 17 00:00:00 2001 From: David Wei Date: Thu, 6 Jun 2024 07:59:08 -0700 Subject: [PATCH 07/35] netdevsim: fix backwards compatibility in nsim_get_iflink() The default ndo_get_iflink() implementation returns the current ifindex of the netdev. But the overridden nsim_get_iflink() returns 0 if the current nsim is not linked, breaking backwards compatibility for userspace that depend on this behaviour. Fix the problem by returning the current ifindex if not linked to a peer. Fixes: 8debcf5832c3 ("netdevsim: add ndo_get_iflink() implementation") Reported-by: Yu Watanabe Suggested-by: Yu Watanabe Signed-off-by: David Wei Signed-off-by: David S. Miller --- drivers/net/netdevsim/netdev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index c22897bf5509..017a6102be0a 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -324,7 +324,8 @@ static int nsim_get_iflink(const struct net_device *dev) rcu_read_lock(); peer = rcu_dereference(nsim->peer); - iflink = peer ? READ_ONCE(peer->netdev->ifindex) : 0; + iflink = peer ? READ_ONCE(peer->netdev->ifindex) : + READ_ONCE(dev->ifindex); rcu_read_unlock(); return iflink; From d37fe4255abe8e7b419b90c5847e8ec2b8debb08 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Jun 2024 15:46:51 +0000 Subject: [PATCH 08/35] tcp: fix race in tcp_v6_syn_recv_sock() tcp_v6_syn_recv_sock() calls ip6_dst_store() before inet_sk(newsk)->pinet6 has been set up. This means ip6_dst_store() writes over the parent (listener) np->dst_cookie. This is racy because multiple threads could share the same parent and their final np->dst_cookie could be wrong. Move ip6_dst_store() call after inet_sk(newsk)->pinet6 has been changed and after the copy of parent ipv6_pinfo. Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets") Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 8c577b651bfc..729faf8bd366 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1439,7 +1439,6 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * */ newsk->sk_gso_type = SKB_GSO_TCPV6; - ip6_dst_store(newsk, dst, NULL, NULL); inet6_sk_rx_dst_set(newsk, skb); inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); @@ -1450,6 +1449,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * memcpy(newnp, np, sizeof(struct ipv6_pinfo)); + ip6_dst_store(newsk, dst, NULL, NULL); + newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; newnp->saddr = ireq->ir_v6_loc_addr; newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; From d029edefed39647c797c2710aedd9d31f84c069e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 6 Jun 2024 19:13:03 +0300 Subject: [PATCH 09/35] net dsa: qca8k: fix usages of device_get_named_child_node() The documentation for device_get_named_child_node() mentions this important point: " The caller is responsible for calling fwnode_handle_put() on the returned fwnode pointer. " Add fwnode_handle_put() to avoid leaked references. Fixes: 1e264f9d2918 ("net: dsa: qca8k: add LEDs basic support") Reviewed-by: Simon Horman Signed-off-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/net/dsa/qca/qca8k-leds.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/qca/qca8k-leds.c b/drivers/net/dsa/qca/qca8k-leds.c index 811ebeeff4ed..43ac68052baf 100644 --- a/drivers/net/dsa/qca/qca8k-leds.c +++ b/drivers/net/dsa/qca/qca8k-leds.c @@ -431,8 +431,11 @@ qca8k_parse_port_leds(struct qca8k_priv *priv, struct fwnode_handle *port, int p init_data.devicename = kasprintf(GFP_KERNEL, "%s:0%d", priv->internal_mdio_bus->id, port_num); - if (!init_data.devicename) + if (!init_data.devicename) { + fwnode_handle_put(led); + fwnode_handle_put(leds); return -ENOMEM; + } ret = devm_led_classdev_register_ext(priv->dev, &port_led->cdev, &init_data); if (ret) @@ -441,6 +444,7 @@ qca8k_parse_port_leds(struct qca8k_priv *priv, struct fwnode_handle *port, int p kfree(init_data.devicename); } + fwnode_handle_put(leds); return 0; } @@ -471,9 +475,13 @@ qca8k_setup_led_ctrl(struct qca8k_priv *priv) * the correct port for LED setup. */ ret = qca8k_parse_port_leds(priv, port, qca8k_port_to_phy(port_num)); - if (ret) + if (ret) { + fwnode_handle_put(port); + fwnode_handle_put(ports); return ret; + } } + fwnode_handle_put(ports); return 0; } From c6ae073f5903f6c6439d0ac855836a4da5c0a701 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 6 Jun 2024 23:32:48 +0300 Subject: [PATCH 10/35] geneve: Fix incorrect inner network header offset when innerprotoinherit is set When innerprotoinherit is set, the tunneled packets do not have an inner Ethernet header. Change 'maclen' to not always assume the header length is ETH_HLEN, as there might not be a MAC header. This resolves issues with drivers (e.g. mlx5, in mlx5e_tx_tunnel_accel()) who rely on the skb inner network header offset to be correct, and use it for TX offloads. Fixes: d8a6213d70ac ("geneve: fix header validation in geneve[6]_xmit_skb") Signed-off-by: Gal Pressman Signed-off-by: Tariq Toukan Reviewed-by: Wojciech Drewek Signed-off-by: David S. Miller --- drivers/net/geneve.c | 10 ++++++---- include/net/ip_tunnels.h | 5 +++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 51495cb4b9be..838e85ddec67 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -815,6 +815,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, struct geneve_dev *geneve, const struct ip_tunnel_info *info) { + bool inner_proto_inherit = geneve->cfg.inner_proto_inherit; bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); const struct ip_tunnel_key *key = &info->key; @@ -826,7 +827,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; - if (!skb_vlan_inet_prepare(skb)) + if (!skb_vlan_inet_prepare(skb, inner_proto_inherit)) return -EINVAL; if (!gs4) @@ -908,7 +909,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, } err = geneve_build_skb(&rt->dst, skb, info, xnet, sizeof(struct iphdr), - geneve->cfg.inner_proto_inherit); + inner_proto_inherit); if (unlikely(err)) return err; @@ -925,6 +926,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, struct geneve_dev *geneve, const struct ip_tunnel_info *info) { + bool inner_proto_inherit = geneve->cfg.inner_proto_inherit; bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); const struct ip_tunnel_key *key = &info->key; @@ -935,7 +937,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; - if (!skb_vlan_inet_prepare(skb)) + if (!skb_vlan_inet_prepare(skb, inner_proto_inherit)) return -EINVAL; if (!gs6) @@ -997,7 +999,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, ttl = ttl ? : ip6_dst_hoplimit(dst); } err = geneve_build_skb(dst, skb, info, xnet, sizeof(struct ipv6hdr), - geneve->cfg.inner_proto_inherit); + inner_proto_inherit); if (unlikely(err)) return err; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 9a6a08ec7713..1db2417b8ff5 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -461,9 +461,10 @@ static inline bool pskb_inet_may_pull(struct sk_buff *skb) /* Variant of pskb_inet_may_pull(). */ -static inline bool skb_vlan_inet_prepare(struct sk_buff *skb) +static inline bool skb_vlan_inet_prepare(struct sk_buff *skb, + bool inner_proto_inherit) { - int nhlen = 0, maclen = ETH_HLEN; + int nhlen = 0, maclen = inner_proto_inherit ? 0 : ETH_HLEN; __be16 type = skb->protocol; /* Essentially this is skb_protocol(skb, true) From 791b4089e326271424b78f2fae778b20e53d071b Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 6 Jun 2024 23:32:49 +0300 Subject: [PATCH 11/35] net/mlx5e: Fix features validation check for tunneled UDP (non-VXLAN) packets Move the vxlan_features_check() call to after we verified the packet is a tunneled VXLAN packet. Without this, tunneled UDP non-VXLAN packets (for ex. GENENVE) might wrongly not get offloaded. In some cases, it worked by chance as GENEVE header is the same size as VXLAN, but it is obviously incorrect. Fixes: e3cfc7e6b7bd ("net/mlx5e: TX, Add geneve tunnel stateless offload support") Signed-off-by: Gal Pressman Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Reviewed-by: Wojciech Drewek Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c53c99dde558..a605eae56685 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4875,7 +4875,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv, /* Verify if UDP port is being offloaded by HW */ if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, port)) - return features; + return vxlan_features_check(skb, features); #if IS_ENABLED(CONFIG_GENEVE) /* Support Geneve offload for default UDP port */ @@ -4901,7 +4901,6 @@ netdev_features_t mlx5e_features_check(struct sk_buff *skb, struct mlx5e_priv *priv = netdev_priv(netdev); features = vlan_features_check(skb, features); - features = vxlan_features_check(skb, features); /* Validate if the tunneled packet is being offloaded by HW */ if (skb->encapsulation && From 86fbd9f63a6b42b8f158361334f5a25762aea358 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 15 May 2024 10:32:01 -0400 Subject: [PATCH 12/35] Bluetooth: hci_sync: Fix not using correct handle When setting up an advertisement the code shall always attempt to use the handle set by the instance since it may not be equal to the instance ID. Fixes: e77f43d531af ("Bluetooth: hci_core: Fix not handling hdev->le_num_of_adv_sets=1") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 16daa79b7981..a8a7d2b36870 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -1194,7 +1194,7 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance) cp.own_addr_type = own_addr_type; cp.channel_map = hdev->le_adv_channel_map; - cp.handle = instance; + cp.handle = adv ? adv->handle : instance; if (flags & MGMT_ADV_FLAG_SEC_2M) { cp.primary_phy = HCI_ADV_PHY_1M; From 806a5198c05987b748b50f3d0c0cfb3d417381a4 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 20 May 2024 16:03:07 -0400 Subject: [PATCH 13/35] Bluetooth: L2CAP: Fix rejecting L2CAP_CONN_PARAM_UPDATE_REQ This removes the bogus check for max > hcon->le_conn_max_interval since the later is just the initial maximum conn interval not the maximum the stack could support which is really 3200=4000ms. In order to pass GAP/CONN/CPUP/BV-05-C one shall probably enter values of the following fields in IXIT that would cause hci_check_conn_params to fail: TSPX_conn_update_int_min TSPX_conn_update_int_max TSPX_conn_update_peripheral_latency TSPX_conn_update_supervision_timeout Link: https://github.com/bluez/bluez/issues/847 Fixes: e4b019515f95 ("Bluetooth: Enforce validation on max value of connection interval") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 36 ++++++++++++++++++++++++++++---- net/bluetooth/l2cap_core.c | 8 +------ 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 9231396fe96f..c43716edf205 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -2113,18 +2113,46 @@ static inline int hci_check_conn_params(u16 min, u16 max, u16 latency, { u16 max_latency; - if (min > max || min < 6 || max > 3200) + if (min > max) { + BT_WARN("min %d > max %d", min, max); return -EINVAL; + } - if (to_multiplier < 10 || to_multiplier > 3200) + if (min < 6) { + BT_WARN("min %d < 6", min); return -EINVAL; + } - if (max >= to_multiplier * 8) + if (max > 3200) { + BT_WARN("max %d > 3200", max); return -EINVAL; + } + + if (to_multiplier < 10) { + BT_WARN("to_multiplier %d < 10", to_multiplier); + return -EINVAL; + } + + if (to_multiplier > 3200) { + BT_WARN("to_multiplier %d > 3200", to_multiplier); + return -EINVAL; + } + + if (max >= to_multiplier * 8) { + BT_WARN("max %d >= to_multiplier %d * 8", max, to_multiplier); + return -EINVAL; + } max_latency = (to_multiplier * 4 / max) - 1; - if (latency > 499 || latency > max_latency) + if (latency > 499) { + BT_WARN("latency %d > 499", latency); return -EINVAL; + } + + if (latency > max_latency) { + BT_WARN("latency %d > max_latency %d", latency, max_latency); + return -EINVAL; + } return 0; } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 5b509b767557..c49e0d4b3c0d 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4647,13 +4647,7 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, memset(&rsp, 0, sizeof(rsp)); - if (max > hcon->le_conn_max_interval) { - BT_DBG("requested connection interval exceeds current bounds."); - err = -EINVAL; - } else { - err = hci_check_conn_params(min, max, latency, to_multiplier); - } - + err = hci_check_conn_params(min, max, latency, to_multiplier); if (err) rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED); else From c695439d198d30e10553a3b98360c5efe77b6903 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Sun, 9 Jun 2024 18:06:20 +0300 Subject: [PATCH 14/35] Bluetooth: fix connection setup in l2cap_connect The amp_id argument of l2cap_connect() was removed in commit 84a4bb6548a2 ("Bluetooth: HCI: Remove HCI_AMP support") It was always called with amp_id == 0, i.e. AMP_ID_BREDR == 0x00 (ie. non-AMP controller). In the above commit, the code path for amp_id != 0 was preserved, although it should have used the amp_id == 0 one. Restore the previous behavior of the non-AMP code path, to fix problems with L2CAP connections. Fixes: 84a4bb6548a2 ("Bluetooth: HCI: Remove HCI_AMP support") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index c49e0d4b3c0d..aed025734d04 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4011,8 +4011,8 @@ static void l2cap_connect(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, status = L2CAP_CS_AUTHOR_PEND; chan->ops->defer(chan); } else { - l2cap_state_change(chan, BT_CONNECT2); - result = L2CAP_CR_PEND; + l2cap_state_change(chan, BT_CONFIG); + result = L2CAP_CR_SUCCESS; status = L2CAP_CS_NO_INFO; } } else { From 44180feaccf266d9b0b28cc4ceaac019817deb5c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 7 Jun 2024 17:53:32 +0200 Subject: [PATCH 15/35] net/sched: initialize noop_qdisc owner When the noop_qdisc owner isn't initialized, then it will be 0, so packets will erroneously be regarded as having been subject to recursion as long as only CPU 0 queues them. For non-SMP, that's all packets, of course. This causes a change in what's reported to userspace, normally noop_qdisc would drop packets silently, but with this change the syscall returns -ENOBUFS if RECVERR is also set on the socket. Fix this by initializing the owner field to -1, just like it would be for dynamically allocated qdiscs by qdisc_alloc(). Fixes: 0f022d32c3ec ("net/sched: Fix mirred deadlock on device recursion") Signed-off-by: Johannes Berg Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240607175340.786bfb938803.I493bf8422e36be4454c08880a8d3703cea8e421a@changeid Signed-off-by: Jakub Kicinski --- net/sched/sch_generic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 2a637a17061b..e22ff003d52e 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -676,6 +676,7 @@ struct Qdisc noop_qdisc = { .qlen = 0, .lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.skb_bad_txq.lock), }, + .owner = -1, }; EXPORT_SYMBOL(noop_qdisc); From 8031b58c3a9b1db3ef68b3bd749fbee2e1e1aaa3 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 7 Jun 2024 17:01:48 +0200 Subject: [PATCH 16/35] mptcp: ensure snd_una is properly initialized on connect This is strictly related to commit fb7a0d334894 ("mptcp: ensure snd_nxt is properly initialized on connect"). It turns out that syzkaller can trigger the retransmit after fallback and before processing any other incoming packet - so that snd_una is still left uninitialized. Address the issue explicitly initializing snd_una together with snd_nxt and write_seq. Suggested-by: Mat Martineau Fixes: 8fd738049ac3 ("mptcp: fallback in case of simultaneous connect") Cc: stable@vger.kernel.org Reported-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/485 Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240607-upstream-net-20240607-misc-fixes-v1-1-1ab9ddfa3d00@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 96b113854bd3..bb7dca8aa2d9 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3740,6 +3740,7 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) WRITE_ONCE(msk->write_seq, subflow->idsn); WRITE_ONCE(msk->snd_nxt, subflow->idsn); + WRITE_ONCE(msk->snd_una, subflow->idsn); if (likely(!__mptcp_check_fallback(msk))) MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVE); From 6a09788c1a66e3d8b04b3b3e7618cc817bb60ae9 Mon Sep 17 00:00:00 2001 From: YonglongLi Date: Fri, 7 Jun 2024 17:01:49 +0200 Subject: [PATCH 17/35] mptcp: pm: inc RmAddr MIB counter once per RM_ADDR ID The RmAddr MIB counter is supposed to be incremented once when a valid RM_ADDR has been received. Before this patch, it could have been incremented as many times as the number of subflows connected to the linked address ID, so it could have been 0, 1 or more than 1. The "RmSubflow" is incremented after a local operation. In this case, it is normal to tied it with the number of subflows that have been actually removed. The "remove invalid addresses" MP Join subtest has been modified to validate this case. A broadcast IP address is now used instead: the client will not be able to create a subflow to this address. The consequence is that when receiving the RM_ADDR with the ID attached to this broadcast IP address, no subflow linked to this ID will be found. Fixes: 7a7e52e38a40 ("mptcp: add RM_ADDR related mibs") Cc: stable@vger.kernel.org Co-developed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: YonglongLi Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240607-upstream-net-20240607-misc-fixes-v1-2-1ab9ddfa3d00@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 5 ++++- tools/testing/selftests/net/mptcp/mptcp_join.sh | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 7f53e022e27e..766a8409fa67 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -814,10 +814,13 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, spin_lock_bh(&msk->pm.lock); removed = true; - __MPTCP_INC_STATS(sock_net(sk), rm_type); + if (rm_type == MPTCP_MIB_RMSUBFLOW) + __MPTCP_INC_STATS(sock_net(sk), rm_type); } if (rm_type == MPTCP_MIB_RMSUBFLOW) __set_bit(rm_id ? rm_id : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap); + else if (rm_type == MPTCP_MIB_RMADDR) + __MPTCP_INC_STATS(sock_net(sk), rm_type); if (!removed) continue; diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 2b66c5fa71eb..aea314d140c9 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2250,7 +2250,8 @@ remove_tests() pm_nl_set_limits $ns1 3 3 pm_nl_add_endpoint $ns1 10.0.12.1 flags signal pm_nl_add_endpoint $ns1 10.0.3.1 flags signal - pm_nl_add_endpoint $ns1 10.0.14.1 flags signal + # broadcast IP: no packet for this address will be received on ns1 + pm_nl_add_endpoint $ns1 224.0.0.1 flags signal pm_nl_set_limits $ns2 3 3 addr_nr_ns1=-3 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 From 40eec1795cc27b076d49236649a29507c7ed8c2d Mon Sep 17 00:00:00 2001 From: YonglongLi Date: Fri, 7 Jun 2024 17:01:50 +0200 Subject: [PATCH 18/35] mptcp: pm: update add_addr counters after connect The creation of new subflows can fail for different reasons. If no subflow have been created using the received ADD_ADDR, the related counters should not be updated, otherwise they will never be decremented for events related to this ID later on. For the moment, the number of accepted ADD_ADDR is only decremented upon the reception of a related RM_ADDR, and only if the remote address ID is currently being used by at least one subflow. In other words, if no subflow can be created with the received address, the counter will not be decremented. In this case, it is then important not to increment pm.add_addr_accepted counter, and not to modify pm.accept_addr bit. Note that this patch does not modify the behaviour in case of failures later on, e.g. if the MP Join is dropped or rejected. The "remove invalid addresses" MP Join subtest has been modified to validate this case. The broadcast IP address is added before the "valid" address that will be used to successfully create a subflow, and the limit is decreased by one: without this patch, it was not possible to create the last subflow, because: - the broadcast address would have been accepted even if it was not usable: the creation of a subflow to this address results in an error, - the limit of 2 accepted ADD_ADDR would have then been reached. Fixes: 01cacb00b35c ("mptcp: add netlink-based PM") Cc: stable@vger.kernel.org Co-developed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: YonglongLi Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240607-upstream-net-20240607-misc-fixes-v1-3-1ab9ddfa3d00@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 16 ++++++++++------ tools/testing/selftests/net/mptcp/mptcp_join.sh | 4 ++-- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 766a8409fa67..ea9e5817b9e9 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -677,6 +677,7 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) unsigned int add_addr_accept_max; struct mptcp_addr_info remote; unsigned int subflows_max; + bool sf_created = false; int i, nr; add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk); @@ -704,15 +705,18 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) if (nr == 0) return; - msk->pm.add_addr_accepted++; - if (msk->pm.add_addr_accepted >= add_addr_accept_max || - msk->pm.subflows >= subflows_max) - WRITE_ONCE(msk->pm.accept_addr, false); - spin_unlock_bh(&msk->pm.lock); for (i = 0; i < nr; i++) - __mptcp_subflow_connect(sk, &addrs[i], &remote); + if (__mptcp_subflow_connect(sk, &addrs[i], &remote) == 0) + sf_created = true; spin_lock_bh(&msk->pm.lock); + + if (sf_created) { + msk->pm.add_addr_accepted++; + if (msk->pm.add_addr_accepted >= add_addr_accept_max || + msk->pm.subflows >= subflows_max) + WRITE_ONCE(msk->pm.accept_addr, false); + } } void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index aea314d140c9..108aeeb84ef1 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2249,10 +2249,10 @@ remove_tests() if reset "remove invalid addresses"; then pm_nl_set_limits $ns1 3 3 pm_nl_add_endpoint $ns1 10.0.12.1 flags signal - pm_nl_add_endpoint $ns1 10.0.3.1 flags signal # broadcast IP: no packet for this address will be received on ns1 pm_nl_add_endpoint $ns1 224.0.0.1 flags signal - pm_nl_set_limits $ns2 3 3 + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_set_limits $ns2 2 2 addr_nr_ns1=-3 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 From 74acb250e103f42be372177628f9272b6e888c49 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 7 Jun 2024 17:01:51 +0200 Subject: [PATCH 19/35] mailmap: map Geliang's new email address Just like my other email addresses, map my new one to kernel.org account too. My new email address uses "last name, first name" format, which is different from my other email addresses. This mailmap is also used to indicate that it is actually the same person. Suggested-by: Mat Martineau Suggested-by: Matthieu Baerts Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240607-upstream-net-20240607-misc-fixes-v1-4-1ab9ddfa3d00@kernel.org Signed-off-by: Jakub Kicinski --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index efd9fa867a8e..9af91bd3584b 100644 --- a/.mailmap +++ b/.mailmap @@ -217,6 +217,7 @@ Geliang Tang Geliang Tang Geliang Tang Geliang Tang +Geliang Tang Georgi Djakov Gerald Schaefer Gerald Schaefer From 36534d3c54537bf098224a32dc31397793d4594d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 7 Jun 2024 12:56:52 +0000 Subject: [PATCH 20/35] tcp: use signed arithmetic in tcp_rtx_probe0_timed_out() Due to timer wheel implementation, a timer will usually fire after its schedule. For instance, for HZ=1000, a timeout between 512ms and 4s has a granularity of 64ms. For this range of values, the extra delay could be up to 63ms. For TCP, this means that tp->rcv_tstamp may be after inet_csk(sk)->icsk_timeout whenever the timer interrupt finally triggers, if one packet came during the extra delay. We need to make sure tcp_rtx_probe0_timed_out() handles this case. Fixes: e89688e3e978 ("net: tcp: fix unexcepted socket die when snd_wnd is 0") Signed-off-by: Eric Dumazet Cc: Menglong Dong Acked-by: Neal Cardwell Reviewed-by: Jason Xing Link: https://lore.kernel.org/r/20240607125652.1472540-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_timer.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 83fe7f62f7f1..5bfd76a31af6 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -485,8 +485,12 @@ static bool tcp_rtx_probe0_timed_out(const struct sock *sk, { const struct tcp_sock *tp = tcp_sk(sk); const int timeout = TCP_RTO_MAX * 2; - u32 rcv_delta; + s32 rcv_delta; + /* Note: timer interrupt might have been delayed by at least one jiffy, + * and tp->rcv_tstamp might very well have been written recently. + * rcv_delta can thus be negative. + */ rcv_delta = inet_csk(sk)->icsk_timeout - tp->rcv_tstamp; if (rcv_delta <= timeout) return false; From c4ab9da85b9df3692f861512fe6c9812f38b7471 Mon Sep 17 00:00:00 2001 From: Davide Ornaghi Date: Wed, 5 Jun 2024 13:03:45 +0200 Subject: [PATCH 21/35] netfilter: nft_inner: validate mandatory meta and payload Check for mandatory netlink attributes in payload and meta expression when used embedded from the inner expression, otherwise NULL pointer dereference is possible from userspace. Fixes: a150d122b6bd ("netfilter: nft_meta: add inner match support") Fixes: 3a07327d10a0 ("netfilter: nft_inner: support for inner tunnel header matching") Signed-off-by: Davide Ornaghi Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_meta.c | 3 +++ net/netfilter/nft_payload.c | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index ba0d3683a45d..9139ce38ea7b 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -839,6 +839,9 @@ static int nft_meta_inner_init(const struct nft_ctx *ctx, struct nft_meta *priv = nft_expr_priv(expr); unsigned int len; + if (!tb[NFTA_META_KEY] || !tb[NFTA_META_DREG]) + return -EINVAL; + priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); switch (priv->key) { case NFT_META_PROTOCOL: diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 0c43d748e23a..50429cbd42da 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -650,6 +650,10 @@ static int nft_payload_inner_init(const struct nft_ctx *ctx, struct nft_payload *priv = nft_expr_priv(expr); u32 base; + if (!tb[NFTA_PAYLOAD_BASE] || !tb[NFTA_PAYLOAD_OFFSET] || + !tb[NFTA_PAYLOAD_LEN] || !tb[NFTA_PAYLOAD_DREG]) + return -EINVAL; + base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); switch (base) { case NFT_PAYLOAD_TUN_HEADER: From 4e7aaa6b82d63e8ddcbfb56b4fd3d014ca586f10 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 4 Jun 2024 15:58:03 +0200 Subject: [PATCH 22/35] netfilter: ipset: Fix race between namespace cleanup and gc in the list:set type Lion Ackermann reported that there is a race condition between namespace cleanup in ipset and the garbage collection of the list:set type. The namespace cleanup can destroy the list:set type of sets while the gc of the set type is waiting to run in rcu cleanup. The latter uses data from the destroyed set which thus leads use after free. The patch contains the following parts: - When destroying all sets, first remove the garbage collectors, then wait if needed and then destroy the sets. - Fix the badly ordered "wait then remove gc" for the destroy a single set case. - Fix the missing rcu locking in the list:set type in the userspace test case. - Use proper RCU list handlings in the list:set type. The patch depends on c1193d9bbbd3 (netfilter: ipset: Add list flush to cancel_gc). Fixes: 97f7cf1cd80e (netfilter: ipset: fix performance regression in swap operation) Reported-by: Lion Ackermann Tested-by: Lion Ackermann Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_core.c | 93 +++++++++++++++------------ net/netfilter/ipset/ip_set_list_set.c | 30 ++++----- 2 files changed, 66 insertions(+), 57 deletions(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 3184cc6be4c9..c7ae4d9bf3d2 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1172,23 +1172,50 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = { .len = IPSET_MAXNAMELEN - 1 }, }; -static void -ip_set_destroy_set(struct ip_set *set) -{ - pr_debug("set: %s\n", set->name); - - /* Must call it without holding any lock */ - set->variant->destroy(set); - module_put(set->type->me); - kfree(set); -} +/* In order to return quickly when destroying a single set, it is split + * into two stages: + * - Cancel garbage collector + * - Destroy the set itself via call_rcu() + */ static void ip_set_destroy_set_rcu(struct rcu_head *head) { struct ip_set *set = container_of(head, struct ip_set, rcu); - ip_set_destroy_set(set); + set->variant->destroy(set); + module_put(set->type->me); + kfree(set); +} + +static void +_destroy_all_sets(struct ip_set_net *inst) +{ + struct ip_set *set; + ip_set_id_t i; + bool need_wait = false; + + /* First cancel gc's: set:list sets are flushed as well */ + for (i = 0; i < inst->ip_set_max; i++) { + set = ip_set(inst, i); + if (set) { + set->variant->cancel_gc(set); + if (set->type->features & IPSET_TYPE_NAME) + need_wait = true; + } + } + /* Must wait for flush to be really finished */ + if (need_wait) + rcu_barrier(); + for (i = 0; i < inst->ip_set_max; i++) { + set = ip_set(inst, i); + if (set) { + ip_set(inst, i) = NULL; + set->variant->destroy(set); + module_put(set->type->me); + kfree(set); + } + } } static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, @@ -1202,11 +1229,10 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, if (unlikely(protocol_min_failed(attr))) return -IPSET_ERR_PROTOCOL; - /* Commands are serialized and references are * protected by the ip_set_ref_lock. * External systems (i.e. xt_set) must call - * ip_set_put|get_nfnl_* functions, that way we + * ip_set_nfnl_get_* functions, that way we * can safely check references here. * * list:set timer can only decrement the reference @@ -1214,8 +1240,6 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, * without holding the lock. */ if (!attr[IPSET_ATTR_SETNAME]) { - /* Must wait for flush to be really finished in list:set */ - rcu_barrier(); read_lock_bh(&ip_set_ref_lock); for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); @@ -1226,15 +1250,7 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, } inst->is_destroyed = true; read_unlock_bh(&ip_set_ref_lock); - for (i = 0; i < inst->ip_set_max; i++) { - s = ip_set(inst, i); - if (s) { - ip_set(inst, i) = NULL; - /* Must cancel garbage collectors */ - s->variant->cancel_gc(s); - ip_set_destroy_set(s); - } - } + _destroy_all_sets(inst); /* Modified by ip_set_destroy() only, which is serialized */ inst->is_destroyed = false; } else { @@ -1255,12 +1271,12 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, features = s->type->features; ip_set(inst, i) = NULL; read_unlock_bh(&ip_set_ref_lock); + /* Must cancel garbage collectors */ + s->variant->cancel_gc(s); if (features & IPSET_TYPE_NAME) { /* Must wait for flush to be really finished */ rcu_barrier(); } - /* Must cancel garbage collectors */ - s->variant->cancel_gc(s); call_rcu(&s->rcu, ip_set_destroy_set_rcu); } return 0; @@ -2364,31 +2380,26 @@ ip_set_net_init(struct net *net) return 0; } +static void __net_exit +ip_set_net_pre_exit(struct net *net) +{ + struct ip_set_net *inst = ip_set_pernet(net); + + inst->is_deleted = true; /* flag for ip_set_nfnl_put */ +} + static void __net_exit ip_set_net_exit(struct net *net) { struct ip_set_net *inst = ip_set_pernet(net); - struct ip_set *set = NULL; - ip_set_id_t i; - - inst->is_deleted = true; /* flag for ip_set_nfnl_put */ - - nfnl_lock(NFNL_SUBSYS_IPSET); - for (i = 0; i < inst->ip_set_max; i++) { - set = ip_set(inst, i); - if (set) { - ip_set(inst, i) = NULL; - set->variant->cancel_gc(set); - ip_set_destroy_set(set); - } - } - nfnl_unlock(NFNL_SUBSYS_IPSET); + _destroy_all_sets(inst); kvfree(rcu_dereference_protected(inst->ip_set_list, 1)); } static struct pernet_operations ip_set_net_ops = { .init = ip_set_net_init, + .pre_exit = ip_set_net_pre_exit, .exit = ip_set_net_exit, .id = &ip_set_net_id, .size = sizeof(struct ip_set_net), diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 54e2a1dd7f5f..bfae7066936b 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -79,7 +79,7 @@ list_set_kadd(struct ip_set *set, const struct sk_buff *skb, struct set_elem *e; int ret; - list_for_each_entry(e, &map->members, list) { + list_for_each_entry_rcu(e, &map->members, list) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; @@ -99,7 +99,7 @@ list_set_kdel(struct ip_set *set, const struct sk_buff *skb, struct set_elem *e; int ret; - list_for_each_entry(e, &map->members, list) { + list_for_each_entry_rcu(e, &map->members, list) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; @@ -188,9 +188,10 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct list_set *map = set->data; struct set_adt_elem *d = value; struct set_elem *e, *next, *prev = NULL; - int ret; + int ret = 0; - list_for_each_entry(e, &map->members, list) { + rcu_read_lock(); + list_for_each_entry_rcu(e, &map->members, list) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; @@ -201,6 +202,7 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (d->before == 0) { ret = 1; + goto out; } else if (d->before > 0) { next = list_next_entry(e, list); ret = !list_is_last(&e->list, &map->members) && @@ -208,9 +210,11 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, } else { ret = prev && prev->id == d->refid; } - return ret; + goto out; } - return 0; +out: + rcu_read_unlock(); + return ret; } static void @@ -239,7 +243,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, /* Find where to add the new entry */ n = prev = next = NULL; - list_for_each_entry(e, &map->members, list) { + list_for_each_entry_rcu(e, &map->members, list) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; @@ -316,9 +320,9 @@ list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext, { struct list_set *map = set->data; struct set_adt_elem *d = value; - struct set_elem *e, *next, *prev = NULL; + struct set_elem *e, *n, *next, *prev = NULL; - list_for_each_entry(e, &map->members, list) { + list_for_each_entry_safe(e, n, &map->members, list) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; @@ -424,14 +428,8 @@ static void list_set_destroy(struct ip_set *set) { struct list_set *map = set->data; - struct set_elem *e, *n; - list_for_each_entry_safe(e, n, &map->members, list) { - list_del(&e->list); - ip_set_put_byindex(map->net, e->id); - ip_set_ext_destroy(set, e); - kfree(e); - } + WARN_ON_ONCE(!list_empty(&map->members)); kfree(map); set->data = NULL; From 6f8f132cc7bac2ac76911e47d5baa378aafda4cb Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 6 Jun 2024 12:23:31 +0200 Subject: [PATCH 23/35] netfilter: Use flowlabel flow key when re-routing mangled packets 'ip6 dscp set $v' in an nftables outpute route chain has no effect. While nftables does detect the dscp change and calls the reroute hook. But ip6_route_me_harder never sets the dscp/flowlabel: flowlabel/dsfield routing rules are ignored and no reroute takes place. Thanks to Yi Chen for an excellent reproducer script that I used to validate this change. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Yi Chen Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 53d255838e6a..5d989d803009 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -36,6 +36,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff .flowi6_uid = sock_net_uid(net, sk), .daddr = iph->daddr, .saddr = iph->saddr, + .flowlabel = ip6_flowinfo(iph), }; int err; From 144ba8580bcb82b2686c3d1a043299d844b9a682 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Mon, 10 Jun 2024 10:34:26 +0200 Subject: [PATCH 24/35] net: pse-pd: Use EOPNOTSUPP error code instead of ENOTSUPP ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP as reported by checkpatch script. Fixes: 18ff0bcda6d1 ("ethtool: add interface to interact with Ethernet Power Equipment") Reviewed-by: Andrew Lunn Acked-by: Oleksij Rempel Signed-off-by: Kory Maincent Link: https://lore.kernel.org/r/20240610083426.740660-1-kory.maincent@bootlin.com Signed-off-by: Jakub Kicinski --- include/linux/pse-pd/pse.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h index 6d07c95dabb9..6eec24ffa866 100644 --- a/include/linux/pse-pd/pse.h +++ b/include/linux/pse-pd/pse.h @@ -167,14 +167,14 @@ static inline int pse_ethtool_get_status(struct pse_control *psec, struct netlink_ext_ack *extack, struct pse_control_status *status) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int pse_ethtool_set_config(struct pse_control *psec, struct netlink_ext_ack *extack, const struct pse_control_config *config) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline bool pse_has_podl(struct pse_control *psec) From 1b9f756344416e02b41439bf2324b26aa25e141c Mon Sep 17 00:00:00 2001 From: Joshua Washington Date: Mon, 10 Jun 2024 15:57:18 -0700 Subject: [PATCH 25/35] gve: ignore nonrelevant GSO type bits when processing TSO headers TSO currently fails when the skb's gso_type field has more than one bit set. TSO packets can be passed from userspace using PF_PACKET, TUNTAP and a few others, using virtio_net_hdr (e.g., PACKET_VNET_HDR). This includes virtualization, such as QEMU, a real use-case. The gso_type and gso_size fields as passed from userspace in virtio_net_hdr are not trusted blindly by the kernel. It adds gso_type |= SKB_GSO_DODGY to force the packet to enter the software GSO stack for verification. This issue might similarly come up when the CWR bit is set in the TCP header for congestion control, causing the SKB_GSO_TCP_ECN gso_type bit to be set. Fixes: a57e5de476be ("gve: DQO: Add TX path") Signed-off-by: Joshua Washington Reviewed-by: Praveen Kaligineedi Reviewed-by: Harshitha Ramamurthy Reviewed-by: Willem de Bruijn Suggested-by: Eric Dumazet Acked-by: Andrei Vagin v2 - Remove unnecessary comments, remove line break between fixes tag and signoffs. v3 - Add back unrelated empty line removal. Link: https://lore.kernel.org/r/20240610225729.2985343-1-joshwash@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_tx_dqo.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c index fe1b26a4d736..0b3cca3fc792 100644 --- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c @@ -555,28 +555,18 @@ static int gve_prep_tso(struct sk_buff *skb) if (unlikely(skb_shinfo(skb)->gso_size < GVE_TX_MIN_TSO_MSS_DQO)) return -1; + if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) + return -EINVAL; + /* Needed because we will modify header. */ err = skb_cow_head(skb, 0); if (err < 0) return err; tcp = tcp_hdr(skb); - - /* Remove payload length from checksum. */ paylen = skb->len - skb_transport_offset(skb); - - switch (skb_shinfo(skb)->gso_type) { - case SKB_GSO_TCPV4: - case SKB_GSO_TCPV6: - csum_replace_by_diff(&tcp->check, - (__force __wsum)htonl(paylen)); - - /* Compute length of segmentation header. */ - header_len = skb_tcp_all_headers(skb); - break; - default: - return -EINVAL; - } + csum_replace_by_diff(&tcp->check, (__force __wsum)htonl(paylen)); + header_len = skb_tcp_all_headers(skb); if (unlikely(header_len > GVE_TX_MAX_HDR_SIZE_DQO)) return -EINVAL; From be27b896529787e23a35ae4befb6337ce73fcca0 Mon Sep 17 00:00:00 2001 From: Xiaolei Wang Date: Sat, 8 Jun 2024 22:35:24 +0800 Subject: [PATCH 26/35] net: stmmac: replace priv->speed with the portTransmitRate from the tc-cbs parameters The current cbs parameter depends on speed after uplinking, which is not needed and will report a configuration error if the port is not initially connected. The UAPI exposed by tc-cbs requires userspace to recalculate the send slope anyway, because the formula depends on port_transmit_rate (see man tc-cbs), which is not an invariant from tc's perspective. Therefore, we use offload->sendslope and offload->idleslope to derive the original port_transmit_rate from the CBS formula. Fixes: 1f705bc61aee ("net: stmmac: Add support for CBS QDISC") Signed-off-by: Xiaolei Wang Reviewed-by: Wojciech Drewek Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20240608143524.2065736-1-xiaolei.wang@windriver.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_tc.c | 25 ++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 222540b55480..1562fbdd0a04 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -343,10 +343,11 @@ static int tc_setup_cbs(struct stmmac_priv *priv, struct tc_cbs_qopt_offload *qopt) { u32 tx_queues_count = priv->plat->tx_queues_to_use; + s64 port_transmit_rate_kbps; u32 queue = qopt->queue; - u32 ptr, speed_div; u32 mode_to_use; u64 value; + u32 ptr; int ret; /* Queue 0 is not AVB capable */ @@ -355,30 +356,26 @@ static int tc_setup_cbs(struct stmmac_priv *priv, if (!priv->dma_cap.av) return -EOPNOTSUPP; + port_transmit_rate_kbps = qopt->idleslope - qopt->sendslope; + /* Port Transmit Rate and Speed Divider */ - switch (priv->speed) { + switch (div_s64(port_transmit_rate_kbps, 1000)) { case SPEED_10000: - ptr = 32; - speed_div = 10000000; - break; case SPEED_5000: ptr = 32; - speed_div = 5000000; break; case SPEED_2500: - ptr = 8; - speed_div = 2500000; - break; case SPEED_1000: ptr = 8; - speed_div = 1000000; break; case SPEED_100: ptr = 4; - speed_div = 100000; break; default: - return -EOPNOTSUPP; + netdev_err(priv->dev, + "Invalid portTransmitRate %lld (idleSlope - sendSlope)\n", + port_transmit_rate_kbps); + return -EINVAL; } mode_to_use = priv->plat->tx_queues_cfg[queue].mode_to_use; @@ -398,10 +395,10 @@ static int tc_setup_cbs(struct stmmac_priv *priv, } /* Final adjustments for HW */ - value = div_s64(qopt->idleslope * 1024ll * ptr, speed_div); + value = div_s64(qopt->idleslope * 1024ll * ptr, port_transmit_rate_kbps); priv->plat->tx_queues_cfg[queue].idle_slope = value & GENMASK(31, 0); - value = div_s64(-qopt->sendslope * 1024ll * ptr, speed_div); + value = div_s64(-qopt->sendslope * 1024ll * ptr, port_transmit_rate_kbps); priv->plat->tx_queues_cfg[queue].send_slope = value & GENMASK(31, 0); value = qopt->hicredit * 1024ll * 8; From 14a20e5b4ad998793c5f43b0330d9e1388446cf3 Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Fri, 7 Jun 2024 13:28:28 +0200 Subject: [PATCH 27/35] net/ipv6: Fix the RT cache flush via sysctl using a previous delay The net.ipv6.route.flush system parameter takes a value which specifies a delay used during the flush operation for aging exception routes. The written value is however not used in the currently requested flush and instead utilized only in the next one. A problem is that ipv6_sysctl_rtcache_flush() first reads the old value of net->ipv6.sysctl.flush_delay into a local delay variable and then calls proc_dointvec() which actually updates the sysctl based on the provided input. Fix the problem by switching the order of the two operations. Fixes: 4990509f19e8 ("[NETNS][IPV6]: Make sysctls route per namespace.") Signed-off-by: Petr Pavlu Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240607112828.30285-1-petr.pavlu@suse.com Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f083d9faba6b..952c2bf11709 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -6343,12 +6343,12 @@ static int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, if (!write) return -EINVAL; - net = (struct net *)ctl->extra1; - delay = net->ipv6.sysctl.flush_delay; ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (ret) return ret; + net = (struct net *)ctl->extra1; + delay = net->ipv6.sysctl.flush_delay; fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0); return 0; } From 36c92936e868601fa1f43da6758cf55805043509 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sun, 9 Jun 2024 13:36:53 +0300 Subject: [PATCH 28/35] net: bridge: mst: pass vlan group directly to br_mst_vlan_set_state Pass the already obtained vlan group pointer to br_mst_vlan_set_state() instead of dereferencing it again. Each caller has already correctly dereferenced it for their context. This change is required for the following suspicious RCU dereference fix. No functional changes intended. Fixes: 3a7c1661ae13 ("net: bridge: mst: fix vlan use-after-free") Reported-by: syzbot+9bbe2de1bc9d470eb5fe@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=9bbe2de1bc9d470eb5fe Signed-off-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20240609103654.914987-2-razor@blackwall.org Signed-off-by: Jakub Kicinski --- net/bridge/br_mst.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/bridge/br_mst.c b/net/bridge/br_mst.c index 3c66141d34d6..1de72816b0fb 100644 --- a/net/bridge/br_mst.c +++ b/net/bridge/br_mst.c @@ -73,11 +73,10 @@ int br_mst_get_state(const struct net_device *dev, u16 msti, u8 *state) } EXPORT_SYMBOL_GPL(br_mst_get_state); -static void br_mst_vlan_set_state(struct net_bridge_port *p, struct net_bridge_vlan *v, +static void br_mst_vlan_set_state(struct net_bridge_vlan_group *vg, + struct net_bridge_vlan *v, u8 state) { - struct net_bridge_vlan_group *vg = nbp_vlan_group(p); - if (br_vlan_get_state(v) == state) return; @@ -121,7 +120,7 @@ int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state, if (v->brvlan->msti != msti) continue; - br_mst_vlan_set_state(p, v, state); + br_mst_vlan_set_state(vg, v, state); } out: @@ -140,13 +139,13 @@ static void br_mst_vlan_sync_state(struct net_bridge_vlan *pv, u16 msti) * it. */ if (v != pv && v->brvlan->msti == msti) { - br_mst_vlan_set_state(pv->port, pv, v->state); + br_mst_vlan_set_state(vg, pv, v->state); return; } } /* Otherwise, start out in a new MSTI with all ports disabled. */ - return br_mst_vlan_set_state(pv->port, pv, BR_STATE_DISABLED); + return br_mst_vlan_set_state(vg, pv, BR_STATE_DISABLED); } int br_mst_vlan_set_msti(struct net_bridge_vlan *mv, u16 msti) From 546ceb1dfdac866648ec959cbc71d9525bd73462 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sun, 9 Jun 2024 13:36:54 +0300 Subject: [PATCH 29/35] net: bridge: mst: fix suspicious rcu usage in br_mst_set_state I converted br_mst_set_state to RCU to avoid a vlan use-after-free but forgot to change the vlan group dereference helper. Switch to vlan group RCU deref helper to fix the suspicious rcu usage warning. Fixes: 3a7c1661ae13 ("net: bridge: mst: fix vlan use-after-free") Reported-by: syzbot+9bbe2de1bc9d470eb5fe@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=9bbe2de1bc9d470eb5fe Signed-off-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20240609103654.914987-3-razor@blackwall.org Signed-off-by: Jakub Kicinski --- net/bridge/br_mst.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_mst.c b/net/bridge/br_mst.c index 1de72816b0fb..1820f09ff59c 100644 --- a/net/bridge/br_mst.c +++ b/net/bridge/br_mst.c @@ -102,7 +102,7 @@ int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state, int err = 0; rcu_read_lock(); - vg = nbp_vlan_group(p); + vg = nbp_vlan_group_rcu(p); if (!vg) goto out; From 8eef5c3cea65f248c99cd9dcb3f84c6509b78162 Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Tue, 11 Jun 2024 09:24:55 -0700 Subject: [PATCH 30/35] Revert "igc: fix a log entry using uninitialized netdev" This reverts commit 86167183a17e03ec77198897975e9fdfbd53cb0b. igc_ptp_init() needs to be called before igc_reset(), otherwise kernel crash could be observed. Following the corresponding discussion [1] and [2] revert this commit. Link: https://lore.kernel.org/all/8fb634f8-7330-4cf4-a8ce-485af9c0a61a@intel.com/ [1] Link: https://lore.kernel.org/all/87o78rmkhu.fsf@intel.com/ [2] Fixes: 86167183a17e ("igc: fix a log entry using uninitialized netdev") Signed-off-by: Sasha Neftin Tested-by: Naama Meir Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20240611162456.961631-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/igc/igc_main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 305e05294a26..87b655b839c1 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -7032,6 +7032,8 @@ static int igc_probe(struct pci_dev *pdev, device_set_wakeup_enable(&adapter->pdev->dev, adapter->flags & IGC_FLAG_WOL_SUPPORTED); + igc_ptp_init(adapter); + igc_tsn_clear_schedule(adapter); /* reset the hardware with the new settings */ @@ -7053,9 +7055,6 @@ static int igc_probe(struct pci_dev *pdev, /* Check if Media Autosense is enabled */ adapter->ei = *ei; - /* do hw tstamp init after resetting */ - igc_ptp_init(adapter); - /* print pcie link status and MAC address */ pcie_print_link_status(pdev); netdev_info(netdev, "MAC: %pM\n", netdev->dev_addr); From 79f18a41dd056115d685f3b0a419c7cd40055e13 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Wed, 12 Jun 2024 06:04:46 +0000 Subject: [PATCH 31/35] ionic: fix use after netif_napi_del() When queues are started, netif_napi_add() and napi_enable() are called. If there are 4 queues and only 3 queues are used for the current configuration, only 3 queues' napi should be registered and enabled. The ionic_qcq_enable() checks whether the .poll pointer is not NULL for enabling only the using queue' napi. Unused queues' napi will not be registered by netif_napi_add(), so the .poll pointer indicates NULL. But it couldn't distinguish whether the napi was unregistered or not because netif_napi_del() doesn't reset the .poll pointer to NULL. So, ionic_qcq_enable() calls napi_enable() for the queue, which was unregistered by netif_napi_del(). Reproducer: ethtool -L rx 1 tx 1 combined 0 ethtool -L rx 0 tx 0 combined 1 ethtool -L rx 0 tx 0 combined 4 Splat looks like: kernel BUG at net/core/dev.c:6666! Oops: invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 3 PID: 1057 Comm: kworker/3:3 Not tainted 6.10.0-rc2+ #16 Workqueue: events ionic_lif_deferred_work [ionic] RIP: 0010:napi_enable+0x3b/0x40 Code: 48 89 c2 48 83 e2 f6 80 b9 61 09 00 00 00 74 0d 48 83 bf 60 01 00 00 00 74 03 80 ce 01 f0 4f RSP: 0018:ffffb6ed83227d48 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff97560cda0828 RCX: 0000000000000029 RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff97560cda0a28 RBP: ffffb6ed83227d50 R08: 0000000000000400 R09: 0000000000000001 R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000000000 R13: ffff97560ce3c1a0 R14: 0000000000000000 R15: ffff975613ba0a20 FS: 0000000000000000(0000) GS:ffff975d5f780000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f8f734ee200 CR3: 0000000103e50000 CR4: 00000000007506f0 PKRU: 55555554 Call Trace: ? die+0x33/0x90 ? do_trap+0xd9/0x100 ? napi_enable+0x3b/0x40 ? do_error_trap+0x83/0xb0 ? napi_enable+0x3b/0x40 ? napi_enable+0x3b/0x40 ? exc_invalid_op+0x4e/0x70 ? napi_enable+0x3b/0x40 ? asm_exc_invalid_op+0x16/0x20 ? napi_enable+0x3b/0x40 ionic_qcq_enable+0xb7/0x180 [ionic 59bdfc8a035436e1c4224ff7d10789e3f14643f8] ionic_start_queues+0xc4/0x290 [ionic 59bdfc8a035436e1c4224ff7d10789e3f14643f8] ionic_link_status_check+0x11c/0x170 [ionic 59bdfc8a035436e1c4224ff7d10789e3f14643f8] ionic_lif_deferred_work+0x129/0x280 [ionic 59bdfc8a035436e1c4224ff7d10789e3f14643f8] process_one_work+0x145/0x360 worker_thread+0x2bb/0x3d0 ? __pfx_worker_thread+0x10/0x10 kthread+0xcc/0x100 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x2d/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 Fixes: 0f3154e6bcb3 ("ionic: Add Tx and Rx handling") Signed-off-by: Taehee Yoo Reviewed-by: Brett Creeley Reviewed-by: Shannon Nelson Link: https://lore.kernel.org/r/20240612060446.1754392-1-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 24870da3f484..1934e9d6d9e4 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -304,10 +304,8 @@ static int ionic_qcq_enable(struct ionic_qcq *qcq) if (ret) return ret; - if (qcq->napi.poll) - napi_enable(&qcq->napi); - if (qcq->flags & IONIC_QCQ_F_INTR) { + napi_enable(&qcq->napi); irq_set_affinity_hint(qcq->intr.vector, &qcq->intr.affinity_mask); ionic_intr_mask(idev->intr_ctrl, qcq->intr.index, From 6f4d93b78ade0a4c2cafd587f7b429ce95abb02e Mon Sep 17 00:00:00 2001 From: Ziwei Xiao Date: Wed, 12 Jun 2024 00:16:54 +0000 Subject: [PATCH 32/35] gve: Clear napi->skb before dev_kfree_skb_any() gve_rx_free_skb incorrectly leaves napi->skb referencing an skb after it is freed with dev_kfree_skb_any(). This can result in a subsequent call to napi_get_frags returning a dangling pointer. Fix this by clearing napi->skb before the skb is freed. Fixes: 9b8dd5e5ea48 ("gve: DQO: Add RX path") Cc: stable@vger.kernel.org Reported-by: Shailend Chand Signed-off-by: Ziwei Xiao Reviewed-by: Harshitha Ramamurthy Reviewed-by: Shailend Chand Reviewed-by: Praveen Kaligineedi Link: https://lore.kernel.org/r/20240612001654.923887-1-ziweixiao@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_rx_dqo.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index c1c912de59c7..1154c1d8f66f 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -647,11 +647,13 @@ static void gve_rx_skb_hash(struct sk_buff *skb, skb_set_hash(skb, le32_to_cpu(compl_desc->hash), hash_type); } -static void gve_rx_free_skb(struct gve_rx_ring *rx) +static void gve_rx_free_skb(struct napi_struct *napi, struct gve_rx_ring *rx) { if (!rx->ctx.skb_head) return; + if (rx->ctx.skb_head == napi->skb) + napi->skb = NULL; dev_kfree_skb_any(rx->ctx.skb_head); rx->ctx.skb_head = NULL; rx->ctx.skb_tail = NULL; @@ -950,7 +952,7 @@ int gve_rx_poll_dqo(struct gve_notify_block *block, int budget) err = gve_rx_dqo(napi, rx, compl_desc, complq->head, rx->q_num); if (err < 0) { - gve_rx_free_skb(rx); + gve_rx_free_skb(napi, rx); u64_stats_update_begin(&rx->statss); if (err == -ENOMEM) rx->rx_skb_alloc_fail++; @@ -993,7 +995,7 @@ int gve_rx_poll_dqo(struct gve_notify_block *block, int budget) /* gve_rx_complete_skb() will consume skb if successful */ if (gve_rx_complete_skb(rx, napi, compl_desc, feat) != 0) { - gve_rx_free_skb(rx); + gve_rx_free_skb(napi, rx); u64_stats_update_begin(&rx->statss); rx->rx_desc_err_dropped_pkt++; u64_stats_update_end(&rx->statss); From 7d9df38c9c037ab84502ce7eeae9f1e1e7e72603 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 12 Jun 2024 16:17:36 -0700 Subject: [PATCH 33/35] bnxt_en: Cap the size of HWRM_PORT_PHY_QCFG forwarded response Firmware interface 1.10.2.118 has increased the size of HWRM_PORT_PHY_QCFG response beyond the maximum size that can be forwarded. When the VF's link state is not the default auto state, the PF will need to forward the response back to the VF to indicate the forced state. This regression may cause the VF to fail to initialize. Fix it by capping the HWRM_PORT_PHY_QCFG response to the maximum 96 bytes. The SPEEDS2_SUPPORTED flag needs to be cleared because the new speeds2 fields are beyond the legacy structure. Also modify bnxt_hwrm_fwd_resp() to print a warning if the message size exceeds 96 bytes to make this failure more obvious. Fixes: 84a911db8305 ("bnxt_en: Update firmware interface to 1.10.2.118") Reviewed-by: Somnath Kotur Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/20240612231736.57823-1-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 51 +++++++++++++++++++ .../net/ethernet/broadcom/bnxt/bnxt_sriov.c | 12 ++++- 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 656ab81c0272..bbc7edccd5a4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1434,6 +1434,57 @@ struct bnxt_l2_filter { atomic_t refcnt; }; +/* Compat version of hwrm_port_phy_qcfg_output capped at 96 bytes. The + * first 95 bytes are identical to hwrm_port_phy_qcfg_output in bnxt_hsi.h. + * The last valid byte in the compat version is different. + */ +struct hwrm_port_phy_qcfg_output_compat { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 link; + u8 active_fec_signal_mode; + __le16 link_speed; + u8 duplex_cfg; + u8 pause; + __le16 support_speeds; + __le16 force_link_speed; + u8 auto_mode; + u8 auto_pause; + __le16 auto_link_speed; + __le16 auto_link_speed_mask; + u8 wirespeed; + u8 lpbk; + u8 force_pause; + u8 module_status; + __le32 preemphasis; + u8 phy_maj; + u8 phy_min; + u8 phy_bld; + u8 phy_type; + u8 media_type; + u8 xcvr_pkg_type; + u8 eee_config_phy_addr; + u8 parallel_detect; + __le16 link_partner_adv_speeds; + u8 link_partner_adv_auto_mode; + u8 link_partner_adv_pause; + __le16 adv_eee_link_speed_mask; + __le16 link_partner_adv_eee_link_speed_mask; + __le32 xcvr_identifier_type_tx_lpi_timer; + __le16 fec_cfg; + u8 duplex_state; + u8 option_flags; + char phy_vendor_name[16]; + char phy_vendor_partnumber[16]; + __le16 support_pam4_speeds; + __le16 force_pam4_link_speed; + __le16 auto_pam4_link_speed_mask; + u8 link_partner_pam4_adv_speeds; + u8 valid; +}; + struct bnxt_link_info { u8 phy_type; u8 media_type; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index 175192ebaa77..22898d3d088b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -950,8 +950,11 @@ static int bnxt_hwrm_fwd_resp(struct bnxt *bp, struct bnxt_vf_info *vf, struct hwrm_fwd_resp_input *req; int rc; - if (BNXT_FWD_RESP_SIZE_ERR(msg_size)) + if (BNXT_FWD_RESP_SIZE_ERR(msg_size)) { + netdev_warn_once(bp->dev, "HWRM fwd response too big (%d bytes)\n", + msg_size); return -EINVAL; + } rc = hwrm_req_init(bp, req, HWRM_FWD_RESP); if (!rc) { @@ -1085,7 +1088,7 @@ static int bnxt_vf_set_link(struct bnxt *bp, struct bnxt_vf_info *vf) rc = bnxt_hwrm_exec_fwd_resp( bp, vf, sizeof(struct hwrm_port_phy_qcfg_input)); } else { - struct hwrm_port_phy_qcfg_output phy_qcfg_resp = {0}; + struct hwrm_port_phy_qcfg_output_compat phy_qcfg_resp = {}; struct hwrm_port_phy_qcfg_input *phy_qcfg_req; phy_qcfg_req = @@ -1096,6 +1099,11 @@ static int bnxt_vf_set_link(struct bnxt *bp, struct bnxt_vf_info *vf) mutex_unlock(&bp->link_lock); phy_qcfg_resp.resp_len = cpu_to_le16(sizeof(phy_qcfg_resp)); phy_qcfg_resp.seq_id = phy_qcfg_req->seq_id; + /* New SPEEDS2 fields are beyond the legacy structure, so + * clear the SPEEDS2_SUPPORTED flag. + */ + phy_qcfg_resp.option_flags &= + ~PORT_PHY_QCAPS_RESP_FLAGS2_SPEEDS2_SUPPORTED; phy_qcfg_resp.valid = 1; if (vf->flags & BNXT_VF_LINK_UP) { From a6736a0addd60fccc3a3508461d72314cc609772 Mon Sep 17 00:00:00 2001 From: Rao Shoaib Date: Tue, 11 Jun 2024 01:46:39 -0700 Subject: [PATCH 34/35] af_unix: Read with MSG_PEEK loops if the first unread byte is OOB Read with MSG_PEEK flag loops if the first byte to read is an OOB byte. commit 22dd70eb2c3d ("af_unix: Don't peek OOB data without MSG_OOB.") addresses the loop issue but does not address the issue that no data beyond OOB byte can be read. >>> from socket import * >>> c1, c2 = socketpair(AF_UNIX, SOCK_STREAM) >>> c1.send(b'a', MSG_OOB) 1 >>> c1.send(b'b') 1 >>> c2.recv(1, MSG_PEEK | MSG_DONTWAIT) b'b' >>> from socket import * >>> c1, c2 = socketpair(AF_UNIX, SOCK_STREAM) >>> c2.setsockopt(SOL_SOCKET, SO_OOBINLINE, 1) >>> c1.send(b'a', MSG_OOB) 1 >>> c1.send(b'b') 1 >>> c2.recv(1, MSG_PEEK | MSG_DONTWAIT) b'a' >>> c2.recv(1, MSG_PEEK | MSG_DONTWAIT) b'a' >>> c2.recv(1, MSG_DONTWAIT) b'a' >>> c2.recv(1, MSG_PEEK | MSG_DONTWAIT) b'b' >>> Fixes: 314001f0bf92 ("af_unix: Add OOB support") Signed-off-by: Rao Shoaib Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240611084639.2248934-1-Rao.Shoaib@oracle.com Signed-off-by: Jakub Kicinski --- net/unix/af_unix.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 80846279de9f..5e695a9a609c 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2625,18 +2625,18 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, if (skb == u->oob_skb) { if (copied) { skb = NULL; - } else if (sock_flag(sk, SOCK_URGINLINE)) { - if (!(flags & MSG_PEEK)) { + } else if (!(flags & MSG_PEEK)) { + if (sock_flag(sk, SOCK_URGINLINE)) { WRITE_ONCE(u->oob_skb, NULL); consume_skb(skb); + } else { + __skb_unlink(skb, &sk->sk_receive_queue); + WRITE_ONCE(u->oob_skb, NULL); + unlinked_skb = skb; + skb = skb_peek(&sk->sk_receive_queue); } - } else if (flags & MSG_PEEK) { - skb = NULL; - } else { - __skb_unlink(skb, &sk->sk_receive_queue); - WRITE_ONCE(u->oob_skb, NULL); - unlinked_skb = skb; - skb = skb_peek(&sk->sk_receive_queue); + } else if (!sock_flag(sk, SOCK_URGINLINE)) { + skb = skb_peek_next(skb, &sk->sk_receive_queue); } } From a9b9741854a9fe9df948af49ca5514e0ed0429df Mon Sep 17 00:00:00 2001 From: Aleksandr Mishin Date: Tue, 11 Jun 2024 11:25:46 +0300 Subject: [PATCH 35/35] bnxt_en: Adjust logging of firmware messages in case of released token in __hwrm_send() In case of token is released due to token->state == BNXT_HWRM_DEFERRED, released token (set to NULL) is used in log messages. This issue is expected to be prevented by HWRM_ERR_CODE_PF_UNAVAILABLE error code. But this error code is returned by recent firmware. So some firmware may not return it. This may lead to NULL pointer dereference. Adjust this issue by adding token pointer check. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 8fa4219dba8e ("bnxt_en: add dynamic debug support for HWRM messages") Suggested-by: Michael Chan Signed-off-by: Aleksandr Mishin Reviewed-by: Wojciech Drewek Reviewed-by: Michael Chan Link: https://lore.kernel.org/r/20240611082547.12178-1-amishin@t-argos.ru Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c index 1df3d56cc4b5..d2fd2d04ed47 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c @@ -680,7 +680,7 @@ static int __hwrm_send(struct bnxt *bp, struct bnxt_hwrm_ctx *ctx) req_type); else if (rc && rc != HWRM_ERR_CODE_PF_UNAVAILABLE) hwrm_err(bp, ctx, "hwrm req_type 0x%x seq id 0x%x error 0x%x\n", - req_type, token->seq_id, rc); + req_type, le16_to_cpu(ctx->req->seq_id), rc); rc = __hwrm_to_stderr(rc); exit: if (token)