From 8c702a53bb0a79bfa203ba21ef1caba43673c5b7 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 30 Mar 2020 10:21:49 +0300 Subject: [PATCH 1/8] net/mlx5: Fix frequent ioread PCI access during recovery High frequency of PCI ioread calls during recovery flow may cause the following trace on powerpc: [ 248.670288] EEH: 2100000 reads ignored for recovering device at location=Slot1 driver=mlx5_core pci addr=0000:01:00.1 [ 248.670331] EEH: Might be infinite loop in mlx5_core driver [ 248.670361] CPU: 2 PID: 35247 Comm: kworker/u192:11 Kdump: loaded Tainted: G OE ------------ 4.14.0-115.14.1.el7a.ppc64le #1 [ 248.670425] Workqueue: mlx5_health0000:01:00.1 health_recover_work [mlx5_core] [ 248.670471] Call Trace: [ 248.670492] [c00020391c11b960] [c000000000c217ac] dump_stack+0xb0/0xf4 (unreliable) [ 248.670548] [c00020391c11b9a0] [c000000000045818] eeh_check_failure+0x5c8/0x630 [ 248.670631] [c00020391c11ba50] [c00000000068fce4] ioread32be+0x114/0x1c0 [ 248.670692] [c00020391c11bac0] [c00800000dd8b400] mlx5_error_sw_reset+0x160/0x510 [mlx5_core] [ 248.670752] [c00020391c11bb60] [c00800000dd75824] mlx5_disable_device+0x34/0x1d0 [mlx5_core] [ 248.670822] [c00020391c11bbe0] [c00800000dd8affc] health_recover_work+0x11c/0x3c0 [mlx5_core] [ 248.670891] [c00020391c11bc80] [c000000000164fcc] process_one_work+0x1bc/0x5f0 [ 248.670955] [c00020391c11bd20] [c000000000167f8c] worker_thread+0xac/0x6b0 [ 248.671015] [c00020391c11bdc0] [c000000000171618] kthread+0x168/0x1b0 [ 248.671067] [c00020391c11be30] [c00000000000b65c] ret_from_kernel_thread+0x5c/0x80 Reduce the PCI ioread frequency during recovery by using msleep() instead of cond_resched() Fixes: 3e5b72ac2f29 ("net/mlx5: Issue SW reset on FW assert") Signed-off-by: Moshe Shemesh Reviewed-by: Feras Daoud Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index fa1665caac46..f99e1752d4e5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -243,7 +243,7 @@ recover_from_sw_reset: if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED) break; - cond_resched(); + msleep(20); } while (!time_after(jiffies, end)); if (mlx5_get_nic_state(dev) != MLX5_NIC_IFC_DISABLED) { From 84be2fdae4f80b7388f754fe49149374a41725f2 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sun, 29 Mar 2020 07:10:43 +0300 Subject: [PATCH 2/8] net/mlx5: Fix condition for termination table cleanup When we destroy rules from slow path we need to avoid destroying termination tables since termination tables are never created in slow path. By doing so we avoid destroying the termination table created for the slow path. Fixes: d8a2034f152a ("net/mlx5: Don't use termination tables in slow path") Signed-off-by: Eli Cohen Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 1 - .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 12 +++--------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 39f42f985fbd..c1848b57f61c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -403,7 +403,6 @@ enum { MLX5_ESW_ATTR_FLAG_VLAN_HANDLED = BIT(0), MLX5_ESW_ATTR_FLAG_SLOW_PATH = BIT(1), MLX5_ESW_ATTR_FLAG_NO_IN_PORT = BIT(2), - MLX5_ESW_ATTR_FLAG_HAIRPIN = BIT(3), }; struct mlx5_esw_flow_attr { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index f171eb2234b0..b2e38e0cde97 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -300,7 +300,6 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, bool split = !!(attr->split_count); struct mlx5_flow_handle *rule; struct mlx5_flow_table *fdb; - bool hairpin = false; int j, i = 0; if (esw->mode != MLX5_ESWITCH_OFFLOADS) @@ -398,21 +397,16 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, goto err_esw_get; } - if (mlx5_eswitch_termtbl_required(esw, attr, &flow_act, spec)) { + if (mlx5_eswitch_termtbl_required(esw, attr, &flow_act, spec)) rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, attr, &flow_act, dest, i); - hairpin = true; - } else { + else rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i); - } if (IS_ERR(rule)) goto err_add_rule; else atomic64_inc(&esw->offloads.num_flows); - if (hairpin) - attr->flags |= MLX5_ESW_ATTR_FLAG_HAIRPIN; - return rule; err_add_rule: @@ -501,7 +495,7 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, mlx5_del_flow_rules(rule); - if (attr->flags & MLX5_ESW_ATTR_FLAG_HAIRPIN) { + if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)) { /* unref the term table */ for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { if (attr->dests[i].termtbl) From d19987ccf57501894fdd8fadc2e55e4a3dd57239 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Tue, 24 Mar 2020 15:04:26 +0200 Subject: [PATCH 3/8] net/mlx5e: Add missing release firmware call Once driver finishes flashing the firmware image, it should release it. Fixes: 9c8bca2637b8 ("mlx5: Move firmware flash implementation to devlink") Signed-off-by: Eran Ben Elisha Reviewed-by: Aya Levin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index bdeb291f6b67..e94f0c4d74a7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -23,7 +23,10 @@ static int mlx5_devlink_flash_update(struct devlink *devlink, if (err) return err; - return mlx5_firmware_flash(dev, fw, extack); + err = mlx5_firmware_flash(dev, fw, extack); + release_firmware(fw); + + return err; } static u8 mlx5_fw_ver_major(u32 version) From 70f478ca085deec4d6c1f187f773f5827ddce7e8 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 1 Apr 2020 14:41:27 +0300 Subject: [PATCH 4/8] net/mlx5e: Fix nest_level for vlan pop action Current value of nest_level, assigned from net_device lower_level value, does not reflect the actual number of vlan headers, needed to pop. For ex., if we have untagged ingress traffic sended over vlan devices, instead of one pop action, driver will perform two pop actions. To fix that, calculate nest_level as difference between vlan device and parent device lower_levels. Fixes: f3b0a18bb6cb ("net: remove unnecessary variables and callback") Signed-off-by: Dmytro Linkin Signed-off-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 438128dde187..e3fee837c7a3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3558,12 +3558,13 @@ static int add_vlan_pop_action(struct mlx5e_priv *priv, struct mlx5_esw_flow_attr *attr, u32 *action) { - int nest_level = attr->parse_attr->filter_dev->lower_level; struct flow_action_entry vlan_act = { .id = FLOW_ACTION_VLAN_POP, }; - int err = 0; + int nest_level, err = 0; + nest_level = attr->parse_attr->filter_dev->lower_level - + priv->netdev->lower_level; while (nest_level--) { err = parse_tc_vlan_action(priv, &vlan_act, attr, action); if (err) From d5a3c2b640093c8a4bb5d76170a8f6c8c2eacc17 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 29 Mar 2020 18:54:10 +0300 Subject: [PATCH 5/8] net/mlx5e: Fix missing pedit action after ct clear action With ct clear action we should not allocate the action in hw and not release the mod_acts parsed in advance. It will be done when handling the ct clear action. Fixes: 1ef3018f5af3 ("net/mlx5e: CT: Support clear action") Signed-off-by: Roi Dayan Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index e3fee837c7a3..a574c588269a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1343,7 +1343,8 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, if (err) return err; - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && + !(attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR)) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); if (err) From 7482d9cb5b974b7ad1a58fa8714f7a8c05b5d278 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 3 Apr 2020 03:57:30 -0500 Subject: [PATCH 6/8] net/mlx5e: Fix pfnum in devlink port attribute Cited patch missed to extract PCI pf number accurately for PF and VF port flavour. It considered PCI device + function number. Due to this, device having non zero device number shown large pfnum. Hence, use only PCI function number; to avoid similar errors, derive pfnum one time for all port flavours. Fixes: f60f315d339e ("net/mlx5e: Register devlink ports for physical link, PCI PF, VFs") Reviewed-by: Jiri Pirko Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 2a0243e4af75..55457f268495 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -2050,29 +2050,30 @@ static int register_devlink_port(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep = rpriv->rep; struct netdev_phys_item_id ppid = {}; unsigned int dl_port_index = 0; + u16 pfnum; if (!is_devlink_port_supported(dev, rpriv)) return 0; mlx5e_rep_get_port_parent_id(rpriv->netdev, &ppid); + pfnum = PCI_FUNC(dev->pdev->devfn); if (rep->vport == MLX5_VPORT_UPLINK) { devlink_port_attrs_set(&rpriv->dl_port, DEVLINK_PORT_FLAVOUR_PHYSICAL, - PCI_FUNC(dev->pdev->devfn), false, 0, + pfnum, false, 0, &ppid.id[0], ppid.id_len); dl_port_index = vport_to_devlink_port_index(dev, rep->vport); } else if (rep->vport == MLX5_VPORT_PF) { devlink_port_attrs_pci_pf_set(&rpriv->dl_port, &ppid.id[0], ppid.id_len, - dev->pdev->devfn); + pfnum); dl_port_index = rep->vport; } else if (mlx5_eswitch_is_vf_vport(dev->priv.eswitch, rpriv->rep->vport)) { devlink_port_attrs_pci_vf_set(&rpriv->dl_port, &ppid.id[0], ppid.id_len, - dev->pdev->devfn, - rep->vport - 1); + pfnum, rep->vport - 1); dl_port_index = vport_to_devlink_port_index(dev, rep->vport); } From 230a1bc2470c5554a8c2bfe14774863897dc9386 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 3 Apr 2020 02:35:46 -0500 Subject: [PATCH 7/8] net/mlx5e: Fix devlink port netdev unregistration sequence In cited commit netdevice is registered after devlink port. Unregistration flow should be mirror sequence of registration flow. Hence, unregister netdevice before devlink port. Fixes: 31e87b39ba9d ("net/mlx5e: Fix devlink port register sequence") Reviewed-by: Jiri Pirko Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index dd7f338425eb..f02150a97ac8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5526,8 +5526,8 @@ static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_delete_app(priv); #endif - mlx5e_devlink_port_unregister(priv); unregister_netdev(priv->netdev); + mlx5e_devlink_port_unregister(priv); mlx5e_detach(mdev, vpriv); mlx5e_destroy_netdev(priv); } From 9808dd0a2aeebcb72239a3b082159b0186d9ac3d Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Fri, 27 Mar 2020 12:12:31 +0300 Subject: [PATCH 8/8] net/mlx5e: CT: Use rhashtable's ct entries instead of a separate list Fixes CT entries list corruption. After allowing parallel insertion/removals in upper nf flow table layer, unprotected ct entries list can be corrupted by parallel add/del on the same flow table. CT entries list is only used while freeing a ct zone flow table to go over all the ct entries offloaded on that zone/table, and flush the table. As rhashtable already provides an api to go over all the inserted entries, fix the race by using the rhashtable iteration instead, and remove the list. Fixes: 7da182a998d6 ("netfilter: flowtable: Use work entry per offload command") Reviewed-by: Oz Shlomo Signed-off-by: Paul Blakey Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/tc_ct.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index ad3e3a65d403..16416eaac39e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -67,11 +67,9 @@ struct mlx5_ct_ft { struct nf_flowtable *nf_ft; struct mlx5_tc_ct_priv *ct_priv; struct rhashtable ct_entries_ht; - struct list_head ct_entries_list; }; struct mlx5_ct_entry { - struct list_head list; u16 zone; struct rhash_head node; struct flow_rule *flow_rule; @@ -617,8 +615,6 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, if (err) goto err_insert; - list_add(&entry->list, &ft->ct_entries_list); - return 0; err_insert: @@ -646,7 +642,6 @@ mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft, WARN_ON(rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params)); - list_del(&entry->list); kfree(entry); return 0; @@ -818,7 +813,6 @@ mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, ft->zone = zone; ft->nf_ft = nf_ft; ft->ct_priv = ct_priv; - INIT_LIST_HEAD(&ft->ct_entries_list); refcount_set(&ft->refcount, 1); err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params); @@ -847,12 +841,12 @@ err_init: } static void -mlx5_tc_ct_flush_ft(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) +mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg) { - struct mlx5_ct_entry *entry; + struct mlx5_tc_ct_priv *ct_priv = arg; + struct mlx5_ct_entry *entry = ptr; - list_for_each_entry(entry, &ft->ct_entries_list, list) - mlx5_tc_ct_entry_del_rules(ft->ct_priv, entry); + mlx5_tc_ct_entry_del_rules(ct_priv, entry); } static void @@ -863,9 +857,10 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) nf_flow_table_offload_del_cb(ft->nf_ft, mlx5_tc_ct_block_flow_offload, ft); - mlx5_tc_ct_flush_ft(ct_priv, ft); rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); - rhashtable_destroy(&ft->ct_entries_ht); + rhashtable_free_and_destroy(&ft->ct_entries_ht, + mlx5_tc_ct_flush_ft_entry, + ct_priv); kfree(ft); }